Mercurial > repos > ebi-gxa > decoupler_pseudobulk
comparison decoupler_aucell_score.py @ 4:f321c60167d4 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
| author | ebi-gxa |
|---|---|
| date | Thu, 16 Nov 2023 20:05:16 +0000 |
| parents | 4fa5f370599f |
| children | ed2a77422e00 |
comparison
equal
deleted
inserted
replaced
| 3:4fa5f370599f | 4:f321c60167d4 |
|---|---|
| 107 | 107 |
| 108 | 108 |
| 109 if __name__ == "__main__": | 109 if __name__ == "__main__": |
| 110 # Create command-line arguments parser | 110 # Create command-line arguments parser |
| 111 parser = argparse.ArgumentParser(description="Score genes using Aucell") | 111 parser = argparse.ArgumentParser(description="Score genes using Aucell") |
| 112 parser.add_argument("--input_file", type=str, help="Path to input AnnData file") | 112 parser.add_argument( |
| 113 parser.add_argument("--output_file", type=str, help="Path to output file") | 113 "--input_file", type=str, help="Path to input AnnData file", required=True |
| 114 ) | |
| 115 parser.add_argument( | |
| 116 "--output_file", type=str, help="Path to output file", required=True | |
| 117 ) | |
| 114 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) | 118 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) |
| 115 # add argument for gene sets to score | 119 # add argument for gene sets to score |
| 116 parser.add_argument( | 120 parser.add_argument( |
| 117 "--gene_sets_to_score", | 121 "--gene_sets_to_score", |
| 118 type=str, | 122 type=str, |
| 119 required=False, | 123 required=False, |
| 120 help="Comma separated list of gene sets to score (the need to be in the gmt file)", | 124 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", |
| 121 ) | 125 ) |
| 122 # add argument for gene list (comma separated) to score | 126 # add argument for gene list (comma separated) to score |
| 123 parser.add_argument( | 127 parser.add_argument( |
| 124 "--gene_lists_to_score", | 128 "--gene_lists_to_score", |
| 125 type=str, | 129 type=str, |
| 135 ) | 139 ) |
| 136 parser.add_argument( | 140 parser.add_argument( |
| 137 "--gene_symbols_field", | 141 "--gene_symbols_field", |
| 138 type=str, | 142 type=str, |
| 139 help="Name of the gene symbols field in the AnnData object", | 143 help="Name of the gene symbols field in the AnnData object", |
| 144 required=True, | |
| 140 ) | 145 ) |
| 141 parser.add_argument("--use_raw", action="store_true", help="Use raw data") | 146 parser.add_argument("--use_raw", action="store_true", help="Use raw data") |
| 142 parser.add_argument( | 147 parser.add_argument( |
| 143 "--write_anndata", action="store_true", help="Write the modified AnnData object" | 148 "--write_anndata", action="store_true", help="Write the modified AnnData object" |
| 144 ) | 149 ) |
| 147 args = parser.parse_args() | 152 args = parser.parse_args() |
| 148 | 153 |
| 149 # Load input AnnData object | 154 # Load input AnnData object |
| 150 adata = anndata.read_h5ad(args.input_file) | 155 adata = anndata.read_h5ad(args.input_file) |
| 151 | 156 |
| 152 if args.gene_sets_to_score is not None and args.gmt_file is not None: | 157 if args.gmt_file is not None: |
| 153 # Load MSigDB file in GMT format | 158 # Load MSigDB file in GMT format |
| 154 msigdb = read_gmt(args.gmt_file) | 159 msigdb = read_gmt(args.gmt_file) |
| 155 | 160 |
| 156 gene_sets_to_score = args.gene_sets_to_score.split(",") | 161 gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] |
| 157 # Score genes by their ensembl ids using the score_genes_aucell function | 162 # Score genes by their ensembl ids using the score_genes_aucell function |
| 158 for _, row in msigdb.iterrows(): | 163 for _, row in msigdb.iterrows(): |
| 159 gene_set_name = row["gene_set_name"] | 164 gene_set_name = row["gene_set_name"] |
| 160 if gene_set_name in gene_sets_to_score: | 165 if not gene_sets_to_score or gene_set_name in gene_sets_to_score: |
| 161 genes = row["genes"].split(",") | 166 genes = row["genes"].split(",") |
| 162 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set | 167 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set |
| 163 ens_gene_ids = adata.var[ | 168 ens_gene_ids = adata.var[ |
| 164 adata.var[args.gene_symbols_field].isin(genes) | 169 adata.var[args.gene_symbols_field].isin(genes) |
| 165 ].index | 170 ].index |
