Mercurial > repos > ebi-gxa > decoupler_pathway_inference
annotate decoupler_aucell_score.py @ 2:82b7cd3e1bbd draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
author | ebi-gxa |
---|---|
date | Tue, 16 Apr 2024 11:49:19 +0000 |
parents | e9b06a8fb73a |
children | c6787c2aee46 |
rev | line source |
---|---|
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
1 import argparse |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
2 import os |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
3 import tempfile |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
4 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
5 import anndata |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
6 import decoupler as dc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
7 import pandas as pd |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
8 import numba as nb |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
9 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
10 |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
11 def read_gmt_long(gmt_file): |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
12 """ |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
13 Reads a GMT file and produce a Pandas DataFrame in long format, ready to be passed to the AUCell method. |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
14 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
15 Parameters |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
16 ---------- |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
17 gmt_file : str |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
18 Path to the GMT file. |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
19 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
20 Returns |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
21 ------- |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
22 pd.DataFrame |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
23 A DataFrame with the gene sets. Each row represents a gene set to gene assignment, and the columns are "gene_set_name" and "genes". |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
24 >>> line = "HALLMARK_NOTCH_SIGNALING\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_NOTCH_SIGNALING\\tJAG1\\tNOTCH3\\tNOTCH2\\tAPH1A\\tHES1\\tCCND1\\tFZD1\\tPSEN2\\tFZD7\\tDTX1\\tDLL1\\tFZD5\\tMAML2\\tNOTCH1\\tPSENEN\\tWNT5A\\tCUL1\\tWNT2\\tDTX4\\tSAP30\\tPPARD\\tKAT2A\\tHEYL\\tSKP1\\tRBX1\\tTCF7L2\\tARRB1\\tLFNG\\tPRKCA\\tDTX2\\tST3GAL6\\tFBXW11\\n" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
25 >>> line2 = "HALLMARK_APICAL_SURFACE\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_APICAL_SURFACE\\tB4GALT1\\tRHCG\\tMAL\\tLYPD3\\tPKHD1\\tATP6V0A4\\tCRYBG1\\tSHROOM2\\tSRPX\\tMDGA1\\tTMEM8B\\tTHY1\\tPCSK9\\tEPHB4\\tDCBLD2\\tGHRL\\tLYN\\tGAS1\\tFLOT2\\tPLAUR\\tAKAP7\\tATP8B1\\tEFNA5\\tSLC34A3\\tAPP\\tGSTM3\\tHSPB1\\tSLC2A4\\tIL2RB\\tRTN4RL1\\tNCOA6\\tSULF2\\tADAM10\\tBRCA1\\tGATA3\\tAFAP1L2\\tIL2RG\\tCD160\\tADIPOR2\\tSLC22A12\\tNTNG1\\tSCUBE1\\tCX3CL1\\tCROCC\\n" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
26 >>> temp_dir = tempfile.gettempdir() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
27 >>> temp_gmt = os.path.join(temp_dir, "temp_file.gmt") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
28 >>> with open(temp_gmt, "w") as f: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
29 ... f.write(line) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
30 ... f.write(line2) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
31 288 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
32 380 |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
33 >>> df = read_gmt_long(temp_gmt) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
34 >>> df.shape[0] |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
35 76 |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
36 >>> len(df.loc[df["gene_set"] == "HALLMARK_APICAL_SURFACE"].gene.tolist()) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
37 44 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
38 """ |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
39 # Create a list of dictionaries, where each dictionary represents a gene set |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
40 gene_sets = {} |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
41 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
42 # Read the GMT file into a list of lines |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
43 with open(gmt_file, "r") as f: |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
44 while True: |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
45 line = f.readline() |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
46 if not line: |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
47 break |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
48 fields = line.strip().split("\t") |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
49 gene_sets[fields[0]]= fields[2:] |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
50 |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
51 return pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_sets.items()) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
52 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
53 |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
54 def score_genes_aucell_mt(adata: anndata.AnnData, gene_set_gene: pd.DataFrame, use_raw=False, min_n_genes=5, var_gene_symbols_field=None): |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
55 """Score genes using Aucell. |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
56 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
57 Parameters |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
58 ---------- |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
59 adata : anndata.AnnData |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
60 gene_set_gene: pd.DataFrame with columns gene_set and gene |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
61 use_raw : bool, optional, False by default. |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
62 min_n_genes : int, optional, 5 by default. |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
63 var_gene_symbols_field : str, optional, None by default. The field in var where gene symbols are stored |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
64 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
65 >>> import scanpy as sc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
66 >>> import decoupler as dc |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
67 >>> adata = sc.datasets.pbmc68k_reduced() |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
68 >>> r_gene_list = adata.var[adata.var.index.str.startswith("RP")].index.tolist() |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
69 >>> m_gene_list = adata.var[adata.var.index.str.startswith("M")].index.tolist() |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
70 >>> gene_set = {} |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
71 >>> gene_set["m"] = m_gene_list |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
72 >>> gene_set["r"] = r_gene_list |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
73 >>> gene_set_df = pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_set.items()) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
74 >>> score_genes_aucell_mt(adata, gene_set_df, use_raw=False) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
75 >>> "AUCell_m" in adata.obs.columns |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
76 True |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
77 >>> "AUCell_r" in adata.obs.columns |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
78 True |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
79 """ |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
80 |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
81 # if var_gene_symbols_fiels is provided, transform gene_set_gene df so that gene contains gene ids instead of gene symbols |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
82 if var_gene_symbols_field: |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
83 # merge the index of var to gene_set_gene df based on var_gene_symbols_field |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
84 var_id_symbols = adata.var[[var_gene_symbols_field]] |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
85 var_id_symbols['gene_id'] = var_id_symbols.index |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
86 |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
87 gene_set_gene = gene_set_gene.merge(var_id_symbols, left_on='gene', right_on=var_gene_symbols_field, how='left') |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
88 # this will still produce some empty gene_ids (genes in the gene_set_gene df that are not in the var df), fill those |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
89 # with the original gene symbol from the gene_set to avoid deforming the AUCell calculation |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
90 gene_set_gene['gene_id'] = gene_set_gene['gene_id'].fillna(gene_set_gene['gene']) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
91 gene_set_gene['gene'] = gene_set_gene['gene_id'] |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
92 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
93 # run decoupler's run_aucell |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
94 dc.run_aucell( |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
95 adata, net=gene_set_gene, source="gene_set", target="gene", use_raw=use_raw, min_n=min_n_genes |
1
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
96 ) |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
97 for gs in gene_set_gene.gene_set.unique(): |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
98 if gs in adata.obsm['aucell_estimate'].keys(): |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
99 adata.obs[f"AUCell_{gs}"] = adata.obsm["aucell_estimate"][gs] |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
100 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
101 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
102 def run_for_genelists( |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
103 adata, gene_lists, score_names, use_raw=False, gene_symbols_field=None, min_n_genes=5 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
104 ): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
105 if len(gene_lists) == len(score_names): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
106 for gene_list, score_names in zip(gene_lists, score_names): |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
107 genes = gene_list.split(",") |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
108 gene_sets = {} |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
109 gene_sets[score_names] = genes |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
110 gene_set_gene_df = pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_sets.items()) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
111 |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
112 score_genes_aucell_mt( |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
113 adata, |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
114 gene_set_gene_df, |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
115 use_raw, |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
116 min_n_genes, |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
117 var_gene_symbols_field=gene_symbols_field |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
118 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
119 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
120 raise ValueError( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
121 "The number of gene lists (separated by :) and score names (separated by :) must be the same" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
122 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
123 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
124 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
125 if __name__ == "__main__": |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
126 # Create command-line arguments parser |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
127 parser = argparse.ArgumentParser(description="Score genes using Aucell") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
128 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
129 "--input_file", type=str, help="Path to input AnnData file", required=True |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
130 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
131 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
132 "--output_file", type=str, help="Path to output file", required=True |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
133 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
134 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
135 # add argument for gene sets to score |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
136 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
137 "--gene_sets_to_score", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
138 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
139 required=False, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
140 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
141 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
142 # add argument for gene list (comma separated) to score |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
143 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
144 "--gene_lists_to_score", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
145 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
146 required=False, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
147 help="Comma separated list of genes to score. You can have more than one set of genes, separated by colon :", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
148 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
149 # argument for the score name when using the gene list |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
150 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
151 "--score_names", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
152 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
153 required=False, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
154 help="Name of the score column when using the gene list. You can have more than one set of score names, separated by colon :. It should be the same length as the number of gene lists.", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
155 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
156 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
157 "--gene_symbols_field", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
158 type=str, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
159 help="Name of the gene symbols field in the AnnData object", |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
160 required=True, |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
161 ) |
1
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
162 # argument for min_n Minimum of targets per source. If less, sources are removed. |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
163 parser.add_argument( |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
164 "--min_n", |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
165 type=int, |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
166 required=False, |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
167 default=5, |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
168 help="Minimum of targets per source. If less, sources are removed.", |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
169 ) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
170 parser.add_argument("--use_raw", action="store_true", help="Use raw data") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
171 parser.add_argument( |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
172 "--write_anndata", action="store_true", help="Write the modified AnnData object" |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
173 ) |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
174 # argument for number of max concurrent processes |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
175 parser.add_argument("--max_threads", type=int, required=False, default=1, help="Number of max concurrent threads") |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
176 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
177 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
178 # Parse command-line arguments |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
179 args = parser.parse_args() |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
180 |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
181 nb.set_num_threads(n=args.max_threads) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
182 |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
183 # Load input AnnData object |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
184 adata = anndata.read_h5ad(args.input_file) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
185 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
186 if args.gmt_file is not None: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
187 # Load MSigDB file in GMT format |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
188 # msigdb = read_gmt(args.gmt_file) |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
189 msigdb = read_gmt_long(args.gmt_file) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
190 |
1
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
191 gene_sets_to_score = ( |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
192 args.gene_sets_to_score.split(",") if args.gene_sets_to_score else [] |
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
193 ) |
2
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
194 if gene_sets_to_score: |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
195 # we limit the GMT file read to the genesets specified in the gene_sets_to_score argument |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
196 msigdb = msigdb[msigdb["gene_set"].isin(gene_sets_to_score)] |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
197 |
82b7cd3e1bbd
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents:
1
diff
changeset
|
198 score_genes_aucell_mt(adata, msigdb, args.use_raw, args.min_n, var_gene_symbols_field=args.gene_symbols_field) |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
199 elif args.gene_lists_to_score is not None and args.score_names is not None: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
200 gene_lists = args.gene_lists_to_score.split(":") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
201 score_names = args.score_names.split(",") |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
202 run_for_genelists( |
1
e9b06a8fb73a
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents:
0
diff
changeset
|
203 adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n |
0
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
204 ) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
205 |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
206 # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
207 if args.write_anndata: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
208 adata.write_h5ad(args.output_file) |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
209 else: |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
210 new_columns = [col for col in adata.obs.columns if col.startswith("AUCell_")] |
77d680b36e23
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff
changeset
|
211 adata.obs[new_columns].to_csv(args.output_file, sep="\t", index=True) |