annotate decoupler_aucell_score.py @ 4:515ac51db6e5 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
author ebi-gxa
date Tue, 16 Apr 2024 11:49:14 +0000
parents e887a7e8c5b4
children c9aaac87c583
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
1 import argparse
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
2 import os
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
3 import tempfile
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
4
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
5 import anndata
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
6 import decoupler as dc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
7 import pandas as pd
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
8 import numba as nb
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
9
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
10
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
11 def read_gmt_long(gmt_file):
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
12 """
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
13 Reads a GMT file and produce a Pandas DataFrame in long format, ready to be passed to the AUCell method.
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
14
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
15 Parameters
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
16 ----------
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
17 gmt_file : str
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
18 Path to the GMT file.
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
19
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
20 Returns
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
21 -------
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
22 pd.DataFrame
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
23 A DataFrame with the gene sets. Each row represents a gene set to gene assignment, and the columns are "gene_set_name" and "genes".
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
24 >>> line = "HALLMARK_NOTCH_SIGNALING\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_NOTCH_SIGNALING\\tJAG1\\tNOTCH3\\tNOTCH2\\tAPH1A\\tHES1\\tCCND1\\tFZD1\\tPSEN2\\tFZD7\\tDTX1\\tDLL1\\tFZD5\\tMAML2\\tNOTCH1\\tPSENEN\\tWNT5A\\tCUL1\\tWNT2\\tDTX4\\tSAP30\\tPPARD\\tKAT2A\\tHEYL\\tSKP1\\tRBX1\\tTCF7L2\\tARRB1\\tLFNG\\tPRKCA\\tDTX2\\tST3GAL6\\tFBXW11\\n"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
25 >>> line2 = "HALLMARK_APICAL_SURFACE\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_APICAL_SURFACE\\tB4GALT1\\tRHCG\\tMAL\\tLYPD3\\tPKHD1\\tATP6V0A4\\tCRYBG1\\tSHROOM2\\tSRPX\\tMDGA1\\tTMEM8B\\tTHY1\\tPCSK9\\tEPHB4\\tDCBLD2\\tGHRL\\tLYN\\tGAS1\\tFLOT2\\tPLAUR\\tAKAP7\\tATP8B1\\tEFNA5\\tSLC34A3\\tAPP\\tGSTM3\\tHSPB1\\tSLC2A4\\tIL2RB\\tRTN4RL1\\tNCOA6\\tSULF2\\tADAM10\\tBRCA1\\tGATA3\\tAFAP1L2\\tIL2RG\\tCD160\\tADIPOR2\\tSLC22A12\\tNTNG1\\tSCUBE1\\tCX3CL1\\tCROCC\\n"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
26 >>> temp_dir = tempfile.gettempdir()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
27 >>> temp_gmt = os.path.join(temp_dir, "temp_file.gmt")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
28 >>> with open(temp_gmt, "w") as f:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
29 ... f.write(line)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
30 ... f.write(line2)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
31 288
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
32 380
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
33 >>> df = read_gmt_long(temp_gmt)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
34 >>> df.shape[0]
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
35 76
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
36 >>> len(df.loc[df["gene_set"] == "HALLMARK_APICAL_SURFACE"].gene.tolist())
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
37 44
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
38 """
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
39 # Create a list of dictionaries, where each dictionary represents a gene set
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
40 gene_sets = {}
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
41
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
42 # Read the GMT file into a list of lines
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
43 with open(gmt_file, "r") as f:
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
44 while True:
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
45 line = f.readline()
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
46 if not line:
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
47 break
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
48 fields = line.strip().split("\t")
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
49 gene_sets[fields[0]]= fields[2:]
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
50
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
51 return pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_sets.items())
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
52
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
53
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
54 def score_genes_aucell_mt(adata: anndata.AnnData, gene_set_gene: pd.DataFrame, use_raw=False, min_n_genes=5, var_gene_symbols_field=None):
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
55 """Score genes using Aucell.
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
56
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
57 Parameters
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
58 ----------
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
59 adata : anndata.AnnData
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
60 gene_set_gene: pd.DataFrame with columns gene_set and gene
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
61 use_raw : bool, optional, False by default.
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
62 min_n_genes : int, optional, 5 by default.
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
63 var_gene_symbols_field : str, optional, None by default. The field in var where gene symbols are stored
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
64
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
65 >>> import scanpy as sc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
66 >>> import decoupler as dc
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
67 >>> adata = sc.datasets.pbmc68k_reduced()
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
68 >>> r_gene_list = adata.var[adata.var.index.str.startswith("RP")].index.tolist()
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
69 >>> m_gene_list = adata.var[adata.var.index.str.startswith("M")].index.tolist()
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
70 >>> gene_set = {}
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
71 >>> gene_set["m"] = m_gene_list
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
72 >>> gene_set["r"] = r_gene_list
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
73 >>> gene_set_df = pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_set.items())
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
74 >>> score_genes_aucell_mt(adata, gene_set_df, use_raw=False)
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
75 >>> "AUCell_m" in adata.obs.columns
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
76 True
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
77 >>> "AUCell_r" in adata.obs.columns
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
78 True
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
79 """
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
80
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
81 # if var_gene_symbols_fiels is provided, transform gene_set_gene df so that gene contains gene ids instead of gene symbols
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
82 if var_gene_symbols_field:
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
83 # merge the index of var to gene_set_gene df based on var_gene_symbols_field
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
84 var_id_symbols = adata.var[[var_gene_symbols_field]]
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
85 var_id_symbols['gene_id'] = var_id_symbols.index
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
86
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
87 gene_set_gene = gene_set_gene.merge(var_id_symbols, left_on='gene', right_on=var_gene_symbols_field, how='left')
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
88 # this will still produce some empty gene_ids (genes in the gene_set_gene df that are not in the var df), fill those
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
89 # with the original gene symbol from the gene_set to avoid deforming the AUCell calculation
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
90 gene_set_gene['gene_id'] = gene_set_gene['gene_id'].fillna(gene_set_gene['gene'])
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
91 gene_set_gene['gene'] = gene_set_gene['gene_id']
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
92
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
93 # run decoupler's run_aucell
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
94 dc.run_aucell(
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
95 adata, net=gene_set_gene, source="gene_set", target="gene", use_raw=use_raw, min_n=min_n_genes
3
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
96 )
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
97 for gs in gene_set_gene.gene_set.unique():
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
98 if gs in adata.obsm['aucell_estimate'].keys():
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
99 adata.obs[f"AUCell_{gs}"] = adata.obsm["aucell_estimate"][gs]
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
100
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
101
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
102 def run_for_genelists(
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
103 adata, gene_lists, score_names, use_raw=False, gene_symbols_field=None, min_n_genes=5
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
104 ):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
105 if len(gene_lists) == len(score_names):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
106 for gene_list, score_names in zip(gene_lists, score_names):
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
107 genes = gene_list.split(",")
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
108 gene_sets = {}
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
109 gene_sets[score_names] = genes
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
110 gene_set_gene_df = pd.concat(pd.DataFrame({'gene_set':k, 'gene':v}) for k, v in gene_sets.items())
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
111
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
112 score_genes_aucell_mt(
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
113 adata,
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
114 gene_set_gene_df,
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
115 use_raw,
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
116 min_n_genes,
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
117 var_gene_symbols_field=gene_symbols_field
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
118 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
119 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
120 raise ValueError(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
121 "The number of gene lists (separated by :) and score names (separated by :) must be the same"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
122 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
123
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
124
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
125 if __name__ == "__main__":
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
126 # Create command-line arguments parser
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
127 parser = argparse.ArgumentParser(description="Score genes using Aucell")
1
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
128 parser.add_argument(
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
129 "--input_file", type=str, help="Path to input AnnData file", required=True
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
130 )
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
131 parser.add_argument(
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
132 "--output_file", type=str, help="Path to output file", required=True
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
133 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
134 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
135 # add argument for gene sets to score
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
136 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
137 "--gene_sets_to_score",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
138 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
139 required=False,
1
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
140 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)",
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
141 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
142 # add argument for gene list (comma separated) to score
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
143 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
144 "--gene_lists_to_score",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
145 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
146 required=False,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
147 help="Comma separated list of genes to score. You can have more than one set of genes, separated by colon :",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
148 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
149 # argument for the score name when using the gene list
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
150 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
151 "--score_names",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
152 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
153 required=False,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
154 help="Name of the score column when using the gene list. You can have more than one set of score names, separated by colon :. It should be the same length as the number of gene lists.",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
155 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
156 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
157 "--gene_symbols_field",
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
158 type=str,
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
159 help="Name of the gene symbols field in the AnnData object",
1
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
160 required=True,
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
161 )
3
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
162 # argument for min_n Minimum of targets per source. If less, sources are removed.
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
163 parser.add_argument(
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
164 "--min_n",
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
165 type=int,
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
166 required=False,
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
167 default=5,
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
168 help="Minimum of targets per source. If less, sources are removed.",
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
169 )
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
170 parser.add_argument("--use_raw", action="store_true", help="Use raw data")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
171 parser.add_argument(
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
172 "--write_anndata", action="store_true", help="Write the modified AnnData object"
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
173 )
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
174 # argument for number of max concurrent processes
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
175 parser.add_argument("--max_threads", type=int, required=False, default=1, help="Number of max concurrent threads")
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
176
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
177
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
178 # Parse command-line arguments
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
179 args = parser.parse_args()
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
180
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
181 nb.set_num_threads(n=args.max_threads)
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
182
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
183 # Load input AnnData object
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
184 adata = anndata.read_h5ad(args.input_file)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
185
1
e024d8280886 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 56273bcfbc0de8f6ab093f1131a7d22c05a70f25
ebi-gxa
parents: 0
diff changeset
186 if args.gmt_file is not None:
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
187 # Load MSigDB file in GMT format
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
188 # msigdb = read_gmt(args.gmt_file)
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
189 msigdb = read_gmt_long(args.gmt_file)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
190
3
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
191 gene_sets_to_score = (
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
192 args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
193 )
4
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
194 if gene_sets_to_score:
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
195 # we limit the GMT file read to the genesets specified in the gene_sets_to_score argument
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
196 msigdb = msigdb[msigdb["gene_set"].isin(gene_sets_to_score)]
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
197
515ac51db6e5 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit b01245159f9cb67101497bb974b2c13bcee019b7
ebi-gxa
parents: 3
diff changeset
198 score_genes_aucell_mt(adata, msigdb, args.use_raw, args.min_n, var_gene_symbols_field=args.gene_symbols_field)
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
199 elif args.gene_lists_to_score is not None and args.score_names is not None:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
200 gene_lists = args.gene_lists_to_score.split(":")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
201 score_names = args.score_names.split(",")
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
202 run_for_genelists(
3
e887a7e8c5b4 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 1
diff changeset
203 adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n
0
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
204 )
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
205
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
206 # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
207 if args.write_anndata:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
208 adata.write_h5ad(args.output_file)
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
209 else:
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
210 new_columns = [col for col in adata.obs.columns if col.startswith("AUCell_")]
1e8697931d73 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit c8c39f14eeee6e7a6d097fd0cb9430b12793eb8b
ebi-gxa
parents:
diff changeset
211 adata.obs[new_columns].to_csv(args.output_file, sep="\t", index=True)