annotate decoupler_aucell_score.py @ 1:e9b06a8fb73a draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
author ebi-gxa
date Mon, 15 Apr 2024 13:20:27 +0000
parents 77d680b36e23
children 82b7cd3e1bbd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
1 import argparse
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
2 import os
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
3 import tempfile
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
4
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
5 import anndata
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
6 import decoupler as dc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
7 import pandas as pd
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
8
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
9
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
10 def read_gmt(gmt_file):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
11 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
12 Reads a GMT file into a Pandas DataFrame.
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
13
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
14 Parameters
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
15 ----------
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
16 gmt_file : str
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
17 Path to the GMT file.
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
18
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
19 Returns
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
20 -------
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
21 pd.DataFrame
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
22 A DataFrame with the gene sets. Each row represents a gene set, and the columns are "gene_set_name", "gene_set_url", and "genes".
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
23 >>> line = "HALLMARK_NOTCH_SIGNALING\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_NOTCH_SIGNALING\\tJAG1\\tNOTCH3\\tNOTCH2\\tAPH1A\\tHES1\\tCCND1\\tFZD1\\tPSEN2\\tFZD7\\tDTX1\\tDLL1\\tFZD5\\tMAML2\\tNOTCH1\\tPSENEN\\tWNT5A\\tCUL1\\tWNT2\\tDTX4\\tSAP30\\tPPARD\\tKAT2A\\tHEYL\\tSKP1\\tRBX1\\tTCF7L2\\tARRB1\\tLFNG\\tPRKCA\\tDTX2\\tST3GAL6\\tFBXW11\\n"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
24 >>> line2 = "HALLMARK_APICAL_SURFACE\\thttp://www.gsea-msigdb.org/gsea/msigdb/human/geneset/HALLMARK_APICAL_SURFACE\\tB4GALT1\\tRHCG\\tMAL\\tLYPD3\\tPKHD1\\tATP6V0A4\\tCRYBG1\\tSHROOM2\\tSRPX\\tMDGA1\\tTMEM8B\\tTHY1\\tPCSK9\\tEPHB4\\tDCBLD2\\tGHRL\\tLYN\\tGAS1\\tFLOT2\\tPLAUR\\tAKAP7\\tATP8B1\\tEFNA5\\tSLC34A3\\tAPP\\tGSTM3\\tHSPB1\\tSLC2A4\\tIL2RB\\tRTN4RL1\\tNCOA6\\tSULF2\\tADAM10\\tBRCA1\\tGATA3\\tAFAP1L2\\tIL2RG\\tCD160\\tADIPOR2\\tSLC22A12\\tNTNG1\\tSCUBE1\\tCX3CL1\\tCROCC\\n"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
25 >>> temp_dir = tempfile.gettempdir()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
26 >>> temp_gmt = os.path.join(temp_dir, "temp_file.gmt")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
27 >>> with open(temp_gmt, "w") as f:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
28 ... f.write(line)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
29 ... f.write(line2)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
30 288
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
31 380
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
32 >>> df = read_gmt(temp_gmt)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
33 >>> df.shape[0]
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
34 2
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
35 >>> df.columns == ["gene_set_name", "genes"]
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
36 array([ True, True])
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
37 >>> df.loc[df["gene_set_name"] == "HALLMARK_APICAL_SURFACE"].genes.tolist()[0].startswith("B4GALT1")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
38 True
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
39 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
40 # Read the GMT file into a list of lines
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
41 with open(gmt_file, "r") as f:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
42 lines = f.readlines()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
43
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
44 # Create a list of dictionaries, where each dictionary represents a gene set
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
45 gene_sets = []
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
46 for line in lines:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
47 fields = line.strip().split("\t")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
48 gene_set = {"gene_set_name": fields[0], "genes": ",".join(fields[2:])}
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
49 gene_sets.append(gene_set)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
50
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
51 # Convert the list of dictionaries to a DataFrame
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
52 return pd.DataFrame(gene_sets)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
53
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
54
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
55 def score_genes_aucell(
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
56 adata: anndata.AnnData, gene_list: list, score_name: str, use_raw=False, min_n_genes=5
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
57 ):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
58 """Score genes using Aucell.
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
59
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
60 Parameters
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
61 ----------
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
62 adata : anndata.AnnData
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
63 gene_list : list
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
64 score_names : str
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
65 use_raw : bool, optional
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
66
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
67 >>> import scanpy as sc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
68 >>> import decoupler as dc
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
69 >>> adata = sc.datasets.pbmc68k_reduced()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
70 >>> gene_list = adata.var[adata.var.index.str.startswith("RP")].index.tolist()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
71 >>> score_genes_aucell(adata, gene_list, "ribosomal_aucell", use_raw=False)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
72 >>> "ribosomal_aucell" in adata.obs.columns
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
73 True
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
74 """
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
75 # make a data.frame with two columns, geneset and gene_id, geneset filled with score_names and gene_id with gene_list, one row per element
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
76 geneset_df = pd.DataFrame(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
77 {
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
78 "gene_id": gene_list,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
79 "geneset": score_name,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
80 }
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
81 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
82 # run decoupler's run_aucell
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
83 # catch the value error
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
84 try:
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
85 dc.run_aucell(
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
86 adata, net=geneset_df, source="geneset", target="gene_id", use_raw=use_raw
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
87 )
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
88 # copy .obsm['aucell_estimate'] matrix columns to adata.obs using the column names
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
89 adata.obs[score_name] = adata.obsm["aucell_estimate"][score_name]
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
90 except ValueError as ve:
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
91 print(f"Gene list {score_name} failed, skipping: {str(ve)}")
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
92
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
93
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
94 def run_for_genelists(
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
95 adata, gene_lists, score_names, use_raw=False, gene_symbols_field="gene_symbols", min_n_genes=5
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
96 ):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
97 if len(gene_lists) == len(score_names):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
98 for gene_list, score_names in zip(gene_lists, score_names):
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
99 genes = gene_list.split(",")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
100 ens_gene_ids = adata.var[adata.var[gene_symbols_field].isin(genes)].index
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
101 score_genes_aucell(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
102 adata,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
103 ens_gene_ids,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
104 f"AUCell_{score_names}",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
105 use_raw,
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
106 min_n_genes
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
107 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
108 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
109 raise ValueError(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
110 "The number of gene lists (separated by :) and score names (separated by :) must be the same"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
111 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
112
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
113
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
114 if __name__ == "__main__":
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
115 # Create command-line arguments parser
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
116 parser = argparse.ArgumentParser(description="Score genes using Aucell")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
117 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
118 "--input_file", type=str, help="Path to input AnnData file", required=True
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
119 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
120 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
121 "--output_file", type=str, help="Path to output file", required=True
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
122 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
123 parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
124 # add argument for gene sets to score
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
125 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
126 "--gene_sets_to_score",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
127 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
128 required=False,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
129 help="Optional comma separated list of gene sets to score (the need to be in the gmt file)",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
130 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
131 # add argument for gene list (comma separated) to score
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
132 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
133 "--gene_lists_to_score",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
134 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
135 required=False,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
136 help="Comma separated list of genes to score. You can have more than one set of genes, separated by colon :",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
137 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
138 # argument for the score name when using the gene list
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
139 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
140 "--score_names",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
141 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
142 required=False,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
143 help="Name of the score column when using the gene list. You can have more than one set of score names, separated by colon :. It should be the same length as the number of gene lists.",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
144 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
145 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
146 "--gene_symbols_field",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
147 type=str,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
148 help="Name of the gene symbols field in the AnnData object",
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
149 required=True,
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
150 )
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
151 # argument for min_n Minimum of targets per source. If less, sources are removed.
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
152 parser.add_argument(
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
153 "--min_n",
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
154 type=int,
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
155 required=False,
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
156 default=5,
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
157 help="Minimum of targets per source. If less, sources are removed.",
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
158 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
159 parser.add_argument("--use_raw", action="store_true", help="Use raw data")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
160 parser.add_argument(
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
161 "--write_anndata", action="store_true", help="Write the modified AnnData object"
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
162 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
163
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
164 # Parse command-line arguments
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
165 args = parser.parse_args()
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
166
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
167 # Load input AnnData object
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
168 adata = anndata.read_h5ad(args.input_file)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
169
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
170 if args.gmt_file is not None:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
171 # Load MSigDB file in GMT format
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
172 msigdb = read_gmt(args.gmt_file)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
173
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
174 gene_sets_to_score = (
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
175 args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
176 )
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
177 # Score genes by their ensembl ids using the score_genes_aucell function
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
178 for _, row in msigdb.iterrows():
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
179 gene_set_name = row["gene_set_name"]
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
180 if not gene_sets_to_score or gene_set_name in gene_sets_to_score:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
181 genes = row["genes"].split(",")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
182 # Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
183 ens_gene_ids = adata.var[
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
184 adata.var[args.gene_symbols_field].isin(genes)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
185 ].index
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
186 score_genes_aucell(
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
187 adata, ens_gene_ids, f"AUCell_{gene_set_name}", args.use_raw, args.min_n
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
188 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
189 elif args.gene_lists_to_score is not None and args.score_names is not None:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
190 gene_lists = args.gene_lists_to_score.split(":")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
191 score_names = args.score_names.split(",")
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
192 run_for_genelists(
1
e9b06a8fb73a planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 11fb36a94b8262ef8e78f1c6dd46c4146eb59341
ebi-gxa
parents: 0
diff changeset
193 adata, gene_lists, score_names, args.use_raw, args.gene_symbols_field, args.min_n
0
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
194 )
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
195
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
196 # Save the modified AnnData object or generate a file with cells as rows and the new score_names columns
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
197 if args.write_anndata:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
198 adata.write_h5ad(args.output_file)
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
199 else:
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
200 new_columns = [col for col in adata.obs.columns if col.startswith("AUCell_")]
77d680b36e23 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
ebi-gxa
parents:
diff changeset
201 adata.obs[new_columns].to_csv(args.output_file, sep="\t", index=True)