Mercurial > repos > ebi-gxa > decoupler_pathway_inference
comparison decoupler_pathway_inference.py @ 0:77d680b36e23 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
author | ebi-gxa |
---|---|
date | Fri, 15 Mar 2024 12:17:49 +0000 |
parents | |
children | c6787c2aee46 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:77d680b36e23 |
---|---|
1 # import the necessary packages | |
2 import argparse | |
3 | |
4 import anndata as ad | |
5 import decoupler as dc | |
6 import pandas as pd | |
7 | |
8 # define arguments for the script | |
9 parser = argparse.ArgumentParser() | |
10 | |
11 # add AnnData input file option | |
12 parser.add_argument( | |
13 "-i", "--input_anndata", help="AnnData input file", required=True | |
14 ) | |
15 | |
16 # add network input file option | |
17 parser.add_argument( | |
18 "-n", "--input_network", help="Network input file", required=True | |
19 ) | |
20 | |
21 # output file prefix | |
22 parser.add_argument( | |
23 "-o", "--output", | |
24 help="output files prefix", | |
25 default=None, | |
26 ) | |
27 | |
28 # path to save Activities AnnData file | |
29 parser.add_argument( | |
30 "-a", "--activities_path", help="Path to save Activities AnnData file", default=None | |
31 ) | |
32 | |
33 # Column name in net with source nodes | |
34 parser.add_argument( | |
35 "-s", "--source", help="Column name in net with source nodes.", default="source" | |
36 ) | |
37 | |
38 # Column name in net with target nodes | |
39 parser.add_argument( | |
40 "-t", "--target", help="Column name in net with target nodes.", default="target" | |
41 ) | |
42 | |
43 # Column name in net with weights. | |
44 parser.add_argument( | |
45 "-w", "--weight", help="Column name in net with weights.", default="weight" | |
46 ) | |
47 | |
48 # add boolean argument for use_raw | |
49 parser.add_argument( | |
50 "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object" | |
51 ) | |
52 | |
53 # add argument for min_cells | |
54 parser.add_argument( | |
55 "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int | |
56 ) | |
57 | |
58 # add activity inference method option | |
59 parser.add_argument( | |
60 "-m", "--method", help="Activity inference method", default="mlm", required=True | |
61 ) | |
62 args = parser.parse_args() | |
63 | |
64 # check that either -o or --output is specified | |
65 if args.output is None: | |
66 raise ValueError("Please specify either -o or --output") | |
67 | |
68 # read in the AnnData input file | |
69 adata = ad.read_h5ad(args.input_anndata) | |
70 | |
71 # read in the input file network input file | |
72 network = pd.read_csv(args.input_network, sep='\t') | |
73 | |
74 if ( | |
75 args.source not in network.columns | |
76 or args.target not in network.columns | |
77 or args.weight not in network.columns | |
78 ): | |
79 raise ValueError( | |
80 "Source, target, and weight columns are not present in the network" | |
81 ) | |
82 | |
83 | |
84 print(type(args.min_n)) | |
85 | |
86 if args.method == "mlm": | |
87 dc.run_mlm( | |
88 mat=adata, | |
89 net=network, | |
90 source=args.source, | |
91 target=args.target, | |
92 weight=args.weight, | |
93 verbose=True, | |
94 min_n=args.min_n, | |
95 use_raw=args.use_raw | |
96 ) | |
97 | |
98 if args.output is not None: | |
99 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | |
100 combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1) | |
101 | |
102 # Save the combined dataframe to a file | |
103 combined_df.to_csv(args.output + ".tsv", sep="\t") | |
104 | |
105 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | |
106 if args.activities_path is not None: | |
107 acts = dc.get_acts(adata, obsm_key="mlm_estimate") | |
108 acts.write_h5ad(args.activities_path) | |
109 | |
110 elif args.method == "ulm": | |
111 dc.run_ulm( | |
112 mat=adata, | |
113 net=network, | |
114 source=args.source, | |
115 target=args.target, | |
116 weight=args.weight, | |
117 verbose=True, | |
118 min_n=args.min_n, | |
119 use_raw=args.use_raw | |
120 ) | |
121 | |
122 if args.output is not None: | |
123 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | |
124 combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1) | |
125 | |
126 # Save the combined dataframe to a file | |
127 combined_df.to_csv(args.output + ".tsv", sep="\t") | |
128 | |
129 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | |
130 if args.activities_path is not None: | |
131 acts = dc.get_acts(adata, obsm_key="ulm_estimate") | |
132 acts.write_h5ad(args.activities_path) |