Mercurial > repos > ebi-gxa > decoupler_pseudobulk
comparison decoupler_pathway_inference.py @ 5:893ff9213a34 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1034a450c97dcbb77871050cf0c6d3da90dac823
| author | ebi-gxa |
|---|---|
| date | Fri, 15 Mar 2024 12:18:11 +0000 |
| parents | |
| children | 93f61ea19336 |
comparison
equal
deleted
inserted
replaced
| 4:f321c60167d4 | 5:893ff9213a34 |
|---|---|
| 1 # import the necessary packages | |
| 2 import argparse | |
| 3 | |
| 4 import anndata as ad | |
| 5 import decoupler as dc | |
| 6 import pandas as pd | |
| 7 | |
| 8 # define arguments for the script | |
| 9 parser = argparse.ArgumentParser() | |
| 10 | |
| 11 # add AnnData input file option | |
| 12 parser.add_argument( | |
| 13 "-i", "--input_anndata", help="AnnData input file", required=True | |
| 14 ) | |
| 15 | |
| 16 # add network input file option | |
| 17 parser.add_argument( | |
| 18 "-n", "--input_network", help="Network input file", required=True | |
| 19 ) | |
| 20 | |
| 21 # output file prefix | |
| 22 parser.add_argument( | |
| 23 "-o", "--output", | |
| 24 help="output files prefix", | |
| 25 default=None, | |
| 26 ) | |
| 27 | |
| 28 # path to save Activities AnnData file | |
| 29 parser.add_argument( | |
| 30 "-a", "--activities_path", help="Path to save Activities AnnData file", default=None | |
| 31 ) | |
| 32 | |
| 33 # Column name in net with source nodes | |
| 34 parser.add_argument( | |
| 35 "-s", "--source", help="Column name in net with source nodes.", default="source" | |
| 36 ) | |
| 37 | |
| 38 # Column name in net with target nodes | |
| 39 parser.add_argument( | |
| 40 "-t", "--target", help="Column name in net with target nodes.", default="target" | |
| 41 ) | |
| 42 | |
| 43 # Column name in net with weights. | |
| 44 parser.add_argument( | |
| 45 "-w", "--weight", help="Column name in net with weights.", default="weight" | |
| 46 ) | |
| 47 | |
| 48 # add boolean argument for use_raw | |
| 49 parser.add_argument( | |
| 50 "--use_raw", action="store_true", default=False, help="Whether to use the raw part of the AnnData object" | |
| 51 ) | |
| 52 | |
| 53 # add argument for min_cells | |
| 54 parser.add_argument( | |
| 55 "--min_n", help="Minimum of targets per source. If less, sources are removed.", default=5, type=int | |
| 56 ) | |
| 57 | |
| 58 # add activity inference method option | |
| 59 parser.add_argument( | |
| 60 "-m", "--method", help="Activity inference method", default="mlm", required=True | |
| 61 ) | |
| 62 args = parser.parse_args() | |
| 63 | |
| 64 # check that either -o or --output is specified | |
| 65 if args.output is None: | |
| 66 raise ValueError("Please specify either -o or --output") | |
| 67 | |
| 68 # read in the AnnData input file | |
| 69 adata = ad.read_h5ad(args.input_anndata) | |
| 70 | |
| 71 # read in the input file network input file | |
| 72 network = pd.read_csv(args.input_network, sep='\t') | |
| 73 | |
| 74 if ( | |
| 75 args.source not in network.columns | |
| 76 or args.target not in network.columns | |
| 77 or args.weight not in network.columns | |
| 78 ): | |
| 79 raise ValueError( | |
| 80 "Source, target, and weight columns are not present in the network" | |
| 81 ) | |
| 82 | |
| 83 | |
| 84 print(type(args.min_n)) | |
| 85 | |
| 86 if args.method == "mlm": | |
| 87 dc.run_mlm( | |
| 88 mat=adata, | |
| 89 net=network, | |
| 90 source=args.source, | |
| 91 target=args.target, | |
| 92 weight=args.weight, | |
| 93 verbose=True, | |
| 94 min_n=args.min_n, | |
| 95 use_raw=args.use_raw | |
| 96 ) | |
| 97 | |
| 98 if args.output is not None: | |
| 99 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | |
| 100 combined_df = pd.concat([adata.obsm["mlm_estimate"], adata.obsm["mlm_pvals"]], axis=1) | |
| 101 | |
| 102 # Save the combined dataframe to a file | |
| 103 combined_df.to_csv(args.output + ".tsv", sep="\t") | |
| 104 | |
| 105 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | |
| 106 if args.activities_path is not None: | |
| 107 acts = dc.get_acts(adata, obsm_key="mlm_estimate") | |
| 108 acts.write_h5ad(args.activities_path) | |
| 109 | |
| 110 elif args.method == "ulm": | |
| 111 dc.run_ulm( | |
| 112 mat=adata, | |
| 113 net=network, | |
| 114 source=args.source, | |
| 115 target=args.target, | |
| 116 weight=args.weight, | |
| 117 verbose=True, | |
| 118 min_n=args.min_n, | |
| 119 use_raw=args.use_raw | |
| 120 ) | |
| 121 | |
| 122 if args.output is not None: | |
| 123 # write adata.obsm[mlm_key] and adata.obsm[mlm_pvals_key] to the output network files | |
| 124 combined_df = pd.concat([adata.obsm["ulm_estimate"], adata.obsm["ulm_pvals"]], axis=1) | |
| 125 | |
| 126 # Save the combined dataframe to a file | |
| 127 combined_df.to_csv(args.output + ".tsv", sep="\t") | |
| 128 | |
| 129 # if args.activities_path is specified, generate the activities AnnData and save the AnnData object to the specified path | |
| 130 if args.activities_path is not None: | |
| 131 acts = dc.get_acts(adata, obsm_key="ulm_estimate") | |
| 132 acts.write_h5ad(args.activities_path) |
