annotate COBRAxy/ras_generator.py @ 401:6c7ddf68381a draft

Uploaded
author francesco_lapi
date Sun, 07 Sep 2025 20:29:23 +0000
parents e94735cb40fa
children ccccb731c953
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
1 from __future__ import division
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
2 # galaxy complains this ^^^ needs to be at the very beginning of the file, for some reason.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
3 import sys
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
4 import argparse
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
5 import collections
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
6 import pandas as pd
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
7 import pickle as pk
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
8 import utils.general_utils as utils
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
9 import utils.rule_parsing as ruleUtils
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
10 from typing import Union, Optional, List, Dict, Tuple, TypeVar
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
11 import os
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
12
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
13 ERRORS = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
14 ########################## argparse ##########################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
15 ARGS :argparse.Namespace
147
3fca9b568faf Uploaded
bimib
parents: 93
diff changeset
16 def process_args(args:List[str] = None) -> argparse.Namespace:
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
17 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
18 Processes command-line arguments.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
19
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
20 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
21 args (list): List of command-line arguments.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
22
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
23 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
24 Namespace: An object containing parsed arguments.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
25 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
26 parser = argparse.ArgumentParser(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
27 usage = '%(prog)s [options]',
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
28 description = "process some value's genes to create a comparison's map.")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
29
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
30 parser.add_argument("-rl", "--model_upload", type = str,
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
31 help = "path to input file with custom rules, if provided")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
32
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
33 parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name")
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
34 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
35
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
36 parser.add_argument(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
37 '-n', '--none',
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
38 type = utils.Bool("none"), default = True,
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
39 help = 'compute Nan values')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
40
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
41 parser.add_argument(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
42 '-td', '--tool_dir',
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
43 type = str,
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
44 required = True, help = 'your tool directory')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
45
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
46 parser.add_argument(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
47 '-ol', '--out_log',
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
48 type = str,
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
49 help = "Output log")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
50
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
51 parser.add_argument(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
52 '-in', '--input', #id รจ diventato in
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
53 type = str,
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
54 help = 'input dataset')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
55
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
56 parser.add_argument(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
57 '-ra', '--ras_output',
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
58 type = str,
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
59 required = True, help = 'ras output')
147
3fca9b568faf Uploaded
bimib
parents: 93
diff changeset
60
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
61
147
3fca9b568faf Uploaded
bimib
parents: 93
diff changeset
62 return parser.parse_args(args)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
63
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
64 ############################ dataset input ####################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
65 def read_dataset(data :str, name :str) -> pd.DataFrame:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
66 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
67 Read a dataset from a CSV file and return it as a pandas DataFrame.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
68
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
69 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
70 data (str): Path to the CSV file containing the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
71 name (str): Name of the dataset, used in error messages.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
72
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
73 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
74 pandas.DataFrame: DataFrame containing the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
75
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
76 Raises:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
77 pd.errors.EmptyDataError: If the CSV file is empty.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
78 sys.exit: If the CSV file has the wrong format, the execution is aborted.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
79 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
80 try:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
81 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
82 except pd.errors.EmptyDataError:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
83 sys.exit('Execution aborted: wrong format of ' + name + '\n')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
84 if len(dataset.columns) < 2:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
85 sys.exit('Execution aborted: wrong format of ' + name + '\n')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
86 return dataset
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
87
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
88 ############################ load id e rules ##################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
89 def load_id_rules(reactions :Dict[str, Dict[str, List[str]]]) -> Tuple[List[str], List[Dict[str, List[str]]]]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
90 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
91 Load IDs and rules from a dictionary of reactions.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
92
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
93 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
94 reactions (dict): A dictionary where keys are IDs and values are rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
95
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
96 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
97 tuple: A tuple containing two lists, the first list containing IDs and the second list containing rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
98 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
99 ids, rules = [], []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
100 for key, value in reactions.items():
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
101 ids.append(key)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
102 rules.append(value)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
103 return (ids, rules)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
104
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
105 ############################ check_methods ####################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
106 def gene_type(l :str, name :str) -> str:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
107 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
108 Determine the type of gene ID.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
109
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
110 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
111 l (str): The gene identifier to check.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
112 name (str): The name of the dataset, used in error messages.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
113
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
114 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
115 str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id').
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
116
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
117 Raises:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
118 sys.exit: If the gene ID type is not supported, the execution is aborted.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
119 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
120 if check_hgnc(l):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
121 return 'hugo_id'
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
122 elif check_ensembl(l):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
123 return 'ensembl_gene_id'
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
124 elif check_symbol(l):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
125 return 'symbol'
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
126 elif check_entrez(l):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
127 return 'entrez_id'
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
128 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
129 sys.exit('Execution aborted:\n' +
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
130 'gene ID type in ' + name + ' not supported. Supported ID'+
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
131 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
132
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
133 def check_hgnc(l :str) -> bool:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
134 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
135 Check if a gene identifier follows the HGNC format.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
136
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
137 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
138 l (str): The gene identifier to check.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
139
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
140 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
141 bool: True if the gene identifier follows the HGNC format, False otherwise.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
142 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
143 if len(l) > 5:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
144 if (l.upper()).startswith('HGNC:'):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
145 return l[5:].isdigit()
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
146 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
147 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
148 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
149 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
150
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
151 def check_ensembl(l :str) -> bool:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
152 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
153 Check if a gene identifier follows the Ensembl format.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
154
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
155 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
156 l (str): The gene identifier to check.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
157
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
158 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
159 bool: True if the gene identifier follows the Ensembl format, False otherwise.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
160 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
161 return l.upper().startswith('ENS')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
162
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
163
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
164 def check_symbol(l :str) -> bool:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
165 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
166 Check if a gene identifier follows the symbol format.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
167
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
168 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
169 l (str): The gene identifier to check.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
170
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
171 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
172 bool: True if the gene identifier follows the symbol format, False otherwise.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
173 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
174 if len(l) > 0:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
175 if l[0].isalpha() and l[1:].isalnum():
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
176 return True
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
177 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
178 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
179 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
180 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
181
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
182 def check_entrez(l :str) -> bool:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
183 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
184 Check if a gene identifier follows the Entrez ID format.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
185
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
186 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
187 l (str): The gene identifier to check.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
188
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
189 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
190 bool: True if the gene identifier follows the Entrez ID format, False otherwise.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
191 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
192 if len(l) > 0:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
193 return l.isdigit()
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
194 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
195 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
196
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
197 ############################ gene #############################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
198 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
199 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
200 Process gene data to ensure correct formatting and handle duplicates.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
201
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
202 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
203 gene (DataFrame): DataFrame containing gene data.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
204 type_gene (str): Type of gene data (e.g., 'hugo_id', 'ensembl_gene_id', 'symbol', 'entrez_id').
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
205 name (str): Name of the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
206 gene_custom (dict or None): Custom gene data dictionary if provided.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
207
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
208 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
209 dict: A dictionary containing gene data with gene IDs as keys and corresponding values.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
210 """
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
211
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
212 for i in range(len(gene)):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
213 tmp = gene.iloc[i, 0]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
214 gene.iloc[i, 0] = tmp.strip().split('.')[0]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
215
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
216 gene_dup = [item for item, count in
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
217 collections.Counter(gene[gene.columns[0]]).items() if count > 1]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
218 pat_dup = [item for item, count in
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
219 collections.Counter(list(gene.columns)).items() if count > 1]
260
70b2bff188dd Uploaded
francesco_lapi
parents: 259
diff changeset
220
70b2bff188dd Uploaded
francesco_lapi
parents: 259
diff changeset
221 gene_in_rule = None
259
91ad9bf5a734 Uploaded
francesco_lapi
parents: 258
diff changeset
222
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
223 if gene_dup:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
224 if gene_custom == None:
264
a4f02ba0f64c Uploaded
francesco_lapi
parents: 263
diff changeset
225
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
226 if str(ARGS.rules_selector) == 'HMRcore':
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
227 gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
228
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
229 elif str(ARGS.rules_selector) == 'Recon':
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
230 gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
231
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
232 elif str(ARGS.rules_selector) == 'ENGRO2':
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
233 gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
263
f7716d7d7e35 Uploaded
francesco_lapi
parents: 261
diff changeset
234
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
235 utils.logWarning(f"{ARGS.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log)
259
91ad9bf5a734 Uploaded
francesco_lapi
parents: 258
diff changeset
236
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
237 gene_in_rule = gene_in_rule.get(type_gene)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
238
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
239 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
240 gene_in_rule = gene_custom
260
70b2bff188dd Uploaded
francesco_lapi
parents: 259
diff changeset
241
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
242 tmp = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
243 for i in gene_dup:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
244 if gene_in_rule.get(i) == 'ok':
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
245 tmp.append(i)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
246 if tmp:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
247 sys.exit('Execution aborted because gene ID '
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
248 +str(tmp)+' in '+name+' is duplicated\n')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
249
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
250 if pat_dup: utils.logWarning(f"Warning: duplicated label\n{pat_dup} in {name}", ARGS.out_log)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
251 return (gene.set_index(gene.columns[0])).to_dict()
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
252
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
253 ############################ resolve ##########################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
254 def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
255 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
256 Replace gene identifiers with corresponding values from a dictionary.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
257
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
258 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
259 l (str): String of gene identifier.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
260 d (str): String corresponding to its value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
261
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
262 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
263 tuple: A tuple containing two lists: the first list contains replaced values, and the second list contains any errors encountered during replacement.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
264 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
265 tmp = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
266 err = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
267 while l:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
268 if isinstance(l[0], list):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
269 tmp_rules, tmp_err = replace_gene_value(l[0], d)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
270 tmp.append(tmp_rules)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
271 err.extend(tmp_err)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
272 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
273 value = replace_gene(l[0], d)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
274 tmp.append(value)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
275 if value == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
276 err.append(l[0])
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
277 l = l[1:]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
278 return (tmp, err)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
279
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
280 def replace_gene(l :str, d :str) -> Union[int, float]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
281 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
282 Replace a single gene identifier with its corresponding value from a dictionary.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
283
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
284 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
285 l (str): Gene identifier to replace.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
286 d (str): String corresponding to its value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
287
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
288 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
289 float/int: Corresponding value from the dictionary if found, None otherwise.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
290
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
291 Raises:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
292 sys.exit: If the value associated with the gene identifier is not valid.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
293 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
294 if l =='and' or l == 'or':
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
295 return l
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
296 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
297 value = d.get(l, None)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
298 if not(value == None or isinstance(value, (int, float))):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
299 sys.exit('Execution aborted: ' + value + ' value not valid\n')
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
300 return value
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
301
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
302 T = TypeVar("T", bound = Optional[Union[int, float]])
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
303 def computes(val1 :T, op :str, val2 :T, cn :bool) -> T:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
304 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
305 Compute the RAS value between two value and an operator ('and' or 'or').
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
306
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
307 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
308 val1(Optional(Union[float, int])): First value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
309 op (str): Operator ('and' or 'or').
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
310 val2(Optional(Union[float, int])): Second value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
311 cn (bool): Control boolean value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
312
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
313 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
314 Optional(Union[float, int]): Result of the computation.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
315 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
316 if val1 != None and val2 != None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
317 if op == 'and':
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
318 return min(val1, val2)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
319 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
320 return val1 + val2
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
321 elif op == 'and':
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
322 if cn is True:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
323 if val1 != None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
324 return val1
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
325 elif val2 != None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
326 return val2
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
327 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
328 return None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
329 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
330 return None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
331 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
332 if val1 != None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
333 return val1
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
334 elif val2 != None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
335 return val2
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
336 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
337 return None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
338
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
339 # ris should be Literal[None] but Literal is not supported in Python 3.7
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
340 def control(ris, l :List[Union[int, float, list]], cn :bool) -> Union[bool, int, float]: #Union[Literal[False], int, float]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
341 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
342 Control the format of the expression.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
343
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
344 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
345 ris: Intermediate result.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
346 l (list): Expression to control.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
347 cn (bool): Control boolean value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
348
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
349 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
350 Union[Literal[False], int, float]: Result of the control.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
351 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
352 if len(l) == 1:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
353 if isinstance(l[0], (float, int)) or l[0] == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
354 return l[0]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
355 elif isinstance(l[0], list):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
356 return control(None, l[0], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
357 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
358 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
359 elif len(l) > 2:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
360 return control_list(ris, l, cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
361 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
362 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
363
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
364 def control_list(ris, l :List[Optional[Union[float, int, list]]], cn :bool) -> Optional[bool]: #Optional[Literal[False]]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
365 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
366 Control the format of a list of expressions.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
367
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
368 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
369 ris: Intermediate result.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
370 l (list): List of expressions to control.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
371 cn (bool): Control boolean value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
372
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
373 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
374 Optional[Literal[False]]: Result of the control.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
375 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
376 while l:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
377 if len(l) == 1:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
378 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
379 elif (isinstance(l[0], (float, int)) or
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
380 l[0] == None) and l[1] in ['and', 'or']:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
381 if isinstance(l[2], (float, int)) or l[2] == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
382 ris = computes(l[0], l[1], l[2], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
383 elif isinstance(l[2], list):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
384 tmp = control(None, l[2], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
385 if tmp is False:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
386 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
387 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
388 ris = computes(l[0], l[1], tmp, cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
389 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
390 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
391 l = l[3:]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
392 elif l[0] in ['and', 'or']:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
393 if isinstance(l[1], (float, int)) or l[1] == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
394 ris = computes(ris, l[0], l[1], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
395 elif isinstance(l[1], list):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
396 tmp = control(None,l[1], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
397 if tmp is False:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
398 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
399 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
400 ris = computes(ris, l[0], tmp, cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
401 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
402 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
403 l = l[2:]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
404 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
405 if isinstance(l[2], (float, int)) or l[2] == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
406 tmp = control(None, l[0], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
407 if tmp is False:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
408 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
409 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
410 ris = computes(tmp, l[1], l[2], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
411 elif isinstance(l[2], list):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
412 tmp = control(None, l[0], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
413 tmp2 = control(None, l[2], cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
414 if tmp is False or tmp2 is False:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
415 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
416 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
417 ris = computes(tmp, l[1], tmp2, cn)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
418 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
419 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
420 l = l[3:]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
421 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
422 return False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
423 return ris
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
424
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
425 ResolvedRules = Dict[str, List[Optional[Union[float, int]]]]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
426 def resolve(genes: Dict[str, str], rules: List[str], ids: List[str], resolve_none: bool, name: str) -> Tuple[Optional[ResolvedRules], Optional[list]]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
427 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
428 Resolve rules using gene data to compute scores for each rule.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
429
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
430 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
431 genes (dict): Dictionary containing gene data with gene IDs as keys and corresponding values.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
432 rules (list): List of rules to resolve.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
433 ids (list): List of IDs corresponding to the rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
434 resolve_none (bool): Flag indicating whether to resolve None values in the rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
435 name (str): Name of the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
436
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
437 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
438 tuple: A tuple containing resolved rules as a dictionary and a list of gene IDs not found in the data.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
439 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
440 resolve_rules = {}
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
441 not_found = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
442 flag = False
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
443 for key, value in genes.items():
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
444 tmp_resolve = []
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
445 for i in range(len(rules)):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
446 tmp = rules[i]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
447 if tmp:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
448 tmp, err = replace_gene_value(tmp, value)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
449 if err:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
450 not_found.extend(err)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
451 ris = control(None, tmp, resolve_none)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
452 if ris is False or ris == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
453 tmp_resolve.append(None)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
454 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
455 tmp_resolve.append(ris)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
456 flag = True
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
457 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
458 tmp_resolve.append(None)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
459 resolve_rules[key] = tmp_resolve
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
460
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
461 if flag is False:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
462 utils.logWarning(
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
463 f"Warning: no computable score (due to missing gene values) for class {name}, the class has been disregarded",
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
464 ARGS.out_log)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
465
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
466 return (None, None)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
467
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
468 return (resolve_rules, list(set(not_found)))
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
469 ############################ create_ras #######################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
470 def create_ras(resolve_rules: Optional[ResolvedRules], dataset_name: str, rules: List[str], ids: List[str], file: str) -> None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
471 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
472 Create a RAS (Reaction Activity Score) file from resolved rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
473
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
474 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
475 resolve_rules (dict): Dictionary containing resolved rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
476 dataset_name (str): Name of the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
477 rules (list): List of rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
478 file (str): Path to the output RAS file.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
479
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
480 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
481 None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
482 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
483 if resolve_rules is None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
484 utils.logWarning(f"Couldn't generate RAS for current dataset: {dataset_name}", ARGS.out_log)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
485
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
486 for geni in resolve_rules.values():
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
487 for i, valori in enumerate(geni):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
488 if valori == None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
489 geni[i] = 'None'
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
490
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
491 output_ras = pd.DataFrame.from_dict(resolve_rules)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
492
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
493 output_ras.insert(0, 'Reactions', ids)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
494 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
495
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
496 text_file = open(file, "w")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
497
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
498 text_file.write(output_to_csv)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
499 text_file.close()
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
500
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
501 ################################- NEW RAS COMPUTATION -################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
502 Expr = Optional[Union[int, float]]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
503 Ras = Expr
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
504 def ras_for_cell_lines(dataset: pd.DataFrame, rules: Dict[str, ruleUtils.OpList]) -> Dict[str, Dict[str, Ras]]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
505 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
506 Generates the RAS scores for each cell line found in the dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
507
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
508 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
509 dataset (pd.DataFrame): Dataset containing gene values.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
510 rules (dict): The dict containing reaction ids as keys and rules as values.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
511
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
512 Side effects:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
513 dataset : mut
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
514
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
515 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
516 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
517 where each key corresponds to a reaction ID and each value is its computed RAS score.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
518 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
519 ras_values_by_cell_line = {}
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
520 dataset.set_index(dataset.columns[0], inplace=True)
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
521
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
522 for cell_line_name in dataset.columns: #[1:]:
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
523 cell_line = dataset[cell_line_name].to_dict()
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
524 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
525 return ras_values_by_cell_line
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
526
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
527 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
528 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
529 Computes the RAS (Reaction Activity Score) values for each rule in the given dict.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
530
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
531 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
532 value_rules (dict): A dictionary where keys are reaction ids and values are OpLists.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
533 dataset : gene expression data of one cell line.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
534
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
535 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
536 dict: A dictionary where keys are reaction ids and values are the computed RAS values for each rule.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
537 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
538 return {key: ras_op_list(op_list, dataset) for key, op_list in value_rules.items()}
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
539
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
540 def get_gene_expr(dataset :Dict[str, Expr], name :str) -> Expr:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
541 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
542 Extracts the gene expression of the given gene from a cell line dataset.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
543
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
544 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
545 dataset : gene expression data of one cell line.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
546 name : gene name.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
547
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
548 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
549 Expr : the gene's expression value.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
550 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
551 expr = dataset.get(name, None)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
552 if expr is None: ERRORS.append(name)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
553
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
554 return expr
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
555
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
556 def ras_op_list(op_list: ruleUtils.OpList, dataset: Dict[str, Expr]) -> Ras:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
557 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
558 Computes recursively the RAS (Reaction Activity Score) value for the given OpList, considering the specified flag to control None behavior.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
559
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
560 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
561 op_list (OpList): The OpList representing a rule with gene values.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
562 dataset : gene expression data of one cell line.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
563
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
564 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
565 Ras: The computed RAS value for the given OpList.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
566 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
567 op = op_list.op
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
568 ras_value :Ras = None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
569 if not op: return get_gene_expr(dataset, op_list[0])
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
570 if op is ruleUtils.RuleOp.AND and not ARGS.none and None in op_list: return None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
571
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
572 for i in range(len(op_list)):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
573 item = op_list[i]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
574 if isinstance(item, ruleUtils.OpList):
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
575 item = ras_op_list(item, dataset)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
576
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
577 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
578 item = get_gene_expr(dataset, item)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
579
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
580 if item is None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
581 if op is ruleUtils.RuleOp.AND and not ARGS.none: return None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
582 continue
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
583
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
584 if ras_value is None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
585 ras_value = item
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
586 else:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
587 ras_value = ras_value + item if op is ruleUtils.RuleOp.OR else min(ras_value, item)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
588
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
589 return ras_value
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
590
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
591 def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
592 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
593 Save computed ras scores to the given path, as a tsv file.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
594
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
595 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
596 rasScores : the computed ras scores.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
597 path : the output tsv file's path.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
598
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
599 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
600 None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
601 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
602 for scores in rasScores.values(): # this is actually a lot faster than using the ootb dataframe metod, sadly
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
603 for reactId, score in scores.items():
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
604 if score is None: scores[reactId] = "None"
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
605
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
606 output_ras = pd.DataFrame.from_dict(rasScores)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
607 output_ras.insert(0, 'Reactions', reactions)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
608 output_ras.to_csv(ARGS.ras_output, sep = '\t', index = False)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
609
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
610 ############################ MAIN #############################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
611 #TODO: not used but keep, it will be when the new translator dicts will be used.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
612 def translateGene(geneName :str, encoding :str, geneTranslator :Dict[str, Dict[str, str]]) -> str:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
613 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
614 Translate gene from any supported encoding to HugoID.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
615
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
616 Args:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
617 geneName (str): the name of the gene in its current encoding.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
618 encoding (str): the encoding.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
619 geneTranslator (Dict[str, Dict[str, str]]): the dict containing all supported gene names
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
620 and encodings in the current model, mapping each to the corresponding HugoID encoding.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
621
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
622 Raises:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
623 ValueError: When the gene isn't supported in the model.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
624
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
625 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
626 str: the gene in HugoID encoding.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
627 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
628 supportedGenesInEncoding = geneTranslator[encoding]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
629 if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
630 raise ValueError(f"Gene \"{geneName}\" non trovato, verifica di star utilizzando il modello corretto!")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
631
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
632 def load_custom_rules() -> Dict[str, ruleUtils.OpList]:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
633 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
634 Opens custom rules file and extracts the rules. If the file is in .csv format an additional parsing step will be
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
635 performed, significantly impacting the runtime.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
636
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
637 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
638 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
639 """
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
640 datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
641
400
e94735cb40fa Uploaded
francesco_lapi
parents: 398
diff changeset
642 #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext
e94735cb40fa Uploaded
francesco_lapi
parents: 398
diff changeset
643 #except utils.PathErr as err:
e94735cb40fa Uploaded
francesco_lapi
parents: 398
diff changeset
644 # utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log)
e94735cb40fa Uploaded
francesco_lapi
parents: 398
diff changeset
645 # filenamePath = None
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
646
400
e94735cb40fa Uploaded
francesco_lapi
parents: 398
diff changeset
647 #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
648
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
649 dict_rule = {}
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
650
401
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
651 try:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
652 # Proviamo prima con delimitatore tab
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
653 for line in utils.readCsv(datFilePath, delimiter = "\t"):
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
654 if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
655 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
656 continue
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
657
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
658 if line[2] == "":
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
659 dict_rule[line[0]] = ruleUtils.OpList([""])
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
660 else:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
661 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
662
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
663 except Exception as e:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
664 # Se fallisce con tab, proviamo con virgola
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
665 try:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
666 dict_rule = {}
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
667 for line in utils.readCsv(datFilePath, delimiter = ","):
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
668 if len(line) < 3:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
669 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
670 continue
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
671
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
672 if line[2] == "":
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
673 dict_rule[line[0]] = ruleUtils.OpList([""])
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
674 else:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
675 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
676 except Exception as e2:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
677 raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
678
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
679 if not dict_rule:
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
680 raise ValueError("No valid rules found in the uploaded file. Please check the file format.")
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
681 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
682 return dict_rule
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
683
401
6c7ddf68381a Uploaded
francesco_lapi
parents: 400
diff changeset
684
147
3fca9b568faf Uploaded
bimib
parents: 93
diff changeset
685 def main(args:List[str] = None) -> None:
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
686 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
687 Initializes everything and sets the program in motion based on the fronted input arguments.
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
688
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
689 Returns:
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
690 None
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
691 """
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
692 # get args from frontend (related xml)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
693 global ARGS
147
3fca9b568faf Uploaded
bimib
parents: 93
diff changeset
694 ARGS = process_args(args)
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
695
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
696 # read dataset
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
697 dataset = read_dataset(ARGS.input, "dataset")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
698 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
699
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
700 # remove versioning from gene names
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
701 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
702
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
703 rules = load_custom_rules()
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
704 reactions = list(rules.keys())
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
705
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
706 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
707 if ERRORS: utils.logWarning(
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
708 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
709 ARGS.out_log)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
710
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
711
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
712 ############
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
713
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
714 # handle custom models
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
715 #model :utils.Model = ARGS.rules_selector
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
716
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
717 #if model is utils.Model.Custom:
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
718 # rules = load_custom_rules()
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
719 # reactions = list(rules.keys())
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
720
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
721 # save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
722 # if ERRORS: utils.logWarning(
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
723 # f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
724 # ARGS.out_log)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
725
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
726 # return
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
727
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
728 # This is the standard flow of the ras_generator program, for non-custom models.
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
729 #name = "RAS Dataset"
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
730 #type_gene = gene_type(dataset.iloc[0, 0], name)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
731
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
732 #rules = model.getRules(ARGS.tool_dir)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
733 #genes = data_gene(dataset, type_gene, name, None)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
734 #ids, rules = load_id_rules(rules.get(type_gene))
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 309
diff changeset
735
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
736 #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
737 #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
738
398
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
739 #if err: utils.logWarning(
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
740 # f"Warning: gene(s) {err} not found in class \"{name}\", " +
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
741 # "the expression level for this gene will be considered NaN",
09064ce8f095 Uploaded
francesco_lapi
parents: 381
diff changeset
742 # ARGS.out_log)
93
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
743
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
744 print("Execution succeded")
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
745
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
746 ###############################################################################
7e703e546998 Uploaded
luca_milaz
parents:
diff changeset
747 if __name__ == "__main__":
309
38c9a958ea78 Uploaded
francesco_lapi
parents: 266
diff changeset
748 main()