annotate COBRAxy/ras_generator.py @ 24:c5dbdbb64cef draft

Uploaded
author luca_milaz
date Thu, 19 Sep 2024 08:03:37 +0000
parents 41f35c2f0c7b
children a1ab05a70185
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
1 from __future__ import division
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
2 # galaxy complains this ^^^ needs to be at the very beginning of the file, for some reason.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
3 import sys
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
4 import argparse
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
5 import collections
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
6 import pandas as pd
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
7 import pickle as pk
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
8 import utils.general_utils as utils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
9 import utils.rule_parsing as ruleUtils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
10 from typing import Union, Optional, List, Dict, Tuple, TypeVar
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
11
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
12 ERRORS = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
13 ########################## argparse ##########################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
14 ARGS :argparse.Namespace
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
15 def process_args() -> argparse.Namespace:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
16 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
17 Processes command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
18
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
19 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
20 args (list): List of command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
21
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
22 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
23 Namespace: An object containing parsed arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
24 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
25 parser = argparse.ArgumentParser(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
26 usage = '%(prog)s [options]',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
27 description = "process some value's genes to create a comparison's map.")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
28
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
29 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
30 '-rs', '--rules_selector',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
31 type = utils.Model, default = utils.Model.HMRcore, choices = list(utils.Model),
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
32 help = 'chose which type of dataset you want use')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
33
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
34 parser.add_argument("-rl", "--rule_list", type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
35 help = "path to input file with custom rules, if provided")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
36
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
37 parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
38 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
39
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
40 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
41 '-n', '--none',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
42 type = utils.Bool("none"), default = True,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
43 help = 'compute Nan values')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
44
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
45 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
46 '-td', '--tool_dir',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
47 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
48 required = True, help = 'your tool directory')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
49
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
50 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
51 '-ol', '--out_log',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
52 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
53 help = "Output log")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
54
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
55 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
56 '-in', '--input', #id รจ diventato in
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
57 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
58 help = 'input dataset')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
59
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
60 parser.add_argument(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
61 '-ra', '--ras_output',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
62 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
63 required = True, help = 'ras output')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
64
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
65 return parser.parse_args()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
66
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
67 ############################ dataset input ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
68 def read_dataset(data :str, name :str) -> pd.DataFrame:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
69 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
70 Read a dataset from a CSV file and return it as a pandas DataFrame.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
71
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
72 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
73 data (str): Path to the CSV file containing the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
74 name (str): Name of the dataset, used in error messages.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
75
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
76 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
77 pandas.DataFrame: DataFrame containing the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
78
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
79 Raises:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
80 pd.errors.EmptyDataError: If the CSV file is empty.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
81 sys.exit: If the CSV file has the wrong format, the execution is aborted.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
82 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
83 try:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
84 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
85 except pd.errors.EmptyDataError:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
86 sys.exit('Execution aborted: wrong format of ' + name + '\n')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
87 if len(dataset.columns) < 2:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
88 sys.exit('Execution aborted: wrong format of ' + name + '\n')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
89 return dataset
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
90
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
91 ############################ load id e rules ##################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
92 def load_id_rules(reactions :Dict[str, Dict[str, List[str]]]) -> Tuple[List[str], List[Dict[str, List[str]]]]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
93 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
94 Load IDs and rules from a dictionary of reactions.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
95
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
96 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
97 reactions (dict): A dictionary where keys are IDs and values are rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
98
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
99 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
100 tuple: A tuple containing two lists, the first list containing IDs and the second list containing rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
101 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
102 ids, rules = [], []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
103 for key, value in reactions.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
104 ids.append(key)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
105 rules.append(value)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
106 return (ids, rules)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
107
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
108 ############################ check_methods ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
109 def gene_type(l :str, name :str) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
110 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
111 Determine the type of gene ID.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
112
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
113 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
114 l (str): The gene identifier to check.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
115 name (str): The name of the dataset, used in error messages.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
116
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
117 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
118 str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id').
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
119
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
120 Raises:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
121 sys.exit: If the gene ID type is not supported, the execution is aborted.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
122 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
123 if check_hgnc(l):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
124 return 'hugo_id'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
125 elif check_ensembl(l):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
126 return 'ensembl_gene_id'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
127 elif check_symbol(l):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
128 return 'symbol'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
129 elif check_entrez(l):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
130 return 'entrez_id'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
131 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
132 sys.exit('Execution aborted:\n' +
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
133 'gene ID type in ' + name + ' not supported. Supported ID'+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
134 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
135
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
136 def check_hgnc(l :str) -> bool:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
137 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
138 Check if a gene identifier follows the HGNC format.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
139
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
140 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
141 l (str): The gene identifier to check.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
142
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
143 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
144 bool: True if the gene identifier follows the HGNC format, False otherwise.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
145 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
146 if len(l) > 5:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
147 if (l.upper()).startswith('HGNC:'):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
148 return l[5:].isdigit()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
149 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
150 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
151 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
152 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
153
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
154 def check_ensembl(l :str) -> bool:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
155 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
156 Check if a gene identifier follows the Ensembl format.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
157
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
158 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
159 l (str): The gene identifier to check.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
160
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
161 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
162 bool: True if the gene identifier follows the Ensembl format, False otherwise.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
163 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
164 return l.upper().startswith('ENS')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
165
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
166
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
167 def check_symbol(l :str) -> bool:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
168 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
169 Check if a gene identifier follows the symbol format.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
170
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
171 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
172 l (str): The gene identifier to check.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
173
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
174 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
175 bool: True if the gene identifier follows the symbol format, False otherwise.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
176 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
177 if len(l) > 0:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
178 if l[0].isalpha() and l[1:].isalnum():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
179 return True
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
180 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
181 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
182 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
183 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
184
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
185 def check_entrez(l :str) -> bool:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
186 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
187 Check if a gene identifier follows the Entrez ID format.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
188
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
189 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
190 l (str): The gene identifier to check.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
191
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
192 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
193 bool: True if the gene identifier follows the Entrez ID format, False otherwise.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
194 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
195 if len(l) > 0:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
196 return l.isdigit()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
197 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
198 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
199
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
200 ############################ gene #############################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
201 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
202 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
203 Process gene data to ensure correct formatting and handle duplicates.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
204
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
205 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
206 gene (DataFrame): DataFrame containing gene data.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
207 type_gene (str): Type of gene data (e.g., 'hugo_id', 'ensembl_gene_id', 'symbol', 'entrez_id').
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
208 name (str): Name of the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
209 gene_custom (dict or None): Custom gene data dictionary if provided.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
210
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
211 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
212 dict: A dictionary containing gene data with gene IDs as keys and corresponding values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
213 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
214 args = process_args()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
215 for i in range(len(gene)):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
216 tmp = gene.iloc[i, 0]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
217 gene.iloc[i, 0] = tmp.strip().split('.')[0]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
218
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
219 gene_dup = [item for item, count in
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
220 collections.Counter(gene[gene.columns[0]]).items() if count > 1]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
221 pat_dup = [item for item, count in
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
222 collections.Counter(list(gene.columns)).items() if count > 1]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
223
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
224 if gene_dup:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
225 if gene_custom == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
226 if args.rules_selector == 'HMRcore':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
227 gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
228
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
229 elif args.rules_selector == 'Recon':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
230 gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
231
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
232 elif args.rules_selector == 'ENGRO2':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
233 gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
234
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
235 gene_in_rule = gene_in_rule.get(type_gene)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
236
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
237 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
238 gene_in_rule = gene_custom
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
239 tmp = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
240 for i in gene_dup:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
241 if gene_in_rule.get(i) == 'ok':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
242 tmp.append(i)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
243 if tmp:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
244 sys.exit('Execution aborted because gene ID '
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
245 +str(tmp)+' in '+name+' is duplicated\n')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
246
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
247 if pat_dup: utils.logWarning(f"Warning: duplicated label\n{pat_dup} in {name}", ARGS.out_log)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
248 return (gene.set_index(gene.columns[0])).to_dict()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
249
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
250 ############################ resolve ##########################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
251 def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
252 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
253 Replace gene identifiers with corresponding values from a dictionary.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
254
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
255 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
256 l (str): String of gene identifier.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
257 d (str): String corresponding to its value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
258
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
259 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
260 tuple: A tuple containing two lists: the first list contains replaced values, and the second list contains any errors encountered during replacement.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
261 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
262 tmp = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
263 err = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
264 while l:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
265 if isinstance(l[0], list):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
266 tmp_rules, tmp_err = replace_gene_value(l[0], d)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
267 tmp.append(tmp_rules)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
268 err.extend(tmp_err)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
269 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
270 value = replace_gene(l[0], d)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
271 tmp.append(value)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
272 if value == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
273 err.append(l[0])
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
274 l = l[1:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
275 return (tmp, err)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
276
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
277 def replace_gene(l :str, d :str) -> Union[int, float]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
278 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
279 Replace a single gene identifier with its corresponding value from a dictionary.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
280
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
281 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
282 l (str): Gene identifier to replace.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
283 d (str): String corresponding to its value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
284
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
285 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
286 float/int: Corresponding value from the dictionary if found, None otherwise.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
287
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
288 Raises:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
289 sys.exit: If the value associated with the gene identifier is not valid.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
290 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
291 if l =='and' or l == 'or':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
292 return l
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
293 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
294 value = d.get(l, None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
295 if not(value == None or isinstance(value, (int, float))):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
296 sys.exit('Execution aborted: ' + value + ' value not valid\n')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
297 return value
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
298
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
299 T = TypeVar("T", bound = Optional[Union[int, float]])
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
300 def computes(val1 :T, op :str, val2 :T, cn :bool) -> T:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
301 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
302 Compute the RAS value between two value and an operator ('and' or 'or').
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
303
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
304 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
305 val1(Optional(Union[float, int])): First value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
306 op (str): Operator ('and' or 'or').
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
307 val2(Optional(Union[float, int])): Second value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
308 cn (bool): Control boolean value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
309
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
310 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
311 Optional(Union[float, int]): Result of the computation.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
312 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
313 if val1 != None and val2 != None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
314 if op == 'and':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
315 return min(val1, val2)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
316 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
317 return val1 + val2
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
318 elif op == 'and':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
319 if cn is True:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
320 if val1 != None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
321 return val1
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
322 elif val2 != None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
323 return val2
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
324 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
325 return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
326 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
327 return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
328 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
329 if val1 != None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
330 return val1
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
331 elif val2 != None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
332 return val2
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
333 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
334 return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
335
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
336 # ris should be Literal[None] but Literal is not supported in Python 3.7
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
337 def control(ris, l :List[Union[int, float, list]], cn :bool) -> Union[bool, int, float]: #Union[Literal[False], int, float]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
338 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
339 Control the format of the expression.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
340
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
341 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
342 ris: Intermediate result.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
343 l (list): Expression to control.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
344 cn (bool): Control boolean value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
345
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
346 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
347 Union[Literal[False], int, float]: Result of the control.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
348 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
349 if len(l) == 1:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
350 if isinstance(l[0], (float, int)) or l[0] == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
351 return l[0]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
352 elif isinstance(l[0], list):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
353 return control(None, l[0], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
354 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
355 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
356 elif len(l) > 2:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
357 return control_list(ris, l, cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
358 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
359 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
360
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
361 def control_list(ris, l :List[Optional[Union[float, int, list]]], cn :bool) -> Optional[bool]: #Optional[Literal[False]]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
362 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
363 Control the format of a list of expressions.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
364
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
365 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
366 ris: Intermediate result.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
367 l (list): List of expressions to control.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
368 cn (bool): Control boolean value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
369
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
370 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
371 Optional[Literal[False]]: Result of the control.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
372 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
373 while l:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
374 if len(l) == 1:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
375 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
376 elif (isinstance(l[0], (float, int)) or
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
377 l[0] == None) and l[1] in ['and', 'or']:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
378 if isinstance(l[2], (float, int)) or l[2] == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
379 ris = computes(l[0], l[1], l[2], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
380 elif isinstance(l[2], list):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
381 tmp = control(None, l[2], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
382 if tmp is False:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
383 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
384 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
385 ris = computes(l[0], l[1], tmp, cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
386 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
387 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
388 l = l[3:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
389 elif l[0] in ['and', 'or']:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
390 if isinstance(l[1], (float, int)) or l[1] == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
391 ris = computes(ris, l[0], l[1], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
392 elif isinstance(l[1], list):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
393 tmp = control(None,l[1], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
394 if tmp is False:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
395 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
396 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
397 ris = computes(ris, l[0], tmp, cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
398 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
399 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
400 l = l[2:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
401 elif isinstance(l[0], list) and l[1] in ['and', 'or']:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
402 if isinstance(l[2], (float, int)) or l[2] == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
403 tmp = control(None, l[0], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
404 if tmp is False:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
405 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
406 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
407 ris = computes(tmp, l[1], l[2], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
408 elif isinstance(l[2], list):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
409 tmp = control(None, l[0], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
410 tmp2 = control(None, l[2], cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
411 if tmp is False or tmp2 is False:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
412 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
413 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
414 ris = computes(tmp, l[1], tmp2, cn)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
415 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
416 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
417 l = l[3:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
418 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
419 return False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
420 return ris
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
421
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
422 ResolvedRules = Dict[str, List[Optional[Union[float, int]]]]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
423 def resolve(genes: Dict[str, str], rules: List[str], ids: List[str], resolve_none: bool, name: str) -> Tuple[Optional[ResolvedRules], Optional[list]]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
424 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
425 Resolve rules using gene data to compute scores for each rule.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
426
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
427 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
428 genes (dict): Dictionary containing gene data with gene IDs as keys and corresponding values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
429 rules (list): List of rules to resolve.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
430 ids (list): List of IDs corresponding to the rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
431 resolve_none (bool): Flag indicating whether to resolve None values in the rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
432 name (str): Name of the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
433
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
434 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
435 tuple: A tuple containing resolved rules as a dictionary and a list of gene IDs not found in the data.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
436 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
437 resolve_rules = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
438 not_found = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
439 flag = False
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
440 for key, value in genes.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
441 tmp_resolve = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
442 for i in range(len(rules)):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
443 tmp = rules[i]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
444 if tmp:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
445 tmp, err = replace_gene_value(tmp, value)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
446 if err:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
447 not_found.extend(err)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
448 ris = control(None, tmp, resolve_none)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
449 if ris is False or ris == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
450 tmp_resolve.append(None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
451 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
452 tmp_resolve.append(ris)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
453 flag = True
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
454 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
455 tmp_resolve.append(None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
456 resolve_rules[key] = tmp_resolve
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
457
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
458 if flag is False:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
459 utils.logWarning(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
460 f"Warning: no computable score (due to missing gene values) for class {name}, the class has been disregarded",
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
461 ARGS.out_log)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
462
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
463 return (None, None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
464
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
465 return (resolve_rules, list(set(not_found)))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
466 ############################ create_ras #######################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
467 def create_ras(resolve_rules: Optional[ResolvedRules], dataset_name: str, rules: List[str], ids: List[str], file: str) -> None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
468 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
469 Create a RAS (Reaction Activity Score) file from resolved rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
470
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
471 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
472 resolve_rules (dict): Dictionary containing resolved rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
473 dataset_name (str): Name of the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
474 rules (list): List of rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
475 file (str): Path to the output RAS file.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
476
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
477 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
478 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
479 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
480 if resolve_rules is None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
481 utils.logWarning(f"Couldn't generate RAS for current dataset: {dataset_name}", ARGS.out_log)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
482
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
483 for geni in resolve_rules.values():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
484 for i, valori in enumerate(geni):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
485 if valori == None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
486 geni[i] = 'None'
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
487
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
488 output_ras = pd.DataFrame.from_dict(resolve_rules)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
489
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
490 output_ras.insert(0, 'Reactions', ids)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
491 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
492
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
493 text_file = open(file, "w")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
494
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
495 text_file.write(output_to_csv)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
496 text_file.close()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
497
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
498 ################################- NEW RAS COMPUTATION -################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
499 Expr = Optional[Union[int, float]]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
500 Ras = Expr
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
501 def ras_for_cell_lines(dataset: pd.DataFrame, rules: Dict[str, ruleUtils.OpList]) -> Dict[str, Dict[str, Ras]]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
502 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
503 Generates the RAS scores for each cell line found in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
504
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
505 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
506 dataset (pd.DataFrame): Dataset containing gene values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
507 rules (dict): The dict containing reaction ids as keys and rules as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
508
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
509 Side effects:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
510 dataset : mut
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
511
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
512 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
513 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
514 where each key corresponds to a reaction ID and each value is its computed RAS score.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
515 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
516 ras_values_by_cell_line = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
517 dataset.set_index(dataset.columns[0], inplace=True)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
518 # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
519 for cell_line_name in dataset.columns[1:]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
520 cell_line = dataset[cell_line_name].to_dict()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
521 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
522 return ras_values_by_cell_line
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
523
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
524 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
525 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
526 Computes the RAS (Reaction Activity Score) values for each rule in the given dict.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
527
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
528 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
529 value_rules (dict): A dictionary where keys are reaction ids and values are OpLists.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
530 dataset : gene expression data of one cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
531
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
532 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
533 dict: A dictionary where keys are reaction ids and values are the computed RAS values for each rule.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
534 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
535 return {key: ras_op_list(op_list, dataset) for key, op_list in value_rules.items()}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
536
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
537 def get_gene_expr(dataset :Dict[str, Expr], name :str) -> Expr:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
538 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
539 Extracts the gene expression of the given gene from a cell line dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
540
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
541 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
542 dataset : gene expression data of one cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
543 name : gene name.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
544
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
545 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
546 Expr : the gene's expression value.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
547 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
548 expr = dataset.get(name, None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
549 if expr is None: ERRORS.append(name)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
550
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
551 return expr
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
552
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
553 def ras_op_list(op_list: ruleUtils.OpList, dataset: Dict[str, Expr]) -> Ras:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
554 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
555 Computes recursively the RAS (Reaction Activity Score) value for the given OpList, considering the specified flag to control None behavior.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
556
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
557 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
558 op_list (OpList): The OpList representing a rule with gene values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
559 dataset : gene expression data of one cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
560
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
561 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
562 Ras: The computed RAS value for the given OpList.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
563 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
564 op = op_list.op
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
565 ras_value :Ras = None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
566 if not op: return get_gene_expr(dataset, op_list[0])
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
567 if op is ruleUtils.RuleOp.AND and not ARGS.none and None in op_list: return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
568
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
569 for i in range(len(op_list)):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
570 item = op_list[i]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
571 if isinstance(item, ruleUtils.OpList):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
572 item = ras_op_list(item, dataset)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
573
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
574 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
575 item = get_gene_expr(dataset, item)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
576
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
577 if item is None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
578 if op is ruleUtils.RuleOp.AND and not ARGS.none: return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
579 continue
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
580
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
581 if ras_value is None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
582 ras_value = item
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
583 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
584 ras_value = ras_value + item if op is ruleUtils.RuleOp.OR else min(ras_value, item)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
585
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
586 return ras_value
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
587
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
588 def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
589 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
590 Save computed ras scores to the given path, as a tsv file.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
591
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
592 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
593 rasScores : the computed ras scores.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
594 path : the output tsv file's path.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
595
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
596 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
597 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
598 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
599 for scores in rasScores.values(): # this is actually a lot faster than using the ootb dataframe metod, sadly
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
600 for reactId, score in scores.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
601 if score is None: scores[reactId] = "None"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
602
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
603 output_ras = pd.DataFrame.from_dict(rasScores)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
604 output_ras.insert(0, 'Reactions', reactions)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
605 output_ras.to_csv(ARGS.ras_output, sep = '\t', index = False)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
606
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
607 ############################ MAIN #############################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
608 #TODO: not used but keep, it will be when the new translator dicts will be used.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
609 def translateGene(geneName :str, encoding :str, geneTranslator :Dict[str, Dict[str, str]]) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
610 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
611 Translate gene from any supported encoding to HugoID.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
612
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
613 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
614 geneName (str): the name of the gene in its current encoding.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
615 encoding (str): the encoding.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
616 geneTranslator (Dict[str, Dict[str, str]]): the dict containing all supported gene names
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
617 and encodings in the current model, mapping each to the corresponding HugoID encoding.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
618
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
619 Raises:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
620 ValueError: When the gene isn't supported in the model.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
621
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
622 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
623 str: the gene in HugoID encoding.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
624 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
625 supportedGenesInEncoding = geneTranslator[encoding]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
626 if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
627 raise ValueError(f"Gene \"{geneName}\" non trovato, verifica di star utilizzando il modello corretto!")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
628
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
629 def load_custom_rules() -> Dict[str, ruleUtils.OpList]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
630 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
631 Opens custom rules file and extracts the rules. If the file is in .csv format an additional parsing step will be
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
632 performed, significantly impacting the runtime.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
633
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
634 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
635 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
636 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
637 datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
638
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
639 try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
640 except utils.PathErr as err:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
641 raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
642
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
643 if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
644
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
645 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
646 return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) }
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
647
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
648 def main() -> None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
649 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
650 Initializes everything and sets the program in motion based on the fronted input arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
651
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
652 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
653 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
654 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
655 # get args from frontend (related xml)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
656 global ARGS
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
657 ARGS = process_args()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
658
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
659 # read dataset
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
660 dataset = read_dataset(ARGS.input, "dataset")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
661 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
662
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
663 # remove versioning from gene names
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
664 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
665
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
666 # handle custom models
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
667 model :utils.Model = ARGS.rules_selector
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
668 if model is utils.Model.Custom:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
669 rules = load_custom_rules()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
670 reactions = list(rules.keys())
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
671
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
672 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
673 if ERRORS: utils.logWarning(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
674 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
675 ARGS.out_log)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
676
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
677 return
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
678
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
679 # This is the standard flow of the ras_generator program, for non-custom models.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
680 name = "RAS Dataset"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
681 type_gene = gene_type(dataset.iloc[0, 0], name)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
682
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
683 rules = model.getRules(ARGS.tool_dir)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
684 genes = data_gene(dataset, type_gene, name, None)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
685 ids, rules = load_id_rules(rules.get(type_gene))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
686
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
687 resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
688 create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
689
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
690 if err: utils.logWarning(
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
691 f"Warning: gene(s) {err} not found in class \"{name}\", " +
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
692 "the expression level for this gene will be considered NaN",
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
693 ARGS.out_log)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
694
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
695 print("Execution succeded")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
696
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
697 ###############################################################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
698 if __name__ == "__main__":
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
699 main()