annotate COBRAxy/rps_generator.py @ 402:ccccb731c953 draft

Uploaded
author francesco_lapi
date Sun, 07 Sep 2025 21:16:26 +0000
parents 0a3ca20848f3
children 187cee1a00e2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
1 import math
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
2 import argparse
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
3
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
4 import numpy as np
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
5 import pickle as pk
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
6 import pandas as pd
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
7
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
8 from typing import Optional, List, Dict
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
9
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
10 import utils.general_utils as utils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
11 import utils.reaction_parsing as reactionUtils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
12
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
13 ########################## argparse ##########################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
14 ARGS :argparse.Namespace
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
15 def process_args(args:List[str] = None) -> argparse.Namespace:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
16 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
17 Processes command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
18
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
19 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
20 args (list): List of command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
21
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
22 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
23 Namespace: An object containing parsed arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
24 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
26 description = 'process some value\'s'+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
27 ' abundances and reactions to create RPS scores.')
402
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
28
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
29 parser.add_argument("-rl", "--model_upload", type = str,
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
30 help = "path to input file containing the reactions")
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
31
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
32 # model_upload custom
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
33 parser.add_argument('-td', '--tool_dir',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
34 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
35 required = True,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
36 help = 'your tool directory')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
37 parser.add_argument('-ol', '--out_log',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
38 help = "Output log")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
39 parser.add_argument('-id', '--input',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
40 type = str,
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
41 required = True,
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
42 help = 'input dataset')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
43 parser.add_argument('-rp', '--rps_output',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
44 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
45 required = True,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
46 help = 'rps output')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
47
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
48 args = parser.parse_args(args)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
49 return args
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
50
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
51 ############################ dataset name #####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
52 def name_dataset(name_data :str, count :int) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
53 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
54 Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
55
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
56 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
57 name_data : name associated with the dataset (from frontend input params)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
58 count : counter from 1 to make these names unique (external)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
59
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
60 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
61 str : the name made unique
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
62 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
63 if str(name_data) == 'Dataset':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
64 return str(name_data) + '_' + str(count)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
65 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
66 return str(name_data)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
67
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
68 ############################ get_abund_data ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
69 def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
70 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
71 Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
72 metabolites and columns are cell lines.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
73
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
74 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
75 dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
76 cell_line_index (int): The index of the cell line of interest in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
77
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
78 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
79 pd.Series or None: A series containing abundance values for the specified cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
80 The name of the series is the name of the cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
81 Returns None if the cell index is invalid.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
82 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
83 if cell_line_index < 0 or cell_line_index >= len(dataset.index):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
84 print(f"Errore: This cell line index: '{cell_line_index}' is not valid.")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
85 return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
86
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
87 cell_line_name = dataset.columns[cell_line_index]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
88 abundances_series = dataset[cell_line_name][1:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
89
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
90 return abundances_series
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
91
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
92 ############################ clean_metabolite_name ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
93 def clean_metabolite_name(name :str) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
94 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
95 Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
96 the search of a match in the dictionary of synonyms.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
97
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
98 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
99 name : the metabolite's name, as given in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
100
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
101 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
102 str : a new string with the cleaned name.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
103 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
104 return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
105
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
106 ############################ get_metabolite_id ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
107 def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
108 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
109 Looks through a dictionary of synonyms to find a match for a given metabolite's name.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
110
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
111 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
112 name : the metabolite's name, as given in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
113 syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
114
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
115 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
116 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
117 An empty string is returned if a match isn't found.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
118 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
119 name = clean_metabolite_name(name)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
120 for id, synonyms in syn_dict.items():
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
121 if name in synonyms:
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
122 return id
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
123
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
124 return ""
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
125
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
126 ############################ check_missing_metab ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
127 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
128 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
129 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
130
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
131 Parameters:
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
132 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
133 stoichiometric coefficients as values.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
134 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
135 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
136
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
137 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
138 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
139
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
140 Side effects:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
141 dataset_by_rows : mut
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
142 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
143 missing_list = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
144 for reaction in reactions.values():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
145 for metabolite in reaction.keys():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
146 if metabolite not in dataset_by_rows:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
147 dataset_by_rows[metabolite] = [1] * cell_lines_amt
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
148 missing_list.append(metabolite)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
149
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
150 return missing_list
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
151
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
152 ############################ calculate_rps ####################################
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
153 def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str], substrateFreqTable: Dict[str, int]) -> Dict[str, float]:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
154 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
155 Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
156 The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
157 for each reaction using the provided coefficient and abundance values. The value is then normalized, based on how frequent the metabolite is in the selected model's reactions,
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
158 and log-transformed.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
159
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
160 Parameters:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
161 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
162 abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
163 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
164 missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1.
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
165 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
166
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
167 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
168 dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
169 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
170 rps_scores = {}
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
171
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
172 for reaction_name, substrates in reactions.items():
326
3dccdf56cb24 Uploaded
francesco_lapi
parents: 293
diff changeset
173 total_contribution = 0
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
174 metab_significant = False
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
175 for metabolite, stoichiometry in substrates.items():
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
176 abundance = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite]
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
177 if metabolite not in black_list and metabolite not in missing_list:
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
178 metab_significant = True
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
179
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
180 total_contribution += math.log((abundance + np.finfo(float).eps) / substrateFreqTable[metabolite]) * stoichiometry
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
181
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
182 rps_scores[reaction_name] = total_contribution if metab_significant else math.nan
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
183
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
184 return rps_scores
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
185
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
186 ############################ rps_for_cell_lines ####################################
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
187 def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]], substrateFreqTable: Dict[str, int]) -> None:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
188 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
189 Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
190
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
191 Parameters:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
192 dataset : the dataset's data, by rows
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
193 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
194 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
195 syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms.
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
196 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
197
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
198 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
199 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
200 """
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
201
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
202 cell_lines = dataset[0][1:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
203 abundances_dict = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
204
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
205 for row in dataset[1:]:
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
206 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
207 if id:
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
208 abundances_dict[id] = list(map(utils.Float(), row[1:]))
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
209
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
211
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
212 rps_scores :Dict[Dict[str, float]] = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
213 for pos, cell_line_name in enumerate(cell_lines):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
215
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
216 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
217
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
218 df = pd.DataFrame.from_dict(rps_scores)
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
219 df = df.loc[list(reactions.keys()),:]
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
220 print(df.head(10))
281
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
221 df.index.name = 'Reactions'
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
222 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
223
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
224 ############################ main ####################################
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
225 def main(args:List[str] = None) -> None:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
226 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
227 Initializes everything and sets the program in motion based on the fronted input arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
228
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
229 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
230 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
231 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
232 global ARGS
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
233 ARGS = process_args(args)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
234
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
235 # TODO:use utils functions vvv
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
236 with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
237 black_list = pk.load(bl)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
238
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
239 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
240 syn_dict = pk.load(sd)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
241
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
242 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
243 tmp_dict = None
402
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
244 #if ARGS.reaction_choice == 'default':
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
245 # reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
246 # substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
247
402
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
248 #elif ARGS.reaction_choice == 'custom':
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
249 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
250 for r, s in reactions.items():
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
251 tmp_list = list(s.keys())
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
252 for k in tmp_list:
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
253 if k[-2] == '_':
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
254 s[k[:-2]] = s.pop(k)
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
255 substrateFreqTable = {}
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
256 for _, substrates in reactions.items():
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
257 for substrateName, _ in substrates.items():
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
258 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
ccccb731c953 Uploaded
francesco_lapi
parents: 381
diff changeset
259 substrateFreqTable[substrateName] += 1
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
260
381
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
261 print(f"Reactions: {reactions}")
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
262 print(f"Substrate Frequencies: {substrateFreqTable}")
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
263 print(f"Synonyms: {syn_dict}")
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
264 tmp_dict = {}
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
265 for metabName, freq in substrateFreqTable.items():
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
266 tmp_metabName = clean_metabolite_name(metabName)
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
267 for syn_key, syn_list in syn_dict.items():
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
268 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
269 print(f"Mapping {tmp_metabName} to {syn_key}")
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
270 tmp_dict[syn_key] = syn_list
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
271 tmp_dict[syn_key].append(tmp_metabName)
0a3ca20848f3 Uploaded
francesco_lapi
parents: 326
diff changeset
272
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
273 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
274 print('Execution succeded')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
275
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
276 ##############################################################################
326
3dccdf56cb24 Uploaded
francesco_lapi
parents: 293
diff changeset
277 if __name__ == "__main__": main()