Mercurial > repos > bimib > cobraxy
comparison COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 08 Sep 2025 14:44:15 +0000 |
| parents | ccccb731c953 |
| children | 97eea560a10f |
comparison
equal
deleted
inserted
replaced
| 405:716b1a638fb5 | 406:187cee1a00e2 |
|---|---|
| 23 Namespace: An object containing parsed arguments. | 23 Namespace: An object containing parsed arguments. |
| 24 """ | 24 """ |
| 25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', |
| 26 description = 'process some value\'s'+ | 26 description = 'process some value\'s'+ |
| 27 ' abundances and reactions to create RPS scores.') | 27 ' abundances and reactions to create RPS scores.') |
| 28 | 28 parser.add_argument('-rc', '--reaction_choice', |
| 29 parser.add_argument("-rl", "--model_upload", type = str, | 29 type = str, |
| 30 help = "path to input file containing the reactions") | 30 default = 'default', |
| 31 | 31 choices = ['default','custom'], |
| 32 # model_upload custom | 32 help = 'chose which type of reaction dataset you want use') |
| 33 parser.add_argument('-cm', '--custom', | |
| 34 type = str, | |
| 35 help='your dataset if you want custom reactions') | |
| 33 parser.add_argument('-td', '--tool_dir', | 36 parser.add_argument('-td', '--tool_dir', |
| 34 type = str, | 37 type = str, |
| 35 required = True, | 38 required = True, |
| 36 help = 'your tool directory') | 39 help = 'your tool directory') |
| 37 parser.add_argument('-ol', '--out_log', | 40 parser.add_argument('-ol', '--out_log', |
| 116 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. | 119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. |
| 117 An empty string is returned if a match isn't found. | 120 An empty string is returned if a match isn't found. |
| 118 """ | 121 """ |
| 119 name = clean_metabolite_name(name) | 122 name = clean_metabolite_name(name) |
| 120 for id, synonyms in syn_dict.items(): | 123 for id, synonyms in syn_dict.items(): |
| 121 if name in synonyms: | 124 if name in synonyms: return id |
| 122 return id | |
| 123 | 125 |
| 124 return "" | 126 return "" |
| 125 | 127 |
| 126 ############################ check_missing_metab #################################### | 128 ############################ check_missing_metab #################################### |
| 127 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: | 129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: |
| 128 """ | 130 """ |
| 129 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. | 131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. |
| 130 | 132 |
| 131 Parameters: | 133 Parameters: |
| 132 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and | 134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. |
| 133 stoichiometric coefficients as values. | |
| 134 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. | 135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. |
| 135 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. | 136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. |
| 136 | 137 |
| 137 Returns: | 138 Returns: |
| 138 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. | 139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. |
| 196 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). | 197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). |
| 197 | 198 |
| 198 Returns: | 199 Returns: |
| 199 None | 200 None |
| 200 """ | 201 """ |
| 201 | |
| 202 cell_lines = dataset[0][1:] | 202 cell_lines = dataset[0][1:] |
| 203 abundances_dict = {} | 203 abundances_dict = {} |
| 204 | 204 |
| 205 translationIsApplied = ARGS.reaction_choice == "default" | |
| 205 for row in dataset[1:]: | 206 for row in dataset[1:]: |
| 206 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0] | 207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] |
| 207 if id: | 208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) |
| 208 abundances_dict[id] = list(map(utils.Float(), row[1:])) | 209 |
| 209 | |
| 210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) | 210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) |
| 211 | 211 |
| 212 rps_scores :Dict[Dict[str, float]] = {} | 212 rps_scores :Dict[Dict[str, float]] = {} |
| 213 for pos, cell_line_name in enumerate(cell_lines): | 213 for pos, cell_line_name in enumerate(cell_lines): |
| 214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } | 214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } |
| 215 | |
| 216 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) | 215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) |
| 217 | 216 |
| 218 df = pd.DataFrame.from_dict(rps_scores) | 217 df = pd.DataFrame.from_dict(rps_scores) |
| 219 df = df.loc[list(reactions.keys()),:] | 218 |
| 220 print(df.head(10)) | |
| 221 df.index.name = 'Reactions' | 219 df.index.name = 'Reactions' |
| 222 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) | 220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) |
| 223 | 221 |
| 224 ############################ main #################################### | 222 ############################ main #################################### |
| 225 def main(args:List[str] = None) -> None: | 223 def main(args:List[str] = None) -> None: |
| 238 | 236 |
| 239 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: | 237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: |
| 240 syn_dict = pk.load(sd) | 238 syn_dict = pk.load(sd) |
| 241 | 239 |
| 242 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) | 240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) |
| 243 tmp_dict = None | 241 |
| 244 #if ARGS.reaction_choice == 'default': | 242 if ARGS.reaction_choice == 'default': |
| 245 # reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) | 243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) |
| 246 # substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) | 244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) |
| 247 | 245 |
| 248 #elif ARGS.reaction_choice == 'custom': | 246 elif ARGS.reaction_choice == 'custom': |
| 249 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) | 247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom) |
| 250 for r, s in reactions.items(): | 248 substrateFreqTable = {} |
| 251 tmp_list = list(s.keys()) | 249 for _, substrates in reactions.items(): |
| 252 for k in tmp_list: | 250 for substrateName, _ in substrates.items(): |
| 253 if k[-2] == '_': | 251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 |
| 254 s[k[:-2]] = s.pop(k) | 252 substrateFreqTable[substrateName] += 1 |
| 255 substrateFreqTable = {} | |
| 256 for _, substrates in reactions.items(): | |
| 257 for substrateName, _ in substrates.items(): | |
| 258 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 | |
| 259 substrateFreqTable[substrateName] += 1 | |
| 260 | |
| 261 print(f"Reactions: {reactions}") | |
| 262 print(f"Substrate Frequencies: {substrateFreqTable}") | |
| 263 print(f"Synonyms: {syn_dict}") | |
| 264 tmp_dict = {} | |
| 265 for metabName, freq in substrateFreqTable.items(): | |
| 266 tmp_metabName = clean_metabolite_name(metabName) | |
| 267 for syn_key, syn_list in syn_dict.items(): | |
| 268 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): | |
| 269 print(f"Mapping {tmp_metabName} to {syn_key}") | |
| 270 tmp_dict[syn_key] = syn_list | |
| 271 tmp_dict[syn_key].append(tmp_metabName) | |
| 272 | 253 |
| 273 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) | 254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) |
| 274 print('Execution succeded') | 255 print('Execution succeded') |
| 275 | 256 |
| 276 ############################################################################## | 257 ############################################################################## |
