Mercurial > repos > bimib > cobraxy
diff COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 14:44:15 +0000 |
parents | ccccb731c953 |
children |
line wrap: on
line diff
--- a/COBRAxy/rps_generator.py Mon Sep 08 13:52:58 2025 +0000 +++ b/COBRAxy/rps_generator.py Mon Sep 08 14:44:15 2025 +0000 @@ -25,11 +25,14 @@ parser = argparse.ArgumentParser(usage = '%(prog)s [options]', description = 'process some value\'s'+ ' abundances and reactions to create RPS scores.') - - parser.add_argument("-rl", "--model_upload", type = str, - help = "path to input file containing the reactions") - - # model_upload custom + parser.add_argument('-rc', '--reaction_choice', + type = str, + default = 'default', + choices = ['default','custom'], + help = 'chose which type of reaction dataset you want use') + parser.add_argument('-cm', '--custom', + type = str, + help='your dataset if you want custom reactions') parser.add_argument('-td', '--tool_dir', type = str, required = True, @@ -118,8 +121,7 @@ """ name = clean_metabolite_name(name) for id, synonyms in syn_dict.items(): - if name in synonyms: - return id + if name in synonyms: return id return "" @@ -129,8 +131,7 @@ Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. Parameters: - reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and - stoichiometric coefficients as values. + reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. @@ -198,26 +199,23 @@ Returns: None """ - cell_lines = dataset[0][1:] abundances_dict = {} + translationIsApplied = ARGS.reaction_choice == "default" for row in dataset[1:]: - id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0] - if id: - abundances_dict[id] = list(map(utils.Float(), row[1:])) - + id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] + if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) + missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) - + rps_scores :Dict[Dict[str, float]] = {} for pos, cell_line_name in enumerate(cell_lines): abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } - rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) df = pd.DataFrame.from_dict(rps_scores) - df = df.loc[list(reactions.keys()),:] - print(df.head(10)) + df.index.name = 'Reactions' df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) @@ -240,35 +238,18 @@ syn_dict = pk.load(sd) dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) - tmp_dict = None - #if ARGS.reaction_choice == 'default': - # reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) - # substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) + + if ARGS.reaction_choice == 'default': + reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) + substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) - #elif ARGS.reaction_choice == 'custom': - reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) - for r, s in reactions.items(): - tmp_list = list(s.keys()) - for k in tmp_list: - if k[-2] == '_': - s[k[:-2]] = s.pop(k) - substrateFreqTable = {} - for _, substrates in reactions.items(): - for substrateName, _ in substrates.items(): - if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 - substrateFreqTable[substrateName] += 1 - - print(f"Reactions: {reactions}") - print(f"Substrate Frequencies: {substrateFreqTable}") - print(f"Synonyms: {syn_dict}") - tmp_dict = {} - for metabName, freq in substrateFreqTable.items(): - tmp_metabName = clean_metabolite_name(metabName) - for syn_key, syn_list in syn_dict.items(): - if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): - print(f"Mapping {tmp_metabName} to {syn_key}") - tmp_dict[syn_key] = syn_list - tmp_dict[syn_key].append(tmp_metabName) + elif ARGS.reaction_choice == 'custom': + reactions = reactionUtils.parse_custom_reactions(ARGS.custom) + substrateFreqTable = {} + for _, substrates in reactions.items(): + for substrateName, _ in substrates.items(): + if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 + substrateFreqTable[substrateName] += 1 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) print('Execution succeded')