cobraxy: COBRAxy/rps_generator

comparison COBRAxy/rps_generator_beta.py @ 456:a6e45049c1b9 draft default tip

Uploaded

author	francesco_lapi
date	Fri, 12 Sep 2025 17:28:45 +0000
parents	187cee1a00e2
children

comparison

equal deleted inserted replaced

-:4e2bc80764b6
+:a6e45049c1b9
+"""
+Compute Reaction Propensity Scores (RPS) from metabolite abundances and reaction stoichiometry.
+Given a tabular dataset (metabolites x samples) and a reaction set, this script
+maps metabolite names via synonyms, fills missing metabolites, and computes RPS
+per reaction for each sample using a log-normalized formula.
+"""
 import math
 import argparse
 import numpy  as np
 import pickle as pk
 args (list): List of command-line arguments.
 Returns:
 Namespace: An object containing parsed arguments.
 """
-parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+parser = argparse.ArgumentParser(
-description = 'process some value\'s'+
+usage='%(prog)s [options]',
-' abundances and reactions to create RPS scores.')
+description='Process abundances and reactions to create RPS scores.'
+)
 parser.add_argument("-rl", "--model_upload", type = str,
 help = "path to input file containing the reactions")
-# model_upload custom
 parser.add_argument('-td', '--tool_dir',
 type = str,
 required = True,
 help = 'your tool directory')
 parser.add_argument('-ol', '--out_log',
 def name_dataset(name_data :str, count :int) -> str:
 """
 Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
 Args:
-name_data : name associated with the dataset (from frontend input params)
+name_data: Name associated with the dataset (from frontend input params).
-count : counter from 1 to make these names unique (external)
+count: Counter starting at 1 to make names unique when default.
 Returns:
 str : the name made unique
 """
 if str(name_data) == 'Dataset':
 pd.Series or None: A series containing abundance values for the specified cell line.
 The name of the series is the name of the cell line.
 Returns None if the cell index is invalid.
 """
 if cell_line_index < 0 or cell_line_index >= len(dataset.index):
-print(f"Errore: This cell line index: '{cell_line_index}' is not valid.")
+print(f"Error: cell line index '{cell_line_index}' is not valid.")
 return None
 cell_line_name = dataset.columns[cell_line_index]
 abundances_series = dataset[cell_line_name][1:]
 Returns:
 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
 Side effects:
-dataset_by_rows : mut
+dataset_by_rows: mutated to include missing metabolites with default abundances.
 """
 missing_list = []
 for reaction in reactions.values():
 for metabolite in reaction.keys():
 if metabolite not in dataset_by_rows:
 cell_lines = dataset[0][1:]
 abundances_dict = {}
 for row in dataset[1:]:
-id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
+id = get_metabolite_id(row[0], syn_dict)
 if id:
 abundances_dict[id] = list(map(utils.Float(), row[1:]))
 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
 rps_scores :Dict[Dict[str, float]] = {}
 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
 df = pd.DataFrame.from_dict(rps_scores)
 df = df.loc[list(reactions.keys()),:]
-print(df.head(10))
+# Optional preview: first 10 rows
+# print(df.head(10))
 df.index.name = 'Reactions'
 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
 ############################ main ####################################
 def main(args:List[str] = None) -> None:
 None
 """
 global ARGS
 ARGS = process_args(args)
-# TODO:use utils functions vvv
+# Load support data (black list and synonyms)
 with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
 black_list = pk.load(bl)
 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
 syn_dict = pk.load(sd)
-dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
+dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader=False)
 tmp_dict = None
-#if ARGS.reaction_choice == 'default':
-#    reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
+# Parse custom reactions from uploaded file
-#    substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
-#elif ARGS.reaction_choice == 'custom':
 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
 for r, s in reactions.items():
 tmp_list = list(s.keys())
 for k in tmp_list:
 if k[-2] == '_':
 for _, substrates in reactions.items():
 for substrateName, _ in substrates.items():
 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
 substrateFreqTable[substrateName] += 1
-print(f"Reactions: {reactions}")
+# Debug prints (can be enabled during troubleshooting)
-print(f"Substrate Frequencies: {substrateFreqTable}")
+# print(f"Reactions: {reactions}")
-print(f"Synonyms: {syn_dict}")
+# print(f"Substrate Frequencies: {substrateFreqTable}")
+# print(f"Synonyms: {syn_dict}")
 tmp_dict = {}
 for metabName, freq in substrateFreqTable.items():
 tmp_metabName = clean_metabolite_name(metabName)
 for syn_key, syn_list in syn_dict.items():
 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
-print(f"Mapping {tmp_metabName} to {syn_key}")
+# print(f"Mapping {tmp_metabName} to {syn_key}")
 tmp_dict[syn_key] = syn_list
 tmp_dict[syn_key].append(tmp_metabName)
 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
-print('Execution succeded')
+print('Execution succeeded')
 ##############################################################################
 if __name__ == "__main__": main()

Mercurial > repos > bimib > cobraxy

comparison COBRAxy/rps_generator_beta.py @ 456:a6e45049c1b9 draft default tip