# HG changeset patch # User francesco_lapi # Date 1757504288 0 # Node ID 4a385fdb9e58d2943043a484ea1306bb758a5382 # Parent 00a78da611ba48fc70fcde99dad93cc7a4563373 Uploaded diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/custom_data_generator_beta.xml --- a/COBRAxy/custom_data_generator_beta.xml Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/custom_data_generator_beta.xml Wed Sep 10 11:38:08 2025 +0000 @@ -89,7 +89,7 @@ - + diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/marea_cluster.xml --- a/COBRAxy/marea_cluster.xml Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/marea_cluster.xml Wed Sep 10 11:38:08 2025 +0000 @@ -19,6 +19,7 @@ --out_log $log --best_cluster $best_cluster --cluster_type ${data.clust_type} + --scaling $scaling #if $data.clust_type == 'kmeans': --k_min ${data.k_min} --k_max ${data.k_max} @@ -40,7 +41,8 @@ - + + diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/ras_generator_beta.py --- a/COBRAxy/ras_generator_beta.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/ras_generator_beta.py Wed Sep 10 11:38:08 2025 +0000 @@ -649,30 +649,50 @@ dict_rule = {} try: + rows = utils.readCsv(datFilePath, delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + # Proviamo prima con delimitatore tab - for line in utils.readCsv(datFilePath, delimiter = "\t"): - if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne + for line in rows[1:]: + if len(line) <= idx_gpr: utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) continue - if line[2] == "": - dict_rule[line[0]] = ruleUtils.OpList([""]) + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) else: - dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) except Exception as e: # Se fallisce con tab, proviamo con virgola try: - dict_rule = {} - for line in utils.readCsv(datFilePath, delimiter = ","): - if len(line) < 3: + rows = utils.readCsv(datFilePath, delimiter = ",", skipHeader=False) + + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + + # Proviamo prima con delimitatore tab + for line in rows[1:]: + if len(line) <= idx_gpr: utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) continue - if line[2] == "": - dict_rule[line[0]] = ruleUtils.OpList([""]) + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) else: - dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) + except Exception as e2: raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}") diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/utils/general_utils.py --- a/COBRAxy/utils/general_utils.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/general_utils.py Wed Sep 10 11:38:08 2025 +0000 @@ -505,6 +505,39 @@ """ with open(path.show(), "r", newline = "") as fd: return list(csv.reader(fd, delimiter = delimiter))[skipHeader:] +def findIdxByName(header: List[str], name: str, colName="name") -> Optional[int]: + """ + Find the indices of the 'ReactionID' column and a user-specified column name + within the header row of a tabular file. + + Args: + header (List[str]): The header row, as a list of column names. + name (str): The name of the column to look for (e.g. 'GPR'). + colName (str, optional): Label used in error messages for clarity. Defaults to "name". + + Returns: + Tuple[int, int]: A tuple containing: + - The index of the 'ReactionID' column. + - The index of the requested column `name`. + + Raises: + ValueError: If 'ReactionID' or the requested column `name` is not found in the header. + + Notes: + Both 'ReactionID' and the requested column are mandatory for downstream processing. + """ + + col_index = {col_name: idx for idx, col_name in enumerate(header)} + + if name not in col_index or "ReactionID" not in col_index: + raise ValueError(f"Tabular file must contain 'ReactionID' and {name} columns.") + + id_idx = col_index["ReactionID"] + idx_gpr = col_index[name] + + return id_idx, idx_gpr + + def readSvg(path :FilePath, customErr :Optional[Exception] = None) -> ET.ElementTree: """ Reads the contents of a .svg file, which needs to exist at the given path. diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/utils/model_utils.py --- a/COBRAxy/utils/model_utils.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/model_utils.py Wed Sep 10 11:38:08 2025 +0000 @@ -221,7 +221,7 @@ for idx, row in df.iterrows(): reaction_id = str(row['ReactionID']).strip() - reaction_formula = str(row['Reaction']).strip() + reaction_formula = str(row['Formula']).strip() # Salta reazioni senza formula if not reaction_formula or reaction_formula == 'nan': @@ -236,8 +236,8 @@ reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 # Aggiungi gene rule se presente - if pd.notna(row['Rule']) and str(row['Rule']).strip(): - reaction.gene_reaction_rule = str(row['Rule']).strip() + if pd.notna(row['GPR']) and str(row['GPR']).strip(): + reaction.gene_reaction_rule = str(row['GPR']).strip() # Parse della formula della reazione try: diff -r 00a78da611ba -r 4a385fdb9e58 COBRAxy/utils/reaction_parsing.py --- a/COBRAxy/utils/reaction_parsing.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/reaction_parsing.py Wed Sep 10 11:38:08 2025 +0000 @@ -124,6 +124,22 @@ Returns: ReactionsDict : dictionary encoding custom reactions information. """ - reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t")} + try: + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + except Exception as e: + + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]} + return create_reaction_dict(reactionsData)