# HG changeset patch
# User francesco_lapi
# Date 1759160099 0
# Node ID 7a413a5ec5662d4d3a761a9853a0eb88d98bef78
# Parent c6ea189ea7e994c12150cee936b4dada77525d93
Uploaded
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/fromCSVtoCOBRA.py
--- a/COBRAxy/fromCSVtoCOBRA.py Mon Sep 29 15:13:21 2025 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-"""
-Convert a tabular (CSV/TSV/Tabular) description of a COBRA model into a COBRA file.
-
-Supported output formats: SBML, JSON, MATLAB (.mat), YAML.
-The script logs to a user-provided file for easier debugging in Galaxy.
-"""
-
-import os
-import cobra
-import argparse
-from typing import List
-import logging
-import utils.model_utils as modelUtils
-
-ARGS : argparse.Namespace
-def process_args(args: List[str] = None) -> argparse.Namespace:
- """
- Parse command-line arguments for the CSV-to-COBRA conversion tool.
-
- Returns:
- argparse.Namespace: Parsed arguments.
- """
- parser = argparse.ArgumentParser(
- usage="%(prog)s [options]",
- description="Convert a tabular/CSV file to a COBRA model"
- )
-
-
- parser.add_argument("--out_log", type=str, required=True,
- help="Output log file")
-
-
- parser.add_argument("--input", type=str, required=True,
- help="Input tabular file (CSV/TSV)")
-
-
- parser.add_argument("--format", type=str, required=True, choices=["sbml", "json", "mat", "yaml"],
- help="Model format (SBML, JSON, MATLAB, YAML)")
-
-
- parser.add_argument("--output", type=str, required=True,
- help="Output model file path")
-
-
- parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
- help="Tool directory (passed from Galaxy as $__tool_directory__)")
-
-
- return parser.parse_args(args)
-
-
-###############################- ENTRY POINT -################################
-
-def main(args: List[str] = None) -> None:
- """
- Entry point: parse arguments, build the COBRA model from a CSV/TSV file,
- and save it in the requested format.
-
- Returns:
- None
- """
- global ARGS
- ARGS = process_args(args)
-
- # configure logging to the requested log file (overwrite each run)
- logging.basicConfig(filename=ARGS.out_log,
- level=logging.DEBUG,
- format='%(asctime)s %(levelname)s: %(message)s',
- filemode='w')
-
- logging.info('Starting fromCSVtoCOBRA tool')
- logging.debug('Args: input=%s format=%s output=%s tool_dir=%s', ARGS.input, ARGS.format, ARGS.output, ARGS.tool_dir)
-
- try:
- # Basic sanity checks
- if not os.path.exists(ARGS.input):
- logging.error('Input file not found: %s', ARGS.input)
-
- out_dir = os.path.dirname(os.path.abspath(ARGS.output))
-
- if out_dir and not os.path.isdir(out_dir):
- try:
- os.makedirs(out_dir, exist_ok=True)
- logging.info('Created missing output directory: %s', out_dir)
- except Exception as e:
- logging.exception('Cannot create output directory: %s', out_dir)
-
- model = modelUtils.build_cobra_model_from_csv(ARGS.input)
-
- # Save model in requested format
- if ARGS.format == "sbml":
- cobra.io.write_sbml_model(model, ARGS.output)
- elif ARGS.format == "json":
- cobra.io.save_json_model(model, ARGS.output)
- elif ARGS.format == "mat":
- cobra.io.save_matlab_model(model, ARGS.output)
- elif ARGS.format == "yaml":
- cobra.io.save_yaml_model(model, ARGS.output)
- else:
- logging.error('Unknown format requested: %s', ARGS.format)
- print(f"ERROR: Unknown format: {ARGS.format}")
-
-
- logging.info('Model successfully written to %s (format=%s)', ARGS.output, ARGS.format)
-
- except Exception:
- # Log full traceback to the out_log so Galaxy users/admins can see what happened
- logging.exception('Unhandled exception in fromCSVtoCOBRA')
-
-
-if __name__ == '__main__':
- main()
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/fromCSVtoCOBRA.xml
--- a/COBRAxy/fromCSVtoCOBRA.xml Mon Sep 29 15:13:21 2025 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-
- Convert a tabular dataset to a COBRA model
-
-
-
- cobra
- numpy
- pandas
- lxml
-
-
-
-
- marea_macros.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/metabolicModel2Tabular.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/metabolicModel2Tabular.py Mon Sep 29 15:34:59 2025 +0000
@@ -0,0 +1,368 @@
+"""
+Scripts to generate a tabular file of a metabolic model (built-in or custom).
+
+This script loads a COBRA model (built-in or custom), optionally applies
+medium and gene nomenclature settings, derives reaction-related metadata
+(GPR rules, formulas, bounds, objective coefficients, medium membership,
+and compartments for ENGRO2), and writes a tabular summary.
+"""
+
+import os
+import csv
+import cobra
+import argparse
+import pandas as pd
+import utils.general_utils as utils
+from typing import Optional, Tuple, List
+import utils.model_utils as modelUtils
+import logging
+from pathlib import Path
+
+
+ARGS : argparse.Namespace
+def process_args(args: List[str] = None) -> argparse.Namespace:
+ """
+ Parse command-line arguments for metabolic_model_setting.
+ """
+
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s [options]",
+ description="Generate custom data from a given model"
+ )
+
+ parser.add_argument("--out_log", type=str, required=True,
+ help="Output log file")
+
+ parser.add_argument("--model", type=str,
+ help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
+ parser.add_argument("--input", type=str,
+ help="Custom model file (JSON or XML)")
+ parser.add_argument("--name", type=str, required=True,
+ help="Model name (default or custom)")
+
+ parser.add_argument("--medium_selector", type=str, required=True,
+ help="Medium selection option")
+
+ parser.add_argument("--gene_format", type=str, default="Default",
+ help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
+
+ parser.add_argument("--out_tabular", type=str,
+ help="Output file for the merged dataset (CSV or XLSX)")
+
+ parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
+ help="Tool directory (passed from Galaxy as $__tool_directory__)")
+
+
+ return parser.parse_args(args)
+
+################################- INPUT DATA LOADING -################################
+def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
+ """
+ Loads a custom model from a file, either in JSON, XML, MAT, or YML format.
+
+ Args:
+ file_path : The path to the file containing the custom model.
+ ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
+
+ Raises:
+ DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
+
+ Returns:
+ cobra.Model : the model, if successfully opened.
+ """
+ ext = ext if ext else file_path.ext
+ try:
+ if ext is utils.FileFormat.XML:
+ return cobra.io.read_sbml_model(file_path.show())
+
+ if ext is utils.FileFormat.JSON:
+ return cobra.io.load_json_model(file_path.show())
+
+ if ext is utils.FileFormat.MAT:
+ return cobra.io.load_matlab_model(file_path.show())
+
+ if ext is utils.FileFormat.YML:
+ return cobra.io.load_yaml_model(file_path.show())
+
+ except Exception as e: raise utils.DataErr(file_path, e.__str__())
+ raise utils.DataErr(
+ file_path,
+ f"Unrecognized format '{file_path.ext}'. Only JSON, XML, MAT, YML are supported."
+ )
+
+
+###############################- FILE SAVING -################################
+def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
+ """
+ Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
+
+ Args:
+ data : the data to be written to the file.
+ file_path : the path to the .csv file.
+ fieldNames : the names of the fields (columns) in the .csv file.
+
+ Returns:
+ None
+ """
+ with open(file_path.show(), 'w', newline='') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+ writer.writeheader()
+
+ for key, value in data.items():
+ writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
+ """
+ Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
+
+ Args:
+ data : the data to be written to the file.
+ file_path : the path to the .csv file.
+ fieldNames : the names of the fields (columns) in the .csv file.
+
+ Returns:
+ None
+ """
+ with open(file_path, 'w', newline='') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+ writer.writeheader()
+
+ for key, value in data.items():
+ writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
+ """
+ Save a pandas DataFrame as a tab-separated file, creating directories as needed.
+
+ Args:
+ df: The DataFrame to write.
+ path: Destination file path (will be written as TSV).
+
+ Raises:
+ DataErr: If writing the output fails for any reason.
+
+ Returns:
+ None
+ """
+ try:
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+ df.to_csv(path, sep="\t", index=False)
+ except Exception as e:
+ raise utils.DataErr(path, f"failed writing tabular output: {e}")
+
+def is_placeholder(gid) -> bool:
+ """Return True if the gene id looks like a placeholder (e.g., 0/NA/NAN/empty)."""
+ if gid is None:
+ return True
+ s = str(gid).strip().lower()
+ return s in {"0", "", "na", "nan"} # lowercase for simple matching
+
+def sample_valid_gene_ids(genes, limit=10):
+ """Yield up to `limit` valid gene IDs, skipping placeholders (e.g., the first 0 in RECON)."""
+ out = []
+ for g in genes:
+ gid = getattr(g, "id", getattr(g, "gene_id", g))
+ if not is_placeholder(gid):
+ out.append(str(gid))
+ if len(out) >= limit:
+ break
+ return out
+
+
+###############################- ENTRY POINT -################################
+def main(args:List[str] = None) -> None:
+ """
+ Initialize and generate custom data based on the frontend input arguments.
+
+ Returns:
+ None
+ """
+ # Parse args from frontend (Galaxy XML)
+ global ARGS
+ ARGS = process_args(args)
+
+
+ if ARGS.input:
+ # Load a custom model from file
+ model = load_custom_model(
+ utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
+ else:
+ # Load a built-in model
+
+ try:
+ model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2']
+ except KeyError:
+ raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
+
+ # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
+ try:
+ model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
+ except Exception as e:
+ # Wrap/normalize load errors as DataErr for consistency
+ raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
+
+ # Determine final model name: explicit --name overrides, otherwise use the model id
+
+ model_name = ARGS.name if ARGS.name else ARGS.model
+
+ if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
+ df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
+ ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
+ medium = df_mediums[[ARGS.medium_selector]]
+ medium = medium[ARGS.medium_selector].to_dict()
+
+ # Reset all medium reactions lower bound to zero
+ for rxn_id, _ in model.medium.items():
+ model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
+
+ # Apply selected medium uptake bounds (negative for uptake)
+ for reaction, value in medium.items():
+ if value is not None:
+ model.reactions.get_by_id(reaction).lower_bound = -float(value)
+
+ # Initialize translation_issues dictionary
+ translation_issues = {}
+
+ if (ARGS.name == "Recon" or ARGS.name == "ENGRO2") and ARGS.gene_format != "Default":
+ logging.basicConfig(level=logging.INFO)
+ logger = logging.getLogger(__name__)
+
+ model, translation_issues = modelUtils.translate_model_genes(
+ model=model,
+ mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
+ target_nomenclature=ARGS.gene_format,
+ source_nomenclature='HGNC_symbol',
+ logger=logger
+ )
+
+ if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
+ logging.basicConfig(level=logging.INFO)
+ logger = logging.getLogger(__name__)
+
+ tmp_check = []
+ for g in model.genes[1:5]: # check first 3 genes only
+ tmp_check.append(modelUtils.gene_type(g.id, "Custom_model"))
+
+ if len(set(tmp_check)) > 1:
+ raise utils.DataErr("Custom_model", "The custom model contains genes with mixed or unrecognized nomenclature. Please ensure all genes use the same recognized nomenclature before applying gene_format conversion.")
+ else:
+ source_nomenclature = tmp_check[0]
+
+ if source_nomenclature != ARGS.gene_format:
+ model, translation_issues = modelUtils.translate_model_genes(
+ model=model,
+ mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
+ target_nomenclature=ARGS.gene_format,
+ source_nomenclature=source_nomenclature,
+ logger=logger
+ )
+
+
+
+
+ if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
+ logger = logging.getLogger(__name__)
+
+ # Take a small, clean sample of gene IDs (skipping placeholders like 0)
+ ids_sample = sample_valid_gene_ids(model.genes, limit=10)
+ if not ids_sample:
+ raise utils.DataErr(
+ "Custom_model",
+ "No valid gene IDs found (many may be placeholders like 0)."
+ )
+
+ # Detect source nomenclature on the sample
+ types = []
+ for gid in ids_sample:
+ try:
+ t = modelUtils.gene_type(gid, "Custom_model")
+ except Exception as e:
+ # Keep it simple: skip problematic IDs
+ logger.debug(f"gene_type failed for {gid}: {e}")
+ t = None
+ if t:
+ types.append(t)
+
+ if not types:
+ raise utils.DataErr(
+ "Custom_model",
+ "Could not detect a known gene nomenclature from the sample."
+ )
+
+ unique_types = set(types)
+ if len(unique_types) > 1:
+ raise utils.DataErr(
+ "Custom_model",
+ "Mixed or inconsistent gene nomenclatures detected. "
+ "Please unify them before converting."
+ )
+
+ source_nomenclature = types[0]
+
+ # Convert only if needed
+ if source_nomenclature != ARGS.gene_format:
+ model, translation_issues = modelUtils.translate_model_genes(
+ model=model,
+ mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
+ target_nomenclature=ARGS.gene_format,
+ source_nomenclature=source_nomenclature,
+ logger=logger
+ )
+
+ # generate data
+ rules = modelUtils.generate_rules(model, asParsed = False)
+ reactions = modelUtils.generate_reactions(model, asParsed = False)
+ bounds = modelUtils.generate_bounds(model)
+ medium = modelUtils.get_medium(model)
+ objective_function = modelUtils.extract_objective_coefficients(model)
+
+ if ARGS.name == "ENGRO2":
+ compartments = modelUtils.generate_compartments(model)
+
+ df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "GPR"])
+ df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Formula"])
+
+ # Create DataFrame for translation issues
+ df_translation_issues = pd.DataFrame([
+ {"ReactionID": rxn_id, "TranslationIssues": issues}
+ for rxn_id, issues in translation_issues.items()
+ ])
+
+ df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
+ df_medium = medium.rename(columns = {"reaction": "ReactionID"})
+ df_medium["InMedium"] = True
+
+ merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
+ merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
+ merged = merged.merge(objective_function, on = "ReactionID", how = "outer")
+ if ARGS.name == "ENGRO2":
+ merged = merged.merge(compartments, on = "ReactionID", how = "outer")
+ merged = merged.merge(df_medium, on = "ReactionID", how = "left")
+
+ # Add translation issues column
+ if not df_translation_issues.empty:
+ merged = merged.merge(df_translation_issues, on = "ReactionID", how = "left")
+ merged["TranslationIssues"] = merged["TranslationIssues"].fillna("")
+ else:
+ # Add empty TranslationIssues column if no issues found
+ #merged["TranslationIssues"] = ""
+ pass
+
+ merged["InMedium"] = merged["InMedium"].fillna(False)
+
+ merged = merged.sort_values(by = "InMedium", ascending = False)
+
+ if not ARGS.out_tabular:
+ raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
+ save_as_tabular_df(merged, ARGS.out_tabular)
+ expected = ARGS.out_tabular
+
+ # verify output exists and non-empty
+ if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
+ raise utils.DataErr(expected, "Output not created or empty")
+
+ print("Metabolic_model_setting: completed successfully")
+
+if __name__ == '__main__':
+
+ main()
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/metabolicModel2Tabular.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/metabolicModel2Tabular.xml Mon Sep 29 15:34:59 2025 +0000
@@ -0,0 +1,121 @@
+
+
+
+ numpy
+ pandas
+ cobra
+ lxml
+
+
+
+ marea_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/metabolic_model_setting.py
--- a/COBRAxy/metabolic_model_setting.py Mon Sep 29 15:13:21 2025 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,368 +0,0 @@
-"""
-Scripts to generate a tabular file of a metabolic model (built-in or custom).
-
-This script loads a COBRA model (built-in or custom), optionally applies
-medium and gene nomenclature settings, derives reaction-related metadata
-(GPR rules, formulas, bounds, objective coefficients, medium membership,
-and compartments for ENGRO2), and writes a tabular summary.
-"""
-
-import os
-import csv
-import cobra
-import argparse
-import pandas as pd
-import utils.general_utils as utils
-from typing import Optional, Tuple, List
-import utils.model_utils as modelUtils
-import logging
-from pathlib import Path
-
-
-ARGS : argparse.Namespace
-def process_args(args: List[str] = None) -> argparse.Namespace:
- """
- Parse command-line arguments for metabolic_model_setting.
- """
-
- parser = argparse.ArgumentParser(
- usage="%(prog)s [options]",
- description="Generate custom data from a given model"
- )
-
- parser.add_argument("--out_log", type=str, required=True,
- help="Output log file")
-
- parser.add_argument("--model", type=str,
- help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
- parser.add_argument("--input", type=str,
- help="Custom model file (JSON or XML)")
- parser.add_argument("--name", type=str, required=True,
- help="Model name (default or custom)")
-
- parser.add_argument("--medium_selector", type=str, required=True,
- help="Medium selection option")
-
- parser.add_argument("--gene_format", type=str, default="Default",
- help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
-
- parser.add_argument("--out_tabular", type=str,
- help="Output file for the merged dataset (CSV or XLSX)")
-
- parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
- help="Tool directory (passed from Galaxy as $__tool_directory__)")
-
-
- return parser.parse_args(args)
-
-################################- INPUT DATA LOADING -################################
-def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
- """
- Loads a custom model from a file, either in JSON, XML, MAT, or YML format.
-
- Args:
- file_path : The path to the file containing the custom model.
- ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
-
- Raises:
- DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
-
- Returns:
- cobra.Model : the model, if successfully opened.
- """
- ext = ext if ext else file_path.ext
- try:
- if ext is utils.FileFormat.XML:
- return cobra.io.read_sbml_model(file_path.show())
-
- if ext is utils.FileFormat.JSON:
- return cobra.io.load_json_model(file_path.show())
-
- if ext is utils.FileFormat.MAT:
- return cobra.io.load_matlab_model(file_path.show())
-
- if ext is utils.FileFormat.YML:
- return cobra.io.load_yaml_model(file_path.show())
-
- except Exception as e: raise utils.DataErr(file_path, e.__str__())
- raise utils.DataErr(
- file_path,
- f"Unrecognized format '{file_path.ext}'. Only JSON, XML, MAT, YML are supported."
- )
-
-
-###############################- FILE SAVING -################################
-def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
- """
- Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
-
- Args:
- data : the data to be written to the file.
- file_path : the path to the .csv file.
- fieldNames : the names of the fields (columns) in the .csv file.
-
- Returns:
- None
- """
- with open(file_path.show(), 'w', newline='') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
- writer.writeheader()
-
- for key, value in data.items():
- writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
-
-def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
- """
- Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
-
- Args:
- data : the data to be written to the file.
- file_path : the path to the .csv file.
- fieldNames : the names of the fields (columns) in the .csv file.
-
- Returns:
- None
- """
- with open(file_path, 'w', newline='') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
- writer.writeheader()
-
- for key, value in data.items():
- writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
-
-def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
- """
- Save a pandas DataFrame as a tab-separated file, creating directories as needed.
-
- Args:
- df: The DataFrame to write.
- path: Destination file path (will be written as TSV).
-
- Raises:
- DataErr: If writing the output fails for any reason.
-
- Returns:
- None
- """
- try:
- os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
- df.to_csv(path, sep="\t", index=False)
- except Exception as e:
- raise utils.DataErr(path, f"failed writing tabular output: {e}")
-
-def is_placeholder(gid) -> bool:
- """Return True if the gene id looks like a placeholder (e.g., 0/NA/NAN/empty)."""
- if gid is None:
- return True
- s = str(gid).strip().lower()
- return s in {"0", "", "na", "nan"} # lowercase for simple matching
-
-def sample_valid_gene_ids(genes, limit=10):
- """Yield up to `limit` valid gene IDs, skipping placeholders (e.g., the first 0 in RECON)."""
- out = []
- for g in genes:
- gid = getattr(g, "id", getattr(g, "gene_id", g))
- if not is_placeholder(gid):
- out.append(str(gid))
- if len(out) >= limit:
- break
- return out
-
-
-###############################- ENTRY POINT -################################
-def main(args:List[str] = None) -> None:
- """
- Initialize and generate custom data based on the frontend input arguments.
-
- Returns:
- None
- """
- # Parse args from frontend (Galaxy XML)
- global ARGS
- ARGS = process_args(args)
-
-
- if ARGS.input:
- # Load a custom model from file
- model = load_custom_model(
- utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
- else:
- # Load a built-in model
-
- try:
- model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2']
- except KeyError:
- raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
-
- # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
- try:
- model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
- except Exception as e:
- # Wrap/normalize load errors as DataErr for consistency
- raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
-
- # Determine final model name: explicit --name overrides, otherwise use the model id
-
- model_name = ARGS.name if ARGS.name else ARGS.model
-
- if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
- df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
- ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
- medium = df_mediums[[ARGS.medium_selector]]
- medium = medium[ARGS.medium_selector].to_dict()
-
- # Reset all medium reactions lower bound to zero
- for rxn_id, _ in model.medium.items():
- model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
-
- # Apply selected medium uptake bounds (negative for uptake)
- for reaction, value in medium.items():
- if value is not None:
- model.reactions.get_by_id(reaction).lower_bound = -float(value)
-
- # Initialize translation_issues dictionary
- translation_issues = {}
-
- if (ARGS.name == "Recon" or ARGS.name == "ENGRO2") and ARGS.gene_format != "Default":
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
-
- model, translation_issues = modelUtils.translate_model_genes(
- model=model,
- mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
- target_nomenclature=ARGS.gene_format,
- source_nomenclature='HGNC_symbol',
- logger=logger
- )
-
- if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
-
- tmp_check = []
- for g in model.genes[1:5]: # check first 3 genes only
- tmp_check.append(modelUtils.gene_type(g.id, "Custom_model"))
-
- if len(set(tmp_check)) > 1:
- raise utils.DataErr("Custom_model", "The custom model contains genes with mixed or unrecognized nomenclature. Please ensure all genes use the same recognized nomenclature before applying gene_format conversion.")
- else:
- source_nomenclature = tmp_check[0]
-
- if source_nomenclature != ARGS.gene_format:
- model, translation_issues = modelUtils.translate_model_genes(
- model=model,
- mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
- target_nomenclature=ARGS.gene_format,
- source_nomenclature=source_nomenclature,
- logger=logger
- )
-
-
-
-
- if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
- logger = logging.getLogger(__name__)
-
- # Take a small, clean sample of gene IDs (skipping placeholders like 0)
- ids_sample = sample_valid_gene_ids(model.genes, limit=10)
- if not ids_sample:
- raise utils.DataErr(
- "Custom_model",
- "No valid gene IDs found (many may be placeholders like 0)."
- )
-
- # Detect source nomenclature on the sample
- types = []
- for gid in ids_sample:
- try:
- t = modelUtils.gene_type(gid, "Custom_model")
- except Exception as e:
- # Keep it simple: skip problematic IDs
- logger.debug(f"gene_type failed for {gid}: {e}")
- t = None
- if t:
- types.append(t)
-
- if not types:
- raise utils.DataErr(
- "Custom_model",
- "Could not detect a known gene nomenclature from the sample."
- )
-
- unique_types = set(types)
- if len(unique_types) > 1:
- raise utils.DataErr(
- "Custom_model",
- "Mixed or inconsistent gene nomenclatures detected. "
- "Please unify them before converting."
- )
-
- source_nomenclature = types[0]
-
- # Convert only if needed
- if source_nomenclature != ARGS.gene_format:
- model, translation_issues = modelUtils.translate_model_genes(
- model=model,
- mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
- target_nomenclature=ARGS.gene_format,
- source_nomenclature=source_nomenclature,
- logger=logger
- )
-
- # generate data
- rules = modelUtils.generate_rules(model, asParsed = False)
- reactions = modelUtils.generate_reactions(model, asParsed = False)
- bounds = modelUtils.generate_bounds(model)
- medium = modelUtils.get_medium(model)
- objective_function = modelUtils.extract_objective_coefficients(model)
-
- if ARGS.name == "ENGRO2":
- compartments = modelUtils.generate_compartments(model)
-
- df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "GPR"])
- df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Formula"])
-
- # Create DataFrame for translation issues
- df_translation_issues = pd.DataFrame([
- {"ReactionID": rxn_id, "TranslationIssues": issues}
- for rxn_id, issues in translation_issues.items()
- ])
-
- df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
- df_medium = medium.rename(columns = {"reaction": "ReactionID"})
- df_medium["InMedium"] = True
-
- merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
- merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
- merged = merged.merge(objective_function, on = "ReactionID", how = "outer")
- if ARGS.name == "ENGRO2":
- merged = merged.merge(compartments, on = "ReactionID", how = "outer")
- merged = merged.merge(df_medium, on = "ReactionID", how = "left")
-
- # Add translation issues column
- if not df_translation_issues.empty:
- merged = merged.merge(df_translation_issues, on = "ReactionID", how = "left")
- merged["TranslationIssues"] = merged["TranslationIssues"].fillna("")
- else:
- # Add empty TranslationIssues column if no issues found
- #merged["TranslationIssues"] = ""
- pass
-
- merged["InMedium"] = merged["InMedium"].fillna(False)
-
- merged = merged.sort_values(by = "InMedium", ascending = False)
-
- if not ARGS.out_tabular:
- raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
- save_as_tabular_df(merged, ARGS.out_tabular)
- expected = ARGS.out_tabular
-
- # verify output exists and non-empty
- if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
- raise utils.DataErr(expected, "Output not created or empty")
-
- print("Metabolic_model_setting: completed successfully")
-
-if __name__ == '__main__':
-
- main()
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/metabolic_model_setting.xml
--- a/COBRAxy/metabolic_model_setting.xml Mon Sep 29 15:13:21 2025 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-
-
-
- numpy
- pandas
- cobra
- lxml
-
-
-
- marea_macros.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/tabular2MetabolicModel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/tabular2MetabolicModel.py Mon Sep 29 15:34:59 2025 +0000
@@ -0,0 +1,112 @@
+"""
+Convert a tabular (CSV/TSV/Tabular) description of a COBRA model into a COBRA file.
+
+Supported output formats: SBML, JSON, MATLAB (.mat), YAML.
+The script logs to a user-provided file for easier debugging in Galaxy.
+"""
+
+import os
+import cobra
+import argparse
+from typing import List
+import logging
+import utils.model_utils as modelUtils
+
+ARGS : argparse.Namespace
+def process_args(args: List[str] = None) -> argparse.Namespace:
+ """
+ Parse command-line arguments for the CSV-to-COBRA conversion tool.
+
+ Returns:
+ argparse.Namespace: Parsed arguments.
+ """
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s [options]",
+ description="Convert a tabular/CSV file to a COBRA model"
+ )
+
+
+ parser.add_argument("--out_log", type=str, required=True,
+ help="Output log file")
+
+
+ parser.add_argument("--input", type=str, required=True,
+ help="Input tabular file (CSV/TSV)")
+
+
+ parser.add_argument("--format", type=str, required=True, choices=["sbml", "json", "mat", "yaml"],
+ help="Model format (SBML, JSON, MATLAB, YAML)")
+
+
+ parser.add_argument("--output", type=str, required=True,
+ help="Output model file path")
+
+
+ parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
+ help="Tool directory (passed from Galaxy as $__tool_directory__)")
+
+
+ return parser.parse_args(args)
+
+
+###############################- ENTRY POINT -################################
+
+def main(args: List[str] = None) -> None:
+ """
+ Entry point: parse arguments, build the COBRA model from a CSV/TSV file,
+ and save it in the requested format.
+
+ Returns:
+ None
+ """
+ global ARGS
+ ARGS = process_args(args)
+
+ # configure logging to the requested log file (overwrite each run)
+ logging.basicConfig(filename=ARGS.out_log,
+ level=logging.DEBUG,
+ format='%(asctime)s %(levelname)s: %(message)s',
+ filemode='w')
+
+ logging.info('Starting fromCSVtoCOBRA tool')
+ logging.debug('Args: input=%s format=%s output=%s tool_dir=%s', ARGS.input, ARGS.format, ARGS.output, ARGS.tool_dir)
+
+ try:
+ # Basic sanity checks
+ if not os.path.exists(ARGS.input):
+ logging.error('Input file not found: %s', ARGS.input)
+
+ out_dir = os.path.dirname(os.path.abspath(ARGS.output))
+
+ if out_dir and not os.path.isdir(out_dir):
+ try:
+ os.makedirs(out_dir, exist_ok=True)
+ logging.info('Created missing output directory: %s', out_dir)
+ except Exception as e:
+ logging.exception('Cannot create output directory: %s', out_dir)
+
+ model = modelUtils.build_cobra_model_from_csv(ARGS.input)
+
+ # Save model in requested format
+ if ARGS.format == "sbml":
+ cobra.io.write_sbml_model(model, ARGS.output)
+ elif ARGS.format == "json":
+ cobra.io.save_json_model(model, ARGS.output)
+ elif ARGS.format == "mat":
+ cobra.io.save_matlab_model(model, ARGS.output)
+ elif ARGS.format == "yaml":
+ cobra.io.save_yaml_model(model, ARGS.output)
+ else:
+ logging.error('Unknown format requested: %s', ARGS.format)
+ print(f"ERROR: Unknown format: {ARGS.format}")
+
+
+ logging.info('Model successfully written to %s (format=%s)', ARGS.output, ARGS.format)
+
+ except Exception:
+ # Log full traceback to the out_log so Galaxy users/admins can see what happened
+ logging.exception('Unhandled exception in fromCSVtoCOBRA')
+
+
+if __name__ == '__main__':
+ main()
diff -r c6ea189ea7e9 -r 7a413a5ec566 COBRAxy/tabular2MetabolicModel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/tabular2MetabolicModel.xml Mon Sep 29 15:34:59 2025 +0000
@@ -0,0 +1,69 @@
+
+ Convert a tabular dataset to a COBRA model
+
+
+
+ cobra
+ numpy
+ pandas
+ lxml
+
+
+
+
+ marea_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+