annotate COBRAxy/src/importMetabolicModel.py @ 546:01147e83f43c draft default tip

Uploaded
author luca_milaz
date Mon, 27 Oct 2025 12:33:08 +0000
parents fcdbc81feb45
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
540
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
1 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
2 Scripts to generate a tabular file of a metabolic model (built-in or custom).
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
3
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
4 This script loads a COBRA model (built-in or custom), optionally applies
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
5 medium and gene nomenclature settings, derives reaction-related metadata
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
6 (GPR rules, formulas, bounds, objective coefficients, medium membership,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
7 and compartments for ENGRO2), and writes a tabular summary.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
8 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
9
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
10 import os
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
11 import csv
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
12 import cobra
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
13 import argparse
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
14 import pandas as pd
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
15 try:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
16 from .utils import general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
17 from .utils import model_utils as modelUtils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
18 except:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
19 import utils.general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
20 import utils.model_utils as modelUtils
540
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
21 from typing import Optional, Tuple, List
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
22 import logging
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
23 from pathlib import Path
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
24
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
25
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
26 ARGS : argparse.Namespace
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
27 def process_args(args: List[str] = None) -> argparse.Namespace:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
28 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
29 Parse command-line arguments.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
30 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
31
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
32 parser = argparse.ArgumentParser(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
33 usage="%(prog)s [options]",
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
34 description="Generate custom data from a given model"
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
35 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
36
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
37 parser.add_argument("--out_log", type=str, required=True,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
38 help="Output log file")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
39
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
40 parser.add_argument("--model", type=str,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
41 help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
42 parser.add_argument("--input", type=str,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
43 help="Custom model file (JSON, XML, MAT, YAML)")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
44 parser.add_argument("--name", nargs='*', required=True,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
45 help="Model name (default or custom)")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
46
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
47 parser.add_argument("--medium_selector", type=str, default="Default",
540
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
48 help="Medium selection option")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
49
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
50 parser.add_argument("--gene_format", type=str, default="Default",
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
51 help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
52
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
53 parser.add_argument("--out_tabular", type=str,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
54 help="Output file for the merged dataset (CSV or XLSX)")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
55
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
56 parser.add_argument("--tool_dir", type=str, default=os.path.dirname(os.path.abspath(__file__)),
fcdbc81feb45 Uploaded
francesco_lapi
parents: 540
diff changeset
57 help="Tool directory (default: auto-detected package location)")
540
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
58
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
59
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
60 return parser.parse_args(args)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
61
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
62 ################################- INPUT DATA LOADING -################################
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
63 def detect_file_format(file_path: str) -> utils.FileFormat:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
64 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
65 Detect file format by examining file content and extension.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
66 Handles Galaxy .dat files by looking at content.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
67 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
68 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
69 with open(file_path, 'r') as f:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
70 first_lines = ''.join([f.readline() for _ in range(5)])
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
71
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
72 # Check for XML (SBML)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
73 if '<?xml' in first_lines or '<sbml' in first_lines:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
74 return utils.FileFormat.XML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
75
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
76 # Check for JSON
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
77 if first_lines.strip().startswith('{'):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
78 return utils.FileFormat.JSON
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
79
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
80 # Check for YAML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
81 if any(line.strip().endswith(':') for line in first_lines.split('\n')[:3]):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
82 return utils.FileFormat.YML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
83
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
84 except:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
85 pass
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
86
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
87 # Fall back to extension-based detection
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
88 if file_path.endswith('.xml') or file_path.endswith('.sbml'):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
89 return utils.FileFormat.XML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
90 elif file_path.endswith('.json'):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
91 return utils.FileFormat.JSON
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
92 elif file_path.endswith('.mat'):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
93 return utils.FileFormat.MAT
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
94 elif file_path.endswith('.yml') or file_path.endswith('.yaml'):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
95 return utils.FileFormat.YML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
96
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
97 # Default to XML for unknown extensions
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
98 return utils.FileFormat.XML
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
99
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
100 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
101 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
102 Loads a custom model from a file, either in JSON, XML, MAT, or YML format.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
103
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
104 Args:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
105 file_path : The path to the file containing the custom model.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
106 ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
107
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
108 Raises:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
109 DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
110
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
111 Returns:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
112 cobra.Model : the model, if successfully opened.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
113 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
114 ext = ext if ext else file_path.ext
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
115 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
116 if ext is utils.FileFormat.XML:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
117 return cobra.io.read_sbml_model(file_path.show())
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
118
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
119 if ext is utils.FileFormat.JSON:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
120 return cobra.io.load_json_model(file_path.show())
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
121
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
122 if ext is utils.FileFormat.MAT:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
123 return cobra.io.load_matlab_model(file_path.show())
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
124
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
125 if ext is utils.FileFormat.YML:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
126 return cobra.io.load_yaml_model(file_path.show())
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
127
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
128 except Exception as e: raise utils.DataErr(file_path, e.__str__())
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
129 raise utils.DataErr(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
130 file_path,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
131 f"Unrecognized format '{file_path.ext}'. Only JSON, XML, MAT, YML are supported."
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
132 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
133
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
134
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
135 ###############################- FILE SAVING -################################
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
136 def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
137 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
138 Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
139
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
140 Args:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
141 data : the data to be written to the file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
142 file_path : the path to the .csv file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
143 fieldNames : the names of the fields (columns) in the .csv file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
144
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
145 Returns:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
146 None
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
147 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
148 with open(file_path.show(), 'w', newline='') as csvfile:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
149 writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
150 writer.writeheader()
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
151
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
152 for key, value in data.items():
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
153 writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
154
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
155 def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
156 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
157 Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
158
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
159 Args:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
160 data : the data to be written to the file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
161 file_path : the path to the .csv file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
162 fieldNames : the names of the fields (columns) in the .csv file.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
163
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
164 Returns:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
165 None
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
166 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
167 with open(file_path, 'w', newline='') as csvfile:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
168 writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
169 writer.writeheader()
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
170
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
171 for key, value in data.items():
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
172 writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
173
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
174 def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
175 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
176 Save a pandas DataFrame as a tab-separated file, creating directories as needed.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
177
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
178 Args:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
179 df: The DataFrame to write.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
180 path: Destination file path (will be written as TSV).
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
181
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
182 Raises:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
183 DataErr: If writing the output fails for any reason.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
184
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
185 Returns:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
186 None
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
187 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
188 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
189 os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
190 df.to_csv(path, sep="\t", index=False)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
191 except Exception as e:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
192 raise utils.DataErr(path, f"failed writing tabular output: {e}")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
193
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
194 def is_placeholder(gid) -> bool:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
195 """Return True if the gene id looks like a placeholder (e.g., 0/NA/NAN/empty)."""
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
196 if gid is None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
197 return True
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
198 s = str(gid).strip().lower()
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
199 return s in {"0", "", "na", "nan"} # lowercase for simple matching
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
200
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
201 def sample_valid_gene_ids(genes, limit=10):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
202 """Yield up to `limit` valid gene IDs, skipping placeholders (e.g., the first 0 in RECON)."""
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
203 out = []
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
204 for g in genes:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
205 gid = getattr(g, "id", getattr(g, "gene_id", g))
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
206 if not is_placeholder(gid):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
207 out.append(str(gid))
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
208 if len(out) >= limit:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
209 break
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
210 return out
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
211
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
212
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
213 ###############################- ENTRY POINT -################################
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
214 def main(args:List[str] = None) -> None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
215 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
216 Initialize and generate custom data based on the frontend input arguments.
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
217
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
218 Returns:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
219 None
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
220 """
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
221 # Parse args from frontend (Galaxy XML)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
222 global ARGS
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
223 ARGS = process_args(args)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
224
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
225 # Convert name from list to string (handles names with spaces)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
226 if isinstance(ARGS.name, list):
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
227 ARGS.name = ' '.join(ARGS.name)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
228
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
229 if ARGS.input:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
230 # Load a custom model from file with auto-detected format
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
231 detected_format = detect_file_format(ARGS.input)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
232 model = load_custom_model(utils.FilePath.fromStrPath(ARGS.input), detected_format)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
233 else:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
234 # Load a built-in model
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
235 if not ARGS.model:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
236 raise utils.ArgsErr("model", "either --model or --input must be provided", "None")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
237
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
238 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
239 model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2']
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
240 except KeyError:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
241 raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
242
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
243 # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
244 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
245 model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
246 except Exception as e:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
247 # Wrap/normalize load errors as DataErr for consistency
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
248 raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
249
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
250 # Determine final model name: explicit --name overrides, otherwise use the model id
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
251
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
252 if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
253 df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
254 #ARGS.medium_selector = ARGS.medium_selector.replace("_", " ") medium.csv uses underscores now
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
255 medium = df_mediums[[ARGS.medium_selector]]
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
256 medium = medium[ARGS.medium_selector].to_dict()
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
257
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
258 # Reset all medium reactions lower bound to zero
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
259 for rxn_id, _ in model.medium.items():
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
260 model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
261
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
262 # Apply selected medium uptake bounds (negative for uptake)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
263 for reaction, value in medium.items():
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
264 if value is not None:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
265 model.reactions.get_by_id(reaction).lower_bound = -float(value)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
266
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
267 # Initialize translation_issues dictionary
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
268 translation_issues = {}
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
269
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
270 if (ARGS.name == "Recon" or ARGS.name == "ENGRO2") and ARGS.gene_format != "Default":
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
271 logging.basicConfig(level=logging.INFO)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
272 logger = logging.getLogger(__name__)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
273
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
274 model, translation_issues = modelUtils.translate_model_genes(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
275 model=model,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
276 mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
277 target_nomenclature=ARGS.gene_format,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
278 source_nomenclature='HGNC_symbol',
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
279 logger=logger
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
280 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
281
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
282 if ARGS.input and ARGS.gene_format != "Default":
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
283 logging.basicConfig(level=logging.INFO)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
284 logger = logging.getLogger(__name__)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
285
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
286 # Take a small, clean sample of gene IDs (skipping placeholders like 0)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
287 ids_sample = sample_valid_gene_ids(model.genes, limit=10)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
288 if not ids_sample:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
289 raise utils.DataErr(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
290 "Custom_model",
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
291 "No valid gene IDs found (many may be placeholders like 0)."
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
292 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
293
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
294 # Detect source nomenclature on the sample
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
295 types = []
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
296 for gid in ids_sample:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
297 try:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
298 t = modelUtils.gene_type(gid, "Custom_model")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
299 except Exception as e:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
300 # Keep it simple: skip problematic IDs
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
301 logger.debug(f"gene_type failed for {gid}: {e}")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
302 t = None
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
303 if t:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
304 types.append(t)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
305
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
306 if not types:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
307 raise utils.DataErr(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
308 "Custom_model",
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
309 "Could not detect a known gene nomenclature from the sample."
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
310 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
311
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
312 unique_types = set(types)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
313 if len(unique_types) > 1:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
314 raise utils.DataErr(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
315 "Custom_model",
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
316 "Mixed or inconsistent gene nomenclatures detected. "
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
317 "Please unify them before converting."
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
318 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
319
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
320 source_nomenclature = types[0]
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
321
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
322 # Convert only if needed
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
323 if source_nomenclature != ARGS.gene_format:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
324 model, translation_issues = modelUtils.translate_model_genes(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
325 model=model,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
326 mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
327 target_nomenclature=ARGS.gene_format,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
328 source_nomenclature=source_nomenclature,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
329 logger=logger
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
330 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
331
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
332 # generate data using unified function
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
333 if not ARGS.out_tabular:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
334 raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
335
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
336 merged = modelUtils.export_model_to_tabular(
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
337 model=model,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
338 output_path=ARGS.out_tabular,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
339 translation_issues=translation_issues,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
340 include_objective=True,
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
341 save_function=save_as_tabular_df
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
342 )
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
343 expected = ARGS.out_tabular
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
344
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
345 # verify output exists and non-empty
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
346 if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
347 raise utils.DataErr(expected, "Output not created or empty")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
348
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
349 print("Completed successfully")
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
350
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
351 if __name__ == '__main__':
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
352
7d5b35c715e8 Uploaded
francesco_lapi
parents:
diff changeset
353 main()