annotate COBRAxy/metabolic_model_setting.py @ 490:c6ea189ea7e9 draft

Uploaded
author francesco_lapi
date Mon, 29 Sep 2025 15:13:21 +0000
parents 5b625d91bc7f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
1 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
2 Scripts to generate a tabular file of a metabolic model (built-in or custom).
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
3
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
4 This script loads a COBRA model (built-in or custom), optionally applies
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
5 medium and gene nomenclature settings, derives reaction-related metadata
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
6 (GPR rules, formulas, bounds, objective coefficients, medium membership,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
7 and compartments for ENGRO2), and writes a tabular summary.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
8 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
9
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
10 import os
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
11 import csv
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
12 import cobra
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
13 import argparse
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
14 import pandas as pd
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
15 import utils.general_utils as utils
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
16 from typing import Optional, Tuple, List
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
17 import utils.model_utils as modelUtils
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
18 import logging
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
19 from pathlib import Path
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
20
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
21
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
22 ARGS : argparse.Namespace
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
23 def process_args(args: List[str] = None) -> argparse.Namespace:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
24 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
25 Parse command-line arguments for metabolic_model_setting.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
26 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
27
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
28 parser = argparse.ArgumentParser(
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
29 usage="%(prog)s [options]",
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
30 description="Generate custom data from a given model"
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
31 )
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
32
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
33 parser.add_argument("--out_log", type=str, required=True,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
34 help="Output log file")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
35
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
36 parser.add_argument("--model", type=str,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
37 help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
38 parser.add_argument("--input", type=str,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
39 help="Custom model file (JSON or XML)")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
40 parser.add_argument("--name", type=str, required=True,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
41 help="Model name (default or custom)")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
42
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
43 parser.add_argument("--medium_selector", type=str, required=True,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
44 help="Medium selection option")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
45
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
46 parser.add_argument("--gene_format", type=str, default="Default",
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
47 help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
48
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
49 parser.add_argument("--out_tabular", type=str,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
50 help="Output file for the merged dataset (CSV or XLSX)")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
51
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
52 parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
53 help="Tool directory (passed from Galaxy as $__tool_directory__)")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
54
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
55
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
56 return parser.parse_args(args)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
57
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
58 ################################- INPUT DATA LOADING -################################
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
59 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
60 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
61 Loads a custom model from a file, either in JSON, XML, MAT, or YML format.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
62
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
63 Args:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
64 file_path : The path to the file containing the custom model.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
65 ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
66
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
67 Raises:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
68 DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
69
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
70 Returns:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
71 cobra.Model : the model, if successfully opened.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
72 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
73 ext = ext if ext else file_path.ext
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
74 try:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
75 if ext is utils.FileFormat.XML:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
76 return cobra.io.read_sbml_model(file_path.show())
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
77
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
78 if ext is utils.FileFormat.JSON:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
79 return cobra.io.load_json_model(file_path.show())
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
80
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
81 if ext is utils.FileFormat.MAT:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
82 return cobra.io.load_matlab_model(file_path.show())
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
83
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
84 if ext is utils.FileFormat.YML:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
85 return cobra.io.load_yaml_model(file_path.show())
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
86
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
87 except Exception as e: raise utils.DataErr(file_path, e.__str__())
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
88 raise utils.DataErr(
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
89 file_path,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
90 f"Unrecognized format '{file_path.ext}'. Only JSON, XML, MAT, YML are supported."
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
91 )
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
92
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
93
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
94 ###############################- FILE SAVING -################################
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
95 def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
96 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
97 Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
98
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
99 Args:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
100 data : the data to be written to the file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
101 file_path : the path to the .csv file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
102 fieldNames : the names of the fields (columns) in the .csv file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
103
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
104 Returns:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
105 None
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
106 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
107 with open(file_path.show(), 'w', newline='') as csvfile:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
108 writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
109 writer.writeheader()
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
110
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
111 for key, value in data.items():
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
112 writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
113
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
114 def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
115 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
116 Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
117
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
118 Args:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
119 data : the data to be written to the file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
120 file_path : the path to the .csv file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
121 fieldNames : the names of the fields (columns) in the .csv file.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
122
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
123 Returns:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
124 None
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
125 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
126 with open(file_path, 'w', newline='') as csvfile:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
127 writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
128 writer.writeheader()
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
129
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
130 for key, value in data.items():
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
131 writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
132
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
133 def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
134 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
135 Save a pandas DataFrame as a tab-separated file, creating directories as needed.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
136
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
137 Args:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
138 df: The DataFrame to write.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
139 path: Destination file path (will be written as TSV).
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
140
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
141 Raises:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
142 DataErr: If writing the output fails for any reason.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
143
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
144 Returns:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
145 None
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
146 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
147 try:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
148 os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
149 df.to_csv(path, sep="\t", index=False)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
150 except Exception as e:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
151 raise utils.DataErr(path, f"failed writing tabular output: {e}")
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
152
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
153 def is_placeholder(gid) -> bool:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
154 """Return True if the gene id looks like a placeholder (e.g., 0/NA/NAN/empty)."""
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
155 if gid is None:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
156 return True
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
157 s = str(gid).strip().lower()
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
158 return s in {"0", "", "na", "nan"} # lowercase for simple matching
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
159
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
160 def sample_valid_gene_ids(genes, limit=10):
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
161 """Yield up to `limit` valid gene IDs, skipping placeholders (e.g., the first 0 in RECON)."""
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
162 out = []
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
163 for g in genes:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
164 gid = getattr(g, "id", getattr(g, "gene_id", g))
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
165 if not is_placeholder(gid):
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
166 out.append(str(gid))
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
167 if len(out) >= limit:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
168 break
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
169 return out
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
170
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
171
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
172 ###############################- ENTRY POINT -################################
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
173 def main(args:List[str] = None) -> None:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
174 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
175 Initialize and generate custom data based on the frontend input arguments.
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
176
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
177 Returns:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
178 None
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
179 """
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
180 # Parse args from frontend (Galaxy XML)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
181 global ARGS
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
182 ARGS = process_args(args)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
183
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
184
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
185 if ARGS.input:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
186 # Load a custom model from file
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
187 model = load_custom_model(
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
188 utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
189 else:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
190 # Load a built-in model
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
191
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
192 try:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
193 model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2']
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
194 except KeyError:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
195 raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
196
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
197 # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
198 try:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
199 model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
200 except Exception as e:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
201 # Wrap/normalize load errors as DataErr for consistency
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
202 raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
203
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
204 # Determine final model name: explicit --name overrides, otherwise use the model id
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
205
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
206 model_name = ARGS.name if ARGS.name else ARGS.model
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
207
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
208 if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
209 df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
210 ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
211 medium = df_mediums[[ARGS.medium_selector]]
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
212 medium = medium[ARGS.medium_selector].to_dict()
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
213
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
214 # Reset all medium reactions lower bound to zero
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
215 for rxn_id, _ in model.medium.items():
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
216 model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
217
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
218 # Apply selected medium uptake bounds (negative for uptake)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
219 for reaction, value in medium.items():
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
220 if value is not None:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
221 model.reactions.get_by_id(reaction).lower_bound = -float(value)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
222
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
223 # Initialize translation_issues dictionary
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
224 translation_issues = {}
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
225
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
226 if (ARGS.name == "Recon" or ARGS.name == "ENGRO2") and ARGS.gene_format != "Default":
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
227 logging.basicConfig(level=logging.INFO)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
228 logger = logging.getLogger(__name__)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
229
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
230 model, translation_issues = modelUtils.translate_model_genes(
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
231 model=model,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
232 mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
233 target_nomenclature=ARGS.gene_format,
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
234 source_nomenclature='HGNC_symbol',
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
235 logger=logger
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
236 )
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
237
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
238 if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
239 logging.basicConfig(level=logging.INFO)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
240 logger = logging.getLogger(__name__)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
241
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
242 tmp_check = []
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
243 for g in model.genes[1:5]: # check first 3 genes only
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
244 tmp_check.append(modelUtils.gene_type(g.id, "Custom_model"))
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
245
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
246 if len(set(tmp_check)) > 1:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
247 raise utils.DataErr("Custom_model", "The custom model contains genes with mixed or unrecognized nomenclature. Please ensure all genes use the same recognized nomenclature before applying gene_format conversion.")
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
248 else:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
249 source_nomenclature = tmp_check[0]
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
250
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
251 if source_nomenclature != ARGS.gene_format:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
252 model, translation_issues = modelUtils.translate_model_genes(
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
253 model=model,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
254 mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
255 target_nomenclature=ARGS.gene_format,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
256 source_nomenclature=source_nomenclature,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
257 logger=logger
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
258 )
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
259
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
260
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
261
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
262
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
263 if ARGS.name == "Custom_model" and ARGS.gene_format != "Default":
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
264 logger = logging.getLogger(__name__)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
265
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
266 # Take a small, clean sample of gene IDs (skipping placeholders like 0)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
267 ids_sample = sample_valid_gene_ids(model.genes, limit=10)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
268 if not ids_sample:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
269 raise utils.DataErr(
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
270 "Custom_model",
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
271 "No valid gene IDs found (many may be placeholders like 0)."
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
272 )
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
273
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
274 # Detect source nomenclature on the sample
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
275 types = []
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
276 for gid in ids_sample:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
277 try:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
278 t = modelUtils.gene_type(gid, "Custom_model")
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
279 except Exception as e:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
280 # Keep it simple: skip problematic IDs
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
281 logger.debug(f"gene_type failed for {gid}: {e}")
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
282 t = None
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
283 if t:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
284 types.append(t)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
285
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
286 if not types:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
287 raise utils.DataErr(
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
288 "Custom_model",
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
289 "Could not detect a known gene nomenclature from the sample."
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
290 )
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
291
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
292 unique_types = set(types)
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
293 if len(unique_types) > 1:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
294 raise utils.DataErr(
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
295 "Custom_model",
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
296 "Mixed or inconsistent gene nomenclatures detected. "
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
297 "Please unify them before converting."
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
298 )
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
299
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
300 source_nomenclature = types[0]
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
301
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
302 # Convert only if needed
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
303 if source_nomenclature != ARGS.gene_format:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
304 model, translation_issues = modelUtils.translate_model_genes(
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
305 model=model,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
306 mapping_df= pd.read_csv(ARGS.tool_dir + "/local/mappings/genes_human.csv", dtype={'entrez_id': str}),
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
307 target_nomenclature=ARGS.gene_format,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
308 source_nomenclature=source_nomenclature,
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
309 logger=logger
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
310 )
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
311
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
312 # generate data
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
313 rules = modelUtils.generate_rules(model, asParsed = False)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
314 reactions = modelUtils.generate_reactions(model, asParsed = False)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
315 bounds = modelUtils.generate_bounds(model)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
316 medium = modelUtils.get_medium(model)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
317 objective_function = modelUtils.extract_objective_coefficients(model)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
318
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
319 if ARGS.name == "ENGRO2":
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
320 compartments = modelUtils.generate_compartments(model)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
321
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
322 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "GPR"])
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
323 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Formula"])
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
324
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
325 # Create DataFrame for translation issues
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
326 df_translation_issues = pd.DataFrame([
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
327 {"ReactionID": rxn_id, "TranslationIssues": issues}
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
328 for rxn_id, issues in translation_issues.items()
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
329 ])
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
330
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
331 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
332 df_medium = medium.rename(columns = {"reaction": "ReactionID"})
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
333 df_medium["InMedium"] = True
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
334
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
335 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
336 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
337 merged = merged.merge(objective_function, on = "ReactionID", how = "outer")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
338 if ARGS.name == "ENGRO2":
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
339 merged = merged.merge(compartments, on = "ReactionID", how = "outer")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
340 merged = merged.merge(df_medium, on = "ReactionID", how = "left")
490
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
341
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
342 # Add translation issues column
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
343 if not df_translation_issues.empty:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
344 merged = merged.merge(df_translation_issues, on = "ReactionID", how = "left")
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
345 merged["TranslationIssues"] = merged["TranslationIssues"].fillna("")
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
346 else:
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
347 # Add empty TranslationIssues column if no issues found
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
348 #merged["TranslationIssues"] = ""
c6ea189ea7e9 Uploaded
francesco_lapi
parents: 457
diff changeset
349 pass
457
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
350
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
351 merged["InMedium"] = merged["InMedium"].fillna(False)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
352
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
353 merged = merged.sort_values(by = "InMedium", ascending = False)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
354
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
355 if not ARGS.out_tabular:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
356 raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
357 save_as_tabular_df(merged, ARGS.out_tabular)
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
358 expected = ARGS.out_tabular
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
359
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
360 # verify output exists and non-empty
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
361 if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
362 raise utils.DataErr(expected, "Output not created or empty")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
363
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
364 print("Metabolic_model_setting: completed successfully")
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
365
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
366 if __name__ == '__main__':
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
367
5b625d91bc7f Uploaded
francesco_lapi
parents:
diff changeset
368 main()