Mercurial > repos > bimib > cobraxy
comparison COBRAxy/custom_data_generator.py @ 343:4165326259cc draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Thu, 04 Sep 2025 22:09:42 +0000 |
| parents | 3fca9b568faf |
| children | 25862d166f88 |
comparison
equal
deleted
inserted
replaced
| 342:057909a104d9 | 343:4165326259cc |
|---|---|
| 8 import utils.rule_parsing as rulesUtils | 8 import utils.rule_parsing as rulesUtils |
| 9 from typing import Optional, Tuple, Union, List, Dict | 9 from typing import Optional, Tuple, Union, List, Dict |
| 10 import utils.reaction_parsing as reactionUtils | 10 import utils.reaction_parsing as reactionUtils |
| 11 | 11 |
| 12 ARGS : argparse.Namespace | 12 ARGS : argparse.Namespace |
| 13 def process_args(args:List[str] = None) -> argparse.Namespace: | 13 def process_args(args: List[str] = None) -> argparse.Namespace: |
| 14 """ | 14 """ |
| 15 Interfaces the script of a module with its frontend, making the user's choices for | 15 Parse command-line arguments for CustomDataGenerator. |
| 16 various parameters available as values in code. | 16 """ |
| 17 | 17 |
| 18 Args: | |
| 19 args : Always obtained (in file) from sys.argv | |
| 20 | |
| 21 Returns: | |
| 22 Namespace : An object containing the parsed arguments | |
| 23 """ | |
| 24 parser = argparse.ArgumentParser( | 18 parser = argparse.ArgumentParser( |
| 25 usage = "%(prog)s [options]", | 19 usage="%(prog)s [options]", |
| 26 description = "generate custom data from a given model") | 20 description="Generate custom data from a given model" |
| 27 | 21 ) |
| 28 parser.add_argument("-ol", "--out_log", type = str, required = True, help = "Output log") | 22 |
| 29 | 23 parser.add_argument("--out_log", type=str, required=True, |
| 30 parser.add_argument("-orules", "--out_rules", type = str, required = True, help = "Output rules") | 24 help="Output log file") |
| 31 parser.add_argument("-orxns", "--out_reactions", type = str, required = True, help = "Output reactions") | 25 parser.add_argument("--out_data", type=str, required=True, |
| 32 parser.add_argument("-omedium", "--out_medium", type = str, required = True, help = "Output medium") | 26 help="Single output dataset (CSV or Excel)") |
| 33 parser.add_argument("-obnds", "--out_bounds", type = str, required = True, help = "Output bounds") | 27 |
| 34 | 28 parser.add_argument("--model", type=str, |
| 35 parser.add_argument("-id", "--input", type = str, required = True, help = "Input model") | 29 help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)") |
| 36 parser.add_argument("-mn", "--name", type = str, required = True, help = "Input model name") | 30 parser.add_argument("--input", type=str, |
| 37 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in | 31 help="Custom model file (JSON or XML)") |
| 38 parser.add_argument('-idop', '--output_path', type = str, default='result', help = 'output path for maps') | 32 parser.add_argument("--name", type=str, required=True, |
| 39 argsNamespace = parser.parse_args(args) | 33 help="Model name (default or custom)") |
| 40 # ^ can't get this one to work from xml, there doesn't seem to be a way to get the directory attribute from the collection | 34 |
| 41 | 35 parser.add_argument("--medium_selector", type=str, required=True, |
| 42 return argsNamespace | 36 help="Medium selection option (default/custom)") |
| 37 parser.add_argument("--medium", type=str, | |
| 38 help="Custom medium file if medium_selector=Custom") | |
| 39 | |
| 40 parser.add_argument("--output_format", type=str, choices=["tabular", "xlsx"], required=True, | |
| 41 help="Output format: CSV (tabular) or Excel (xlsx)") | |
| 42 | |
| 43 parser.add_argument('-idop', '--output_path', type = str, default='result', | |
| 44 help = 'output path for the result files (default: result)') | |
| 45 | |
| 46 | |
| 47 return parser.parse_args(args) | |
| 43 | 48 |
| 44 ################################- INPUT DATA LOADING -################################ | 49 ################################- INPUT DATA LOADING -################################ |
| 45 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model: | 50 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model: |
| 46 """ | 51 """ |
| 47 Loads a custom model from a file, either in JSON or XML format. | 52 Loads a custom model from a file, either in JSON or XML format. |
| 193 # get args from frontend (related xml) | 198 # get args from frontend (related xml) |
| 194 global ARGS | 199 global ARGS |
| 195 ARGS = process_args(args) | 200 ARGS = process_args(args) |
| 196 | 201 |
| 197 # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this! | 202 # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this! |
| 198 if os.path.isdir(ARGS.output_path) == False: os.makedirs(ARGS.output_path) | 203 if os.path.isdir(ARGS.output_path) == False: |
| 199 | 204 os.makedirs(ARGS.output_path) |
| 200 # load custom model | 205 |
| 201 model = load_custom_model( | 206 if ARGS.input != "None": |
| 202 utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext) | 207 # load custom model |
| 208 model = load_custom_model( | |
| 209 utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext) | |
| 210 else: | |
| 211 # load built-in model | |
| 212 | |
| 213 try: | |
| 214 model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2'] | |
| 215 except KeyError: | |
| 216 raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model) | |
| 217 | |
| 218 # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models) | |
| 219 try: | |
| 220 model = model_enum.getCOBRAmodel(toolDir='../../COBRAxy') | |
| 221 except Exception as e: | |
| 222 # Wrap/normalize load errors as DataErr for consistency | |
| 223 raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}") | |
| 224 | |
| 225 # Determine final model name: explicit --name overrides, otherwise use the model id | |
| 226 model_name = ARGS.name if ARGS.name else ARGS.model | |
| 203 | 227 |
| 204 # generate data | 228 # generate data |
| 205 rules = generate_rules(model, asParsed = False) | 229 rules = generate_rules(model, asParsed = False) |
| 206 reactions = generate_reactions(model, asParsed = False) | 230 reactions = generate_reactions(model, asParsed = False) |
| 207 bounds = generate_bounds(model) | 231 bounds = generate_bounds(model) |
| 208 medium = get_medium(model) | 232 medium = get_medium(model) |
| 209 | 233 |
| 210 # save files out of collection: path coming from xml | 234 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) |
| 211 save_as_csv(rules, ARGS.out_rules, ("ReactionID", "Rule")) | 235 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) |
| 212 save_as_csv(reactions, ARGS.out_reactions, ("ReactionID", "Reaction")) | 236 |
| 213 bounds.to_csv(ARGS.out_bounds, sep = '\t') | 237 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) |
| 214 medium.to_csv(ARGS.out_medium, sep = '\t') | 238 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) |
| 239 df_medium["InMedium"] = True # flag per indicare la presenza nel medium | |
| 240 | |
| 241 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") | |
| 242 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") | |
| 243 | |
| 244 merged = merged.merge(df_medium, on = "ReactionID", how = "left") | |
| 245 | |
| 246 merged["InMedium"] = merged["InMedium"].fillna(False) | |
| 247 | |
| 248 merged = merged.sort_values(by = "InMedium", ascending = False) | |
| 249 | |
| 250 out_file = os.path.join(ARGS.output_path, f"{os.path.basename(ARGS.name).split('.')[0]}_custom_data") | |
| 251 | |
| 252 #merged.to_csv(out_file, sep = '\t', index = False) | |
| 253 | |
| 254 | |
| 255 #### | |
| 256 out_data_path = out_file #ARGS.out_data | |
| 257 | |
| 258 # If Galaxy provided a .dat name, ensure a correct extension according to output_format | |
| 259 if ARGS.output_format == "xlsx": | |
| 260 if not out_data_path.lower().endswith(".xlsx"): | |
| 261 out_data_path = out_data_path + ".xlsx" | |
| 262 merged.to_excel(out_data_path, index=False) | |
| 263 else: | |
| 264 # 'tabular' -> tab-separated, extension .csv is fine and common for Galaxy tabular | |
| 265 if not (out_data_path.lower().endswith(".csv") or out_data_path.lower().endswith(".tsv")): | |
| 266 out_data_path = out_data_path + ".csv" | |
| 267 merged.to_csv(out_data_path, sep="\t", index=False) | |
| 268 | |
| 215 | 269 |
| 216 if __name__ == '__main__': | 270 if __name__ == '__main__': |
| 217 main() | 271 main() |
