# HG changeset patch
# User luca_milaz
# Date 1728819508 0
# Node ID f4f93df8c221afdc69b3eff86ef7d6e906bf58cb
# Parent a48b2e06ebe7566cee76220785843f2f5999f457
Uploaded
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/CBS_backend.py
--- a/cobraxy-9688ad27287b/COBRAxy/utils/CBS_backend.py Sun Oct 13 11:35:56 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,200 +0,0 @@
-from swiglpk import *
-import random
-import pandas as pd
-import numpy as np
-import cobra as cb
-
-# Initialize LP problem
-def initialize_lp_problem(S):
-
- len_vector=len(S.keys())
- values=list(S.values())
- indexes=list(S.keys())
- ia = intArray(len_vector+1);
- ja = intArray(len_vector+1);
- ar = doubleArray(len_vector+1);
-
- i=0
- ind_row=[indexes[i][0]+1 for i in range(0, len(values) )]
- ind_col=[indexes[i][1]+1 for i in range(0, len(values) )]
- for i in range(1, len(values) + 1):
- ia[i]=ind_row[i-1]
- ja[i]=ind_col[i-1]
- ar[i] = values[i-1]
-
- nrows=S.shape[0]
- ncol=S.shape[1]
-
- return len_vector, values, indexes, ia, ja, ar, nrows, ncol
-
-
-
-# Solve LP problem from the structure of the metabolic model
-def create_and_solve_lp_problem(lb,ub,nrows, ncol, len_vector, ia, ja, ar,
- obj_coefs,reactions,return_lp=False):
-
-
- lp = glp_create_prob();
- glp_set_prob_name(lp, "sample");
- glp_set_obj_dir(lp, GLP_MAX);
- glp_add_rows(lp, nrows);
- eps = 1e-16
- for i in range(nrows):
- glp_set_row_name(lp, i+1, "constrain_"+str(i+1));
- glp_set_row_bnds(lp, i+1, GLP_FX, 0.0, 0.0);
- glp_add_cols(lp, ncol);
- for i in range(ncol):
- glp_set_col_name(lp, i+1, "flux_"+str(i+1));
- glp_set_col_bnds(lp, i+1, GLP_DB,lb[i]-eps,ub[i]+eps);
- glp_load_matrix(lp, len_vector, ia, ja, ar);
-
- try:
- fluxes,Z=solve_lp_problem(lp,obj_coefs,reactions)
- if return_lp:
- return fluxes,Z,lp
- else:
- glp_delete_prob(lp);
- return fluxes,Z
- except Exception as e:
- glp_delete_prob(lp)
- raise Exception(e)
-
-
-# Solve LP problem from the structure of the metabolic model
-def solve_lp_problem(lp,obj_coefs,reactions):
-
- # Set the coefficients of the objective function
- i=1
- for ind_coef in obj_coefs:
- glp_set_obj_coef(lp, i, ind_coef);
- i+=1
-
- # Initialize the parameters
- params=glp_smcp()
- params.presolve=GLP_ON
- params.msg_lev = GLP_MSG_ALL
- params.tm_lim=4000
- glp_init_smcp(params)
-
- # Solve the problem
- glp_scale_prob(lp,GLP_SF_AUTO)
-
- value=glp_simplex(lp, params)
-
- Z = glp_get_obj_val(lp);
-
- if value == 0:
- fluxes = []
- for i in range(len(reactions)): fluxes.append(glp_get_col_prim(lp, i+1))
- return fluxes,Z
- else:
- raise Exception("error in LP problem. Problem:",str(value))
-
-
-# Create LP structure
-def create_lp_structure(model):
-
- reactions=[el.id for el in model.reactions]
- coefs_obj=[reaction.objective_coefficient for reaction in model.reactions]
-
- # Lower and upper bounds
- lb=[reaction.lower_bound for reaction in model.reactions]
- ub=[reaction.upper_bound for reaction in model.reactions]
-
- # Create S matrix
- S=cb.util.create_stoichiometric_matrix(model,array_type="dok")
-
- return S,lb,ub,coefs_obj,reactions
-
-# CBS sampling interface
-def randomObjectiveFunctionSampling(model, nsample, coefficients_df, df_sample):
-
- S,lb,ub,coefs_obj,reactions = create_lp_structure(model)
- len_vector, values, indexes, ia, ja, ar, nrow, ncol = initialize_lp_problem(S)
-
- for i in range(nsample):
-
- coefs_obj=coefficients_df.iloc[:,i].values
-
- if coefs_obj[-1]==1: #minimize
- coefs_obj= coefs_obj[0:-1] * -1
- else:
- coefs_obj=coefs_obj[0:-1]
-
- fluxes,Z = create_and_solve_lp_problem(lb,ub, nrow, ncol, len_vector,
- ia, ja, ar, coefs_obj,reactions,return_lp=False)
- df_sample.loc[i] = fluxes
- pass
-
-def randomObjectiveFunctionSampling_cobrapy(model, nsample, coefficients_df, df_sample):
-
- for i in range(nsample):
-
- dict_coeff={}
- if(coefficients_df.iloc[-1][i]==1):
- type_problem = -1 #minimize
- else:
- type_problem = 1
-
- for rxn in [reaction.id for reaction in model.reactions]:
- dict_coeff[model.reactions.get_by_id(rxn)] = coefficients_df.loc[rxn][i] * type_problem
-
- model.objective = dict_coeff
- solution = model.optimize().fluxes
- for rxn, flux in solution.items():
- df_sample.loc[i][rxn] = flux
-
- pass
-
-# Create random coefficients for CBS
-def randomObjectiveFunction(model, n_samples, df_fva, seed=0):
-
-
- #reactions = model.reactions
- reactions = [reaction.id for reaction in model.reactions]
- cont=seed
- list_ex=reactions.copy()
- list_ex.append("type_of_problem")
- coefficients_df = pd.DataFrame(index=list_ex,columns=[str(i) for i in range(n_samples)])
-
- for i in range(0, n_samples):
-
- cont=cont+1
- random.seed(cont)
-
- # Genera un numero casuale tra 0 e 1
- threshold = random.random() #coefficiente tra 0 e 1
-
- for reaction in reactions:
-
- cont=cont+1
- random.seed(cont)
-
- val=random.random()
-
- if val>threshold:
-
- cont=cont+1
- random.seed(cont)
-
- c=2*random.random()-1 #coefficiente tra -1 e 1
-
- val_max=np.max([df_fva.loc[reaction,"minimum"],df_fva.loc[reaction,"maximum"]])
-
- if val_max!=0: #solo se la fva è diversa da zero
- coefficients_df.loc[reaction,str(i)] = c/val_max #divido per la fva
- else:
- coefficients_df.loc[reaction,str(i)] = 0
-
- else:
- coefficients_df.loc[reaction,str(i)] = 0
-
- cont=cont+1
- random.seed(cont)
-
- if random.random()<0.5:
- coefficients_df.loc["type_of_problem",str(i)] = 0 #maximize
- else:
- coefficients_df.loc["type_of_problem",str(i)] = 1 #minimize
-
- return coefficients_df
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/GSOC project submission.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/GSOC project submission.html Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,65 @@
+
+
+
+
+
+ Google Summer of Code 2024 - COBRAxy: COBRA and MaREA4Galaxy
+
+
+
+
+
Google Summer of Code 2024
+
COBRAxy: COBRA and MaREA4Galaxy
+
National Resource for Network Biology (NRNB)
+
Mentors:
+
+
Alex Graudenzi, alex.graudenzi@unimib.it
+
Chiara Damiani, chiara.damiani@unimib.it
+
Marco Antoniotti, marco.antoniotti@unimib.it
+
+
Contributor:
+
+
Luca Milazzo (University of Milano-Bicocca) – lucmil2000@gmail.com, luca.milazzo@epfl.ch
+
+
+
Project Description
+
+ The project focused on developing an advanced Galaxy tool that enhances the data mapping capabilities of MaREA4Galaxy. The extension of this framework includes the analysis of fluxomics data, starting from a metabolic model and progressing to the representation of up-regulated fluxes on a metabolic map. This tool enables users to perform constraint-based enrichment analysis of metabolic pathways.
+
+
The primary goals of the project were:
+
+
Create a flux sampling and analysis interface to allow users to work with constraint-based metabolic models (e.g., sampling algorithms, FBA, pFBA, and FVA).
+
Adapt the existing clustering module to clusterize fluxomics data and implement additional clustering algorithms (e.g., Leiden and Louvain).
+
Build upon the existing module for visualizing enriched reactions based on RAS to create a new module for enrichment analysis of metabolic pathways based on simulated fluxomics data, and visualize the results on the metabolic map.
+
+
+
What I Did
+
+
Updated all existing modules of MaREA4Galaxy to use recent versions of Python libraries, ensuring greater future compatibility.
+
Modified the "Custom Data Generator" tool to extract rules, reactions, bounds, and medium information from a COBRA model.
+
Developed the "RAS to Bound" tool, which generates metabolic reaction bounds based on the RAS matrix and a growth medium (either custom or one of 26 pre-defined settings), enabling the creation of cell-specific bounds from a generic metabolic model (e.g., ENGRO2 or a custom model).
+
Developed the "Flux Simulation" tool, allowing users to sample multiple metabolic models using cell-specific bounds, employing the CBS and OPTGP algorithms. This tool also supports flux analysis using FBA, pFBA, FVA, and biomass sensitivity analysis.
+
Developed the "Metabolic Flux Enrichment Analysis" tool, which visualizes up-regulated fluxes identified by the "Flux Simulation" tool, compares different sub-classes identified by the clustering tool over fluxomics data, and visualizes all results on the metabolic map.
+
+
+
Current State and Future Extensions
+
+ Currently, the updated MaREA4Galaxy tool allows users to perform constraint-based enrichment analysis of metabolic pathways using RNA-seq profiles by simulating fluxomics. Additionally, users can compare different sub-populations identified by the clustering tool. The architecture minimizes computational costs by handling cell-specific models through a set of bounds, without storing complete COBRA models, which would contain a large amount of redundant information.
+
+
+ The implementation of the "Metabolic Flux Enrichment Analysis" tool did not leave enough time to extend the clustering module to new algorithms such as HDBSCAN, Leiden, and Louvain. This is a potential future extension to consider. Moreover, implementing a more advanced clustering grid search could further optimize clustering results.
+
+
+
About the Code
+
+ I worked on the Mercurial repository of MaREA4Galaxy, where this document is stored. I committed all my changes, as shown by the repository history, though without using any Git-like merge operations due to the limitations of the Mercurial interface.
+
+
+
Conclusions
+
+ Over the past years, I have focused on biology-related subjects, particularly metabolic fluxes and other omics data such as gene expression datasets. Through this project, I was able to apply the knowledge I have gained in constraint-based modeling, flux sampling, and omics enrichment analysis by expanding the MaREA4Galaxy tool. This experience not only enhanced my programming skills but also deepened my understanding of the real needs of biologists when working with such omics data.
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/README.md Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,11 @@
+# Official repository for the COBRAxy toolset
+> COBRAxy (COBRApy in Galaxy) is a user-friendly tool that allows a user to user to characterize and to graphically compare simulated fluxomics coming from groups of samples with different transcriptional regulation of metabolism.
+It extends the MaREA 2 (Metabolic Reaction Enrichment Analysis) tool that enables users to compare groups in terms of RAS and RPS only. The tool is available as plug-in for the widely-used Galaxy platform for comparative genomics and bioinformatics analyses.
+
+## Useful links:
+- COBRAxy Google Summer of Code 2024: https://summerofcode.withgoogle.com/programs/2024/projects/LSrCKfq7
+- COBRApy: https://opencobra.github.io/cobrapy/
+- MaREA4Galaxy: https://galaxyproject.org/use/marea4galaxy/
+- Galaxy project: https://usegalaxy.org/
+
+## Documentation:
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/custom_data_generator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/custom_data_generator.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,218 @@
+import os
+import csv
+import cobra
+import pickle
+import argparse
+import pandas as pd
+import utils.general_utils as utils
+import utils.rule_parsing as rulesUtils
+from typing import Optional, Tuple, Union, Dict
+import utils.reaction_parsing as reactionUtils
+
+ARGS : argparse.Namespace
+def process_args() -> argparse.Namespace:
+ """
+ Interfaces the script of a module with its frontend, making the user's choices for
+ various parameters available as values in code.
+
+ Args:
+ args : Always obtained (in file) from sys.argv
+
+ Returns:
+ Namespace : An object containing the parsed arguments
+ """
+ parser = argparse.ArgumentParser(
+ usage = "%(prog)s [options]",
+ description = "generate custom data from a given model")
+
+ parser.add_argument("-ol", "--out_log", type = str, required = True, help = "Output log")
+
+ parser.add_argument("-orules", "--out_rules", type = str, required = True, help = "Output rules")
+ parser.add_argument("-orxns", "--out_reactions", type = str, required = True, help = "Output reactions")
+ parser.add_argument("-omedium", "--out_medium", type = str, required = True, help = "Output medium")
+ parser.add_argument("-obnds", "--out_bounds", type = str, required = True, help = "Output bounds")
+
+ parser.add_argument("-id", "--input", type = str, required = True, help = "Input model")
+ parser.add_argument("-mn", "--name", type = str, required = True, help = "Input model name")
+ # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
+
+ argsNamespace = parser.parse_args()
+ argsNamespace.out_dir = "result"
+ # ^ can't get this one to work from xml, there doesn't seem to be a way to get the directory attribute from the collection
+
+ return argsNamespace
+
+################################- INPUT DATA LOADING -################################
+def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
+ """
+ Loads a custom model from a file, either in JSON or XML format.
+
+ Args:
+ file_path : The path to the file containing the custom model.
+ ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
+
+ Raises:
+ DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
+
+ Returns:
+ cobra.Model : the model, if successfully opened.
+ """
+ ext = ext if ext else file_path.ext
+ try:
+ if ext is utils.FileFormat.XML:
+ return cobra.io.read_sbml_model(file_path.show())
+
+ if ext is utils.FileFormat.JSON:
+ return cobra.io.load_json_model(file_path.show())
+
+ except Exception as e: raise utils.DataErr(file_path, e.__str__())
+ raise utils.DataErr(file_path,
+ f"Formato \"{file_path.ext}\" non riconosciuto, sono supportati solo file JSON e XML")
+
+################################- DATA GENERATION -################################
+ReactionId = str
+def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]:
+ """
+ Generates a dictionary mapping reaction ids to rules from the model.
+
+ Args:
+ model : the model to derive data from.
+ asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings.
+
+ Returns:
+ Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules.
+ Dict[ReactionId, str] : the generated dictionary of raw rules.
+ """
+ # Is the below approach convoluted? yes
+ # Ok but is it inefficient? probably
+ # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane)
+ _ruleGetter = lambda reaction : reaction.gene_reaction_rule
+ ruleExtractor = (lambda reaction :
+ rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter
+
+ return {
+ reaction.id : ruleExtractor(reaction)
+ for reaction in model.reactions
+ if reaction.gene_reaction_rule }
+
+def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]:
+ """
+ Generates a dictionary mapping reaction ids to reaction formulas from the model.
+
+ Args:
+ model : the model to derive data from.
+ asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are.
+
+ Returns:
+ Dict[ReactionId, str] : the generated dictionary.
+ """
+
+ unparsedReactions = {
+ reaction.id : reaction.reaction
+ for reaction in model.reactions
+ if reaction.reaction
+ }
+
+ if not asParsed: return unparsedReactions
+
+ return reactionUtils.create_reaction_dict(unparsedReactions)
+
+def get_medium(model:cobra.Model) -> pd.DataFrame:
+ trueMedium=[]
+ for r in model.reactions:
+ positiveCoeff=0
+ for m in r.metabolites:
+ if r.get_coefficient(m.id)>0:
+ positiveCoeff=1;
+ if (positiveCoeff==0 and r.lower_bound<0):
+ trueMedium.append(r.id)
+
+ df_medium = pd.DataFrame()
+ df_medium["reaction"] = trueMedium
+ return df_medium
+
+def generate_bounds(model:cobra.Model) -> pd.DataFrame:
+
+ rxns = []
+ for reaction in model.reactions:
+ rxns.append(reaction.id)
+
+ bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns)
+
+ for reaction in model.reactions:
+ bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound]
+ return bounds
+
+
+###############################- FILE SAVING -################################
+def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
+ """
+ Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
+
+ Args:
+ data : the data to be written to the file.
+ file_path : the path to the .csv file.
+ fieldNames : the names of the fields (columns) in the .csv file.
+
+ Returns:
+ None
+ """
+ with open(file_path.show(), 'w', newline='') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+ writer.writeheader()
+
+ for key, value in data.items():
+ writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
+ """
+ Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
+
+ Args:
+ data : the data to be written to the file.
+ file_path : the path to the .csv file.
+ fieldNames : the names of the fields (columns) in the .csv file.
+
+ Returns:
+ None
+ """
+ with open(file_path, 'w', newline='') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+ writer.writeheader()
+
+ for key, value in data.items():
+ writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+###############################- ENTRY POINT -################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ # get args from frontend (related xml)
+ global ARGS
+ ARGS = process_args()
+
+ # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this!
+ if os.path.isdir(ARGS.out_dir) == False: os.makedirs(ARGS.out_dir)
+
+ # load custom model
+ model = load_custom_model(
+ utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
+
+ # generate data
+ rules = generate_rules(model, asParsed = False)
+ reactions = generate_reactions(model, asParsed = False)
+ bounds = generate_bounds(model)
+ medium = get_medium(model)
+
+ # save files out of collection: path coming from xml
+ save_as_csv(rules, ARGS.out_rules, ("ReactionID", "Rule"))
+ save_as_csv(reactions, ARGS.out_reactions, ("ReactionID", "Reaction"))
+ bounds.to_csv(ARGS.out_bounds, sep = '\t')
+ medium.to_csv(ARGS.out_medium, sep = '\t')
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/custom_data_generator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/custom_data_generator.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,63 @@
+
+
+
+ marea_macros.xml
+
+
+
+ numpy
+ pandas
+ cobra
+ lxml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_simulation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_simulation.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,437 @@
+import argparse
+import utils.general_utils as utils
+from typing import Optional, List
+import os
+import numpy as np
+import pandas as pd
+import cobra
+import utils.CBS_backend as CBS_backend
+from joblib import Parallel, delayed, cpu_count
+from cobra.sampling import OptGPSampler
+import sys
+
+################################# process args ###############################
+def process_args(args :List[str]) -> argparse.Namespace:
+ """
+ Processes command-line arguments.
+
+ Args:
+ args (list): List of command-line arguments.
+
+ Returns:
+ Namespace: An object containing parsed arguments.
+ """
+ parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+ description = 'process some value\'s')
+
+ parser.add_argument('-ol', '--out_log',
+ help = "Output log")
+
+ parser.add_argument('-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+
+ parser.add_argument('-in', '--input',
+ required = True,
+ type=str,
+ help = 'inputs bounds')
+
+ parser.add_argument('-ni', '--names',
+ required = True,
+ type=str,
+ help = 'cell names')
+
+ parser.add_argument(
+ '-ms', '--model_selector',
+ type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom],
+ help = 'chose which type of model you want use')
+
+ parser.add_argument("-mo", "--model", type = str)
+
+ parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name")
+
+ parser.add_argument('-a', '--algorithm',
+ type = str,
+ choices = ['OPTGP', 'CBS'],
+ required = True,
+ help = 'choose sampling algorithm')
+
+ parser.add_argument('-th', '--thinning',
+ type = int,
+ default= 100,
+ required=False,
+ help = 'choose thinning')
+
+ parser.add_argument('-ns', '--n_samples',
+ type = int,
+ required = True,
+ help = 'choose how many samples')
+
+ parser.add_argument('-sd', '--seed',
+ type = int,
+ required = True,
+ help = 'seed')
+
+ parser.add_argument('-nb', '--n_batches',
+ type = int,
+ required = True,
+ help = 'choose how many batches')
+
+ parser.add_argument('-ot', '--output_type',
+ type = str,
+ required = True,
+ help = 'output type')
+
+ parser.add_argument('-ota', '--output_type_analysis',
+ type = str,
+ required = False,
+ help = 'output type analysis')
+
+ ARGS = parser.parse_args()
+ return ARGS
+
+########################### warning ###########################################
+def warning(s :str) -> None:
+ """
+ Log a warning message to an output log file and print it to the console.
+
+ Args:
+ s (str): The warning message to be logged and printed.
+
+ Returns:
+ None
+ """
+ with open(ARGS.out_log, 'a') as log:
+ log.write(s + "\n\n")
+ print(s)
+
+
+def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None:
+ dataset.index.name = 'Reactions'
+ dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = keep_index)
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+ """
+ Read a dataset from a CSV file and return it as a pandas DataFrame.
+
+ Args:
+ data (str): Path to the CSV file containing the dataset.
+ name (str): Name of the dataset, used in error messages.
+
+ Returns:
+ pandas.DataFrame: DataFrame containing the dataset.
+
+ Raises:
+ pd.errors.EmptyDataError: If the CSV file is empty.
+ sys.exit: If the CSV file has the wrong format, the execution is aborted.
+ """
+ try:
+ dataset = pd.read_csv(data, sep = '\t', header = 0, index_col=0, engine='python')
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ return dataset
+
+
+
+def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None:
+ """
+ Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files.
+
+ Args:
+ model (cobra.Model): The COBRA model to sample from.
+ model_name (str): The name of the model, used in naming output files.
+ n_samples (int, optional): Number of samples per batch. Default is 1000.
+ thinning (int, optional): Thinning parameter for the sampler. Default is 100.
+ n_batches (int, optional): Number of batches to run. Default is 1.
+ seed (int, optional): Random seed for reproducibility. Default is 0.
+
+ Returns:
+ None
+ """
+
+ for i in range(0, n_batches):
+ optgp = OptGPSampler(model, thinning, seed)
+ samples = optgp.sample(n_samples)
+ samples.to_csv(ARGS.output_folder + model_name + '_'+ str(i)+'_OPTGP.csv', index=False)
+ seed+=1
+ samplesTotal = pd.DataFrame()
+ for i in range(0, n_batches):
+ samples_batch = pd.read_csv(ARGS.output_folder + model_name + '_'+ str(i)+'_OPTGP.csv')
+ samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True)
+
+ write_to_file(samplesTotal.T, model_name, True)
+
+ for i in range(0, n_batches):
+ os.remove(ARGS.output_folder + model_name + '_'+ str(i)+'_OPTGP.csv')
+ pass
+
+
+def CBS_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, n_batches:int=1, seed:int=0)-> None:
+ """
+ Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files.
+
+ Args:
+ model (cobra.Model): The COBRA model to sample from.
+ model_name (str): The name of the model, used in naming output files.
+ n_samples (int, optional): Number of samples per batch. Default is 1000.
+ n_batches (int, optional): Number of batches to run. Default is 1.
+ seed (int, optional): Random seed for reproducibility. Default is 0.
+
+ Returns:
+ None
+ """
+
+ df_FVA = cobra.flux_analysis.flux_variability_analysis(model,fraction_of_optimum=0).round(6)
+
+ df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples*n_batches, df_FVA, seed=seed)
+
+ for i in range(0, n_batches):
+ samples = pd.DataFrame(columns =[reaction.id for reaction in model.reactions], index = range(n_samples))
+ try:
+ CBS_backend.randomObjectiveFunctionSampling(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], samples)
+ except Exception as e:
+ utils.logWarning(
+ "Warning: GLPK solver has failed for " + model_name + ". Trying with COBRA interface. Error:" + str(e),
+ ARGS.out_log)
+ CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples],
+ samples)
+ utils.logWarning(ARGS.output_folder + model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log)
+ samples.to_csv(ARGS.output_folder + model_name + '_'+ str(i)+'_CBS.csv', index=False)
+
+ samplesTotal = pd.DataFrame()
+ for i in range(0, n_batches):
+ samples_batch = pd.read_csv(ARGS.output_folder + model_name + '_'+ str(i)+'_CBS.csv')
+ samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True)
+
+ write_to_file(samplesTotal.T, model_name, True)
+
+ for i in range(0, n_batches):
+ os.remove(ARGS.output_folder + model_name + '_'+ str(i)+'_CBS.csv')
+ pass
+
+
+def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]:
+ """
+ Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm.
+
+ Args:
+ model_input_original (cobra.Model): The original COBRA model.
+ bounds_path (str): Path to the CSV file containing the bounds dataset.
+ cell_name (str): Name of the cell, used to generate filenames for output.
+
+ Returns:
+ List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
+ """
+
+ model_input = model_input_original.copy()
+ bounds_df = read_dataset(bounds_path, "bounds dataset")
+ for rxn_index, row in bounds_df.iterrows():
+ model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
+ model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
+
+ name = cell_name.split('.')[0]
+
+ if ARGS.algorithm == 'OPTGP':
+ OPTGP_sampler(model_input, name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
+
+ elif ARGS.algorithm == 'CBS':
+ CBS_sampler(model_input, name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
+
+ df_mean, df_median, df_quantiles = fluxes_statistics(name, ARGS.output_types)
+
+ if("fluxes" not in ARGS.output_types):
+ os.remove(ARGS.output_folder + name + '.csv')
+
+ returnList = []
+ returnList.append(df_mean)
+ returnList.append(df_median)
+ returnList.append(df_quantiles)
+
+ df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, name, ARGS.output_type_analysis)
+
+ if("pFBA" in ARGS.output_type_analysis):
+ returnList.append(df_pFBA)
+ if("FVA" in ARGS.output_type_analysis):
+ returnList.append(df_FVA)
+ if("sensitivity" in ARGS.output_type_analysis):
+ returnList.append(df_sensitivity)
+
+ return returnList
+
+def fluxes_statistics(model_name: str, output_types:List)-> List[pd.DataFrame]:
+ """
+ Computes statistics (mean, median, quantiles) for the fluxes.
+
+ Args:
+ model_name (str): Name of the model, used in filename for input.
+ output_types (List[str]): Types of statistics to compute (mean, median, quantiles).
+
+ Returns:
+ List[pd.DataFrame]: List of DataFrames containing mean, median, and quantiles statistics.
+ """
+
+ df_mean = pd.DataFrame()
+ df_median= pd.DataFrame()
+ df_quantiles= pd.DataFrame()
+
+ df_samples = pd.read_csv(ARGS.output_folder + model_name + '.csv', sep = '\t', index_col = 0).T
+ df_samples = df_samples.round(8)
+
+ for output_type in output_types:
+ if(output_type == "mean"):
+ df_mean = df_samples.mean()
+ df_mean = df_mean.to_frame().T
+ df_mean = df_mean.reset_index(drop=True)
+ df_mean.index = [model_name]
+ elif(output_type == "median"):
+ df_median = df_samples.median()
+ df_median = df_median.to_frame().T
+ df_median = df_median.reset_index(drop=True)
+ df_median.index = [model_name]
+ elif(output_type == "quantiles"):
+ newRow = []
+ cols = []
+ for rxn in df_samples.columns:
+ quantiles = df_samples[rxn].quantile([0.25, 0.50, 0.75])
+ newRow.append(quantiles[0.25])
+ cols.append(rxn + "_q1")
+ newRow.append(quantiles[0.5])
+ cols.append(rxn + "_q2")
+ newRow.append(quantiles[0.75])
+ cols.append(rxn + "_q3")
+ df_quantiles = pd.DataFrame(columns=cols)
+ df_quantiles.loc[0] = newRow
+ df_quantiles = df_quantiles.reset_index(drop=True)
+ df_quantiles.index = [model_name]
+
+ return df_mean, df_median, df_quantiles
+
+def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]:
+ """
+ Performs flux analysis including pFBA, FVA, and sensitivity analysis.
+
+ Args:
+ model (cobra.Model): The COBRA model to analyze.
+ model_name (str): Name of the model, used in filenames for output.
+ output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity).
+
+ Returns:
+ List[pd.DataFrame]: List of DataFrames containing pFBA, FVA, and sensitivity analysis results.
+ """
+
+ df_pFBA = pd.DataFrame()
+ df_FVA= pd.DataFrame()
+ df_sensitivity= pd.DataFrame()
+
+ for output_type in output_types:
+ if(output_type == "pFBA"):
+ model.objective = "Biomass"
+ solution = cobra.flux_analysis.pfba(model)
+ fluxes = solution.fluxes
+ df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist()
+ df_pFBA = df_pFBA.reset_index(drop=True)
+ df_pFBA.index = [model_name]
+ df_pFBA = df_pFBA.astype(float).round(6)
+ elif(output_type == "FVA"):
+ fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
+ columns = []
+ for rxn in fva.index.to_list():
+ columns.append(rxn + "_min")
+ columns.append(rxn + "_max")
+ df_FVA= pd.DataFrame(columns = columns)
+ for index_rxn, row in fva.iterrows():
+ df_FVA.loc[0, index_rxn+ "_min"] = fva.loc[index_rxn, "minimum"]
+ df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"]
+ df_FVA = df_FVA.reset_index(drop=True)
+ df_FVA.index = [model_name]
+ df_FVA = df_FVA.astype(float).round(6)
+ elif(output_type == "sensitivity"):
+ model.objective = "Biomass"
+ solution_original = model.optimize().objective_value
+ reactions = model.reactions
+ single = cobra.flux_analysis.single_reaction_deletion(model)
+ newRow = []
+ df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name])
+ for rxn in reactions:
+ newRow.append(single.knockout[rxn.id].growth.values[0]/solution_original)
+ df_sensitivity.loc[model_name] = newRow
+ df_sensitivity = df_sensitivity.astype(float).round(6)
+ return df_pFBA, df_FVA, df_sensitivity
+
+############################# main ###########################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ if not os.path.exists('flux_simulation/'):
+ os.makedirs('flux_simulation/')
+
+ num_processors = cpu_count()
+
+ global ARGS
+ ARGS = process_args(sys.argv)
+
+ ARGS.output_folder = 'flux_simulation/'
+
+
+ model_type :utils.Model = ARGS.model_selector
+ if model_type is utils.Model.Custom:
+ model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
+ else:
+ model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
+
+ ARGS.bounds = ARGS.input.split(",")
+ ARGS.bounds_name = ARGS.names.split(",")
+ ARGS.output_types = ARGS.output_type.split(",")
+ ARGS.output_type_analysis = ARGS.output_type_analysis.split(",")
+
+
+ results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name))
+
+ all_mean = pd.concat([result[0] for result in results], ignore_index=False)
+ all_median = pd.concat([result[1] for result in results], ignore_index=False)
+ all_quantiles = pd.concat([result[2] for result in results], ignore_index=False)
+
+ if("mean" in ARGS.output_types):
+ all_mean = all_mean.fillna(0.0)
+ all_mean = all_mean.sort_index()
+ write_to_file(all_mean.T, "mean", True)
+
+ if("median" in ARGS.output_types):
+ all_median = all_median.fillna(0.0)
+ all_median = all_median.sort_index()
+ write_to_file(all_median.T, "median", True)
+
+ if("quantiles" in ARGS.output_types):
+ all_quantiles = all_quantiles.fillna(0.0)
+ all_quantiles = all_quantiles.sort_index()
+ write_to_file(all_quantiles.T, "quantiles", True)
+
+ index_result = 3
+ if("pFBA" in ARGS.output_type_analysis):
+ all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False)
+ all_pFBA = all_pFBA.sort_index()
+ write_to_file(all_pFBA.T, "pFBA", True)
+ index_result+=1
+ if("FVA" in ARGS.output_type_analysis):
+ all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False)
+ all_FVA = all_FVA.sort_index()
+ write_to_file(all_FVA.T, "FVA", True)
+ index_result+=1
+ if("sensitivity" in ARGS.output_type_analysis):
+ all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False)
+ all_sensitivity = all_sensitivity.sort_index()
+ write_to_file(all_sensitivity.T, "sensitivity", True)
+
+ pass
+
+##############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_simulation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_simulation.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,134 @@
+
+
+
+ marea_macros.xml
+
+
+
+ numpy
+ pandas
+ cobra
+ lxml
+ joblib
+ scipy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_to_map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_to_map.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,1055 @@
+from __future__ import division
+import csv
+from enum import Enum
+import re
+import sys
+import numpy as np
+import pandas as pd
+import itertools as it
+import scipy.stats as st
+import lxml.etree as ET
+import math
+import utils.general_utils as utils
+from PIL import Image
+import os
+import copy
+import argparse
+import pyvips
+from PIL import Image, ImageDraw, ImageFont
+from typing import Tuple, Union, Optional, List, Dict
+import matplotlib.pyplot as plt
+
+ERRORS = []
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args() -> argparse.Namespace:
+ """
+ Interfaces the script of a module with its frontend, making the user's choices for various parameters available as values in code.
+
+ Args:
+ args : Always obtained (in file) from sys.argv
+
+ Returns:
+ Namespace : An object containing the parsed arguments
+ """
+ parser = argparse.ArgumentParser(
+ usage = "%(prog)s [options]",
+ description = "process some value's genes to create a comparison's map.")
+
+ #General:
+ parser.add_argument(
+ '-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+
+ parser.add_argument('-on', '--control', type = str)
+ parser.add_argument('-ol', '--out_log', help = "Output log")
+
+ #Computation details:
+ parser.add_argument(
+ '-co', '--comparison',
+ type = str,
+ default = '1vs1',
+ choices = ['manyvsmany', 'onevsrest', 'onevsmany'])
+
+ parser.add_argument(
+ '-pv' ,'--pValue',
+ type = float,
+ default = 0.1,
+ help = 'P-Value threshold (default: %(default)s)')
+
+ parser.add_argument(
+ '-fc', '--fChange',
+ type = float,
+ default = 1.5,
+ help = 'Fold-Change threshold (default: %(default)s)')
+
+
+ parser.add_argument(
+ '-op', '--option',
+ type = str,
+ choices = ['datasets', 'dataset_class'],
+ help='dataset or dataset and class')
+
+ parser.add_argument(
+ '-idf', '--input_data_fluxes',
+ type = str,
+ help = 'input dataset fluxes')
+
+ parser.add_argument(
+ '-icf', '--input_class_fluxes',
+ type = str,
+ help = 'sample group specification fluxes')
+
+ parser.add_argument(
+ '-idsf', '--input_datas_fluxes',
+ type = str,
+ nargs = '+',
+ help = 'input datasets fluxes')
+
+ parser.add_argument(
+ '-naf', '--names_fluxes',
+ type = str,
+ nargs = '+',
+ help = 'input names fluxes')
+
+ #Output:
+ parser.add_argument(
+ "-gs", "--generate_svg",
+ type = utils.Bool("generate_svg"), default = True,
+ help = "choose whether to generate svg")
+
+ parser.add_argument(
+ "-gp", "--generate_pdf",
+ type = utils.Bool("generate_pdf"), default = True,
+ help = "choose whether to generate pdf")
+
+ parser.add_argument(
+ '-cm', '--custom_map',
+ type = str,
+ help='custom map to use')
+
+ parser.add_argument(
+ '-mc', '--choice_map',
+ type = utils.Model, default = utils.Model.HMRcore,
+ choices = [utils.Model.HMRcore, utils.Model.ENGRO2, utils.Model.Custom])
+
+ parser.add_argument(
+ '-colorm', '--color_map',
+ type = str,
+ choices = ["jet", "viridis"])
+
+ args :argparse.Namespace = parser.parse_args()
+ args.net = True
+
+ return args
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+ """
+ Tries to read the dataset from its path (data) as a tsv and turns it into a DataFrame.
+
+ Args:
+ data : filepath of a dataset (from frontend input params or literals upon calling)
+ name : name associated with the dataset (from frontend input params or literals upon calling)
+
+ Returns:
+ pd.DataFrame : dataset in a runtime operable shape
+
+ Raises:
+ sys.exit : if there's no data (pd.errors.EmptyDataError) or if the dataset has less than 2 columns
+ """
+ try:
+ dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ return dataset
+
+############################ dataset name #####################################
+def name_dataset(name_data :str, count :int) -> str:
+ """
+ Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
+
+ Args:
+ name_data : name associated with the dataset (from frontend input params)
+ count : counter from 1 to make these names unique (external)
+
+ Returns:
+ str : the name made unique
+ """
+ if str(name_data) == 'Dataset':
+ return str(name_data) + '_' + str(count)
+ else:
+ return str(name_data)
+
+############################ map_methods ######################################
+FoldChange = Union[float, int, str] # Union[float, Literal[0, "-INF", "INF"]]
+def fold_change(avg1 :float, avg2 :float) -> FoldChange:
+ """
+ Calculates the fold change between two gene expression values.
+
+ Args:
+ avg1 : average expression value from one dataset avg2 : average expression value from the other dataset
+
+ Returns:
+ FoldChange :
+ 0 : when both input values are 0
+ "-INF" : when avg1 is 0
+ "INF" : when avg2 is 0
+ float : for any other combination of values
+ """
+ if avg1 == 0 and avg2 == 0:
+ return 0
+ elif avg1 == 0:
+ return '-INF'
+ elif avg2 == 0:
+ return 'INF'
+ else: # (threshold_F_C - 1) / (abs(threshold_F_C) + 1) con threshold_F_C > 1
+ return (avg1 - avg2) / (abs(avg1) + abs(avg2))
+
+def fix_style(l :str, col :Optional[str], width :str, dash :str) -> str:
+ """
+ Produces a "fixed" style string to assign to a reaction arrow in the SVG map, assigning style properties to the corresponding values passed as input params.
+
+ Args:
+ l : current style string of an SVG element
+ col : new value for the "stroke" style property
+ width : new value for the "stroke-width" style property
+ dash : new value for the "stroke-dasharray" style property
+
+ Returns:
+ str : the fixed style string
+ """
+ tmp = l.split(';')
+ flag_col = False
+ flag_width = False
+ flag_dash = False
+ for i in range(len(tmp)):
+ if tmp[i].startswith('stroke:'):
+ tmp[i] = 'stroke:' + col
+ flag_col = True
+ if tmp[i].startswith('stroke-width:'):
+ tmp[i] = 'stroke-width:' + width
+ flag_width = True
+ if tmp[i].startswith('stroke-dasharray:'):
+ tmp[i] = 'stroke-dasharray:' + dash
+ flag_dash = True
+ if not flag_col:
+ tmp.append('stroke:' + col)
+ if not flag_width:
+ tmp.append('stroke-width:' + width)
+ if not flag_dash:
+ tmp.append('stroke-dasharray:' + dash)
+ return ';'.join(tmp)
+
+# The type of d values is collapsed, losing precision, because the dict containst lists instead of tuples, please fix!
+def fix_map(d :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, threshold_P_V :float, threshold_F_C :float, max_z_score :float) -> ET.ElementTree:
+ """
+ Edits the selected SVG map based on the p-value and fold change data (d) and some significance thresholds also passed as inputs.
+
+ Args:
+ d : dictionary mapping a p-value and a fold-change value (values) to each reaction ID as encoded in the SVG map (keys)
+ core_map : SVG map to modify
+ threshold_P_V : threshold for a p-value to be considered significant
+ threshold_F_C : threshold for a fold change value to be considered significant
+ max_z_score : highest z-score (absolute value)
+
+ Returns:
+ ET.ElementTree : the modified core_map
+
+ Side effects:
+ core_map : mut
+ """
+ maxT = 12
+ minT = 2
+ grey = '#BEBEBE'
+ blue = '#6495ed'
+ red = '#ecac68'
+ for el in core_map.iter():
+ el_id = str(el.get('id'))
+ if el_id.startswith('R_'):
+ tmp = d.get(el_id[2:])
+ if tmp != None:
+ p_val :float = tmp[0]
+ f_c = tmp[1]
+ z_score = tmp[2]
+ if p_val < threshold_P_V:
+ if not isinstance(f_c, str):
+ if abs(f_c) < ((threshold_F_C - 1) / (abs(threshold_F_C) + 1)): #
+ col = grey
+ width = str(minT)
+ else:
+ if f_c < 0:
+ col = blue
+ elif f_c > 0:
+ col = red
+ width = str(max((abs(z_score) * maxT) / max_z_score, minT))
+ else:
+ if f_c == '-INF':
+ col = blue
+ elif f_c == 'INF':
+ col = red
+ width = str(maxT)
+ dash = 'none'
+ else:
+ dash = '5,5'
+ col = grey
+ width = str(minT)
+ el.set('style', fix_style(el.get('style', ""), col, width, dash))
+ return core_map
+
+def getElementById(reactionId :str, metabMap :ET.ElementTree) -> utils.Result[ET.Element, utils.Result.ResultErr]:
+ """
+ Finds any element in the given map with the given ID. ID uniqueness in an svg file is recommended but
+ not enforced, if more than one element with the exact ID is found only the first will be returned.
+
+ Args:
+ reactionId (str): exact ID of the requested element.
+ metabMap (ET.ElementTree): metabolic map containing the element.
+
+ Returns:
+ utils.Result[ET.Element, ResultErr]: result of the search, either the first match found or a ResultErr.
+ """
+ return utils.Result.Ok(
+ f"//*[@id=\"{reactionId}\"]").map(
+ lambda xPath : metabMap.xpath(xPath)[0]).mapErr(
+ lambda _ : utils.Result.ResultErr(f"No elements with ID \"{reactionId}\" found in map"))
+ # ^^^ we shamelessly ignore the contents of the IndexError, it offers nothing to the user.
+
+def styleMapElement(element :ET.Element, styleStr :str) -> None:
+ currentStyles :str = element.get("style", "")
+ if re.search(r";stroke:[^;]+;stroke-width:[^;]+;stroke-dasharray:[^;]+$", currentStyles):
+ currentStyles = ';'.join(currentStyles.split(';')[:-3])
+
+ element.set("style", currentStyles + styleStr)
+
+class ReactionDirection(Enum):
+ Unknown = ""
+ Direct = "_F"
+ Inverse = "_B"
+
+ @classmethod
+ def fromDir(cls, s :str) -> "ReactionDirection":
+ # vvv as long as there's so few variants I actually condone the if spam:
+ if s == ReactionDirection.Direct.value: return ReactionDirection.Direct
+ if s == ReactionDirection.Inverse.value: return ReactionDirection.Inverse
+ return ReactionDirection.Unknown
+
+ @classmethod
+ def fromReactionId(cls, reactionId :str) -> "ReactionDirection":
+ return ReactionDirection.fromDir(reactionId[-2:])
+
+def getArrowBodyElementId(reactionId :str) -> str:
+ if reactionId.endswith("_RV"): reactionId = reactionId[:-3] #TODO: standardize _RV
+ elif ReactionDirection.fromReactionId(reactionId) is not ReactionDirection.Unknown: reactionId = reactionId[:-2]
+ return f"R_{reactionId}"
+
+def getArrowHeadElementId(reactionId :str) -> Tuple[str, str]:
+ """
+ We attempt extracting the direction information from the provided reaction ID, if unsuccessful we provide the IDs of both directions.
+
+ Args:
+ reactionId : the provided reaction ID.
+
+ Returns:
+ Tuple[str, str]: either a single str ID for the correct arrow head followed by an empty string or both options to try.
+ """
+ if reactionId.endswith("_RV"): reactionId = reactionId[:-3] #TODO: standardize _RV
+ elif ReactionDirection.fromReactionId(reactionId) is not ReactionDirection.Unknown: return reactionId[:-3:-1] + reactionId[:-2], ""
+ return f"F_{reactionId}", f"B_{reactionId}"
+
+class ArrowColor(Enum):
+ """
+ Encodes possible arrow colors based on their meaning in the enrichment process.
+ """
+ Invalid = "#BEBEBE" # gray, fold-change under treshold
+ Transparent = "#ffffff00" # white, not significant p-value
+ UpRegulated = "#ecac68" # red, up-regulated reaction
+ DownRegulated = "#6495ed" # blue, down-regulated reaction
+
+ UpRegulatedInv = "#FF0000"
+ # ^^^ different shade of red (actually orange), up-regulated net value for a reversible reaction with
+ # conflicting enrichment in the two directions.
+
+ DownRegulatedInv = "#0000FF"
+ # ^^^ different shade of blue (actually purple), down-regulated net value for a reversible reaction with
+ # conflicting enrichment in the two directions.
+
+ @classmethod
+ def fromFoldChangeSign(cls, foldChange :float, *, useAltColor = False) -> "ArrowColor":
+ colors = (cls.DownRegulated, cls.DownRegulatedInv) if foldChange < 0 else (cls.UpRegulated, cls.UpRegulatedInv)
+ return colors[useAltColor]
+
+ def __str__(self) -> str: return self.value
+
+class Arrow:
+ """
+ Models the properties of a reaction arrow that change based on enrichment.
+ """
+ MIN_W = 2
+ MAX_W = 12
+
+ def __init__(self, width :int, col: ArrowColor, *, isDashed = False) -> None:
+ """
+ (Private) Initializes an instance of Arrow.
+
+ Args:
+ width : width of the arrow, ideally to be kept within Arrow.MIN_W and Arrow.MAX_W (not enforced).
+ col : color of the arrow.
+ isDashed : whether the arrow should be dashed, meaning the associated pValue resulted not significant.
+
+ Returns:
+ None : practically, a Arrow instance.
+ """
+ self.w = width
+ self.col = col
+ self.dash = isDashed
+
+ def applyTo(self, reactionId :str, metabMap :ET.ElementTree, styleStr :str) -> None:
+ if getElementById(reactionId, metabMap).map(lambda el : styleMapElement(el, styleStr)).isErr:
+ ERRORS.append(reactionId)
+
+ def styleReactionElements(self, metabMap :ET.ElementTree, reactionId :str, *, mindReactionDir = True) -> None:
+ if not mindReactionDir:
+ return self.applyTo(getArrowBodyElementId(reactionId), metabMap, self.toStyleStr())
+
+ # Now we style the arrow head(s):
+ idOpt1, idOpt2 = getArrowHeadElementId(reactionId)
+ self.applyTo(idOpt1, metabMap, self.toStyleStr(downSizedForTips = True))
+ if idOpt2: self.applyTo(idOpt2, metabMap, self.toStyleStr(downSizedForTips = True))
+
+ def styleReactionElementsMeanMedian(self, metabMap :ET.ElementTree, reactionId :str, isNegative:bool) -> None:
+
+ self.applyTo(getArrowBodyElementId(reactionId), metabMap, self.toStyleStr())
+ idOpt1, idOpt2 = getArrowHeadElementId(reactionId)
+
+ if(isNegative):
+ self.applyTo(idOpt2, metabMap, self.toStyleStr(downSizedForTips = True))
+ self.col = ArrowColor.Transparent
+ self.applyTo(idOpt1, metabMap, self.toStyleStr(downSizedForTips = True)) #trasp
+ else:
+ self.applyTo(idOpt1, metabMap, self.toStyleStr(downSizedForTips = True))
+ self.col = ArrowColor.Transparent
+ self.applyTo(idOpt2, metabMap, self.toStyleStr(downSizedForTips = True)) #trasp
+
+
+
+ def getMapReactionId(self, reactionId :str, mindReactionDir :bool) -> str:
+ """
+ Computes the reaction ID as encoded in the map for a given reaction ID from the dataset.
+
+ Args:
+ reactionId: the reaction ID, as encoded in the dataset.
+ mindReactionDir: if True forward (F_) and backward (B_) directions will be encoded in the result.
+
+ Returns:
+ str : the ID of an arrow's body or tips in the map.
+ """
+ # we assume the reactionIds also don't encode reaction dir if they don't mind it when styling the map.
+ if not mindReactionDir: return "R_" + reactionId
+
+ #TODO: this is clearly something we need to make consistent in fluxes
+ return (reactionId[:-3:-1] + reactionId[:-2]) if reactionId[:-2] in ["_F", "_B"] else f"F_{reactionId}" # "Pyr_F" --> "F_Pyr"
+
+ def toStyleStr(self, *, downSizedForTips = False) -> str:
+ """
+ Collapses the styles of this Arrow into a str, ready to be applied as part of the "style" property on an svg element.
+
+ Returns:
+ str : the styles string.
+ """
+ width = self.w
+ if downSizedForTips: width *= 0.8
+ return f";stroke:{self.col};stroke-width:{width};stroke-dasharray:{'5,5' if self.dash else 'none'}"
+
+# vvv These constants could be inside the class itself a static properties, but python
+# was built by brainless organisms so here we are!
+INVALID_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid)
+INSIGNIFICANT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid, isDashed = True)
+
+def applyFluxesEnrichmentToMap(fluxesEnrichmentRes :Dict[str, Union[Tuple[float, FoldChange], Tuple[float, FoldChange, float, float]]], metabMap :ET.ElementTree, maxNumericZScore :float) -> None:
+ """
+ Applies fluxes enrichment results to the provided metabolic map.
+
+ Args:
+ fluxesEnrichmentRes : fluxes enrichment results.
+ metabMap : the metabolic map to edit.
+ maxNumericZScore : biggest finite z-score value found.
+
+ Side effects:
+ metabMap : mut
+
+ Returns:
+ None
+ """
+ for reactionId, values in fluxesEnrichmentRes.items():
+ pValue = values[0]
+ foldChange = values[1]
+ z_score = values[2]
+
+ if isinstance(foldChange, str): foldChange = float(foldChange)
+ if pValue >= ARGS.pValue: # pValue above tresh: dashed arrow
+ INSIGNIFICANT_ARROW.styleReactionElements(metabMap, reactionId)
+ INSIGNIFICANT_ARROW.styleReactionElements(metabMap, reactionId, mindReactionDir = False)
+
+ continue
+
+ if abs(foldChange) < (ARGS.fChange - 1) / (abs(ARGS.fChange) + 1):
+ INVALID_ARROW.styleReactionElements(metabMap, reactionId)
+ INVALID_ARROW.styleReactionElements(metabMap, reactionId, mindReactionDir = False)
+
+ continue
+
+ width = Arrow.MAX_W
+ if not math.isinf(foldChange):
+ try:
+ width = max(abs(z_score * Arrow.MAX_W) / maxNumericZScore, Arrow.MIN_W)
+
+ except ZeroDivisionError: pass
+
+ #if not reactionId.endswith("_RV"): # RV stands for reversible reactions
+ # Arrow(width, ArrowColor.fromFoldChangeSign(foldChange)).styleReactionElements(metabMap, reactionId)
+ # continue
+
+ #reactionId = reactionId[:-3] # Remove "_RV"
+
+ inversionScore = (values[3] < 0) + (values[4] < 0) # Compacts the signs of averages into 1 easy to check score
+ if inversionScore == 2: foldChange *= -1
+ # ^^^ Style the inverse direction with the opposite sign netValue
+
+ # If the score is 1 (opposite signs) we use alternative colors vvv
+ arrow = Arrow(width, ArrowColor.fromFoldChangeSign(foldChange, useAltColor = inversionScore == 1))
+
+ # vvv These 2 if statements can both be true and can both happen
+ if ARGS.net: # style arrow head(s):
+ arrow.styleReactionElements(metabMap, reactionId + ("_B" if inversionScore == 2 else "_F"))
+ arrow.applyTo(("F_" if inversionScore == 2 else "B_") + reactionId, metabMap, f";stroke:{ArrowColor.Transparent};stroke-width:0;stroke-dasharray:None")
+
+ arrow.styleReactionElements(metabMap, reactionId, mindReactionDir = False)
+
+
+############################ split class ######################################
+def split_class(classes :pd.DataFrame, resolve_rules :Dict[str, List[float]]) -> Dict[str, List[List[float]]]:
+ """
+ Generates a :dict that groups together data from a :DataFrame based on classes the data is related to.
+
+ Args:
+ classes : a :DataFrame of only string values, containing class information (rows) and keys to query the resolve_rules :dict
+ resolve_rules : a :dict containing :float data
+
+ Returns:
+ dict : the dict with data grouped by class
+
+ Side effects:
+ classes : mut
+ """
+ class_pat :Dict[str, List[List[float]]] = {}
+ for i in range(len(classes)):
+ classe :str = classes.iloc[i, 1]
+ if pd.isnull(classe): continue
+
+ l :List[List[float]] = []
+ for j in range(i, len(classes)):
+ if classes.iloc[j, 1] == classe:
+ pat_id :str = classes.iloc[j, 0]
+ tmp = resolve_rules.get(pat_id, None)
+ if tmp != None:
+ l.append(tmp)
+ classes.iloc[j, 1] = None
+
+ if l:
+ class_pat[classe] = list(map(list, zip(*l)))
+ continue
+
+ utils.logWarning(
+ f"Warning: no sample found in class \"{classe}\", the class has been disregarded", ARGS.out_log)
+
+ return class_pat
+
+############################ conversion ##############################################
+#conversion from svg to png
+def svg_to_png_with_background(svg_path :utils.FilePath, png_path :utils.FilePath, dpi :int = 72, scale :int = 1, size :Optional[float] = None) -> None:
+ """
+ Internal utility to convert an SVG to PNG (forced opaque) to aid in PDF conversion.
+
+ Args:
+ svg_path : path to SVG file
+ png_path : path for new PNG file
+ dpi : dots per inch of the generated PNG
+ scale : scaling factor for the generated PNG, computed internally when a size is provided
+ size : final effective width of the generated PNG
+
+ Returns:
+ None
+ """
+ if size:
+ image = pyvips.Image.new_from_file(svg_path.show(), dpi=dpi, scale=1)
+ scale = size / image.width
+ image = image.resize(scale)
+ else:
+ image = pyvips.Image.new_from_file(svg_path.show(), dpi=dpi, scale=scale)
+
+ white_background = pyvips.Image.black(image.width, image.height).new_from_image([255, 255, 255])
+ white_background = white_background.affine([scale, 0, 0, scale])
+
+ if white_background.bands != image.bands:
+ white_background = white_background.extract_band(0)
+
+ composite_image = white_background.composite2(image, 'over')
+ composite_image.write_to_file(png_path.show())
+
+#funzione unica, lascio fuori i file e li passo in input
+#conversion from png to pdf
+def convert_png_to_pdf(png_file :utils.FilePath, pdf_file :utils.FilePath) -> None:
+ """
+ Internal utility to convert a PNG to PDF to aid from SVG conversion.
+
+ Args:
+ png_file : path to PNG file
+ pdf_file : path to new PDF file
+
+ Returns:
+ None
+ """
+ image = Image.open(png_file.show())
+ image = image.convert("RGB")
+ image.save(pdf_file.show(), "PDF", resolution=100.0)
+
+#function called to reduce redundancy in the code
+def convert_to_pdf(file_svg :utils.FilePath, file_png :utils.FilePath, file_pdf :utils.FilePath) -> None:
+ """
+ Converts the SVG map at the provided path to PDF.
+
+ Args:
+ file_svg : path to SVG file
+ file_png : path to PNG file
+ file_pdf : path to new PDF file
+
+ Returns:
+ None
+ """
+ svg_to_png_with_background(file_svg, file_png)
+ try:
+ convert_png_to_pdf(file_png, file_pdf)
+ print(f'PDF file {file_pdf.filePath} successfully generated.')
+
+ except Exception as e:
+ raise utils.DataErr(file_pdf.show(), f'Error generating PDF file: {e}')
+
+############################ map ##############################################
+def buildOutputPath(dataset1Name :str, dataset2Name = "rest", *, details = "", ext :utils.FileFormat) -> utils.FilePath:
+ """
+ Builds a FilePath instance from the names of confronted datasets ready to point to a location in the
+ "result/" folder, used by this tool for output files in collections.
+
+ Args:
+ dataset1Name : _description_
+ dataset2Name : _description_. Defaults to "rest".
+ details : _description_
+ ext : _description_
+
+ Returns:
+ utils.FilePath : _description_
+ """
+ # This function returns a util data structure but is extremely specific to this module.
+ # RAS also uses collections as output and as such might benefit from a method like this, but I'd wait
+ # TODO: until a third tool with multiple outputs appears before porting this to utils.
+ return utils.FilePath(
+ f"{dataset1Name}_vs_{dataset2Name}" + (f" ({details})" if details else ""),
+ # ^^^ yes this string is built every time even if the form is the same for the same 2 datasets in
+ # all output files: I don't care, this was never the performance bottleneck of the tool and
+ # there is no other net gain in saving and re-using the built string.
+ ext,
+ prefix = "result")
+
+FIELD_NOT_AVAILABLE = '/'
+def writeToCsv(rows: List[list], fieldNames :List[str], outPath :utils.FilePath) -> None:
+ fieldsAmt = len(fieldNames)
+ with open(outPath.show(), "w", newline = "") as fd:
+ writer = csv.DictWriter(fd, fieldnames = fieldNames, delimiter = '\t')
+ writer.writeheader()
+
+ for row in rows:
+ sizeMismatch = fieldsAmt - len(row)
+ if sizeMismatch > 0: row.extend([FIELD_NOT_AVAILABLE] * sizeMismatch)
+ writer.writerow({ field : data for field, data in zip(fieldNames, row) })
+
+OldEnrichedScores = Dict[str, List[Union[float, FoldChange]]] #TODO: try to use Tuple whenever possible
+def writeTabularResult(enrichedScores : OldEnrichedScores, outPath :utils.FilePath) -> None:
+ fieldNames = ["ids", "P_Value", "fold change"]
+ fieldNames.extend(["average_1", "average_2"])
+
+ writeToCsv([ [reactId] + values for reactId, values in enrichedScores.items() ], fieldNames, outPath)
+
+def temp_thingsInCommon(tmp :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, max_z_score :float, dataset1Name :str, dataset2Name = "rest") -> None:
+ # this function compiles the things always in common between comparison modes after enrichment.
+ # TODO: organize, name better.
+ writeTabularResult(tmp, buildOutputPath(dataset1Name, dataset2Name, details = "Tabular Result", ext = utils.FileFormat.TSV))
+ for reactId, enrichData in tmp.items(): tmp[reactId] = tuple(enrichData)
+ applyFluxesEnrichmentToMap(tmp, core_map, max_z_score)
+
+def computePValue(dataset1Data: List[float], dataset2Data: List[float]) -> Tuple[float, float]:
+ """
+ Computes the statistical significance score (P-value) of the comparison between coherent data
+ from two datasets. The data is supposed to, in both datasets:
+ - be related to the same reaction ID;
+ - be ordered by sample, such that the item at position i in both lists is related to the
+ same sample or cell line.
+
+ Args:
+ dataset1Data : data from the 1st dataset.
+ dataset2Data : data from the 2nd dataset.
+
+ Returns:
+ tuple: (P-value, Z-score)
+ - P-value from a Kolmogorov-Smirnov test on the provided data.
+ - Z-score of the difference between means of the two datasets.
+ """
+ # Perform Kolmogorov-Smirnov test
+ ks_statistic, p_value = st.ks_2samp(dataset1Data, dataset2Data)
+
+ # Calculate means and standard deviations
+ mean1 = np.mean(dataset1Data)
+ mean2 = np.mean(dataset2Data)
+ std1 = np.std(dataset1Data, ddof=1)
+ std2 = np.std(dataset2Data, ddof=1)
+
+ n1 = len(dataset1Data)
+ n2 = len(dataset2Data)
+
+ # Calculate Z-score
+ z_score = (mean1 - mean2) / np.sqrt((std1**2 / n1) + (std2**2 / n2))
+
+ return p_value, z_score
+
+def compareDatasetPair(dataset1Data :List[List[float]], dataset2Data :List[List[float]], ids :List[str]) -> Tuple[Dict[str, List[Union[float, FoldChange]]], float]:
+ #TODO: the following code still suffers from "dumbvarnames-osis"
+ tmp :Dict[str, List[Union[float, FoldChange]]] = {}
+ count = 0
+ max_z_score = 0
+
+ for l1, l2 in zip(dataset1Data, dataset2Data):
+ reactId = ids[count]
+ count += 1
+ if not reactId: continue # we skip ids that have already been processed
+
+ try:
+ p_value, z_score = computePValue(l1, l2)
+ avg1 = sum(l1) / len(l1)
+ avg2 = sum(l2) / len(l2)
+ avg = fold_change(avg1, avg2)
+ if not isinstance(z_score, str) and max_z_score < abs(z_score): max_z_score = abs(z_score)
+ tmp[reactId] = [float(p_value), avg, z_score, avg1, avg2]
+ except (TypeError, ZeroDivisionError): continue
+
+ return tmp, max_z_score
+
+def computeEnrichment(metabMap :ET.ElementTree, class_pat :Dict[str, List[List[float]]], ids :List[str]) -> None:
+ """
+ Compares clustered data based on a given comparison mode and applies enrichment-based styling on the
+ provided metabolic map.
+
+ Args:
+ metabMap : SVG map to modify.
+ class_pat : the clustered data.
+ ids : ids for data association.
+
+
+ Returns:
+ None
+
+ Raises:
+ sys.exit : if there are less than 2 classes for comparison
+
+ Side effects:
+ metabMap : mut
+ ids : mut
+ """
+ class_pat = { k.strip() : v for k, v in class_pat.items() }
+ #TODO: simplfy this stuff vvv and stop using sys.exit (raise the correct utils error)
+ if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are less than two\n')
+
+ if ARGS.comparison == "manyvsmany":
+ for i, j in it.combinations(class_pat.keys(), 2):
+ #TODO: these 2 functions are always called in pair and in this order and need common data,
+ # some clever refactoring would be appreciated.
+ comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, i, j)
+
+ elif ARGS.comparison == "onevsrest":
+ for single_cluster in class_pat.keys():
+ t :List[List[List[float]]] = []
+ for k in class_pat.keys():
+ if k != single_cluster:
+ t.append(class_pat.get(k))
+
+ rest :List[List[float]] = []
+ for i in t:
+ rest = rest + i
+
+ comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, single_cluster)
+
+ elif ARGS.comparison == "onevsmany":
+ controlItems = class_pat.get(ARGS.control)
+ for otherDataset in class_pat.keys():
+ if otherDataset == ARGS.control: continue
+
+ comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, ARGS.control, otherDataset)
+
+def createOutputMaps(dataset1Name :str, dataset2Name :str, core_map :ET.ElementTree) -> None:
+ svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details = "SVG Map", ext = utils.FileFormat.SVG)
+ utils.writeSvg(svgFilePath, core_map)
+
+ if ARGS.generate_pdf:
+ pngPath = buildOutputPath(dataset1Name, dataset2Name, details = "PNG Map", ext = utils.FileFormat.PNG)
+ pdfPath = buildOutputPath(dataset1Name, dataset2Name, details = "PDF Map", ext = utils.FileFormat.PDF)
+ convert_to_pdf(svgFilePath, pngPath, pdfPath)
+
+ if not ARGS.generate_svg: os.remove(svgFilePath.show())
+
+ClassPat = Dict[str, List[List[float]]]
+def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]:
+ # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate,
+ # for the sake of everyone's sanity.
+ class_pat :ClassPat = {}
+ if ARGS.option == 'datasets':
+ num = 1 #TODO: the dataset naming function could be a generator
+ for path, name in zip(datasetsPaths, names):
+ name = name_dataset(name, num)
+ resolve_rules_float, ids = getDatasetValues(path, name)
+ if resolve_rules_float != None:
+ class_pat[name] = list(map(list, zip(*resolve_rules_float.values())))
+
+ num += 1
+
+ elif ARGS.option == "dataset_class":
+ classes = read_dataset(classPath, "class")
+ classes = classes.astype(str)
+
+ resolve_rules_float, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)")
+ if resolve_rules_float != None: class_pat = split_class(classes, resolve_rules_float)
+
+ return ids, class_pat
+ #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs)
+
+#TODO: create these damn args as FilePath objects
+def getDatasetValues(datasetPath :str, datasetName :str) -> Tuple[ClassPat, List[str]]:
+ """
+ Opens the dataset at the given path and extracts the values (expected nullable numerics) and the IDs.
+
+ Args:
+ datasetPath : path to the dataset
+ datasetName (str): dataset name, used in error reporting
+
+ Returns:
+ Tuple[ClassPat, List[str]]: values and IDs extracted from the dataset
+ """
+ dataset = read_dataset(datasetPath, datasetName)
+ IDs = pd.Series.tolist(dataset.iloc[:, 0].astype(str))
+
+ dataset = dataset.drop(dataset.columns[0], axis = "columns").to_dict("list")
+ return { id : list(map(utils.Float("Dataset values, not an argument"), values)) for id, values in dataset.items() }, IDs
+
+def rgb_to_hex(rgb):
+ """
+ Convert RGB values (0-1 range) to hexadecimal color format.
+
+ Args:
+ rgb (numpy.ndarray): An array of RGB color components (in the range [0, 1]).
+
+ Returns:
+ str: The color in hexadecimal format (e.g., '#ff0000' for red).
+ """
+ # Convert RGB values (0-1 range) to hexadecimal format
+ rgb = (np.array(rgb) * 255).astype(int)
+ return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
+
+
+
+def save_colormap_image(min_value: float, max_value: float, path: utils.FilePath, colorMap:str="viridis"):
+ """
+ Create and save an image of the colormap showing the gradient and its range.
+
+ Args:
+ min_value (float): The minimum value of the colormap range.
+ max_value (float): The maximum value of the colormap range.
+ filename (str): The filename for saving the image.
+ """
+
+ # Create a colormap using matplotlib
+ cmap = plt.get_cmap(colorMap)
+
+ # Create a figure and axis
+ fig, ax = plt.subplots(figsize=(6, 1))
+ fig.subplots_adjust(bottom=0.5)
+
+ # Create a gradient image
+ gradient = np.linspace(0, 1, 256)
+ gradient = np.vstack((gradient, gradient))
+
+ # Add min and max value annotations
+ ax.text(0, 0.5, f'{np.round(min_value, 3)}', va='center', ha='right', transform=ax.transAxes, fontsize=12, color='black')
+ ax.text(1, 0.5, f'{np.round(max_value, 3)}', va='center', ha='left', transform=ax.transAxes, fontsize=12, color='black')
+
+
+ # Display the gradient image
+ ax.imshow(gradient, aspect='auto', cmap=cmap)
+ ax.set_axis_off()
+
+ # Save the image
+ plt.savefig(path.show(), bbox_inches='tight', pad_inches=0)
+ plt.close()
+ pass
+
+def min_nonzero_abs(arr):
+ # Flatten the array and filter out zeros, then find the minimum of the remaining values
+ non_zero_elements = np.abs(arr)[np.abs(arr) > 0]
+ return np.min(non_zero_elements) if non_zero_elements.size > 0 else None
+
+def computeEnrichmentMeanMedian(metabMap: ET.ElementTree, class_pat: Dict[str, List[List[float]]], ids: List[str], colormap:str) -> None:
+ """
+ Compute and visualize the metabolic map based on mean and median of the input fluxes.
+ The fluxes are normalised across classes/datasets and visualised using the given colormap.
+
+ Args:
+ metabMap (ET.ElementTree): An XML tree representing the metabolic map.
+ class_pat (Dict[str, List[List[float]]]): A dictionary where keys are class names and values are lists of enrichment values.
+ ids (List[str]): A list of reaction IDs to be used for coloring arrows.
+
+ Returns:
+ None
+ """
+ # Create copies only if they are needed
+ metabMap_mean = copy.deepcopy(metabMap)
+ metabMap_median = copy.deepcopy(metabMap)
+
+ # Compute medians and means
+ medians = {key: np.round(np.median(np.array(value), axis=1), 6) for key, value in class_pat.items()}
+ means = {key: np.round(np.mean(np.array(value), axis=1),6) for key, value in class_pat.items()}
+
+ # Normalize medians and means
+ max_flux_medians = max(np.max(np.abs(arr)) for arr in medians.values())
+ max_flux_means = max(np.max(np.abs(arr)) for arr in means.values())
+
+ min_flux_medians = min(min_nonzero_abs(arr) for arr in medians.values())
+ min_flux_means = min(min_nonzero_abs(arr) for arr in means.values())
+
+ medians = {key: median/max_flux_medians for key, median in medians.items()}
+ means = {key: mean/max_flux_means for key, mean in means.items()}
+
+ save_colormap_image(min_flux_medians, max_flux_medians, utils.FilePath("Color map median", ext=utils.FileFormat.PNG, prefix="result"), colormap)
+ save_colormap_image(min_flux_means, max_flux_means, utils.FilePath("Color map mean", ext=utils.FileFormat.PNG, prefix="result"), colormap)
+
+ cmap = plt.get_cmap(colormap)
+
+ for key in class_pat:
+ # Create color mappings for median and mean
+ colors_median = {
+ rxn_id: rgb_to_hex(cmap(abs(medians[key][i]))) if medians[key][i] != 0 else '#bebebe' #grey blocked
+ for i, rxn_id in enumerate(ids)
+ }
+
+ colors_mean = {
+ rxn_id: rgb_to_hex(cmap(abs(means[key][i]))) if means[key][i] != 0 else '#bebebe' #grey blocked
+ for i, rxn_id in enumerate(ids)
+ }
+
+ for i, rxn_id in enumerate(ids):
+ isNegative = medians[key][i] < 0
+
+ # Apply median arrows
+ apply_arrow(metabMap_median, rxn_id, colors_median[rxn_id], isNegative)
+
+ isNegative = means[key][i] < 0
+ # Apply mean arrows
+ apply_arrow(metabMap_mean, rxn_id, colors_mean[rxn_id], isNegative)
+
+ # Save and convert the SVG files
+ save_and_convert(metabMap_mean, "mean", key)
+ save_and_convert(metabMap_median, "median", key)
+
+def apply_arrow(metabMap, rxn_id, color, isNegative):
+ """
+ Apply an arrow to a specific reaction in the metabolic map with a given color.
+
+ Args:
+ metabMap (ET.ElementTree): An XML tree representing the metabolic map.
+ rxn_id (str): The ID of the reaction to which the arrow will be applied.
+ color (str): The color of the arrow in hexadecimal format.
+
+ Returns:
+ None
+ """
+ arrow = Arrow(width=5, col=color)
+ arrow.styleReactionElementsMeanMedian(metabMap, rxn_id, isNegative)
+ pass
+
+def save_and_convert(metabMap, map_type, key):
+ """
+ Save the metabolic map as an SVG file and optionally convert it to PNG and PDF formats.
+
+ Args:
+ metabMap (ET.ElementTree): An XML tree representing the metabolic map.
+ map_type (str): The type of map ('mean' or 'median').
+ key (str): The key identifying the specific map.
+
+ Returns:
+ None
+ """
+ svgFilePath = utils.FilePath(f"SVG Map {map_type} - {key}", ext=utils.FileFormat.SVG, prefix="result")
+ utils.writeSvg(svgFilePath, metabMap)
+ if ARGS.generate_pdf:
+ pngPath = utils.FilePath(f"PNG Map {map_type} - {key}", ext=utils.FileFormat.PNG, prefix="result")
+ pdfPath = utils.FilePath(f"PDF Map {map_type} - {key}", ext=utils.FileFormat.PDF, prefix="result")
+ convert_to_pdf(svgFilePath, pngPath, pdfPath)
+ if not ARGS.generate_svg:
+ os.remove(svgFilePath.show())
+
+
+
+
+############################ MAIN #############################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+
+ Raises:
+ sys.exit : if a user-provided custom map is in the wrong format (ET.XMLSyntaxError, ET.XMLSchemaParseError)
+ """
+
+ global ARGS
+ ARGS = process_args()
+
+ if os.path.isdir('result') == False: os.makedirs('result')
+
+ core_map :ET.ElementTree = ARGS.choice_map.getMap(
+ ARGS.tool_dir,
+ utils.FilePath.fromStrPath(ARGS.custom_map) if ARGS.custom_map else None)
+ # TODO: ^^^ ugly but fine for now, the argument is None if the model isn't custom because no file was given.
+ # getMap will None-check the customPath and panic when the model IS custom but there's no file (good). A cleaner
+ # solution can be derived from my comment in FilePath.fromStrPath
+
+ ids, class_pat = getClassesAndIdsFromDatasets(ARGS.input_datas_fluxes, ARGS.input_data_fluxes, ARGS.input_class_fluxes, ARGS.names_fluxes)
+
+ if(ARGS.choice_map == utils.Model.HMRcore):
+ temp_map = utils.Model.HMRcore_no_legend
+ computeEnrichmentMeanMedian(temp_map.getMap(ARGS.tool_dir), class_pat, ids, ARGS.color_map)
+ elif(ARGS.choice_map == utils.Model.ENGRO2):
+ temp_map = utils.Model.ENGRO2_no_legend
+ computeEnrichmentMeanMedian(temp_map.getMap(ARGS.tool_dir), class_pat, ids, ARGS.color_map)
+ else:
+ computeEnrichmentMeanMedian(core_map, class_pat, ids, ARGS.color_map)
+
+
+ computeEnrichment(core_map, class_pat, ids)
+
+ # create output files: TODO: this is the same comparison happening in "maps", find a better way to organize this
+ if ARGS.comparison == "manyvsmany":
+ for i, j in it.combinations(class_pat.keys(), 2): createOutputMaps(i, j, core_map)
+ return
+
+ if ARGS.comparison == "onevsrest":
+ for single_cluster in class_pat.keys(): createOutputMaps(single_cluster, "rest", core_map)
+ return
+
+ for otherDataset in class_pat.keys():
+ if otherDataset != ARGS.control: createOutputMaps(i, j, core_map)
+
+ if not ERRORS: return
+ utils.logWarning(
+ f"The following reaction IDs were mentioned in the dataset but weren't found in the map: {ERRORS}",
+ ARGS.out_log)
+
+ print('Execution succeded')
+
+###############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_to_map.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/flux_to_map.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,244 @@
+
+
+ marea_macros.xml
+
+
+
+ numpy
+ pandas
+ seaborn
+ scipy
+ svglib
+ pyvips
+ cairosvg
+ cobra
+ lxml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/ENGRO2_rules.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/ENGRO2_rules.csv Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,458 @@
+,0
+EX_lac__L_e,
+EX_glc__D_e,
+EX_glu__L_e,
+EX_gln__L_e,
+EX_asp__L_e,
+EX_co2_e,
+EX_h_e,
+EX_h2o_e,
+EX_pi_e,
+EX_nh4_e,
+EX_o2_e,
+EX_arg__L_e,
+EX_pro__L_e,
+EX_leu__L_e,
+EX_ile__L_e,
+EX_met__L_e,
+EX_gly_e,
+EX_phe__L_e,
+EX_ser__L_e,
+EX_ala__L_e,
+EX_asn__L_e,
+EX_fol_e,
+EX_urea_e,
+EX_pyr_e,
+PYRt2,ENSG00000118596 or ENSG00000141526 or ENSG00000155380
+EX_gudac_e,
+EX_hdca_e,
+EX_ptrc_e,
+EX_biomassx,
+EX_gthox_e,
+EX_2obut_e,
+EX_val__L_e,
+EX_spmd_e,
+EX_sprm_e,
+EX_5mthf_e,
+EX_crtn_e,
+EX_his__L_e,
+EX_lys__L_e,
+EX_cys__L_e,
+EX_so3_e,
+EX_thr__L_e,
+EX_trp__L_e,
+EX_anth_e,
+EX_tyr__L_e,
+EX_Lcystin_e,
+EX_gthrd_e,
+HEX1,ENSG00000106633 or ENSG00000156510 or ENSG00000156515 or ENSG00000159399 or ENSG00000160883
+G6PP,ENSG00000131482 or ENSG00000141349 or ENSG00000152254
+PGI,ENSG00000105220
+PFK,ENSG00000067057 or ENSG00000141959 or ENSG00000152556
+FBP,ENSG00000130957 or ENSG00000165140
+FBA,ENSG00000109107 or ENSG00000136872 or ENSG00000149925
+TPI,ENSG00000111669
+GAPD,ENSG00000105679 or ENSG00000111640
+PGK,ENSG00000102144 or ENSG00000170950
+DPGM,ENSG00000164708 or ENSG00000171314 or ENSG00000172331
+DPGase,ENSG00000164708 or ENSG00000171314 or ENSG00000172331
+PGM,ENSG00000164708 or ENSG00000171314 or ENSG00000172331
+ENO,ENSG00000074800 or ENSG00000108515 or ENSG00000111674
+PYK,ENSG00000067225 or ENSG00000143627
+LDH_L,ENSG00000111716 or ENSG00000134333 or ENSG00000151116 or ENSG00000166796 or ENSG00000166800 or ENSG00000171989
+r0407,ENSG00000109107 or ENSG00000136872 or ENSG00000149925
+PRPPS,ENSG00000101911 or ENSG00000147224 or ENSG00000229937
+G6PDH2r,ENSG00000160211
+PGL,ENSG00000049239 or ENSG00000130313
+GND,ENSG00000142657
+RPI,ENSG00000153574
+TKT2,ENSG00000007350 or ENSG00000151005 or ENSG00000163931
+RPE,ENSG00000197713 or ENSG00000235376
+TKT1,ENSG00000007350 or ENSG00000151005 or ENSG00000163931
+TALA,ENSG00000177156
+r0408,ENSG00000067057 or ENSG00000141959 or ENSG00000152556
+PDHm,ENSG00000091140 and ENSG00000110435 and ENSG00000150768 and ENSG00000168291 and (ENSG00000131828 or ENSG00000163114)
+PCm,ENSG00000173599
+PEPCK_re,ENSG00000124253
+CSm,ENSG00000062485
+ACONTm,ENSG00000100412 or ENSG00000122729
+ICDHxm,ENSG00000067829 and ENSG00000101365 and ENSG00000166411
+ICDHym,ENSG00000182054
+AKGDm,ENSG00000091140 and ENSG00000105953 and ENSG00000119689
+SUCOAS1m,ENSG00000163541 and ENSG00000172340
+SUCD1m,ENSG00000073578 and ENSG00000117118 and ENSG00000143252 and ENSG00000204370
+FUMm,ENSG00000091483
+MDHm,ENSG00000146701
+ME1m,ENSG00000082212
+ME2m,ENSG00000151376
+ME2,ENSG00000065833
+ACITL,ENSG00000131473
+FUM,ENSG00000091483
+MDH,ENSG00000014641 or ENSG00000138400
+AKGMALtm,ENSG00000108528 or ENSG00000183048
+ACONT,ENSG00000122729
+ICDHyr,ENSG00000138413
+PPA,ENSG00000107902 or ENSG00000180817
+Complex1ROS,ENSG00000004779 and ENSG00000023228 and ENSG00000065518 and ENSG00000090266 and ENSG00000099795 and ENSG00000109390 and ENSG00000110717 and ENSG00000115286 and ENSG00000119013 and ENSG00000119421 and ENSG00000125356 and ENSG00000128609 and ENSG00000130414 and ENSG00000131495 and ENSG00000136521 and ENSG00000139180 and ENSG00000140990 and ENSG00000147123 and ENSG00000147684 and ENSG00000151366 and ENSG00000158864 and ENSG00000165264 and ENSG00000166136 and ENSG00000167792 and ENSG00000168653 and ENSG00000170906 and ENSG00000174886 and ENSG00000178127 and ENSG00000183648 and ENSG00000184983 and ENSG00000186010 and ENSG00000198695 and ENSG00000198763 and ENSG00000198786 and ENSG00000198840 and ENSG00000198886 and ENSG00000198888 and ENSG00000212907 and ENSG00000213619
+FADH2ETC,ENSG00000171503 and ENSG00000105379 and ENSG00000140374
+CYOR_u10mi,ENSG00000010256 and ENSG00000127540 and ENSG00000140740 and ENSG00000156467 and ENSG00000164405 and ENSG00000169021 and ENSG00000173660 and ENSG00000179091 and ENSG00000184076 and ENSG00000198727
+CYOOm2i,ENSG00000127184 and ENSG00000135940 and ENSG00000164919 and ENSG00000178741 and ENSG00000198712 and ENSG00000198804 and ENSG00000198938 and (ENSG00000111775 or ENSG00000156885) and (ENSG00000112695 or ENSG00000161281) and (ENSG00000126267 or ENSG00000160471) and (ENSG00000131055 or ENSG00000131143) and (ENSG00000131174 or ENSG00000170516) and (ENSG00000176340 or ENSG00000187581)
+ATPS4mi,ENSG00000099624 and ENSG00000110955 and ENSG00000116459 and ENSG00000124172 and ENSG00000152234 and ENSG00000154723 and ENSG00000165629 and ENSG00000167283 and ENSG00000167863 and ENSG00000169020 and ENSG00000198899 and ENSG00000228253 and ENSG00000241468 and ENSG00000241837 and (ENSG00000135390 or ENSG00000154518 or ENSG00000159199)
+GLUCYS,ENSG00000001084 and ENSG00000023909
+GTHS,ENSG00000100983
+SPODMm,ENSG00000112096
+GTHPi,ENSG00000117450 or ENSG00000167468 or ENSG00000167815 or ENSG00000176153 or ENSG00000233276
+GTHPm,ENSG00000165672 or ENSG00000167468 or ENSG00000233276
+GDR,ENSG00000104687
+GTHOr,ENSG00000104687
+GDRm,ENSG00000104687
+GTHOm,ENSG00000104687
+r0885,ENSG00000183048
+PYRt2m,ENSG00000060762 or ENSG00000143158 or ENSG00000155380
+HMR_4964,ENSG00000100075
+r2420,ENSG00000075415
+ATPtm,ENSG00000005022 or ENSG00000151729 or ENSG00000169100
+FUMtm,ENSG00000183048
+CO2tm,
+O2tm,
+r0801,ENSG00000169100
+THD1m,ENSG00000112992
+H2Otm,ENSG00000103375
+NH4tm,
+HCO3E,ENSG00000074410 or ENSG00000104267 or ENSG00000107159 or ENSG00000118298 or ENSG00000131686 or ENSG00000133742 or ENSG00000164879 or ENSG00000167434 or ENSG00000168748 or ENSG00000185015
+HCO3Em,ENSG00000169239 or ENSG00000174990
+GLUN,ENSG00000115419
+GLNS,ENSG00000135821 or ENSG00000146166
+GLUt2m,ENSG00000177542 or ENSG00000182902
+GLNtm_1,
+GLUNm,ENSG00000115419 or ENSG00000135423
+GLUPRT,ENSG00000128059
+PRAGSr,ENSG00000159131
+GARFT,ENSG00000159131
+PRFGS,ENSG00000178921
+PRAIS,ENSG00000159131
+AIRCr,ENSG00000128050
+PRASCSi,ENSG00000128050
+ADSL2r,ENSG00000239900
+AICART,ENSG00000138363
+IMPC,ENSG00000138363
+ADSS,ENSG00000035687 or ENSG00000185100
+ADSL1r,ENSG00000239900
+IMPDH2,ENSG00000106348 or ENSG00000178035
+GMPS,ENSG00000163655
+GMPS2,ENSG00000163655
+GK1,ENSG00000143774
+RNDR2,ENSG00000167325 and (ENSG00000048392 or ENSG00000171848)
+ADK1,ENSG00000004455 or ENSG00000106992 or ENSG00000140057 or ENSG00000154027
+RNDR1,ENSG00000167325 and (ENSG00000048392 or ENSG00000171848)
+CBPS,ENSG00000084774
+ASPCT,ENSG00000084774
+DHORTS,ENSG00000084774
+DHORD9,ENSG00000102967
+ORPT,ENSG00000114491
+OMPDC,ENSG00000114491
+UMPK,ENSG00000162368
+RNDR4,ENSG00000167325 and (ENSG00000048392 or ENSG00000171848)
+TMDS,ENSG00000176890
+URIDK2r,ENSG00000168393
+NDPK2,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+NDPK3,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+CTPS1,ENSG00000047230 or ENSG00000171793
+RNDR3,ENSG00000167325 and (ENSG00000048392 or ENSG00000171848)
+GDHm,ENSG00000148672 or ENSG00000182890
+GLUDym,ENSG00000148672 or ENSG00000182890
+ASPTA,ENSG00000120053
+ASPTAm,ENSG00000125166
+ASPGLUm,ENSG00000004864 or ENSG00000115840
+ASPT,ENSG00000091483
+CBPSam,ENSG00000021826
+OCBTm,ENSG00000036473
+ORNt4m,ENSG00000102743 or ENSG00000120329
+ARGSS,ENSG00000130707
+ARGSL,ENSG00000126522
+ARGN,ENSG00000118520
+ORNDC,ENSG00000115758
+SPMS,ENSG00000116649
+SPRMS,ENSG00000102172 or ENSG00000116649
+ADMDC,ENSG00000123505
+MTAP,ENSG00000099810
+ADPT,ENSG00000198931
+MTRI,
+MDRPD,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+DKMPPD,
+SPMDtex2,
+SPRMti,ENSG00000175003
+GLYAMDTRc,ENSG00000171766
+ARGDr,
+r0074,ENSG00000159423
+GLU5SAtmc,
+G5SADs,ENSG00000159423
+PRO1x,ENSG00000143811
+P5CR,ENSG00000143811
+PROD2,ENSG00000250799
+TransportFAD,
+ASNS1,ENSG00000070669
+ASNN,ENSG00000162174 or ENSG00000166183
+METS,ENSG00000116984
+METAT,ENSG00000151224 or (ENSG00000038274 and ENSG00000168906)
+HMR_3915,ENSG00000150540
+AHCi,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+ADNK1,ENSG00000156110
+CYSTS,ENSG00000160200
+CYSTGL,ENSG00000116761
+MTHFR3,ENSG00000177000
+UNK2,
+UNK3,
+5MTHFt,ENSG00000110195 or ENSG00000110203
+ILETA,ENSG00000060982
+3MOPt2im,
+ILETAm,ENSG00000105552
+LEUTA,ENSG00000060982
+4MOPt2im,
+LEUTAm,ENSG00000105552
+OIVD1m,ENSG00000083123 and ENSG00000091140 and ENSG00000137992 and ENSG00000248098
+ACOAD8m,ENSG00000128928
+MCCCrm,ENSG00000078070 and ENSG00000131844
+MGCHrm,ENSG00000148090
+HMGCOAtm,
+VALTA,ENSG00000060982
+3MOBt2im,
+VALTAim,ENSG00000105552
+OIVD2m,ENSG00000083123 and ENSG00000091140 and ENSG00000137992 and ENSG00000248098
+ACOAD9m,ENSG00000117054 or ENSG00000151498
+ECOAH12m,ENSG00000127884 or (ENSG00000084754 and ENSG00000138029)
+3HBCOAHLm,
+HIBDm,ENSG00000106049
+r0643,ENSG00000072210 or ENSG00000111275 or ENSG00000137124 or ENSG00000143149 or ENSG00000164904
+MMSAD1m,ENSG00000119711
+MMTSADm,
+PCC,
+MMALH,
+MMEm,ENSG00000124370
+MMMm,ENSG00000146085
+ILEtmi,
+LEUt5m,
+VALt5m,
+PGCD,ENSG00000092621
+PSERT,ENSG00000135069
+PSP_L,ENSG00000146733
+GHMT2r,ENSG00000176974
+SERD_L,ENSG00000135094 or ENSG00000139410
+ALATA_L,ENSG00000166123 or ENSG00000167701
+r1435,
+GLYtm,
+GHMT2rm,ENSG00000182199
+MLTHFtm,
+r0962,ENSG00000164933
+FORtm,
+r0514,ENSG00000228716
+DHFRim,ENSG00000228716
+FTHFLmi,ENSG00000100714 or ENSG00000120254
+MTHFCm,ENSG00000065911
+MTHFDm,ENSG00000065911 or ENSG00000100714
+FOLR2,ENSG00000228716
+DHFR,ENSG00000228716
+FTHFLi,ENSG00000100714
+FTHFDH,ENSG00000136010 or ENSG00000144908
+MTHFC,ENSG00000100714
+MTHFD,ENSG00000100714 or ENSG00000177000
+MTHFD2i,ENSG00000065911 or ENSG00000163738
+ACACT1r,ENSG00000120437
+HMGCOAS,ENSG00000112972
+HMGCOAR,ENSG00000113161
+MEVK1,ENSG00000110921
+PMEVK,ENSG00000163344
+IPDDI,ENSG00000067064 or ENSG00000148377
+DPMVD,ENSG00000167508
+DMATT,ENSG00000152904
+GRTT,ENSG00000152904
+HMR_1465,ENSG00000079459
+HMR_1467,ENSG00000079459
+SMO,ENSG00000104549
+LNSTLS,ENSG00000160285
+HMR_1477,ENSG00000001630
+HMR_1478,ENSG00000001630
+HMR_1479,ENSG00000001630
+C14STR,ENSG00000143815 or ENSG00000149809
+HMR_1490,ENSG00000052802 or ENSG00000170271
+HMR_1493,ENSG00000052802 or ENSG00000170271
+HMR_1494,ENSG00000052802 or ENSG00000170271
+C3STDH1,ENSG00000147383
+HMR_1495,ENSG00000147383 or ENSG00000183305
+HMR_1500,ENSG00000132196
+HMR_1502,ENSG00000052802 or ENSG00000170271
+HMR_1503,ENSG00000052802 or ENSG00000170271
+HMR_1504,ENSG00000052802 or ENSG00000170271
+HMR_1505,ENSG00000147383
+HMR_1509,ENSG00000132196
+CHLSTI_1,ENSG00000147155
+HMR_1516,ENSG00000109929
+RE2410C,
+DSMSTOLR,ENSG00000116133
+ACCOAC,ENSG00000076555 or ENSG00000278540
+FASN,ENSG00000169710
+MCAT,ENSG00000169710
+AcetoacetylACPsynthesis,ENSG00000169710
+r0691,ENSG00000169710
+r0681,ENSG00000169710
+r0682,ENSG00000169710
+r0760,ENSG00000169710
+r0761,ENSG00000169710
+r0762,ENSG00000169710
+r0763,ENSG00000169710
+r0764,ENSG00000169710
+r0694,ENSG00000169710
+r0695,ENSG00000169710
+r0765,ENSG00000169710
+r0766,ENSG00000169710
+r0692,ENSG00000169710
+r0693,ENSG00000169710
+r0767,ENSG00000169710
+r0768,ENSG00000169710
+r0769,ENSG00000169710
+r0770,ENSG00000169710
+r0712,ENSG00000169710
+r0713,ENSG00000169710
+r0701,ENSG00000169710
+r0702,ENSG00000169710
+r0771,ENSG00000169710
+r0772,ENSG00000169710
+r0696,ENSG00000169710
+r0697,ENSG00000169710
+r0773,ENSG00000169710
+FA160ACPH,ENSG00000152463 or ENSG00000169710
+palmitateActivation,ENSG00000068366 or ENSG00000123983 or ENSG00000151726 or ENSG00000164398
+carnitineAcylTransferaseI,ENSG00000110090 or ENSG00000169169 or ENSG00000205560
+CARN160t_m,ENSG00000178537
+carnitineAcylTransferaseII,ENSG00000157184
+betaOxidation,ENSG00000072778 and ENSG00000084754 and ENSG00000105379 and ENSG00000115361 and ENSG00000117054 and ENSG00000127884 and ENSG00000138029 and ENSG00000140374 and ENSG00000161533 and ENSG00000167315
+Biomass,
+GLCt1,ENSG00000059804 or ENSG00000100170 or ENSG00000100191 or ENSG00000105641 or ENSG00000109667 or ENSG00000115665 or ENSG00000117394 or ENSG00000117834 or ENSG00000133460 or ENSG00000136856 or ENSG00000138074 or ENSG00000140675 or ENSG00000142583 or ENSG00000146411 or ENSG00000148942 or ENSG00000151229 or ENSG00000154025 or ENSG00000158865 or ENSG00000160326 or ENSG00000163581 or ENSG00000173262 or ENSG00000181856 or ENSG00000197241 or ENSG00000197496 or ENSG00000198743 or ENSG00000256870
+O2t,
+gln_L_t,(ENSG00000130876 and ENSG00000168003) or ENSG00000017483 or ENSG00000188338 or (ENSG00000130876 and ENSG00000168003) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000149150 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003) or ENSG00000111371 or ENSG00000268104 or ENSG00000134294 or ENSG00000139209 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234)
+r0963,ENSG00000110195 or ENSG00000110203 or ENSG00000165457
+arg_L_t,ENSG00000268104 or ENSG00000103064 or ENSG00000021488 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003) or ENSG00000139514 or ENSG00000003989 or ENSG00000165349 or ENSG00000103064 or (ENSG00000168003 and ENSG00000155465) or (ENSG00000103064 and ENSG00000168003)
+phe_L_t,ENSG00000134294 or ENSG00000165349 or ENSG00000268104 or ENSG00000003989 or ENSG00000111371 or ENSG00000139514 or ENSG00000139209 or (ENSG00000174358 and (ENSG00000147003 or ENSG00000130234)) or ENSG00000112394 or ENSG00000278550 or ENSG00000149150 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or ENSG00000092068 or (ENSG00000164363 and ENSG00000147003)
+ile_L_t,ENSG00000268104 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000149150 or ENSG00000278550 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+leu_L_t,ENSG00000111371 or ENSG00000165349 or ENSG00000168003 or ENSG00000003989 or ENSG00000139514 or ENSG00000268104 or ENSG00000139209 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000149150 or ENSG00000278550 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or ENSG00000149150 or (ENSG00000164363 and ENSG00000147003)
+val_L_t,ENSG00000268104 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000149150 or ENSG00000278550 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+met_L_t,ENSG00000111371 or ENSG00000268104 or ENSG00000134294 or ENSG00000197375 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000149150 or ENSG00000278550 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+ser_L_t,ENSG00000111371 or ENSG00000268104 or ENSG00000134294 or ENSG00000139209 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000017483 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000123643 or ENSG00000149150 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+gly_t,(ENSG00000130876 and ENSG00000168003) or ENSG00000111371 or ENSG00000268104 or ENSG00000134294 or ENSG00000139209 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000196517 or ENSG00000196517 or (ENSG00000130876 and ENSG00000168003) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000165970 or ENSG00000017483 or (ENSG00000164363 and ENSG00000147003) or ENSG00000186335 or ENSG00000123643
+asn_L_t,ENSG00000111371 or ENSG00000268104 or ENSG00000134294 or ENSG00000139209 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000017483 or ENSG00000188338 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000149150 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+pro_L_t,ENSG00000139209 or ENSG00000111371 or ENSG00000134294 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000011083 or (ENSG00000163817 and ENSG00000147003) or ENSG00000180773 or ENSG00000186335 or ENSG00000123643 or ENSG00000011083
+HDCAt,ENSG00000125166 or ENSG00000130304 or ENSG00000135218 or ENSG00000167114
+GTHRDt2,
+DmLact,ENSG00000100156 or ENSG00000118596 or ENSG00000141526 or ENSG00000155380 or ENSG00000256870
+UREAt,ENSG00000132874 or ENSG00000141469
+DmBiomass,
+NH4t,
+PTRCtex2,
+GUDACtr2,
+Dm2oxobutyrate,
+H2Ot,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+PHLACHt,
+CO2t,
+DmGSSG,
+glu_L_t,ENSG00000105143 or ENSG00000106688 or ENSG00000079215 or ENSG00000110436 or ENSG00000162383 or ENSG00000188338 or ENSG00000137204 or (ENSG00000164363 and ENSG00000147003)
+ala_L_t,ENSG00000134294 or ENSG00000003989 or ENSG00000268104 or ENSG00000139514 or ENSG00000111371 or ENSG00000139209 or ENSG00000165349 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000017483 or ENSG00000188338 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or ENSG00000115902 or (ENSG00000164363 and ENSG00000147003) or ENSG00000180773 or ENSG00000186335 or ENSG00000123643 or ENSG00000017483
+asp_L_t,ENSG00000105143 or ENSG00000106688 or ENSG00000079215 or ENSG00000110436 or ENSG00000162383 or ENSG00000188338
+ATPM,
+GACMTRc,ENSG00000130005
+CKc_cho,ENSG00000104879 or ENSG00000166165
+CRTNsyn_cho,
+CRTNtr,
+NDPK1,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+NDPK5,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+NDPK8,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+NDPK7,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+DTMPK,ENSG00000168393
+NDPK4,ENSG00000103024 or ENSG00000143156 or ENSG00000172113 or (ENSG00000239672 and ENSG00000243678)
+his_L_t,ENSG00000268104 or ENSG00000134294 or ENSG00000139209 or ENSG00000188338 or ENSG00000103064 or ENSG00000103064 or ENSG00000021488 or ENSG00000017483 or ENSG00000268104 or ENSG00000196517
+PTRCOX1,ENSG00000002726 or ENSG00000131471 or ENSG00000131480
+ABUTD,ENSG00000143149
+r0465_1,ENSG00000172508
+ABUTH,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+GLUDC,ENSG00000136750 or ENSG00000128683
+HISDr,ENSG00000084110
+URCN,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+IZPN,ENSG00000139344
+GluForTx,ENSG00000160282
+FTCD,ENSG00000160282
+NBAHH_ir,ENSG00000103375 or ENSG00000135517 or ENSG00000161798 or ENSG00000167580 or ENSG00000171885 or ENSG00000240583
+r0283,ENSG00000172508
+ASP1DC,ENSG00000128683
+lys_L_t,ENSG00000268104 or ENSG00000197375 or ENSG00000139514 or ENSG00000003989 or ENSG00000139209 or ENSG00000165349 or ENSG00000103064 or ENSG00000103064 or ENSG00000021488 or ENSG00000268104
+LYStm,ENSG00000102743 or ENSG00000120329
+SACCD3m,ENSG00000008311
+r0525,ENSG00000008311
+AASAD3m,
+r0450,ENSG00000109576
+2OXOADOXm,ENSG00000091140 and ENSG00000105953 and ENSG00000110435 and ENSG00000119689
+GLUTCOADHm,ENSG00000105607
+3HBCDm,ENSG00000121310
+HACD1m,ENSG00000138796 or ENSG00000072506 or (ENSG00000084754 and ENSG00000138029)
+HMGCOASm,ENSG00000134240
+cys_L_t,ENSG00000268104 or ENSG00000134294 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000278550 or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or ENSG00000017483 or (ENSG00000164363 and ENSG00000147003)
+CYSO,ENSG00000129596
+3SALATAi,ENSG00000120053
+3SALAASPm,ENSG00000004864 or ENSG00000115840
+3SALATAim,ENSG00000125166
+3SPYRSP,
+3SPYRSPm,
+HMR_3951,
+ExSulfitem,
+tyr_L_t,ENSG00000112394 or ENSG00000268104 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or (ENSG00000164363 and ENSG00000147003)
+TYRTA,ENSG00000120053 or ENSG00000198650
+34HPPOR,ENSG00000158104
+HGNTOR,ENSG00000113924
+MACACI,ENSG00000100577
+FUMAC,ENSG00000103876
+AACOAT,ENSG00000081760
+thr_L_t,ENSG00000268104 or ENSG00000111371 or ENSG00000134294 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000149150 or ENSG00000268104
+THRD_L,ENSG00000135094 or ENSG00000139410
+OBDHc,
+PPCOAtm,
+trp_L_t,ENSG00000268104 or (ENSG00000174358 and ENSG00000147003) or (ENSG00000174358 and ENSG00000130234) or ENSG00000103257 or ENSG00000103064 or ENSG00000103064 or ENSG00000103064 or ENSG00000268104 or ENSG00000103257 or (ENSG00000164363 and ENSG00000147003) or ENSG00000180773 or ENSG00000112394
+TRPO2,ENSG00000131203 or ENSG00000188676 or ENSG00000151790
+FKYNH,ENSG00000183077
+KYN,ENSG00000115919
+ANTHte,
+KYN3OX,ENSG00000117009
+HKYNH,ENSG00000115919
+3HAO,ENSG00000162882
+PCLAD,ENSG00000153086
+AM6SAD,
+AMCOXO,
+2OXOADPTm,ENSG00000183032
+CystinePyruvate,
+r0027,ENSG00000184470 or ENSG00000198431
+HMR_3996,
+CYSGLTH,
+ACACT1m,ENSG00000075239 or (ENSG00000084754 and ENSG00000138029)
+G3PD1ir,ENSG00000167588
+GLYC3Ptm,
+G3PDm,
+DHAPtm,
+Transport_ala_B_c_e,
+EX_ala_B_e,
+TMDK1,ENSG00000166548 or ENSG00000167900
+THYMDt1,ENSG00000112759 or ENSG00000174669
+EX_thymd_e,
+Transport_HC00576_c_e,
+EX_HC00576_e,
+Transport_4abut_c_e,
+EX_4abut_e,
+GLUVESSEC,ENSG00000091664 or ENSG00000179520 or ENSG00000104888
+EX_chsterol_e,
+r1050,
+EX_gal_e,
+GALt1r,ENSG00000136856 or ENSG00000117394 or ENSG00000059804 or ENSG00000163581 or ENSG00000197496
+GALK,ENSG00000108479 or ENSG00000156958
+UGLT,ENSG00000213930
+PGMT,ENSG00000079739 or ENSG00000169299
+UDPG4E,ENSG00000117308
+t_Lcystin_ala__L,ENSG00000021488 and ENSG00000138079
+t_Lcystin_glu__L,ENSG00000151012 and ENSG00000168003
+t_Lcystin_leu__L,ENSG00000021488 and ENSG00000138079
+t_Lcystin_ser__L,ENSG00000138079 and ENSG00000021488
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/desktop.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/desktop.ini Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,6 @@
+[.ShellClassInfo]
+IconResource=C:\WINDOWS\System32\SHELL32.dll,4
+[ViewState]
+Mode=
+Vid=
+FolderType=Generic
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/medium/medium.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/medium/medium.csv Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,35 @@
+engro2_name,RPMI 1640,DMEM,EMEM,DMEM:F12 = 1:1,McCoy's 5A,IMDM,MEM,GMEM,Leibovitz's L-15,F12,F10,AMEM,Waymouth MB 7521 medium,F12K,William's E Medium,Medium 199,MCDB 105,NEAA,RPMI:F12 = 1:1,RPMI:MEM = 1:1,RPMI:EMEM = 1:1,EMEM:F12 = 1:1,DMEM:RPMI = 2:1,DMEM:IMDM = 1:1,MCDB 105:Medium 199 = 1:1,allOpen
+EX_Lcystin_e,0.20766774,0.20127796,0.09904154,0.09996805,0.0,0.29201278,0.09904154,0.09904154,0.0,0.0,0.0,0.09904154,0.0625,0.0,0.08329073,0.108333334,0.0,0.0,0.10383387,0.15335464,0.15335464,0.04952077,0.20340788666666665,0.24664537000000003,0.054166667,1000
+EX_ala__L_e,0.0,0.0,0.0,0.049999997,0.15617977,0.28089887,0.0,0.0,2.52809,0.099999994,0.101123594,0.28089887,0.0,0.20224719,1.011236,0.28089887,0.030337078,10.0,0.049999997,0.0,0.0,0.049999997,0.0,0.140449435,0.15561797400000002,1000
+EX_arg__L_e,1.1494253,0.39810428,0.5971564,0.69905216,0.19952606,0.39810428,0.5971564,0.19905214,2.8735633,1.0,1.0,0.49763033,0.35545024,2.0,0.28735632,0.33175355,0.29952607,0.0,1.07471265,0.8732908500000001,0.8732908500000001,0.7985782,0.64854462,0.39810428,0.31563980999999997,1000
+EX_asn__L_e,0.37878788,0.0,0.0,0.05,0.3409091,0.18939394,0.0,0.0,1.8939394,0.10006667,0.1,0.33333334,0.0,0.2,0.13333334,0.0,0.1,10.0,0.239427275,0.18939394,0.18939394,0.050033335,0.12626262666666668,0.09469697,0.05,1000
+EX_asp__L_e,0.15037593,0.0,0.0,0.05,0.15015037,0.22556391,0.0,0.0,0.0,0.1,0.09774436,0.22556391,0.45112783,0.2,0.22556391,0.22556391,0.1,10.0,0.125187965,0.075187965,0.075187965,0.05,0.05012531,0.112781955,0.162781955,1000
+EX_chsterol_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00051679584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00025839792,1000
+EX_cys__L_e,0.0,0.0,0.0,0.09977272,0.2603306,0.0,0.0,0.0,0.9917355,0.19954544,0.20661157,0.5681818,0.5041322,0.39772728,0.3305785,0.0005681818,0.0,0.0,0.09977272,0.0,0.0,0.09977272,0.0,0.0,0.0002840909,1000
+EX_fol_e,0.0022675737,0.009070295,0.0022675737,0.0060090707,0.022675738,0.009070295,0.0022675737,0.0045351475,0.0022675737,0.0029478457,0.0029478457,0.0022675737,0.0011337869,0.0029478457,0.0022675737,2.2675737e-05,0.001171875,0.0,0.0026077097,0.0022675737,0.0022675737,0.0026077097,0.006802721233333334,0.009070295,0.0005972753685,1000
+EX_gal_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000
+EX_glc__D_e,11.111111,25.0,5.5555553,17.505556,16.666666,25.0,5.5555553,25.0,0.0,10.011111,6.111111,5.5555553,27.777779,7.0,11.111111,5.5555553,5.5555553,0.0,10.561111,8.33333315,8.33333315,7.78333315,20.370370333333334,25.0,5.5555553,1000
+EX_gln__L_e,2.0547945,3.9726028,2.0,2.5,1.5013698,4.0,2.0,2.0,2.0547945,1.0,1.0,2.0,2.3972602,2.0,0.0,0.6849315,0.0,0.0,1.52739725,2.02739725,2.02739725,1.5,3.333333366666667,3.9863014000000003,0.34246575,1000
+EX_glu__L_e,0.13605443,0.0,0.0,0.05,0.15034014,0.5102041,0.0,0.0,0.0,0.1,0.1,0.5102041,1.0204082,0.19727892,0.34013605,0.5102041,0.029931974,10.0,0.118027215,0.068027215,0.068027215,0.05,0.04535147666666667,0.25510205,0.27006803700000004,1000
+EX_gly_e,0.13333334,0.4,0.0,0.25,0.1,0.4,0.0,0.0,2.6666667,0.1,0.1,0.6666667,0.6666667,0.2,0.6666667,0.6666667,0.030666666,10.0,0.11666667,0.06666667,0.06666667,0.05,0.31111111333333336,0.4,0.348666683,1000
+EX_gthrd_e,0.0032573289,0.0,0.0,0.0,0.0016286644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048859935,0.0,0.00016286645,0.00016286645,0.0,0.0,0.00162866445,0.00162866445,0.00162866445,0.0,0.0010857763,0.0,8.1433225e-05,1000
+EX_hdca_e,0.0,0.0,0.0,0.00014999999,0.0,0.0,0.0,0.0,0.0,0.00029999999,0.0,0.0,0.0,0.0,0.00010169491,0.0,0.0,0.0,0.000149999995,0.0,0.0,0.000149999995,0.0,0.0,0.0,1000
+EX_his__L_e,0.09677419,0.2,0.2,0.14990476,0.09980952,0.2,0.2,0.1,1.6129032,0.1,0.10952381,0.2,0.82580644,0.21809523,0.09677419,0.10419047,0.2,0.0,0.09838709500000001,0.148387095,0.148387095,0.15000000000000002,0.16559139666666667,0.2,0.152095235,1000
+EX_ile__L_e,0.3816794,0.8015267,0.39694658,0.41580153,0.300458,0.8015267,0.39694658,0.39694658,1.908397,0.030534351,0.019847328,0.4,0.1908397,0.060152672,0.3816794,0.3053435,0.5038168,0.0,0.2061068755,0.38931298999999997,0.38931298999999997,0.2137404655,0.6615776000000001,0.8015267,0.40458014999999997,1000
+EX_leu__L_e,0.3816794,0.8015267,0.39694658,0.45076334,0.300458,0.8015267,0.39694658,0.39694658,0.9541985,0.1,0.099236645,0.39694658,0.3816794,0.2,0.57251906,0.45801526,1.0,0.0,0.2408397,0.38931298999999997,0.38931298999999997,0.24847329,0.6615776000000001,0.8015267,0.72900763,1000
+EX_lys__L_e,0.2739726,0.7978142,0.3989071,0.4986339,0.19945355,0.7978142,0.3989071,0.3989071,0.51369864,0.19945355,0.15846995,0.3989071,1.3114754,0.3989071,0.47792348,0.38251367,0.9945355,0.0,0.236713075,0.33643985,0.33643985,0.299180325,0.6232003333333334,0.7978142,0.688524585,1000
+EX_met__L_e,0.10067114,0.20134228,0.10067114,0.11570469,0.099999994,0.20134228,0.10067114,0.10067114,0.5033557,0.030201342,0.030201342,0.10067114,0.33557048,0.06013423,0.10067114,0.10067114,0.10067114,0.0,0.065436241,0.10067114,0.10067114,0.065436241,0.16778523333333337,0.20134228,0.10067114,1000
+EX_phe__L_e,0.09090909,0.4,0.19393939,0.2150303,0.1,0.4,0.19393939,0.2,0.75757575,0.030303031,0.030303031,0.19393939,0.3030303,0.060121212,0.15151516,0.15151516,0.2,0.0,0.0606060605,0.14242423999999998,0.14242423999999998,0.1121212105,0.29696969666666667,0.4,0.17575758000000002,1000
+EX_pi_e,5.633803,0.91558444,1.0144928,0.9530394099999999,4.2028985,0.9057971,1.0144928,0.89855075,1.77920467,1.0,1.6926885,1.0144928,2.7001757,1.23784074,1.0144928,1.0144928,0.5,0.0,3.3169015,3.3241479000000003,3.3241479000000003,1.0072464,2.4883239600000002,0.91069077,0.7572464,1000
+EX_pro__L_e,0.17391305,0.0,0.0,0.15,0.15043478,0.3478261,0.0,0.0,0.0,0.3,0.1,0.3478261,0.4347826,0.6,0.26086956,0.3478261,0.1,10.0,0.236956525,0.086956525,0.086956525,0.15,0.05797101666666667,0.17391305,0.22391305,1000
+EX_ptrc_e,0.0,0.0,0.0,0.0005031056,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0019875776,0.0,0.0,1.242236e-06,0.0,0.0005,0.0,0.0,0.0005,0.0,0.0,6.21118e-07,1000
+EX_pyr_e,0.0,0.0,0.0,0.5,0.0,1.0,0.0,0.0,5.0,1.0,1.0,1.0,0.0,2.0,0.22727273,0.0,1.0,0.0,0.5,0.0,0.0,0.5,0.0,0.5,0.5,1000
+EX_ser__L_e,0.2857143,0.4,0.0,0.25,0.25047618,0.4,0.0,0.0,1.9047619,0.1,0.1,0.23809524,0.0,0.2,0.0952381,0.23809524,0.30476192,10.0,0.19285714999999998,0.14285715,0.14285715,0.05,0.36190476666666666,0.4,0.27142858000000003,1000
+EX_thr__L_e,0.16806723,0.79831934,0.40336135,0.44915968,0.15042016,0.79831934,0.40336135,0.39999998,2.5210085,0.099999994,0.0302521,0.40336135,0.6302521,0.19327731,0.33613446,0.25210086,0.10084034,0.0,0.134033612,0.28571429000000004,0.28571429000000004,0.251680672,0.5882353033333333,0.79831934,0.17647059999999998,1000
+EX_thymd_e,0.0,0.0,0.0,0.0015082645,0.0,0.0,0.0,0.0,0.0,0.002892562,0.002892562,0.041322313,0.0,0.002892562,0.0,0.0,9.917356e-05,0.0,0.001446281,0.0,0.0,0.001446281,0.0,0.0,4.958678e-05,1000
+EX_trp__L_e,0.024509804,0.078431375,0.04901961,0.04421569,0.0151960775,0.078431375,0.04901961,0.039215688,0.09803922,0.01,0.0029411765,0.04901961,0.19607843,0.020098038,0.04901961,0.04901961,0.020098038,0.0,0.017254902,0.036764707,0.036764707,0.029509805,0.060457517999999995,0.078431375,0.034558824,1000
+EX_tyr__L_e,0.11111111,0.39779004,0.19923371,0.21375479,0.1,0.46222222,0.19923371,0.19923371,1.6574585,0.02980916,0.010038313,0.23111111,0.22099447,0.051526718,0.19406131,0.22222222,0.1,0.0,0.07046013500000001,0.15517241,0.15517241,0.114521435,0.3022303966666667,0.43000613,0.16111111,1000
+EX_val__L_e,0.17094018,0.8034188,0.3931624,0.4517094,0.15042736,0.8034188,0.3931624,0.4,0.85470086,0.1,0.02991453,0.3931624,0.5555556,0.1965812,0.42735043,0.21367522,1.0,0.0,0.13547009,0.28205129,0.28205129,0.2465812,0.5925925933333334,0.8034188,0.60683761,1000
+EX_o2_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
+EX_h_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
+EX_h2o_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/models/ENGRO2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/models/ENGRO2.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,11558 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
gpr_old: ENSG00000017483 or ENSG00000111371 or ENSG00000134294 or ENSG00000139209 or ENSG00000149150 or ENSG00000174358 or ENSG00000188338 or ENSG00000268104 or (ENSG00000147003 and ENSG00000164363)
gpr_old: ENSG00000017483 or ENSG00000111371 or ENSG00000123643 or ENSG00000134294 or ENSG00000139209 or ENSG00000149150 or ENSG00000268104 or (ENSG00000130234 and ENSG00000174358) or (ENSG00000147003 and ENSG00000164363) or (ENSG00000147003 and ENSG00000174358)
gpr_old: ENSG00000017483 or ENSG00000111371 or ENSG00000123643 or ENSG00000134294 or ENSG00000139209 or ENSG00000165970 or ENSG00000186335 or ENSG00000196517 or ENSG00000268104 or (ENSG00000130234 and ENSG00000174358) or (ENSG00000147003 and ENSG00000164363) or (ENSG00000147003 and ENSG00000174358)
gpr_old: ENSG00000017483 or ENSG00000111371 or ENSG00000134294 or ENSG00000139209 or ENSG00000149150 or ENSG00000188338 or ENSG00000268104 or (ENSG00000130234 and ENSG00000174358) or (ENSG00000147003 and ENSG00000164363) or (ENSG00000147003 and ENSG00000174358)
gpr_old: ENSG00000003989 or ENSG00000017483 or ENSG00000111371 or ENSG00000123643 or ENSG00000134294 or ENSG00000139209 or ENSG00000139514 or ENSG00000165349 or ENSG00000180773 or ENSG00000186335 or ENSG00000188338 or ENSG00000268104 or (ENSG00000130234 and ENSG00000174358) or (ENSG00000147003 and ENSG00000164363) or (ENSG00000147003 and ENSG00000174358)
+
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_genes.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_genes.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_genes.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_genes.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_rules.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/ENGRO2_rules.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_genes.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_genes.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_genes.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_genes.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_rules.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/HMRcore_rules.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/RECON_genes.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/RECON_genes.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/Recon_genes.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/Recon_genes.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/Recon_rules.p
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/Recon_rules.p has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/black_list.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/black_list.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/reactions.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/reactions.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/synonyms.pickle
Binary file cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/pickle files/synonyms.pickle has changed
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/readme.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/readme.txt Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,9 @@
+I file sono codificati con Pickle in esadecimale, contengono rispettivamente i geni e le regole salvati in dizionari.
+
+Geni:
+{keys = possibili codifiche dei geni : value = { keys = nome dei geni nella codifica corrispondente : 'ok' } }
+
+Regole:
+{keys = possibili codifiche dei geni : value = { keys = nome della reazione/metabolita : [ lista di stringhe contenente la regola nella codifica corrispondente alla keys ] } }
+
+README DA RIVEDERE
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/ENGRO2_map.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/ENGRO2_map.svg Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,20105 @@
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/ENGRO2_no_legend_map.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/ENGRO2_no_legend_map.svg Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,20019 @@
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/HMRcore_map.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/HMRcore_map.svg Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,7702 @@
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/HMRcore_no_legend_map.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/local/svg metabolic maps/HMRcore_no_legend_map.svg Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,7654 @@
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,925 @@
+from __future__ import division
+import csv
+from enum import Enum
+import re
+import sys
+import numpy as np
+import pandas as pd
+import itertools as it
+import scipy.stats as st
+import lxml.etree as ET
+import math
+import utils.general_utils as utils
+from PIL import Image
+import os
+import argparse
+import pyvips
+from typing import Tuple, Union, Optional, List, Dict
+
+ERRORS = []
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args() -> argparse.Namespace:
+ """
+ Interfaces the script of a module with its frontend, making the user's choices for various parameters available as values in code.
+
+ Args:
+ args : Always obtained (in file) from sys.argv
+
+ Returns:
+ Namespace : An object containing the parsed arguments
+ """
+ parser = argparse.ArgumentParser(
+ usage = "%(prog)s [options]",
+ description = "process some value's genes to create a comparison's map.")
+
+ #General:
+ parser.add_argument(
+ '-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+
+ parser.add_argument('-on', '--control', type = str)
+ parser.add_argument('-ol', '--out_log', help = "Output log")
+
+ #Computation details:
+ parser.add_argument(
+ '-co', '--comparison',
+ type = str,
+ default = '1vs1',
+ choices = ['manyvsmany', 'onevsrest', 'onevsmany'])
+
+ parser.add_argument(
+ '-pv' ,'--pValue',
+ type = float,
+ default = 0.1,
+ help = 'P-Value threshold (default: %(default)s)')
+
+ parser.add_argument(
+ '-fc', '--fChange',
+ type = float,
+ default = 1.5,
+ help = 'Fold-Change threshold (default: %(default)s)')
+
+ parser.add_argument(
+ "-ne", "--net",
+ type = utils.Bool("net"), default = False,
+ help = "choose if you want net enrichment for RPS")
+
+ parser.add_argument(
+ '-op', '--option',
+ type = str,
+ choices = ['datasets', 'dataset_class'],
+ help='dataset or dataset and class')
+
+ #RAS:
+ parser.add_argument(
+ "-ra", "--using_RAS",
+ type = utils.Bool("using_RAS"), default = True,
+ help = "choose whether to use RAS datasets.")
+
+ parser.add_argument(
+ '-id', '--input_data',
+ type = str,
+ help = 'input dataset')
+
+ parser.add_argument(
+ '-ic', '--input_class',
+ type = str,
+ help = 'sample group specification')
+
+ parser.add_argument(
+ '-ids', '--input_datas',
+ type = str,
+ nargs = '+',
+ help = 'input datasets')
+
+ parser.add_argument(
+ '-na', '--names',
+ type = str,
+ nargs = '+',
+ help = 'input names')
+
+ #RPS:
+ parser.add_argument(
+ "-rp", "--using_RPS",
+ type = utils.Bool("using_RPS"), default = False,
+ help = "choose whether to use RPS datasets.")
+
+ parser.add_argument(
+ '-idr', '--input_data_rps',
+ type = str,
+ help = 'input dataset rps')
+
+ parser.add_argument(
+ '-icr', '--input_class_rps',
+ type = str,
+ help = 'sample group specification rps')
+
+ parser.add_argument(
+ '-idsr', '--input_datas_rps',
+ type = str,
+ nargs = '+',
+ help = 'input datasets rps')
+
+ parser.add_argument(
+ '-nar', '--names_rps',
+ type = str,
+ nargs = '+',
+ help = 'input names rps')
+
+ #Output:
+ parser.add_argument(
+ "-gs", "--generate_svg",
+ type = utils.Bool("generate_svg"), default = True,
+ help = "choose whether to use RAS datasets.")
+
+ parser.add_argument(
+ "-gp", "--generate_pdf",
+ type = utils.Bool("generate_pdf"), default = True,
+ help = "choose whether to use RAS datasets.")
+
+ parser.add_argument(
+ '-cm', '--custom_map',
+ type = str,
+ help='custom map to use')
+
+ parser.add_argument(
+ '-mc', '--choice_map',
+ type = utils.Model, default = utils.Model.HMRcore,
+ choices = [utils.Model.HMRcore, utils.Model.ENGRO2, utils.Model.Custom])
+
+ args :argparse.Namespace = parser.parse_args()
+ if args.using_RAS and not args.using_RPS: args.net = False
+
+ return args
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+ """
+ Tries to read the dataset from its path (data) as a tsv and turns it into a DataFrame.
+
+ Args:
+ data : filepath of a dataset (from frontend input params or literals upon calling)
+ name : name associated with the dataset (from frontend input params or literals upon calling)
+
+ Returns:
+ pd.DataFrame : dataset in a runtime operable shape
+
+ Raises:
+ sys.exit : if there's no data (pd.errors.EmptyDataError) or if the dataset has less than 2 columns
+ """
+ try:
+ dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ return dataset
+
+############################ dataset name #####################################
+def name_dataset(name_data :str, count :int) -> str:
+ """
+ Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
+
+ Args:
+ name_data : name associated with the dataset (from frontend input params)
+ count : counter from 1 to make these names unique (external)
+
+ Returns:
+ str : the name made unique
+ """
+ if str(name_data) == 'Dataset':
+ return str(name_data) + '_' + str(count)
+ else:
+ return str(name_data)
+
+############################ map_methods ######################################
+FoldChange = Union[float, int, str] # Union[float, Literal[0, "-INF", "INF"]]
+def fold_change(avg1 :float, avg2 :float) -> FoldChange:
+ """
+ Calculates the fold change between two gene expression values.
+
+ Args:
+ avg1 : average expression value from one dataset avg2 : average expression value from the other dataset
+
+ Returns:
+ FoldChange :
+ 0 : when both input values are 0
+ "-INF" : when avg1 is 0
+ "INF" : when avg2 is 0
+ float : for any other combination of values
+ """
+ if avg1 == 0 and avg2 == 0:
+ return 0
+ elif avg1 == 0:
+ return '-INF'
+ elif avg2 == 0:
+ return 'INF'
+ else: # (threshold_F_C - 1) / (abs(threshold_F_C) + 1) con threshold_F_C > 1
+ return (avg1 - avg2) / (abs(avg1) + abs(avg2))
+
+def fix_style(l :str, col :Optional[str], width :str, dash :str) -> str:
+ """
+ Produces a "fixed" style string to assign to a reaction arrow in the SVG map, assigning style properties to the corresponding values passed as input params.
+
+ Args:
+ l : current style string of an SVG element
+ col : new value for the "stroke" style property
+ width : new value for the "stroke-width" style property
+ dash : new value for the "stroke-dasharray" style property
+
+ Returns:
+ str : the fixed style string
+ """
+ tmp = l.split(';')
+ flag_col = False
+ flag_width = False
+ flag_dash = False
+ for i in range(len(tmp)):
+ if tmp[i].startswith('stroke:'):
+ tmp[i] = 'stroke:' + col
+ flag_col = True
+ if tmp[i].startswith('stroke-width:'):
+ tmp[i] = 'stroke-width:' + width
+ flag_width = True
+ if tmp[i].startswith('stroke-dasharray:'):
+ tmp[i] = 'stroke-dasharray:' + dash
+ flag_dash = True
+ if not flag_col:
+ tmp.append('stroke:' + col)
+ if not flag_width:
+ tmp.append('stroke-width:' + width)
+ if not flag_dash:
+ tmp.append('stroke-dasharray:' + dash)
+ return ';'.join(tmp)
+
+# The type of d values is collapsed, losing precision, because the dict containst lists instead of tuples, please fix!
+def fix_map(d :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, threshold_P_V :float, threshold_F_C :float, max_z_score :float) -> ET.ElementTree:
+ """
+ Edits the selected SVG map based on the p-value and fold change data (d) and some significance thresholds also passed as inputs.
+
+ Args:
+ d : dictionary mapping a p-value and a fold-change value (values) to each reaction ID as encoded in the SVG map (keys)
+ core_map : SVG map to modify
+ threshold_P_V : threshold for a p-value to be considered significant
+ threshold_F_C : threshold for a fold change value to be considered significant
+ max_z_score : highest z-score (absolute value)
+
+ Returns:
+ ET.ElementTree : the modified core_map
+
+ Side effects:
+ core_map : mut
+ """
+ maxT = 12
+ minT = 2
+ grey = '#BEBEBE'
+ blue = '#6495ed'
+ red = '#ecac68'
+ for el in core_map.iter():
+ el_id = str(el.get('id'))
+ if el_id.startswith('R_'):
+ tmp = d.get(el_id[2:])
+ if tmp != None:
+ p_val :float = tmp[0]
+ f_c = tmp[1]
+ z_score = tmp[2]
+ if p_val < threshold_P_V:
+ if not isinstance(f_c, str):
+ if abs(f_c) < ((threshold_F_C - 1) / (abs(threshold_F_C) + 1)): #
+ col = grey
+ width = str(minT)
+ else:
+ if f_c < 0:
+ col = blue
+ elif f_c > 0:
+ col = red
+ width = str(max((abs(z_score) * maxT) / max_z_score, minT))
+ else:
+ if f_c == '-INF':
+ col = blue
+ elif f_c == 'INF':
+ col = red
+ width = str(maxT)
+ dash = 'none'
+ else:
+ dash = '5,5'
+ col = grey
+ width = str(minT)
+ el.set('style', fix_style(el.get('style', ""), col, width, dash))
+ return core_map
+
+def getElementById(reactionId :str, metabMap :ET.ElementTree) -> utils.Result[ET.Element, utils.Result.ResultErr]:
+ """
+ Finds any element in the given map with the given ID. ID uniqueness in an svg file is recommended but
+ not enforced, if more than one element with the exact ID is found only the first will be returned.
+
+ Args:
+ reactionId (str): exact ID of the requested element.
+ metabMap (ET.ElementTree): metabolic map containing the element.
+
+ Returns:
+ utils.Result[ET.Element, ResultErr]: result of the search, either the first match found or a ResultErr.
+ """
+ return utils.Result.Ok(
+ f"//*[@id=\"{reactionId}\"]").map(
+ lambda xPath : metabMap.xpath(xPath)[0]).mapErr(
+ lambda _ : utils.Result.ResultErr(f"No elements with ID \"{reactionId}\" found in map"))
+ # ^^^ we shamelessly ignore the contents of the IndexError, it offers nothing to the user.
+
+def styleMapElement(element :ET.Element, styleStr :str) -> None:
+ currentStyles :str = element.get("style", "")
+ if re.search(r";stroke:[^;]+;stroke-width:[^;]+;stroke-dasharray:[^;]+$", currentStyles):
+ currentStyles = ';'.join(currentStyles.split(';')[:-3])
+
+ element.set("style", currentStyles + styleStr)
+
+class ReactionDirection(Enum):
+ Unknown = ""
+ Direct = "_F"
+ Inverse = "_B"
+
+ @classmethod
+ def fromDir(cls, s :str) -> "ReactionDirection":
+ # vvv as long as there's so few variants I actually condone the if spam:
+ if s == ReactionDirection.Direct.value: return ReactionDirection.Direct
+ if s == ReactionDirection.Inverse.value: return ReactionDirection.Inverse
+ return ReactionDirection.Unknown
+
+ @classmethod
+ def fromReactionId(cls, reactionId :str) -> "ReactionDirection":
+ return ReactionDirection.fromDir(reactionId[-2:])
+
+def getArrowBodyElementId(reactionId :str) -> str:
+ if reactionId.endswith("_RV"): reactionId = reactionId[:-3] #TODO: standardize _RV
+ elif ReactionDirection.fromReactionId(reactionId) is not ReactionDirection.Unknown: reactionId = reactionId[:-2]
+ return f"R_{reactionId}"
+
+def getArrowHeadElementId(reactionId :str) -> Tuple[str, str]:
+ """
+ We attempt extracting the direction information from the provided reaction ID, if unsuccessful we provide the IDs of both directions.
+
+ Args:
+ reactionId : the provided reaction ID.
+
+ Returns:
+ Tuple[str, str]: either a single str ID for the correct arrow head followed by an empty string or both options to try.
+ """
+ if reactionId.endswith("_RV"): reactionId = reactionId[:-3] #TODO: standardize _RV
+ elif ReactionDirection.fromReactionId(reactionId) is not ReactionDirection.Unknown: return reactionId[:-3:-1] + reactionId[:-2], ""
+ return f"F_{reactionId}", f"B_{reactionId}"
+
+class ArrowColor(Enum):
+ """
+ Encodes possible arrow colors based on their meaning in the enrichment process.
+ """
+ Invalid = "#BEBEBE" # gray, fold-change under treshold
+ UpRegulated = "#ecac68" # red, up-regulated reaction
+ DownRegulated = "#6495ed" # blue, down-regulated reaction
+
+ UpRegulatedInv = "#FF0000"
+ # ^^^ different shade of red (actually orange), up-regulated net value for a reversible reaction with
+ # conflicting enrichment in the two directions.
+
+ DownRegulatedInv = "#0000FF"
+ # ^^^ different shade of blue (actually purple), down-regulated net value for a reversible reaction with
+ # conflicting enrichment in the two directions.
+
+ @classmethod
+ def fromFoldChangeSign(cls, foldChange :float, *, useAltColor = False) -> "ArrowColor":
+ colors = (cls.DownRegulated, cls.DownRegulatedInv) if foldChange < 0 else (cls.UpRegulated, cls.UpRegulatedInv)
+ return colors[useAltColor]
+
+ def __str__(self) -> str: return self.value
+
+class Arrow:
+ """
+ Models the properties of a reaction arrow that change based on enrichment.
+ """
+ MIN_W = 2
+ MAX_W = 12
+
+ def __init__(self, width :int, col: ArrowColor, *, isDashed = False) -> None:
+ """
+ (Private) Initializes an instance of Arrow.
+
+ Args:
+ width : width of the arrow, ideally to be kept within Arrow.MIN_W and Arrow.MAX_W (not enforced).
+ col : color of the arrow.
+ isDashed : whether the arrow should be dashed, meaning the associated pValue resulted not significant.
+
+ Returns:
+ None : practically, a Arrow instance.
+ """
+ self.w = width
+ self.col = col
+ self.dash = isDashed
+
+ def applyTo(self, reactionId :str, metabMap :ET.ElementTree, styleStr :str) -> None:
+ if getElementById(reactionId, metabMap).map(lambda el : styleMapElement(el, styleStr)).isErr:
+ ERRORS.append(reactionId)
+
+ def styleReactionElements(self, metabMap :ET.ElementTree, reactionId :str, *, mindReactionDir = True) -> None:
+ # If We're dealing with RAS data or in general don't care about the direction of the reaction we only style the arrow body
+ if not mindReactionDir:
+ return self.applyTo(getArrowBodyElementId(reactionId), metabMap, self.toStyleStr())
+
+ # Now we style the arrow head(s):
+ idOpt1, idOpt2 = getArrowHeadElementId(reactionId)
+ self.applyTo(idOpt1, metabMap, self.toStyleStr(downSizedForTips = True))
+ if idOpt2: self.applyTo(idOpt2, metabMap, self.toStyleStr(downSizedForTips = True))
+
+ def getMapReactionId(self, reactionId :str, mindReactionDir :bool) -> str:
+ """
+ Computes the reaction ID as encoded in the map for a given reaction ID from the dataset.
+
+ Args:
+ reactionId: the reaction ID, as encoded in the dataset.
+ mindReactionDir: if True forward (F_) and backward (B_) directions will be encoded in the result.
+
+ Returns:
+ str : the ID of an arrow's body or tips in the map.
+ """
+ # we assume the reactionIds also don't encode reaction dir if they don't mind it when styling the map.
+ if not mindReactionDir: return "R_" + reactionId
+
+ #TODO: this is clearly something we need to make consistent in RPS
+ return (reactionId[:-3:-1] + reactionId[:-2]) if reactionId[:-2] in ["_F", "_B"] else f"F_{reactionId}" # "Pyr_F" --> "F_Pyr"
+
+ def toStyleStr(self, *, downSizedForTips = False) -> str:
+ """
+ Collapses the styles of this Arrow into a str, ready to be applied as part of the "style" property on an svg element.
+
+ Returns:
+ str : the styles string.
+ """
+ width = self.w
+ if downSizedForTips: width *= 0.8
+ return f";stroke:{self.col};stroke-width:{width};stroke-dasharray:{'5,5' if self.dash else 'none'}"
+
+# vvv These constants could be inside the class itself a static properties, but python
+# was built by brainless organisms so here we are!
+INVALID_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid)
+INSIGNIFICANT_ARROW = Arrow(Arrow.MIN_W, ArrowColor.Invalid, isDashed = True)
+
+def applyRpsEnrichmentToMap(rpsEnrichmentRes :Dict[str, Union[Tuple[float, FoldChange], Tuple[float, FoldChange, float, float]]], metabMap :ET.ElementTree, maxNumericZScore :float) -> None:
+ """
+ Applies RPS enrichment results to the provided metabolic map.
+
+ Args:
+ rpsEnrichmentRes : RPS enrichment results.
+ metabMap : the metabolic map to edit.
+ maxNumericZScore : biggest finite z-score value found.
+
+ Side effects:
+ metabMap : mut
+
+ Returns:
+ None
+ """
+ for reactionId, values in rpsEnrichmentRes.items():
+ pValue = values[0]
+ foldChange = values[1]
+ z_score = values[2]
+
+ if isinstance(foldChange, str): foldChange = float(foldChange)
+ if pValue >= ARGS.pValue: # pValue above tresh: dashed arrow
+ INSIGNIFICANT_ARROW.styleReactionElements(metabMap, reactionId)
+ continue
+
+ if abs(foldChange) < (ARGS.fChange - 1) / (abs(ARGS.fChange) + 1):
+ INVALID_ARROW.styleReactionElements(metabMap, reactionId)
+ continue
+
+ width = Arrow.MAX_W
+ if not math.isinf(foldChange):
+ try: width = max(abs(z_score * Arrow.MAX_W) / maxNumericZScore, Arrow.MIN_W)
+ except ZeroDivisionError: pass
+
+ if not reactionId.endswith("_RV"): # RV stands for reversible reactions
+ Arrow(width, ArrowColor.fromFoldChangeSign(foldChange)).styleReactionElements(metabMap, reactionId)
+ continue
+
+ reactionId = reactionId[:-3] # Remove "_RV"
+
+ inversionScore = (values[3] < 0) + (values[4] < 0) # Compacts the signs of averages into 1 easy to check score
+ if inversionScore == 2: foldChange *= -1
+ # ^^^ Style the inverse direction with the opposite sign netValue
+
+ # If the score is 1 (opposite signs) we use alternative colors vvv
+ arrow = Arrow(width, ArrowColor.fromFoldChangeSign(foldChange, useAltColor = inversionScore == 1))
+
+ # vvv These 2 if statements can both be true and can both happen
+ if ARGS.net: # style arrow head(s):
+ arrow.styleReactionElements(metabMap, reactionId + ("_B" if inversionScore == 2 else "_F"))
+
+ if not ARGS.using_RAS: # style arrow body
+ arrow.styleReactionElements(metabMap, reactionId, mindReactionDir = False)
+
+############################ split class ######################################
+def split_class(classes :pd.DataFrame, resolve_rules :Dict[str, List[float]]) -> Dict[str, List[List[float]]]:
+ """
+ Generates a :dict that groups together data from a :DataFrame based on classes the data is related to.
+
+ Args:
+ classes : a :DataFrame of only string values, containing class information (rows) and keys to query the resolve_rules :dict
+ resolve_rules : a :dict containing :float data
+
+ Returns:
+ dict : the dict with data grouped by class
+
+ Side effects:
+ classes : mut
+ """
+ class_pat :Dict[str, List[List[float]]] = {}
+ for i in range(len(classes)):
+ classe :str = classes.iloc[i, 1]
+ if pd.isnull(classe): continue
+
+ l :List[List[float]] = []
+ for j in range(i, len(classes)):
+ if classes.iloc[j, 1] == classe:
+ pat_id :str = classes.iloc[j, 0]
+ tmp = resolve_rules.get(pat_id, None)
+ if tmp != None:
+ l.append(tmp)
+ classes.iloc[j, 1] = None
+
+ if l:
+ class_pat[classe] = list(map(list, zip(*l)))
+ continue
+
+ utils.logWarning(
+ f"Warning: no sample found in class \"{classe}\", the class has been disregarded", ARGS.out_log)
+
+ return class_pat
+
+############################ conversion ##############################################
+#conversion from svg to png
+def svg_to_png_with_background(svg_path :utils.FilePath, png_path :utils.FilePath, dpi :int = 72, scale :int = 1, size :Optional[float] = None) -> None:
+ """
+ Internal utility to convert an SVG to PNG (forced opaque) to aid in PDF conversion.
+
+ Args:
+ svg_path : path to SVG file
+ png_path : path for new PNG file
+ dpi : dots per inch of the generated PNG
+ scale : scaling factor for the generated PNG, computed internally when a size is provided
+ size : final effective width of the generated PNG
+
+ Returns:
+ None
+ """
+ if size:
+ image = pyvips.Image.new_from_file(svg_path.show(), dpi=dpi, scale=1)
+ scale = size / image.width
+ image = image.resize(scale)
+ else:
+ image = pyvips.Image.new_from_file(svg_path.show(), dpi=dpi, scale=scale)
+
+ white_background = pyvips.Image.black(image.width, image.height).new_from_image([255, 255, 255])
+ white_background = white_background.affine([scale, 0, 0, scale])
+
+ if white_background.bands != image.bands:
+ white_background = white_background.extract_band(0)
+
+ composite_image = white_background.composite2(image, 'over')
+ composite_image.write_to_file(png_path.show())
+
+#funzione unica, lascio fuori i file e li passo in input
+#conversion from png to pdf
+def convert_png_to_pdf(png_file :utils.FilePath, pdf_file :utils.FilePath) -> None:
+ """
+ Internal utility to convert a PNG to PDF to aid from SVG conversion.
+
+ Args:
+ png_file : path to PNG file
+ pdf_file : path to new PDF file
+
+ Returns:
+ None
+ """
+ image = Image.open(png_file.show())
+ image = image.convert("RGB")
+ image.save(pdf_file.show(), "PDF", resolution=100.0)
+
+#function called to reduce redundancy in the code
+def convert_to_pdf(file_svg :utils.FilePath, file_png :utils.FilePath, file_pdf :utils.FilePath) -> None:
+ """
+ Converts the SVG map at the provided path to PDF.
+
+ Args:
+ file_svg : path to SVG file
+ file_png : path to PNG file
+ file_pdf : path to new PDF file
+
+ Returns:
+ None
+ """
+ svg_to_png_with_background(file_svg, file_png)
+ try:
+ convert_png_to_pdf(file_png, file_pdf)
+ print(f'PDF file {file_pdf.filePath} successfully generated.')
+
+ except Exception as e:
+ raise utils.DataErr(file_pdf.show(), f'Error generating PDF file: {e}')
+
+############################ map ##############################################
+def buildOutputPath(dataset1Name :str, dataset2Name = "rest", *, details = "", ext :utils.FileFormat) -> utils.FilePath:
+ """
+ Builds a FilePath instance from the names of confronted datasets ready to point to a location in the
+ "result/" folder, used by this tool for output files in collections.
+
+ Args:
+ dataset1Name : _description_
+ dataset2Name : _description_. Defaults to "rest".
+ details : _description_
+ ext : _description_
+
+ Returns:
+ utils.FilePath : _description_
+ """
+ # This function returns a util data structure but is extremely specific to this module.
+ # RAS also uses collections as output and as such might benefit from a method like this, but I'd wait
+ # TODO: until a third tool with multiple outputs appears before porting this to utils.
+ return utils.FilePath(
+ f"{dataset1Name}_vs_{dataset2Name}" + (f" ({details})" if details else ""),
+ # ^^^ yes this string is built every time even if the form is the same for the same 2 datasets in
+ # all output files: I don't care, this was never the performance bottleneck of the tool and
+ # there is no other net gain in saving and re-using the built string.
+ ext,
+ prefix = "result")
+
+FIELD_NOT_AVAILABLE = '/'
+def writeToCsv(rows: List[list], fieldNames :List[str], outPath :utils.FilePath) -> None:
+ fieldsAmt = len(fieldNames)
+ with open(outPath.show(), "w", newline = "") as fd:
+ writer = csv.DictWriter(fd, fieldnames = fieldNames, delimiter = '\t')
+ writer.writeheader()
+
+ for row in rows:
+ sizeMismatch = fieldsAmt - len(row)
+ if sizeMismatch > 0: row.extend([FIELD_NOT_AVAILABLE] * sizeMismatch)
+ writer.writerow({ field : data for field, data in zip(fieldNames, row) })
+
+OldEnrichedScores = Dict[str, List[Union[float, FoldChange]]] #TODO: try to use Tuple whenever possible
+def writeTabularResult(enrichedScores : OldEnrichedScores, ras_enrichment: bool, outPath :utils.FilePath) -> None:
+ fieldNames = ["ids", "P_Value", "fold change"]
+ if not ras_enrichment: fieldNames.extend(["average_1", "average_2"])
+
+ writeToCsv([ [reactId] + values for reactId, values in enrichedScores.items() ], fieldNames, outPath)
+
+def temp_thingsInCommon(tmp :Dict[str, List[Union[float, FoldChange]]], core_map :ET.ElementTree, max_z_score :float, dataset1Name :str, dataset2Name = "rest", ras_enrichment = True) -> None:
+ # this function compiles the things always in common between comparison modes after enrichment.
+ # TODO: organize, name better.
+ writeTabularResult(tmp, ras_enrichment, buildOutputPath(dataset1Name, dataset2Name, details = "Tabular Result", ext = utils.FileFormat.TSV))
+
+ if ras_enrichment:
+ fix_map(tmp, core_map, ARGS.pValue, ARGS.fChange, max_z_score)
+ return
+
+ for reactId, enrichData in tmp.items(): tmp[reactId] = tuple(enrichData)
+ applyRpsEnrichmentToMap(tmp, core_map, max_z_score)
+
+def computePValue(dataset1Data: List[float], dataset2Data: List[float]) -> Tuple[float, float]:
+ """
+ Computes the statistical significance score (P-value) of the comparison between coherent data
+ from two datasets. The data is supposed to, in both datasets:
+ - be related to the same reaction ID;
+ - be ordered by sample, such that the item at position i in both lists is related to the
+ same sample or cell line.
+
+ Args:
+ dataset1Data : data from the 1st dataset.
+ dataset2Data : data from the 2nd dataset.
+
+ Returns:
+ tuple: (P-value, Z-score)
+ - P-value from a Kolmogorov-Smirnov test on the provided data.
+ - Z-score of the difference between means of the two datasets.
+ """
+ # Perform Kolmogorov-Smirnov test
+ ks_statistic, p_value = st.ks_2samp(dataset1Data, dataset2Data)
+
+ # Calculate means and standard deviations
+ mean1 = np.mean(dataset1Data)
+ mean2 = np.mean(dataset2Data)
+ std1 = np.std(dataset1Data, ddof=1)
+ std2 = np.std(dataset2Data, ddof=1)
+
+ n1 = len(dataset1Data)
+ n2 = len(dataset2Data)
+
+ # Calculate Z-score
+ z_score = (mean1 - mean2) / np.sqrt((std1**2 / n1) + (std2**2 / n2))
+
+ return p_value, z_score
+
+def compareDatasetPair(dataset1Data :List[List[float]], dataset2Data :List[List[float]], ids :List[str]) -> Tuple[Dict[str, List[Union[float, FoldChange]]], float]:
+ #TODO: the following code still suffers from "dumbvarnames-osis"
+ tmp :Dict[str, List[Union[float, FoldChange]]] = {}
+ count = 0
+ max_z_score = 0
+
+ for l1, l2 in zip(dataset1Data, dataset2Data):
+ reactId = ids[count]
+ count += 1
+ if not reactId: continue # we skip ids that have already been processed
+
+ try: #TODO: identify the source of these errors and minimize code in the try block
+ reactDir = ReactionDirection.fromReactionId(reactId)
+ # Net score is computed only for reversible reactions when user wants it on arrow tips or when RAS datasets aren't used
+ if (ARGS.net or not ARGS.using_RAS) and reactDir is not ReactionDirection.Unknown:
+ try: position = ids.index(reactId[:-1] + ('B' if reactDir is ReactionDirection.Direct else 'F'))
+ except ValueError: continue # we look for the complementary id, if not found we skip
+
+ nets1 = np.subtract(l1, dataset1Data[position])
+ nets2 = np.subtract(l2, dataset2Data[position])
+
+ p_value, z_score = computePValue(nets1, nets2)
+ avg1 = sum(nets1) / len(nets1)
+ avg2 = sum(nets2) / len(nets2)
+ net = fold_change(avg1, avg2)
+
+ if math.isnan(net): continue
+ tmp[reactId[:-1] + "RV"] = [p_value, net, z_score, avg1, avg2]
+
+ # vvv complementary directional ids are set to None once processed if net is to be applied to tips
+ if ARGS.net:
+ ids[position] = None
+ continue
+
+ # fallthrough is intended, regular scores need to be computed when tips aren't net but RAS datasets aren't used
+ p_value, z_score = computePValue(l1, l2)
+ avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2))
+ if not isinstance(z_score, str) and max_z_score < abs(z_score): max_z_score = abs(z_score)
+ tmp[reactId] = [float(p_value), avg, z_score]
+
+ except (TypeError, ZeroDivisionError): continue
+
+ return tmp, max_z_score
+
+def computeEnrichment(metabMap :ET.ElementTree, class_pat :Dict[str, List[List[float]]], ids :List[str], *, fromRAS = True) -> None:
+ """
+ Compares clustered data based on a given comparison mode and applies enrichment-based styling on the
+ provided metabolic map.
+
+ Args:
+ metabMap : SVG map to modify.
+ class_pat : the clustered data.
+ ids : ids for data association.
+ fromRAS : whether the data to enrich consists of RAS scores.
+
+ Returns:
+ None
+
+ Raises:
+ sys.exit : if there are less than 2 classes for comparison
+
+ Side effects:
+ metabMap : mut
+ ids : mut
+ """
+ class_pat = { k.strip() : v for k, v in class_pat.items() }
+ #TODO: simplfy this stuff vvv and stop using sys.exit (raise the correct utils error)
+ if (not class_pat) or (len(class_pat.keys()) < 2): sys.exit('Execution aborted: classes provided for comparisons are less than two\n')
+
+ if ARGS.comparison == "manyvsmany":
+ for i, j in it.combinations(class_pat.keys(), 2):
+ #TODO: these 2 functions are always called in pair and in this order and need common data,
+ # some clever refactoring would be appreciated.
+ comparisonDict, max_z_score = compareDatasetPair(class_pat.get(i), class_pat.get(j), ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, i, j, fromRAS)
+
+ elif ARGS.comparison == "onevsrest":
+ for single_cluster in class_pat.keys():
+ t :List[List[List[float]]] = []
+ for k in class_pat.keys():
+ if k != single_cluster:
+ t.append(class_pat.get(k))
+
+ rest :List[List[float]] = []
+ for i in t:
+ rest = rest + i
+
+ comparisonDict, max_z_score = compareDatasetPair(class_pat.get(single_cluster), rest, ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, single_cluster, fromRAS)
+
+ elif ARGS.comparison == "onevsmany":
+ controlItems = class_pat.get(ARGS.control)
+ for otherDataset in class_pat.keys():
+ if otherDataset == ARGS.control: continue
+
+ comparisonDict, max_z_score = compareDatasetPair(controlItems, class_pat.get(otherDataset), ids)
+ temp_thingsInCommon(comparisonDict, metabMap, max_z_score, ARGS.control, otherDataset, fromRAS)
+
+def createOutputMaps(dataset1Name :str, dataset2Name :str, core_map :ET.ElementTree) -> None:
+ svgFilePath = buildOutputPath(dataset1Name, dataset2Name, details = "SVG Map", ext = utils.FileFormat.SVG)
+ utils.writeSvg(svgFilePath, core_map)
+
+ if ARGS.generate_pdf:
+ pngPath = buildOutputPath(dataset1Name, dataset2Name, details = "PNG Map", ext = utils.FileFormat.PNG)
+ pdfPath = buildOutputPath(dataset1Name, dataset2Name, details = "PDF Map", ext = utils.FileFormat.PDF)
+ convert_to_pdf(svgFilePath, pngPath, pdfPath)
+
+ if not ARGS.generate_svg: os.remove(svgFilePath.show())
+
+ClassPat = Dict[str, List[List[float]]]
+def getClassesAndIdsFromDatasets(datasetsPaths :List[str], datasetPath :str, classPath :str, names :List[str]) -> Tuple[List[str], ClassPat]:
+ # TODO: I suggest creating dicts with ids as keys instead of keeping class_pat and ids separate,
+ # for the sake of everyone's sanity.
+ class_pat :ClassPat = {}
+ if ARGS.option == 'datasets':
+ num = 1 #TODO: the dataset naming function could be a generator
+ for path, name in zip(datasetsPaths, names):
+ name = name_dataset(name, num)
+ resolve_rules_float, ids = getDatasetValues(path, name)
+ if resolve_rules_float != None:
+ class_pat[name] = list(map(list, zip(*resolve_rules_float.values())))
+
+ num += 1
+
+ elif ARGS.option == "dataset_class":
+ classes = read_dataset(classPath, "class")
+ classes = classes.astype(str)
+
+ resolve_rules_float, ids = getDatasetValues(datasetPath, "Dataset Class (not actual name)")
+ if resolve_rules_float != None: class_pat = split_class(classes, resolve_rules_float)
+
+ return ids, class_pat
+ #^^^ TODO: this could be a match statement over an enum, make it happen future marea dev with python 3.12! (it's why I kept the ifs)
+
+#TODO: create these damn args as FilePath objects
+def getDatasetValues(datasetPath :str, datasetName :str) -> Tuple[ClassPat, List[str]]:
+ """
+ Opens the dataset at the given path and extracts the values (expected nullable numerics) and the IDs.
+
+ Args:
+ datasetPath : path to the dataset
+ datasetName (str): dataset name, used in error reporting
+
+ Returns:
+ Tuple[ClassPat, List[str]]: values and IDs extracted from the dataset
+ """
+ dataset = read_dataset(datasetPath, datasetName)
+ IDs = pd.Series.tolist(dataset.iloc[:, 0].astype(str))
+
+ dataset = dataset.drop(dataset.columns[0], axis = "columns").to_dict("list")
+ return { id : list(map(utils.Float("Dataset values, not an argument"), values)) for id, values in dataset.items() }, IDs
+
+############################ MAIN #############################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+
+ Raises:
+ sys.exit : if a user-provided custom map is in the wrong format (ET.XMLSyntaxError, ET.XMLSchemaParseError)
+ """
+
+ global ARGS
+ ARGS = process_args()
+
+ if os.path.isdir('result') == False: os.makedirs('result')
+
+ core_map :ET.ElementTree = ARGS.choice_map.getMap(
+ ARGS.tool_dir,
+ utils.FilePath.fromStrPath(ARGS.custom_map) if ARGS.custom_map else None)
+ # TODO: ^^^ ugly but fine for now, the argument is None if the model isn't custom because no file was given.
+ # getMap will None-check the customPath and panic when the model IS custom but there's no file (good). A cleaner
+ # solution can be derived from my comment in FilePath.fromStrPath
+
+ if ARGS.using_RAS:
+ ids, class_pat = getClassesAndIdsFromDatasets(ARGS.input_datas, ARGS.input_data, ARGS.input_class, ARGS.names)
+ computeEnrichment(core_map, class_pat, ids)
+
+ if ARGS.using_RPS:
+ ids, class_pat = getClassesAndIdsFromDatasets(ARGS.input_datas_rps, ARGS.input_data_rps, ARGS.input_class_rps, ARGS.names_rps)
+ computeEnrichment(core_map, class_pat, ids, fromRAS = False)
+
+ # create output files: TODO: this is the same comparison happening in "maps", find a better way to organize this
+ if ARGS.comparison == "manyvsmany":
+ for i, j in it.combinations(class_pat.keys(), 2): createOutputMaps(i, j, core_map)
+ return
+
+ if ARGS.comparison == "onevsrest":
+ for single_cluster in class_pat.keys(): createOutputMaps(single_cluster, "rest", core_map)
+ return
+
+ for otherDataset in class_pat.keys():
+ if otherDataset != ARGS.control: createOutputMaps(i, j, core_map)
+
+ if not ERRORS: return
+ utils.logWarning(
+ f"The following reaction IDs were mentioned in the dataset but weren't found in the map: {ERRORS}",
+ ARGS.out_log)
+
+ print('Execution succeded')
+
+###############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,325 @@
+
+
+ marea_macros.xml
+
+
+
+ numpy
+ pandas
+ lxml
+ scipy
+ svglib
+ pyvips
+ cairosvg
+ cobra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_cluster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_cluster.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,534 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jun 3 19:51:00 2019
+@author: Narger
+"""
+
+import sys
+import argparse
+import os
+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_blobs
+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
+from sklearn.metrics import silhouette_samples, silhouette_score, cluster
+import matplotlib
+matplotlib.use('agg')
+import matplotlib.pyplot as plt
+import scipy.cluster.hierarchy as shc
+import matplotlib.cm as cm
+from typing import Optional, Dict, List
+
+################################# process args ###############################
+def process_args(args :List[str]) -> argparse.Namespace:
+ """
+ Processes command-line arguments.
+
+ Args:
+ args (list): List of command-line arguments.
+
+ Returns:
+ Namespace: An object containing parsed arguments.
+ """
+ parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+ description = 'process some value\'s' +
+ ' genes to create class.')
+
+ parser.add_argument('-ol', '--out_log',
+ help = "Output log")
+
+ parser.add_argument('-in', '--input',
+ type = str,
+ help = 'input dataset')
+
+ parser.add_argument('-cy', '--cluster_type',
+ type = str,
+ choices = ['kmeans', 'dbscan', 'hierarchy'],
+ default = 'kmeans',
+ help = 'choose clustering algorythm')
+
+ parser.add_argument('-k1', '--k_min',
+ type = int,
+ default = 2,
+ help = 'choose minimun cluster number to be generated')
+
+ parser.add_argument('-k2', '--k_max',
+ type = int,
+ default = 7,
+ help = 'choose maximum cluster number to be generated')
+
+ parser.add_argument('-el', '--elbow',
+ type = str,
+ default = 'false',
+ choices = ['true', 'false'],
+ help = 'choose if you want to generate an elbow plot for kmeans')
+
+ parser.add_argument('-si', '--silhouette',
+ type = str,
+ default = 'false',
+ choices = ['true', 'false'],
+ help = 'choose if you want silhouette plots')
+
+ parser.add_argument('-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+
+ parser.add_argument('-ms', '--min_samples',
+ type = float,
+ help = 'min samples for dbscan (optional)')
+
+ parser.add_argument('-ep', '--eps',
+ type = float,
+ help = 'eps for dbscan (optional)')
+
+ parser.add_argument('-bc', '--best_cluster',
+ type = str,
+ help = 'output of best cluster tsv')
+
+
+
+ args = parser.parse_args()
+ return args
+
+########################### warning ###########################################
+def warning(s :str) -> None:
+ """
+ Log a warning message to an output log file and print it to the console.
+
+ Args:
+ s (str): The warning message to be logged and printed.
+
+ Returns:
+ None
+ """
+ args = process_args(sys.argv)
+ with open(args.out_log, 'a') as log:
+ log.write(s + "\n\n")
+ print(s)
+
+########################## read dataset ######################################
+def read_dataset(dataset :str) -> pd.DataFrame:
+ """
+ Read dataset from a CSV file and return it as a Pandas DataFrame.
+
+ Args:
+ dataset (str): the path to the dataset to convert into a DataFrame
+
+ Returns:
+ pandas.DataFrame: The dataset loaded as a Pandas DataFrame.
+
+ Raises:
+ pandas.errors.EmptyDataError: If the dataset file is empty.
+ sys.exit: If the dataset file has the wrong format (e.g., fewer than 2 columns)
+ """
+ try:
+ dataset = pd.read_csv(dataset, sep = '\t', header = 0)
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of dataset\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of dataset\n')
+ return dataset
+
+############################ rewrite_input ###################################
+def rewrite_input(dataset :pd.DataFrame) -> Dict[str, List[Optional[float]]]:
+ """
+ Rewrite the dataset as a dictionary of lists instead of as a dictionary of dictionaries.
+
+ Args:
+ dataset (pandas.DataFrame): The dataset to be rewritten.
+
+ Returns:
+ dict: The rewritten dataset as a dictionary of lists.
+ """
+ #Riscrivo il dataset come dizionario di liste,
+ #non come dizionario di dizionari
+
+ dataset.pop('Reactions', None)
+
+ for key, val in dataset.items():
+ l = []
+ for i in val:
+ if i == 'None':
+ l.append(None)
+ else:
+ l.append(float(i))
+
+ dataset[key] = l
+
+ return dataset
+
+############################## write to csv ##################################
+def write_to_csv (dataset :pd.DataFrame, labels :List[str], name :str) -> None:
+ """
+ Write dataset and predicted labels to a CSV file.
+
+ Args:
+ dataset (pandas.DataFrame): The dataset to be written.
+ labels (list): The predicted labels for each data point.
+ name (str): The name of the output CSV file.
+
+ Returns:
+ None
+ """
+ #labels = predict
+ predict = [x+1 for x in labels]
+
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+
+ dest = name
+ classe.to_csv(dest, sep = '\t', index = False,
+ header = ['Patient_ID', 'Class'])
+
+########################### trova il massimo in lista ########################
+def max_index (lista :List[int]) -> int:
+ """
+ Find the index of the maximum value in a list.
+
+ Args:
+ lista (list): The list in which we search for the index of the maximum value.
+
+ Returns:
+ int: The index of the maximum value in the list.
+ """
+ best = -1
+ best_index = 0
+ for i in range(len(lista)):
+ if lista[i] > best:
+ best = lista [i]
+ best_index = i
+
+ return best_index
+
+################################ kmeans #####################################
+def kmeans (k_min: int, k_max: int, dataset: pd.DataFrame, elbow: str, silhouette: str, best_cluster: str) -> None:
+ """
+ Perform k-means clustering on the given dataset, which is an algorithm used to partition a dataset into groups (clusters) based on their characteristics.
+ The goal is to divide the data into homogeneous groups, where the elements within each group are similar to each other and different from the elements in other groups.
+
+ Args:
+ k_min (int): The minimum number of clusters to consider.
+ k_max (int): The maximum number of clusters to consider.
+ dataset (pandas.DataFrame): The dataset to perform clustering on.
+ elbow (str): Whether to generate an elbow plot for kmeans ('true' or 'false').
+ silhouette (str): Whether to generate silhouette plots ('true' or 'false').
+ best_cluster (str): The file path to save the output of the best cluster.
+
+ Returns:
+ None
+ """
+ if not os.path.exists('clustering'):
+ os.makedirs('clustering')
+
+
+ if elbow == 'true':
+ elbow = True
+ else:
+ elbow = False
+
+ if silhouette == 'true':
+ silhouette = True
+ else:
+ silhouette = False
+
+ range_n_clusters = [i for i in range(k_min, k_max+1)]
+ distortions = []
+ scores = []
+ all_labels = []
+
+ clusterer = KMeans(n_clusters=1, random_state=10)
+ distortions.append(clusterer.fit(dataset).inertia_)
+
+
+ for n_clusters in range_n_clusters:
+ clusterer = KMeans(n_clusters=n_clusters, random_state=10)
+ cluster_labels = clusterer.fit_predict(dataset)
+
+ all_labels.append(cluster_labels)
+ if n_clusters == 1:
+ silhouette_avg = 0
+ else:
+ silhouette_avg = silhouette_score(dataset, cluster_labels)
+ scores.append(silhouette_avg)
+ distortions.append(clusterer.fit(dataset).inertia_)
+
+ best = max_index(scores) + k_min
+
+ for i in range(len(all_labels)):
+ prefix = ''
+ if (i + k_min == best):
+ prefix = '_BEST'
+
+ write_to_csv(dataset, all_labels[i], 'clustering/kmeans_with_' + str(i + k_min) + prefix + '_clusters.tsv')
+
+
+ if (prefix == '_BEST'):
+ labels = all_labels[i]
+ predict = [x+1 for x in labels]
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+ classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
+
+
+
+
+ if silhouette:
+ silhouette_draw(dataset, all_labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
+
+
+ if elbow:
+ elbow_plot(distortions, k_min,k_max)
+
+
+
+
+
+############################## elbow_plot ####################################
+def elbow_plot (distortions: List[float], k_min: int, k_max: int) -> None:
+ """
+ Generate an elbow plot to visualize the distortion for different numbers of clusters.
+ The elbow plot is a graphical tool used in clustering analysis to help identifying the appropriate number of clusters by looking for the point where the rate of decrease
+ in distortion sharply decreases, indicating the optimal balance between model complexity and clustering quality.
+
+ Args:
+ distortions (list): List of distortion values for different numbers of clusters.
+ k_min (int): The minimum number of clusters considered.
+ k_max (int): The maximum number of clusters considered.
+
+ Returns:
+ None
+ """
+ plt.figure(0)
+ x = list(range(k_min, k_max + 1))
+ x.insert(0, 1)
+ plt.plot(x, distortions, marker = 'o')
+ plt.xlabel('Number of clusters (k)')
+ plt.ylabel('Distortion')
+ s = 'clustering/elbow_plot.png'
+ fig = plt.gcf()
+ fig.set_size_inches(18.5, 10.5, forward = True)
+ fig.savefig(s, dpi=100)
+
+
+############################## silhouette plot ###############################
+def silhouette_draw(dataset: pd.DataFrame, labels: List[str], n_clusters: int, path:str) -> None:
+ """
+ Generate a silhouette plot for the clustering results.
+ The silhouette coefficient is a measure used to evaluate the quality of clusters obtained from a clustering algorithmand it quantifies how similar an object is to its own cluster compared to other clusters.
+ The silhouette coefficient ranges from -1 to 1, where:
+ - A value close to +1 indicates that the object is well matched to its own cluster and poorly matched to neighboring clusters. This implies that the object is in a dense, well-separated cluster.
+ - A value close to 0 indicates that the object is close to the decision boundary between two neighboring clusters.
+ - A value close to -1 indicates that the object may have been assigned to the wrong cluster.
+
+ Args:
+ dataset (pandas.DataFrame): The dataset used for clustering.
+ labels (list): The cluster labels assigned to each data point.
+ n_clusters (int): The number of clusters.
+ path (str): The path to save the silhouette plot image.
+
+ Returns:
+ None
+ """
+ if n_clusters == 1:
+ return None
+
+ silhouette_avg = silhouette_score(dataset, labels)
+ warning("For n_clusters = " + str(n_clusters) +
+ " The average silhouette_score is: " + str(silhouette_avg))
+
+ plt.close('all')
+ # Create a subplot with 1 row and 2 columns
+ fig, (ax1) = plt.subplots(1, 1)
+
+ fig.set_size_inches(18, 7)
+
+ # The 1st subplot is the silhouette plot
+ # The silhouette coefficient can range from -1, 1 but in this example all
+ # lie within [-0.1, 1]
+ ax1.set_xlim([-1, 1])
+ # The (n_clusters+1)*10 is for inserting blank space between silhouette
+ # plots of individual clusters, to demarcate them clearly.
+ ax1.set_ylim([0, len(dataset) + (n_clusters + 1) * 10])
+
+ # Compute the silhouette scores for each sample
+ sample_silhouette_values = silhouette_samples(dataset, labels)
+
+ y_lower = 10
+ for i in range(n_clusters):
+ # Aggregate the silhouette scores for samples belonging to
+ # cluster i, and sort them
+ ith_cluster_silhouette_values = \
+ sample_silhouette_values[labels == i]
+
+ ith_cluster_silhouette_values.sort()
+
+ size_cluster_i = ith_cluster_silhouette_values.shape[0]
+ y_upper = y_lower + size_cluster_i
+
+ color = cm.nipy_spectral(float(i) / n_clusters)
+ ax1.fill_betweenx(np.arange(y_lower, y_upper),
+ 0, ith_cluster_silhouette_values,
+ facecolor=color, edgecolor=color, alpha=0.7)
+
+ # Label the silhouette plots with their cluster numbers at the middle
+ ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
+
+ # Compute the new y_lower for next plot
+ y_lower = y_upper + 10 # 10 for the 0 samples
+
+ ax1.set_title("The silhouette plot for the various clusters.")
+ ax1.set_xlabel("The silhouette coefficient values")
+ ax1.set_ylabel("Cluster label")
+
+ # The vertical line for average silhouette score of all the values
+ ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
+
+ ax1.set_yticks([]) # Clear the yaxis labels / ticks
+ ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
+
+
+ plt.suptitle(("Silhouette analysis for clustering on sample data "
+ "with n_clusters = " + str(n_clusters) + "\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight='bold')
+
+
+ plt.savefig(path, bbox_inches='tight')
+
+######################## dbscan ##############################################
+def dbscan(dataset: pd.DataFrame, eps: float, min_samples: float, best_cluster: str) -> None:
+ """
+ Perform DBSCAN clustering on the given dataset, which is a clustering algorithm that groups together closely packed points based on the notion of density.
+
+ Args:
+ dataset (pandas.DataFrame): The dataset to be clustered.
+ eps (float): The maximum distance between two samples for one to be considered as in the neighborhood of the other.
+ min_samples (float): The number of samples in a neighborhood for a point to be considered as a core point.
+ best_cluster (str): The file path to save the output of the best cluster.
+
+ Returns:
+ None
+ """
+ if not os.path.exists('clustering'):
+ os.makedirs('clustering')
+
+ if eps is not None:
+ clusterer = DBSCAN(eps = eps, min_samples = min_samples)
+ else:
+ clusterer = DBSCAN()
+
+ clustering = clusterer.fit(dataset)
+
+ core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
+ core_samples_mask[clustering.core_sample_indices_] = True
+ labels = clustering.labels_
+
+ # Number of clusters in labels, ignoring noise if present.
+ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+
+
+ labels = labels
+ predict = [x+1 for x in labels]
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+ classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
+
+
+########################## hierachical #######################################
+def hierachical_agglomerative(dataset: pd.DataFrame, k_min: int, k_max: int, best_cluster: str, silhouette: str) -> None:
+ """
+ Perform hierarchical agglomerative clustering on the given dataset.
+
+ Args:
+ dataset (pandas.DataFrame): The dataset to be clustered.
+ k_min (int): The minimum number of clusters to consider.
+ k_max (int): The maximum number of clusters to consider.
+ best_cluster (str): The file path to save the output of the best cluster.
+ silhouette (str): Whether to generate silhouette plots ('true' or 'false').
+
+ Returns:
+ None
+ """
+ if not os.path.exists('clustering'):
+ os.makedirs('clustering')
+
+ plt.figure(figsize=(10, 7))
+ plt.title("Customer Dendograms")
+ shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist())
+ fig = plt.gcf()
+ fig.savefig('clustering/dendogram.png', dpi=200)
+
+ range_n_clusters = [i for i in range(k_min, k_max+1)]
+
+ scores = []
+ labels = []
+
+ n_classi = dataset.shape[0]
+
+ for n_clusters in range_n_clusters:
+ cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')
+ cluster.fit_predict(dataset)
+ cluster_labels = cluster.labels_
+ labels.append(cluster_labels)
+ write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
+
+ best = max_index(scores) + k_min
+
+ for i in range(len(labels)):
+ prefix = ''
+ if (i + k_min == best):
+ prefix = '_BEST'
+ if silhouette == 'true':
+ silhouette_draw(dataset, labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
+
+ for i in range(len(labels)):
+ if (i + k_min == best):
+ labels = labels[i]
+ predict = [x+1 for x in labels]
+ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
+ classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
+
+
+############################# main ###########################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ if not os.path.exists('clustering'):
+ os.makedirs('clustering')
+
+ args = process_args(sys.argv)
+
+ #Data read
+
+ X = read_dataset(args.input)
+ X = pd.DataFrame.to_dict(X, orient='list')
+ X = rewrite_input(X)
+ X = pd.DataFrame.from_dict(X, orient = 'index')
+
+ for i in X.columns:
+ tmp = X[i][0]
+ if tmp == None:
+ X = X.drop(columns=[i])
+
+ ## NAN TO HANLDE
+
+ if args.k_max != None:
+ numero_classi = X.shape[0]
+ while args.k_max >= numero_classi:
+ err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset'
+ warning(err)
+ args.k_max = args.k_max - 1
+
+
+ if args.cluster_type == 'kmeans':
+ kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster)
+
+ if args.cluster_type == 'dbscan':
+ dbscan(X, args.eps, args.min_samples, args.best_cluster)
+
+ if args.cluster_type == 'hierarchy':
+ hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster, args.silhouette)
+
+##############################################################################
+if __name__ == "__main__":
+ main()
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_cluster.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_cluster.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,151 @@
+
+
+
+ marea_macros.xml
+
+
+ numpy
+ pandas
+ scipy
+ scikit-learn
+ matplotlib
+ lxml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"
+
+
+
+
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/marea_macros.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,195 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
++--------------------+-------------------------------+
+| id | rule (with entrez-id) |
++====================+===============================+
+| SHMT1 | 155060 or 10357 |
++--------------------+-------------------------------+
+| NIT2 | 155060 or 100134869 |
++--------------------+-------------------------------+
+| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 |
++--------------------+-------------------------------+
+
+|
+
+
+
+
+
++------------+------------+------------+------------+
+| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 |
++============+============+============+============+
+| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 |
++------------+------------+------------+------------+
+| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 |
++------------+------------+------------+------------+
+| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 |
++------------+------------+------------+------------+
+| HGNC:9 | 0.456788 | 0.876543 | 0.876542 |
++------------+------------+------------+------------+
+| HGNC:23 | 0.876543 | 0.786543 | 0.897654 |
++------------+------------+------------+------------+
+
+|
+
+
+
+
+
++-------------+------------+------------+------------+
+| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 |
++=============+============+============+============+
+| A1BG | 0.523167 | 0.371355 | 0.925661 |
++-------------+------------+------------+------------+
+| A1CF | 0.568765 | 0.765567 | 0.456789 |
++-------------+------------+------------+------------+
+| A2M | 0.876545 | 0.768933 | 0.987654 |
++-------------+------------+------------+------------+
+| A4GALT | 0.456788 | 0.876543 | 0.876542 |
++-------------+------------+------------+------------+
+| M664Y65 | 0.876543 | 0.786543 | 0.897654 |
++-------------+------------+------------+------------+
+
+|
+
+
+
+
+
+This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_.
+
+.. _BIMIB: https://bimib.disco.unimib.it/index.php/Home
+.. _Department of Informatics, Systems and Communications: https://www.disco.unimib.it/en
+.. _University of Milan - Bicocca: https://en.unimib.it/
+
+
+
+
+
+
+ @article{graudenzi2018integration,
+ title={Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power},
+ author={Graudenzi, Alex and Maspero, Davide and Di Filippo, Marzia and Gnugnoli, Marco and Isella, Claudio and Mauri, Giancarlo and Medico, Enzo and Antoniotti, Marco and Damiani, Chiara},
+ journal={Journal of biomedical informatics},
+ volume={87},
+ pages={37--49},
+ year={2018},
+ publisher={Elsevier},
+ url = {https://doi.org/10.1016/j.jbi.2018.09.010},
+ }
+
+
+ @article{damiani2020marea4galaxy,
+ title={MaREA4Galaxy: Metabolic reaction enrichment analysis and visualization of RNA-seq data within Galaxy},
+ author={Damiani, Chiara and Rovida, Lorenzo and Maspero, Davide and Sala, Irene and Rosato, Luca and Di Filippo, Marzia and Pescini, Dario and Graudenzi, Alex and Antoniotti, Marco and Mauri, Giancarlo},
+ journal={Computational and Structural Biotechnology Journal},
+ volume={18},
+ pages={993},
+ year={2020},
+ publisher={Research Network of Computational and Structural Biotechnology},
+ url = {https://doi.org/10.1016/j.csbj.2020.04.008},
+ }
+
+
+ @article{ebrahim2013cobrapy,
+ title={COBRApy: constraints-based reconstruction and analysis for python},
+ author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R},
+ journal={BMC systems biology},
+ volume={7},
+ pages={1--6},
+ year={2013},
+ publisher={Springer}
+ }
+
+
+
+
+
+
+
+ @article{galuzzi2024adjusting,
+ title={Adjusting for false discoveries in constraint-based differential metabolic flux analysis},
+ author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara},
+ journal={Journal of Biomedical Informatics},
+ volume={150},
+ pages={104597},
+ year={2024},
+ publisher={Elsevier}
+ }
+
+
+ @inproceedings{galuzzi2022best,
+ title={Best practices in flux sampling of constrained-based models},
+ author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara},
+ booktitle={International Conference on Machine Learning, Optimization, and Data Science},
+ pages={234--248},
+ year={2022},
+ organization={Springer}
+ }
+
+
+ @article{ebrahim2013cobrapy,
+ title={COBRApy: constraints-based reconstruction and analysis for python},
+ author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R},
+ journal={BMC systems biology},
+ volume={7},
+ pages={1--6},
+ year={2013},
+ publisher={Springer}
+ }
+
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_generator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_generator.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,701 @@
+from __future__ import division
+# galaxy complains this ^^^ needs to be at the very beginning of the file, for some reason.
+import sys
+import argparse
+import collections
+import pandas as pd
+import pickle as pk
+import utils.general_utils as utils
+import utils.rule_parsing as ruleUtils
+from typing import Union, Optional, List, Dict, Tuple, TypeVar
+
+ERRORS = []
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args() -> argparse.Namespace:
+ """
+ Processes command-line arguments.
+
+ Args:
+ args (list): List of command-line arguments.
+
+ Returns:
+ Namespace: An object containing parsed arguments.
+ """
+ parser = argparse.ArgumentParser(
+ usage = '%(prog)s [options]',
+ description = "process some value's genes to create a comparison's map.")
+
+ parser.add_argument(
+ '-rs', '--rules_selector',
+ type = utils.Model, default = utils.Model.HMRcore, choices = list(utils.Model),
+ help = 'chose which type of dataset you want use')
+
+ parser.add_argument("-rl", "--rule_list", type = str,
+ help = "path to input file with custom rules, if provided")
+
+ parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name")
+ # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
+
+ parser.add_argument(
+ '-n', '--none',
+ type = utils.Bool("none"), default = True,
+ help = 'compute Nan values')
+
+ parser.add_argument(
+ '-td', '--tool_dir',
+ type = str,
+ required = True, help = 'your tool directory')
+
+ parser.add_argument(
+ '-ol', '--out_log',
+ type = str,
+ help = "Output log")
+
+ parser.add_argument(
+ '-in', '--input', #id è diventato in
+ type = str,
+ help = 'input dataset')
+
+ parser.add_argument(
+ '-ra', '--ras_output',
+ type = str,
+ required = True, help = 'ras output')
+
+ return parser.parse_args()
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+ """
+ Read a dataset from a CSV file and return it as a pandas DataFrame.
+
+ Args:
+ data (str): Path to the CSV file containing the dataset.
+ name (str): Name of the dataset, used in error messages.
+
+ Returns:
+ pandas.DataFrame: DataFrame containing the dataset.
+
+ Raises:
+ pd.errors.EmptyDataError: If the CSV file is empty.
+ sys.exit: If the CSV file has the wrong format, the execution is aborted.
+ """
+ try:
+ dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ return dataset
+
+############################ load id e rules ##################################
+def load_id_rules(reactions :Dict[str, Dict[str, List[str]]]) -> Tuple[List[str], List[Dict[str, List[str]]]]:
+ """
+ Load IDs and rules from a dictionary of reactions.
+
+ Args:
+ reactions (dict): A dictionary where keys are IDs and values are rules.
+
+ Returns:
+ tuple: A tuple containing two lists, the first list containing IDs and the second list containing rules.
+ """
+ ids, rules = [], []
+ for key, value in reactions.items():
+ ids.append(key)
+ rules.append(value)
+ return (ids, rules)
+
+############################ check_methods ####################################
+def gene_type(l :str, name :str) -> str:
+ """
+ Determine the type of gene ID.
+
+ Args:
+ l (str): The gene identifier to check.
+ name (str): The name of the dataset, used in error messages.
+
+ Returns:
+ str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id').
+
+ Raises:
+ sys.exit: If the gene ID type is not supported, the execution is aborted.
+ """
+ if check_hgnc(l):
+ return 'hugo_id'
+ elif check_ensembl(l):
+ return 'ensembl_gene_id'
+ elif check_symbol(l):
+ return 'symbol'
+ elif check_entrez(l):
+ return 'entrez_id'
+ else:
+ sys.exit('Execution aborted:\n' +
+ 'gene ID type in ' + name + ' not supported. Supported ID'+
+ 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
+
+def check_hgnc(l :str) -> bool:
+ """
+ Check if a gene identifier follows the HGNC format.
+
+ Args:
+ l (str): The gene identifier to check.
+
+ Returns:
+ bool: True if the gene identifier follows the HGNC format, False otherwise.
+ """
+ if len(l) > 5:
+ if (l.upper()).startswith('HGNC:'):
+ return l[5:].isdigit()
+ else:
+ return False
+ else:
+ return False
+
+def check_ensembl(l :str) -> bool:
+ """
+ Check if a gene identifier follows the Ensembl format.
+
+ Args:
+ l (str): The gene identifier to check.
+
+ Returns:
+ bool: True if the gene identifier follows the Ensembl format, False otherwise.
+ """
+ return l.upper().startswith('ENS')
+
+
+def check_symbol(l :str) -> bool:
+ """
+ Check if a gene identifier follows the symbol format.
+
+ Args:
+ l (str): The gene identifier to check.
+
+ Returns:
+ bool: True if the gene identifier follows the symbol format, False otherwise.
+ """
+ if len(l) > 0:
+ if l[0].isalpha() and l[1:].isalnum():
+ return True
+ else:
+ return False
+ else:
+ return False
+
+def check_entrez(l :str) -> bool:
+ """
+ Check if a gene identifier follows the Entrez ID format.
+
+ Args:
+ l (str): The gene identifier to check.
+
+ Returns:
+ bool: True if the gene identifier follows the Entrez ID format, False otherwise.
+ """
+ if len(l) > 0:
+ return l.isdigit()
+ else:
+ return False
+
+############################ gene #############################################
+def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]:
+ """
+ Process gene data to ensure correct formatting and handle duplicates.
+
+ Args:
+ gene (DataFrame): DataFrame containing gene data.
+ type_gene (str): Type of gene data (e.g., 'hugo_id', 'ensembl_gene_id', 'symbol', 'entrez_id').
+ name (str): Name of the dataset.
+ gene_custom (dict or None): Custom gene data dictionary if provided.
+
+ Returns:
+ dict: A dictionary containing gene data with gene IDs as keys and corresponding values.
+ """
+ args = process_args()
+ for i in range(len(gene)):
+ tmp = gene.iloc[i, 0]
+ gene.iloc[i, 0] = tmp.strip().split('.')[0]
+
+ gene_dup = [item for item, count in
+ collections.Counter(gene[gene.columns[0]]).items() if count > 1]
+ pat_dup = [item for item, count in
+ collections.Counter(list(gene.columns)).items() if count > 1]
+
+ if gene_dup:
+ if gene_custom == None:
+ if args.rules_selector == 'HMRcore':
+ gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
+
+ elif args.rules_selector == 'Recon':
+ gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
+
+ elif args.rules_selector == 'ENGRO2':
+ gene_in_rule = pk.load(open(args.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
+ print(f"{args.tool_dir}'/local/pickle files/ENGRO2_genes.p'")
+ utils.logWarning(f"{args.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log)
+ print(args.rules_selector)
+ gene_in_rule = gene_in_rule.get(type_gene)
+
+ else:
+ gene_in_rule = gene_custom
+ tmp = []
+ for i in gene_dup:
+ if gene_in_rule.get(i) == 'ok':
+ tmp.append(i)
+ if tmp:
+ sys.exit('Execution aborted because gene ID '
+ +str(tmp)+' in '+name+' is duplicated\n')
+
+ if pat_dup: utils.logWarning(f"Warning: duplicated label\n{pat_dup} in {name}", ARGS.out_log)
+ return (gene.set_index(gene.columns[0])).to_dict()
+
+############################ resolve ##########################################
+def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]:
+ """
+ Replace gene identifiers with corresponding values from a dictionary.
+
+ Args:
+ l (str): String of gene identifier.
+ d (str): String corresponding to its value.
+
+ Returns:
+ tuple: A tuple containing two lists: the first list contains replaced values, and the second list contains any errors encountered during replacement.
+ """
+ tmp = []
+ err = []
+ while l:
+ if isinstance(l[0], list):
+ tmp_rules, tmp_err = replace_gene_value(l[0], d)
+ tmp.append(tmp_rules)
+ err.extend(tmp_err)
+ else:
+ value = replace_gene(l[0], d)
+ tmp.append(value)
+ if value == None:
+ err.append(l[0])
+ l = l[1:]
+ return (tmp, err)
+
+def replace_gene(l :str, d :str) -> Union[int, float]:
+ """
+ Replace a single gene identifier with its corresponding value from a dictionary.
+
+ Args:
+ l (str): Gene identifier to replace.
+ d (str): String corresponding to its value.
+
+ Returns:
+ float/int: Corresponding value from the dictionary if found, None otherwise.
+
+ Raises:
+ sys.exit: If the value associated with the gene identifier is not valid.
+ """
+ if l =='and' or l == 'or':
+ return l
+ else:
+ value = d.get(l, None)
+ if not(value == None or isinstance(value, (int, float))):
+ sys.exit('Execution aborted: ' + value + ' value not valid\n')
+ return value
+
+T = TypeVar("T", bound = Optional[Union[int, float]])
+def computes(val1 :T, op :str, val2 :T, cn :bool) -> T:
+ """
+ Compute the RAS value between two value and an operator ('and' or 'or').
+
+ Args:
+ val1(Optional(Union[float, int])): First value.
+ op (str): Operator ('and' or 'or').
+ val2(Optional(Union[float, int])): Second value.
+ cn (bool): Control boolean value.
+
+ Returns:
+ Optional(Union[float, int]): Result of the computation.
+ """
+ if val1 != None and val2 != None:
+ if op == 'and':
+ return min(val1, val2)
+ else:
+ return val1 + val2
+ elif op == 'and':
+ if cn is True:
+ if val1 != None:
+ return val1
+ elif val2 != None:
+ return val2
+ else:
+ return None
+ else:
+ return None
+ else:
+ if val1 != None:
+ return val1
+ elif val2 != None:
+ return val2
+ else:
+ return None
+
+# ris should be Literal[None] but Literal is not supported in Python 3.7
+def control(ris, l :List[Union[int, float, list]], cn :bool) -> Union[bool, int, float]: #Union[Literal[False], int, float]:
+ """
+ Control the format of the expression.
+
+ Args:
+ ris: Intermediate result.
+ l (list): Expression to control.
+ cn (bool): Control boolean value.
+
+ Returns:
+ Union[Literal[False], int, float]: Result of the control.
+ """
+ if len(l) == 1:
+ if isinstance(l[0], (float, int)) or l[0] == None:
+ return l[0]
+ elif isinstance(l[0], list):
+ return control(None, l[0], cn)
+ else:
+ return False
+ elif len(l) > 2:
+ return control_list(ris, l, cn)
+ else:
+ return False
+
+def control_list(ris, l :List[Optional[Union[float, int, list]]], cn :bool) -> Optional[bool]: #Optional[Literal[False]]:
+ """
+ Control the format of a list of expressions.
+
+ Args:
+ ris: Intermediate result.
+ l (list): List of expressions to control.
+ cn (bool): Control boolean value.
+
+ Returns:
+ Optional[Literal[False]]: Result of the control.
+ """
+ while l:
+ if len(l) == 1:
+ return False
+ elif (isinstance(l[0], (float, int)) or
+ l[0] == None) and l[1] in ['and', 'or']:
+ if isinstance(l[2], (float, int)) or l[2] == None:
+ ris = computes(l[0], l[1], l[2], cn)
+ elif isinstance(l[2], list):
+ tmp = control(None, l[2], cn)
+ if tmp is False:
+ return False
+ else:
+ ris = computes(l[0], l[1], tmp, cn)
+ else:
+ return False
+ l = l[3:]
+ elif l[0] in ['and', 'or']:
+ if isinstance(l[1], (float, int)) or l[1] == None:
+ ris = computes(ris, l[0], l[1], cn)
+ elif isinstance(l[1], list):
+ tmp = control(None,l[1], cn)
+ if tmp is False:
+ return False
+ else:
+ ris = computes(ris, l[0], tmp, cn)
+ else:
+ return False
+ l = l[2:]
+ elif isinstance(l[0], list) and l[1] in ['and', 'or']:
+ if isinstance(l[2], (float, int)) or l[2] == None:
+ tmp = control(None, l[0], cn)
+ if tmp is False:
+ return False
+ else:
+ ris = computes(tmp, l[1], l[2], cn)
+ elif isinstance(l[2], list):
+ tmp = control(None, l[0], cn)
+ tmp2 = control(None, l[2], cn)
+ if tmp is False or tmp2 is False:
+ return False
+ else:
+ ris = computes(tmp, l[1], tmp2, cn)
+ else:
+ return False
+ l = l[3:]
+ else:
+ return False
+ return ris
+
+ResolvedRules = Dict[str, List[Optional[Union[float, int]]]]
+def resolve(genes: Dict[str, str], rules: List[str], ids: List[str], resolve_none: bool, name: str) -> Tuple[Optional[ResolvedRules], Optional[list]]:
+ """
+ Resolve rules using gene data to compute scores for each rule.
+
+ Args:
+ genes (dict): Dictionary containing gene data with gene IDs as keys and corresponding values.
+ rules (list): List of rules to resolve.
+ ids (list): List of IDs corresponding to the rules.
+ resolve_none (bool): Flag indicating whether to resolve None values in the rules.
+ name (str): Name of the dataset.
+
+ Returns:
+ tuple: A tuple containing resolved rules as a dictionary and a list of gene IDs not found in the data.
+ """
+ resolve_rules = {}
+ not_found = []
+ flag = False
+ for key, value in genes.items():
+ tmp_resolve = []
+ for i in range(len(rules)):
+ tmp = rules[i]
+ if tmp:
+ tmp, err = replace_gene_value(tmp, value)
+ if err:
+ not_found.extend(err)
+ ris = control(None, tmp, resolve_none)
+ if ris is False or ris == None:
+ tmp_resolve.append(None)
+ else:
+ tmp_resolve.append(ris)
+ flag = True
+ else:
+ tmp_resolve.append(None)
+ resolve_rules[key] = tmp_resolve
+
+ if flag is False:
+ utils.logWarning(
+ f"Warning: no computable score (due to missing gene values) for class {name}, the class has been disregarded",
+ ARGS.out_log)
+
+ return (None, None)
+
+ return (resolve_rules, list(set(not_found)))
+############################ create_ras #######################################
+def create_ras(resolve_rules: Optional[ResolvedRules], dataset_name: str, rules: List[str], ids: List[str], file: str) -> None:
+ """
+ Create a RAS (Reaction Activity Score) file from resolved rules.
+
+ Args:
+ resolve_rules (dict): Dictionary containing resolved rules.
+ dataset_name (str): Name of the dataset.
+ rules (list): List of rules.
+ file (str): Path to the output RAS file.
+
+ Returns:
+ None
+ """
+ if resolve_rules is None:
+ utils.logWarning(f"Couldn't generate RAS for current dataset: {dataset_name}", ARGS.out_log)
+
+ for geni in resolve_rules.values():
+ for i, valori in enumerate(geni):
+ if valori == None:
+ geni[i] = 'None'
+
+ output_ras = pd.DataFrame.from_dict(resolve_rules)
+
+ output_ras.insert(0, 'Reactions', ids)
+ output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
+
+ text_file = open(file, "w")
+
+ text_file.write(output_to_csv)
+ text_file.close()
+
+################################- NEW RAS COMPUTATION -################################
+Expr = Optional[Union[int, float]]
+Ras = Expr
+def ras_for_cell_lines(dataset: pd.DataFrame, rules: Dict[str, ruleUtils.OpList]) -> Dict[str, Dict[str, Ras]]:
+ """
+ Generates the RAS scores for each cell line found in the dataset.
+
+ Args:
+ dataset (pd.DataFrame): Dataset containing gene values.
+ rules (dict): The dict containing reaction ids as keys and rules as values.
+
+ Side effects:
+ dataset : mut
+
+ Returns:
+ dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
+ where each key corresponds to a reaction ID and each value is its computed RAS score.
+ """
+ ras_values_by_cell_line = {}
+ dataset.set_index(dataset.columns[0], inplace=True)
+ # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata
+ for cell_line_name in dataset.columns[1:]:
+ cell_line = dataset[cell_line_name].to_dict()
+ ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
+ return ras_values_by_cell_line
+
+def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
+ """
+ Computes the RAS (Reaction Activity Score) values for each rule in the given dict.
+
+ Args:
+ value_rules (dict): A dictionary where keys are reaction ids and values are OpLists.
+ dataset : gene expression data of one cell line.
+
+ Returns:
+ dict: A dictionary where keys are reaction ids and values are the computed RAS values for each rule.
+ """
+ return {key: ras_op_list(op_list, dataset) for key, op_list in value_rules.items()}
+
+def get_gene_expr(dataset :Dict[str, Expr], name :str) -> Expr:
+ """
+ Extracts the gene expression of the given gene from a cell line dataset.
+
+ Args:
+ dataset : gene expression data of one cell line.
+ name : gene name.
+
+ Returns:
+ Expr : the gene's expression value.
+ """
+ expr = dataset.get(name, None)
+ if expr is None: ERRORS.append(name)
+
+ return expr
+
+def ras_op_list(op_list: ruleUtils.OpList, dataset: Dict[str, Expr]) -> Ras:
+ """
+ Computes recursively the RAS (Reaction Activity Score) value for the given OpList, considering the specified flag to control None behavior.
+
+ Args:
+ op_list (OpList): The OpList representing a rule with gene values.
+ dataset : gene expression data of one cell line.
+
+ Returns:
+ Ras: The computed RAS value for the given OpList.
+ """
+ op = op_list.op
+ ras_value :Ras = None
+ if not op: return get_gene_expr(dataset, op_list[0])
+ if op is ruleUtils.RuleOp.AND and not ARGS.none and None in op_list: return None
+
+ for i in range(len(op_list)):
+ item = op_list[i]
+ if isinstance(item, ruleUtils.OpList):
+ item = ras_op_list(item, dataset)
+
+ else:
+ item = get_gene_expr(dataset, item)
+
+ if item is None:
+ if op is ruleUtils.RuleOp.AND and not ARGS.none: return None
+ continue
+
+ if ras_value is None:
+ ras_value = item
+ else:
+ ras_value = ras_value + item if op is ruleUtils.RuleOp.OR else min(ras_value, item)
+
+ return ras_value
+
+def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None:
+ """
+ Save computed ras scores to the given path, as a tsv file.
+
+ Args:
+ rasScores : the computed ras scores.
+ path : the output tsv file's path.
+
+ Returns:
+ None
+ """
+ for scores in rasScores.values(): # this is actually a lot faster than using the ootb dataframe metod, sadly
+ for reactId, score in scores.items():
+ if score is None: scores[reactId] = "None"
+
+ output_ras = pd.DataFrame.from_dict(rasScores)
+ output_ras.insert(0, 'Reactions', reactions)
+ output_ras.to_csv(ARGS.ras_output, sep = '\t', index = False)
+
+############################ MAIN #############################################
+#TODO: not used but keep, it will be when the new translator dicts will be used.
+def translateGene(geneName :str, encoding :str, geneTranslator :Dict[str, Dict[str, str]]) -> str:
+ """
+ Translate gene from any supported encoding to HugoID.
+
+ Args:
+ geneName (str): the name of the gene in its current encoding.
+ encoding (str): the encoding.
+ geneTranslator (Dict[str, Dict[str, str]]): the dict containing all supported gene names
+ and encodings in the current model, mapping each to the corresponding HugoID encoding.
+
+ Raises:
+ ValueError: When the gene isn't supported in the model.
+
+ Returns:
+ str: the gene in HugoID encoding.
+ """
+ supportedGenesInEncoding = geneTranslator[encoding]
+ if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName]
+ raise ValueError(f"Gene \"{geneName}\" non trovato, verifica di star utilizzando il modello corretto!")
+
+def load_custom_rules() -> Dict[str, ruleUtils.OpList]:
+ """
+ Opens custom rules file and extracts the rules. If the file is in .csv format an additional parsing step will be
+ performed, significantly impacting the runtime.
+
+ Returns:
+ Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
+ """
+ datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat
+
+ try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext
+ except utils.PathErr as err:
+ raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}")
+
+ if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
+
+ # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
+ return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) }
+
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ # get args from frontend (related xml)
+ global ARGS
+ ARGS = process_args()
+ print(ARGS.rules_selector)
+ # read dataset
+ dataset = read_dataset(ARGS.input, "dataset")
+ dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
+
+ # remove versioning from gene names
+ dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
+
+ # handle custom models
+ model :utils.Model = ARGS.rules_selector
+ if model is utils.Model.Custom:
+ rules = load_custom_rules()
+ reactions = list(rules.keys())
+
+ save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
+ if ERRORS: utils.logWarning(
+ f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
+ ARGS.out_log)
+
+ return
+
+ # This is the standard flow of the ras_generator program, for non-custom models.
+ name = "RAS Dataset"
+ type_gene = gene_type(dataset.iloc[0, 0], name)
+
+ rules = model.getRules(ARGS.tool_dir)
+ genes = data_gene(dataset, type_gene, name, None)
+ ids, rules = load_id_rules(rules.get(type_gene))
+
+ resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
+ create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
+
+ if err: utils.logWarning(
+ f"Warning: gene(s) {err} not found in class \"{name}\", " +
+ "the expression level for this gene will be considered NaN",
+ ARGS.out_log)
+
+ print("Execution succeded")
+
+###############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_generator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_generator.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,109 @@
+
+ - Reaction Activity Scores computation
+
+ marea_macros.xml
+
+
+ numpy
+ pandas
+ lxml
+ cobra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_to_bounds.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_to_bounds.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,273 @@
+import argparse
+import utils.general_utils as utils
+from typing import Optional, List
+import os
+import numpy as np
+import pandas as pd
+import cobra
+import sys
+import csv
+from joblib import Parallel, delayed, cpu_count
+
+################################# process args ###############################
+def process_args(args :List[str]) -> argparse.Namespace:
+ """
+ Processes command-line arguments.
+
+ Args:
+ args (list): List of command-line arguments.
+
+ Returns:
+ Namespace: An object containing parsed arguments.
+ """
+ parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+ description = 'process some value\'s')
+
+ parser.add_argument(
+ '-ms', '--model_selector',
+ type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom],
+ help = 'chose which type of model you want use')
+
+ parser.add_argument("-mo", "--model", type = str,
+ help = "path to input file with custom rules, if provided")
+
+ parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name")
+
+ parser.add_argument(
+ '-mes', '--medium_selector',
+ default = "allOpen",
+ help = 'chose which type of medium you want use')
+
+ parser.add_argument("-meo", "--medium", type = str,
+ help = "path to input file with custom medium, if provided")
+
+ parser.add_argument('-ol', '--out_log',
+ help = "Output log")
+
+ parser.add_argument('-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+
+ parser.add_argument('-ir', '--input_ras',
+ type=str,
+ required = False,
+ help = 'input ras')
+
+ parser.add_argument('-rs', '--ras_selector',
+ required = True,
+ type=utils.Bool("using_RAS"),
+ help = 'ras selector')
+
+ parser.add_argument('-c', '--classes',
+ type = str,
+ required = False,
+ help = 'input classes')
+
+ parser.add_argument('-cc', '--cell_class',
+ type = str,
+ help = 'output of cell class')
+
+ ARGS = parser.parse_args()
+ return ARGS
+
+########################### warning ###########################################
+def warning(s :str) -> None:
+ """
+ Log a warning message to an output log file and print it to the console.
+
+ Args:
+ s (str): The warning message to be logged and printed.
+
+ Returns:
+ None
+ """
+ with open(ARGS.out_log, 'a') as log:
+ log.write(s + "\n\n")
+ print(s)
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+ """
+ Read a dataset from a CSV file and return it as a pandas DataFrame.
+
+ Args:
+ data (str): Path to the CSV file containing the dataset.
+ name (str): Name of the dataset, used in error messages.
+
+ Returns:
+ pandas.DataFrame: DataFrame containing the dataset.
+
+ Raises:
+ pd.errors.EmptyDataError: If the CSV file is empty.
+ sys.exit: If the CSV file has the wrong format, the execution is aborted.
+ """
+ try:
+ dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+ except pd.errors.EmptyDataError:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ if len(dataset.columns) < 2:
+ sys.exit('Execution aborted: wrong format of ' + name + '\n')
+ return dataset
+
+
+def apply_ras_bounds(model, ras_row, rxns_ids):
+ """
+ Adjust the bounds of reactions in the model based on RAS values.
+
+ Args:
+ model (cobra.Model): The metabolic model to be modified.
+ ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
+ rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied.
+
+ Returns:
+ None
+ """
+ for reaction in rxns_ids:
+ if reaction in ras_row.index:
+ scaling_factor = ras_row[reaction]
+ lower_bound=model.reactions.get_by_id(reaction).lower_bound
+ upper_bound=model.reactions.get_by_id(reaction).upper_bound
+ valMax=float((upper_bound)*scaling_factor)
+ valMin=float((lower_bound)*scaling_factor)
+ if upper_bound!=0 and lower_bound==0:
+ model.reactions.get_by_id(reaction).upper_bound=valMax
+ if upper_bound==0 and lower_bound!=0:
+ model.reactions.get_by_id(reaction).lower_bound=valMin
+ if upper_bound!=0 and lower_bound!=0:
+ model.reactions.get_by_id(reaction).lower_bound=valMin
+ model.reactions.get_by_id(reaction).upper_bound=valMax
+ pass
+
+def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder):
+ """
+ Process a single RAS cell, apply bounds, and save the bounds to a CSV file.
+
+ Args:
+ cellName (str): The name of the RAS cell (used for naming the output file).
+ ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
+ model (cobra.Model): The metabolic model to be modified.
+ rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied.
+ output_folder (str): Folder path where the output CSV file will be saved.
+
+ Returns:
+ None
+ """
+ model_new = model.copy()
+ apply_ras_bounds(model_new, ras_row, rxns_ids)
+ bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model_new.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
+ bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True)
+ pass
+
+def generate_bounds(model: cobra.Model, medium: dict, ras=None, output_folder='output/') -> pd.DataFrame:
+ """
+ Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments.
+
+ Args:
+ model (cobra.Model): The metabolic model for which bounds will be generated.
+ medium (dict): A dictionary where keys are reaction IDs and values are the medium conditions.
+ ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None.
+ output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'.
+
+ Returns:
+ pd.DataFrame: DataFrame containing the bounds of reactions in the model.
+ """
+ rxns_ids = [rxn.id for rxn in model.reactions]
+
+ # Set medium conditions
+ for reaction, value in medium.items():
+ if value is not None:
+ model.reactions.get_by_id(reaction).lower_bound = -float(value)
+
+ # Perform Flux Variability Analysis (FVA)
+ df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
+
+ # Set FVA bounds
+ for reaction in rxns_ids:
+ rxn = model.reactions.get_by_id(reaction)
+ rxn.lower_bound = float(df_FVA.loc[reaction, "minimum"])
+ rxn.upper_bound = float(df_FVA.loc[reaction, "maximum"])
+
+ if ras is not None:
+ Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(cellName, ras_row, model, rxns_ids, output_folder) for cellName, ras_row in ras.iterrows())
+ else:
+ model_new = model.copy()
+ apply_ras_bounds(model_new, pd.Series([1]*len(rxns_ids), index=rxns_ids), rxns_ids)
+ bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model_new.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
+ bounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True)
+ pass
+
+
+
+############################# main ###########################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ if not os.path.exists('ras_to_bounds'):
+ os.makedirs('ras_to_bounds')
+
+
+ global ARGS
+ ARGS = process_args(sys.argv)
+
+ ARGS.output_folder = 'ras_to_bounds/'
+
+ if(ARGS.ras_selector == True):
+ ras_file_list = ARGS.input_ras.split(",")
+ if(len(ras_file_list)>1):
+ ras_class_names = [cls.strip() for cls in ARGS.classes.split(',')]
+ else:
+ ras_class_names = ["placeHolder"]
+ ras_list = []
+ class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
+ for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names):
+ ras = read_dataset(ras_matrix, "ras dataset")
+ ras.replace("None", None, inplace=True)
+ ras.set_index("Reactions", drop=True, inplace=True)
+ ras = ras.T
+ ras = ras.astype(float)
+ ras_list.append(ras)
+ for patient_id in ras.index:
+ class_assignments = class_assignments.append({"Patient_ID": patient_id, "Class": ras_class_name}, ignore_index=True)
+
+ # Concatenate all ras DataFrames into a single DataFrame
+ ras_combined = pd.concat(ras_list, axis=1)
+ # Normalize the RAS values by max RAS
+ ras_combined = ras_combined.div(ras_combined.max(axis=0))
+ ras_combined = ras_combined.fillna(0)
+
+
+
+ model_type :utils.Model = ARGS.model_selector
+ if model_type is utils.Model.Custom:
+ model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
+ else:
+ model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
+
+ if(ARGS.medium_selector == "Custom"):
+ medium = read_dataset(ARGS.medium, "medium dataset")
+ medium.set_index(medium.columns[0], inplace=True)
+ medium = medium.astype(float)
+ medium = medium[medium.columns[0]].to_dict()
+ else:
+ df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
+ ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
+ medium = df_mediums[[ARGS.medium_selector]]
+ medium = medium[ARGS.medium_selector].to_dict()
+
+ if(ARGS.ras_selector == True):
+ generate_bounds(model, medium, ras = ras_combined, output_folder=ARGS.output_folder)
+ if(len(ras_list)>1):
+ class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False)
+ else:
+ generate_bounds(model, medium, output_folder=ARGS.output_folder)
+
+ pass
+
+##############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_to_bounds.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/ras_to_bounds.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,113 @@
+
+
+
+ marea_macros.xml
+
+
+
+ numpy
+ pandas
+ cobra
+ lxml
+ joblib
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/rps_generator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/rps_generator.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,255 @@
+import re
+import sys
+import csv
+import math
+import argparse
+
+import numpy as np
+import pickle as pk
+import pandas as pd
+
+from enum import Enum
+from typing import Optional, List, Dict, Tuple
+
+import utils.general_utils as utils
+import utils.reaction_parsing as reactionUtils
+
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args() -> argparse.Namespace:
+ """
+ Processes command-line arguments.
+
+ Args:
+ args (list): List of command-line arguments.
+
+ Returns:
+ Namespace: An object containing parsed arguments.
+ """
+ parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+ description = 'process some value\'s'+
+ ' abundances and reactions to create RPS scores.')
+ parser.add_argument('-rc', '--reaction_choice',
+ type = str,
+ default = 'default',
+ choices = ['default','custom'],
+ help = 'chose which type of reaction dataset you want use')
+ parser.add_argument('-cm', '--custom',
+ type = str,
+ help='your dataset if you want custom reactions')
+ parser.add_argument('-td', '--tool_dir',
+ type = str,
+ required = True,
+ help = 'your tool directory')
+ parser.add_argument('-ol', '--out_log',
+ help = "Output log")
+ parser.add_argument('-id', '--input',
+ type = str,
+ help = 'input dataset')
+ parser.add_argument('-rp', '--rps_output',
+ type = str,
+ required = True,
+ help = 'rps output')
+
+ args = parser.parse_args()
+ return args
+
+############################ dataset name #####################################
+def name_dataset(name_data :str, count :int) -> str:
+ """
+ Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
+
+ Args:
+ name_data : name associated with the dataset (from frontend input params)
+ count : counter from 1 to make these names unique (external)
+
+ Returns:
+ str : the name made unique
+ """
+ if str(name_data) == 'Dataset':
+ return str(name_data) + '_' + str(count)
+ else:
+ return str(name_data)
+
+
+############################ get_abund_data ####################################
+def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]:
+ """
+ Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are
+ metabolites and columns are cell lines.
+
+ Args:
+ dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites.
+ cell_line_index (int): The index of the cell line of interest in the dataset.
+
+ Returns:
+ pd.Series or None: A series containing abundance values for the specified cell line.
+ The name of the series is the name of the cell line.
+ Returns None if the cell index is invalid.
+ """
+ if cell_line_index < 0 or cell_line_index >= len(dataset.index):
+ print(f"Errore: This cell line index: '{cell_line_index}' is not valid.")
+ return None
+
+ cell_line_name = dataset.columns[cell_line_index]
+ abundances_series = dataset[cell_line_name][1:]
+
+ return abundances_series
+
+
+############################ clean_metabolite_name ####################################
+def clean_metabolite_name(name :str) -> str:
+ """
+ Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify
+ the search of a match in the dictionary of synonyms.
+
+ Args:
+ name : the metabolite's name, as given in the dataset.
+
+ Returns:
+ str : a new string with the cleaned name.
+ """
+ return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower()
+
+
+############################ get_metabolite_id ####################################
+def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str:
+ """
+ Looks through a dictionary of synonyms to find a match for a given metabolite's name.
+
+ Args:
+ name : the metabolite's name, as given in the dataset.
+ syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values.
+
+ Returns:
+ str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
+ An empty string is returned if a match isn't found.
+ """
+ name = clean_metabolite_name(name)
+ for id, synonyms in syn_dict.items():
+ if name in synonyms: return id
+
+ return ""
+
+############################ check_missing_metab ####################################
+def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
+ """
+ Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
+
+ Parameters:
+ reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+ dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
+ cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
+
+ Returns:
+ list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
+
+ Side effects:
+ dataset_by_rows : mut
+ """
+ missing_list = []
+ for reaction in reactions.values():
+ for metabolite in reaction.keys():
+ if metabolite not in dataset_by_rows:
+ dataset_by_rows[metabolite] = [1] * cell_lines_amt
+ missing_list.append(metabolite)
+
+ return missing_list
+
+############################ calculate_rps ####################################
+def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str]) -> Dict[str, float]:
+ """
+ Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
+ The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient
+ for each reaction using the provided coefficient and abundance values.
+
+ Parameters:
+ reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+ abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances.
+ black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
+ missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1.
+
+ Returns:
+ dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores.
+ """
+ rps_scores = {}
+
+ for reaction_name, substrates in reactions.items():
+ total_contribution = 1
+ metab_significant = False
+ for metabolite, stoichiometry in substrates.items():
+ temp = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite]
+ if metabolite not in black_list and metabolite not in missing_list:
+ metab_significant = True
+ total_contribution *= temp ** stoichiometry
+
+ rps_scores[reaction_name] = total_contribution if metab_significant else math.nan
+
+ return rps_scores
+
+
+############################ rps_for_cell_lines ####################################
+def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]]) -> None:
+ """
+ Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file.
+
+ Parameters:
+ dataset : the dataset's data, by rows
+ reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+ black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
+ syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms.
+
+ Returns:
+ None
+ """
+ cell_lines = dataset[0][1:]
+ abundances_dict = {}
+
+ translationIsApplied = ARGS.reaction_choice == "default"
+ for row in dataset[1:]:
+ id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
+ if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
+
+ missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
+
+ rps_scores :Dict[Dict[str, float]] = {}
+ for pos, cell_line_name in enumerate(cell_lines):
+ abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
+ rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list)
+
+ df = pd.DataFrame.from_dict(rps_scores)
+ df.rename(columns={'Unnamed: 0': 'Reactions'}, inplace=True)
+ df.to_csv(ARGS.rps_output, sep = '\t', na_rep = "None", index = False)
+
+############################ main ####################################
+def main() -> None:
+ """
+ Initializes everything and sets the program in motion based on the fronted input arguments.
+
+ Returns:
+ None
+ """
+ global ARGS
+ ARGS = process_args()
+
+ # TODO:use utils functions vvv
+ with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
+ black_list = pk.load(bl)
+
+ with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
+ syn_dict = pk.load(sd)
+
+ dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
+
+ if ARGS.reaction_choice == 'default':
+ reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
+
+ elif ARGS.reaction_choice == 'custom':
+ reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
+
+ rps_for_cell_lines(dataset, reactions, black_list, syn_dict)
+ print('Execution succeded')
+
+##############################################################################
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/rps_generator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/rps_generator.xml Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,79 @@
+
+ - Reaction Propensity Scores computation
+
+ marea_macros.xml
+
+
+ numpy
+ pandas
+ lxml
+ cobra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/testing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/testing.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,806 @@
+# This is a general-purpose "testing utilities" module for the COBRAxy tool.
+# This code was written entirely by m.ferrari133@campus.unimib.it and then (hopefully) many
+# more people contributed by writing tests for this tool's modules, feel free to send an email for
+# any questions.
+
+# How the testing module works:
+# The testing module allows you to easily set up unit tests for functions in a module, obtaining
+# information on what each method returns, when and how it fails and so on.
+
+# How do I test a module?
+# - create a function at the very bottom, before the __main__
+# - import the stuff you need
+# - create a UnitTester instance, follow the documentation
+# - fill it up with UnitTest instances, follow the documentation
+# - each UnitTest tests the function by passing specific parameters to it and by veryfing the correctness
+# of the output via a CheckingMode instance
+# - call testModule() on the UnitTester
+
+# TODO(s):
+# - This module was written before the utilities were introduced, it may want to use some of those functions.
+# - I never got around to writing a CheckingMode for methods you WANT to fail in certain scenarios, I
+# like the name "MustPanic".
+# - It's good practice to enforce boolean arguments of a function to be passed as kwargs and I did it a lot
+# in the code I wrote for these tool's modules, but the current implementation of UnitTest doesn't allow
+# you to pass kwargs to the functions you test.
+# - Implement integration tests as well, maybe!
+
+## Imports:
+from typing import Dict, Callable, Type, List
+from enum import Enum, auto
+from collections.abc import Iterable
+
+## Generic utilities:
+class TestResult:
+ """
+ Represents the result of a test and contains all the relevant information about it. Loosely models two variants:
+ - Ok: The test passed, no further information is saved besides the target's name.
+ - Err: The test failed, an error message and further contextual details are also saved.
+
+ This class does not ensure a static proof of the two states' behaviour, their meaning or mutual exclusivity outside
+ of the :bool property "isPass", meant for outside reads.
+ """
+ def __init__(self, isPass :bool, targetName :str, errMsg = "", details = "") -> None:
+ """
+ (Private) Initializes an instance of TestResult.
+
+ Args:
+ isPass : distinction between TestResult.Ok (True) and TestResult.Err (False).
+ targetName : the name of the target object / property / function / module being tested, not always set
+ to a meaningful value at this stage.
+
+ errMsg : concise error message explaining the test's failure.
+ details : contextual details about the error.
+
+ Returns:
+ None : practically, a TestResult instance.
+ """
+ self.isPass = isPass
+ self.isFail = not isPass # Convenience above all
+
+ self.targetName = targetName
+ if isPass: return
+
+ self.errMsg = errMsg
+ self.details = details
+
+ @classmethod
+ def Ok(cls, targetName = "") -> "TestResult":
+ """
+ Factory method for TestResult.Ok, where all we need to know is that our test passed.
+
+ Args:
+ targetName : the name of the target object / property / function / module being tested, not always set
+ to a meaningful value at this stage.
+
+ Returns:
+ TestResult : a new Ok instance.
+ """
+ return cls(True, targetName)
+
+ @classmethod
+ def Err(cls, errMsg :str, details :str, targetName = "") -> "TestResult":
+ """
+ Factory method for TestResult.Err, where we store relevant error information.
+
+ Args:
+ errMsg : concise error message explaining the test's failure.
+ details : contextual details about the error.
+ targetName : the name of the target object / property / function / module being tested, not always set
+ to a meaningful value at this stage.
+
+ Returns:
+ TestResult : a new Err instance.
+ """
+ return cls(False, targetName, errMsg, details)
+
+ def log(self, isCompact = True) -> str:
+ """
+ Dumps all the available information in a :str, ready for logging.
+
+ Args:
+ isCompact : if True limits the amount of information displayed to the targetName.
+
+ Returns:
+ str : information about this test result.
+
+ """
+ if isCompact:
+ return f"{TestResult.__name__}::{'Ok' if self.isPass else 'Err'}(Unit test on {self.targetName})"
+
+ logMsg = f"Unit test on {self.targetName} {'passed' if self.isPass else f'failed because {self.errMsg}'}"
+ if self.details: logMsg += f", {self.details}"
+ return logMsg
+
+ def throw(self) -> None:
+ #TODO: finer Exception typing would be desirable
+ """
+ Logs the result information and panics.
+
+ Raises:
+ Exception : an error containing log information about the test result.
+
+ Returns:
+ None
+
+ """
+ raise Exception(self.log())
+
+class CheckingMode:
+ """
+ (Private) Represents a way to check a value for correctness, in the context of "testing" it.
+ """
+
+ def __init__(self) -> None:
+ """
+ (Private) Implemented on child classes, initializes an instance of CheckingMode.
+
+ Returns:
+ None : practically, a CheckingMode instance.
+ """
+ self.logMsg = "CheckingMode base class should not be used directly"
+
+ def __checkPasses__(self, _) -> bool:
+ """
+ (Private) Implemented on child classes, performs the actual correctness check on a received value.
+
+ Returns:
+ bool : True if the check passed, False if it failed.
+ """
+ return True
+
+ def check(self, value) -> TestResult:
+ """
+ Converts the :bool evaluation of the value's correctness to a TestResult.
+
+ Args:
+ value : the value to check.
+
+ Returns:
+ TestResult : the result of the check.
+ """
+ return TestResult.Ok() if self.__checkPasses__(value) else TestResult.Err(self.logMsg, f"got {value} instead")
+
+ def __repr__(self) -> str:
+ """
+ (Private) Implemented on child classes, formats :object as :str.
+ """
+ return self.__class__.__name__
+
+class ExactValue(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value needs to match another exactly.
+ """
+
+ #I suggest solving the more complex equality checking edge cases with the "Satisfies" and "MatchingShape" variants.
+ def __init__(self, value) -> None:
+ self.value = value
+ self.logMsg = f"value needed to match {value} exactly"
+
+ def __checkPasses__(self, value) -> bool:
+ return self.value == value
+
+ def __repr__(self) -> str:
+ return f"{super().__repr__()}({self.value})"
+
+class AcceptedValues(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value needs to appear in a list of accepted values.
+ """
+ def __init__(self, *values) -> None:
+ self.values = values
+ self.logMsg = f"value needed to be one of these: {values}"
+
+ def __checkPasses__(self, value) -> bool:
+ return value in self.values
+
+ def __repr__(self) -> str:
+ return f"{super().__repr__()}{self.values}"
+
+class SatisfiesPredicate(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value needs to verify a given predicate, as in
+ the predicate accepts it as input and returns True.
+ """
+ def __init__(self, pred :Callable[..., bool], predName = "") -> None:
+ self.pred = pred
+ self.logMsg = f"value needed to verify a predicate{bool(predName) * f' called {predName}'}"
+
+ def __checkPasses__(self, *params) -> bool:
+ return self.pred(*params)
+
+ def __repr__(self) -> str:
+ return f"{super().__repr__()}(T) -> bool"
+
+class IsOfType(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value needs to be of a certain type.
+ """
+ def __init__(self, type :Type) -> None:
+ self.type = type
+ self.logMsg = f"value needed to be of type {type.__name__}"
+
+ def __checkPasses__(self, value :Type) -> bool:
+ return isinstance(value, self.type)
+
+ def __repr__(self) -> str:
+ return f"{super().__repr__()}:{self.type.__name__}"
+
+class Exists(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value needs to exist (or not!). Mainly employed as a quick default
+ check that always passes, it still upholds its contract when it comes to checking for existing properties in objects
+ without much concern on what value they contain.
+ """
+ def __init__(self, exists = True) -> None:
+ self.exists = exists
+ self.logMsg = f"value needed to {(not exists) * 'not '}exist"
+
+ def __checkPasses__(self, _) -> bool: return self.exists
+
+ def __repr__(self) -> str:
+ return f"{super().__repr__() if self.exists else 'IsMissing'}"
+
+class MatchingShape(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value is an object that needs to have a certain shape,
+ as in to posess properties with a given name and value. Each property is checked for existance and correctness with
+ its own given CheckingMode.
+ """
+ def __init__(self, props :Dict[str, CheckingMode], objName = "") -> None:
+ """
+ (Private) Initializes an instance of MatchingShape.
+
+ Args:
+ props : :dict using property names as keys and checking modes for the property's value as values.
+ objName : label for the object we're testing the shape of.
+
+ Returns:
+ None : practically, a MatchingShape instance.
+ """
+ self.props = props
+ self.objName = objName
+
+ self.shapeRepr = " {\n" + "\n".join([f" {propName} : {prop}" for propName, prop in props.items()]) + "\n}"
+
+ def check(self, obj :object) -> TestResult:
+ objIsDict = isinstance(obj, dict) # Python forces us to distinguish between object properties and dict keys
+ for propName, checkingMode in self.props.items():
+ # Checking if the property exists:
+ if (not objIsDict and not hasattr(obj, propName)) or (objIsDict and propName not in obj):
+ if not isinstance(checkingMode, Exists): return TestResult.Err(
+ f"property \"{propName}\" doesn't exist on object {self.objName}", "", self.objName)
+
+ if not checkingMode.exists: return TestResult.Ok(self.objName)
+ # Either the property value is meant to be checked (checkingMode is anything but Exists)
+ # or we want the property to not exist, all other cases are handled correctly ahead
+
+ checkRes = checkingMode.check(obj[propName] if objIsDict else getattr(obj, propName))
+ if checkRes.isPass: continue
+
+ checkRes.targetName = self.objName
+ return TestResult.Err(
+ f"property \"{propName}\" failed check {checkingMode} on shape {obj}",
+ checkRes.log(isCompact = False),
+ self.objName)
+
+ return TestResult.Ok(self.objName)
+
+ def __repr__(self) -> str:
+ return super().__repr__() + self.shapeRepr
+
+class Many(CheckingMode):
+ """
+ CheckingMode subclass variant to be used when the checked value is an Iterable we want to check item by item.
+ """
+ def __init__(self, *values :CheckingMode) -> None:
+ self.values = values
+ self.shapeRepr = " [\n" + "\n".join([f" {value}" for value in values]) + "\n]"
+
+ def check(self, coll :Iterable) -> TestResult:
+ amt = len(coll)
+ expectedAmt = len(self.values)
+ # Length equality is forced:
+ if amt != expectedAmt: return TestResult.Err(
+ "items' quantities don't match", f"expected {expectedAmt} items, but got {amt}")
+
+ # Items in the given collection value are paired in order with the corresponding checkingMode meant for each of them
+ for item, checkingMode in zip(coll, self.values):
+ checkRes = checkingMode.check(item)
+ if checkRes.isFail: return TestResult.Err(
+ f"item in list failed check {checkingMode}",
+ checkRes.log(isCompact = False))
+
+ return TestResult.Ok()
+
+ def __repr__(self) -> str:
+ return super().__repr__() + self.shapeRepr
+
+class LogMode(Enum):
+ """
+ Represents the level of detail of a logged message. Models 4 variants, in order of increasing detail:
+ - Minimal : Logs the overall test result for the entire module.
+ - Default : Also logs all single test fails, in compact mode.
+ - Detailed : Logs all function test results, in compact mode.
+ - Pedantic : Also logs all single test results in detailed mode.
+ """
+ Minimal = auto()
+ Default = auto()
+ Detailed = auto()
+ Pedantic = auto()
+
+ def isMoreVerbose(self, requiredMode :"LogMode") -> bool:
+ """
+ Compares the instance's level of detail with that of another.
+
+ Args:
+ requiredMode : the other instance.
+
+ Returns:
+ bool : True if the caller instance is a more detailed variant than the other.
+ """
+ return self.value >= requiredMode.value
+
+## Specific Unit Testing utilities:
+class UnitTest:
+ """
+ Represents a unit test, the test of a single function's isolated correctness.
+ """
+ def __init__(self, func :Callable, inputParams :list, expectedRes :CheckingMode) -> None:
+ """
+ (Private) Initializes an instance of UnitTest.
+
+ Args:
+ func : the function to test.
+ inputParams : list of parameters to pass as inputs to the function, in order.
+ expectedRes : checkingMode to test the function's return value for correctness.
+
+ Returns:
+ None : practically, a UnitTest instance.
+ """
+ self.func = func
+ self.inputParams = inputParams
+ self.expectedRes = expectedRes
+
+ self.funcName = func.__name__
+
+ def test(self) -> TestResult:
+ """
+ Tests the function.
+
+ Returns:
+ TestResult : the test's result.
+ """
+ result = None
+ try: result = self.func(*self.inputParams)
+ except Exception as e: return TestResult.Err("the function panicked at runtime", e, self.funcName)
+
+ checkRes = self.expectedRes.check(result)
+ checkRes.targetName = self.funcName
+ return checkRes
+
+class UnitTester:
+ """
+ Manager class for unit testing an entire module, groups single UnitTests together and executes them in order on a
+ per-function basis (tests about the same function are executed consecutively) giving back as much information as
+ possible depending on the selected logMode. More customization options are available.
+ """
+ def __init__(self, moduleName :str, logMode = LogMode.Default, stopOnFail = True, *funcTests :'UnitTest') -> None:
+ """
+ (Private) initializes an instance of UnitTester.
+
+ Args:
+ moduleName : name of the tested module.
+ logMode : level of detail applied to all messages logged during the test.
+ stopOnFail : if True, the test stops entirely after one unit test fails.
+ funcTests : the unit tests to perform on the module.
+
+ Returns:
+ None : practically, a UnitTester instance.
+ """
+ self.logMode = logMode
+ self.moduleName = moduleName
+ self.stopOnFail = stopOnFail
+
+ # This ensures the per-function order:
+ self.funcTests :Dict[str, List[UnitTest]]= {}
+ for test in funcTests:
+ if test.funcName in self.funcTests: self.funcTests[test.funcName].append(test)
+ else: self.funcTests[test.funcName] = [test]
+
+ def logTestResult(self, testRes :TestResult) -> None:
+ """
+ Prints the formatted result information of a unit test.
+
+ Args:
+ testRes : the result of the test.
+
+ Returns:
+ None
+ """
+ if testRes.isPass: return self.log("Passed!", LogMode.Detailed, indent = 2)
+
+ failMsg = "Failed! "
+ # Doing it this way prevents .log computations when not needed
+ if self.logMode.isMoreVerbose(LogMode.Detailed):
+ # Given that Pedantic is the most verbose variant, there's no point in comparing with LogMode.isMoreVerbose
+ failMsg += testRes.log(self.logMode is not LogMode.Pedantic)
+
+ self.log(failMsg, indent = 2)
+
+ def log(self, msg :str, minRequiredMode = LogMode.Default, indent = 0) -> None:
+ """
+ Prints and formats a message only when the UnitTester instance is set to a level of detail at least equal
+ to a minimum requirement, given as input.
+
+ Args:
+ msg : the message to print.
+ minRequiredMode : minimum detail requirement.
+ indent : formatting information, counter from 0 that adds 2 spaces each number up
+
+ Returns:
+ None
+ """
+ if self.logMode.isMoreVerbose(minRequiredMode): print(" " * indent + msg)
+
+ def testFunction(self, name :str) -> TestResult:
+ """
+ Perform all unit tests relative to the same function, plus the surrounding logs and checks.
+
+ Args:
+ name : the name of the tested function.
+
+ Returns :
+ TestResult : the overall Ok result of all the tests passing or the first Err. This behaviour is unrelated
+ to that of the overall testing procedure (stopOnFail), it always works like this for tests about the
+ same function.
+ """
+ self.log(f"Unit testing {name}...", indent = 1)
+
+ allPassed = True
+ for unitTest in self.funcTests[name]:
+ testRes = unitTest.test()
+ self.logTestResult(testRes)
+ if testRes.isPass: continue
+
+ allPassed = False
+ if self.stopOnFail: break
+
+ self.log("", LogMode.Detailed) # Provides one extra newline of space when needed, to better format the output
+ if allPassed: return TestResult.Ok(name)
+
+ if self.logMode is LogMode.Default: self.log("")
+ return TestResult.Err(f"Unlogged err", "unit test failed", name)
+
+ def testModule(self) -> None:
+ """
+ Runs all the provided unit tests in order but on a per-function basis.
+
+ Returns:
+ None
+ """
+ self.log(f"Unit testing module {self.moduleName}...", LogMode.Minimal)
+
+ fails = 0
+ testStatusMsg = "complete"
+ for funcName in self.funcTests.keys():
+ if self.testFunction(funcName).isPass: continue
+ fails += 1
+
+ if self.stopOnFail:
+ testStatusMsg = "interrupted"
+ break
+
+ self.log(f"Testing {testStatusMsg}: {fails} problem{'s' * (fails != 1)} found.\n", LogMode.Minimal)
+ # ^^^ Manually applied an extra newline of space.
+
+## Unit testing all the modules:
+def unit_cobraxy() -> None:
+ import cobraxy as m
+ import math
+ import lxml.etree as ET
+ import utils.general_utils as utils
+
+ #m.ARGS = m.process_args()
+
+ ids = ["react1", "react2", "react3", "react4", "react5"]
+ metabMap = utils.Model.ENGRO2.getMap()
+ class_pat = {
+ "dataset1" :[
+ [2.3, 4, 7, 0, 0.01, math.nan, math.nan],
+ [math.nan, math.nan, math.nan, math.nan, math.nan, math.nan, math.nan],
+ [2.3, 4, 7, 0, 0.01, 5, 9],
+ [math.nan, math.nan, 2.3, 4, 7, 0, 0.01],
+ [2.3, 4, 7, math.nan, 2.3, 0, 0.01]],
+
+ "dataset2" :[
+ [2.3, 4, 7, math.nan, 2.3, 0, 0.01],
+ [2.3, 4, 7, 0, 0.01, math.nan, math.nan],
+ [math.nan, math.nan, 2.3, 4, 7, 0, 0.01],
+ [2.3, 4, 7, 0, 0.01, 5, 9],
+ [math.nan, math.nan, math.nan, math.nan, math.nan, math.nan, math.nan]]
+ }
+
+ unitTester = UnitTester("cobraxy", LogMode.Pedantic, False,
+ UnitTest(m.name_dataset, ["customName", 12], ExactValue("customName")),
+ UnitTest(m.name_dataset, ["Dataset", 12], ExactValue("Dataset_12")),
+
+ UnitTest(m.fold_change, [0.5, 0.5], ExactValue(0.0)),
+ UnitTest(m.fold_change, [0, 0.35], ExactValue("-INF")),
+ UnitTest(m.fold_change, [0.5, 0], ExactValue("INF")),
+ UnitTest(m.fold_change, [0, 0], ExactValue(0)),
+
+ UnitTest(
+ m.Arrow(m.Arrow.MAX_W, m.ArrowColor.DownRegulated, isDashed = True).toStyleStr, [],
+ ExactValue(";stroke:#0000FF;stroke-width:12;stroke-dasharray:5,5")),
+
+ UnitTest(m.computeEnrichment, [metabMap, class_pat, ids], ExactValue(None)),
+
+ UnitTest(m.computePValue, [class_pat["dataset1"][0], class_pat["dataset2"][0]], SatisfiesPredicate(math.isnan)),
+
+ UnitTest(m.reactionIdIsDirectional, ["reactId"], ExactValue(m.ReactionDirection.Unknown)),
+ UnitTest(m.reactionIdIsDirectional, ["reactId_F"], ExactValue(m.ReactionDirection.Direct)),
+ UnitTest(m.reactionIdIsDirectional, ["reactId_B"], ExactValue(m.ReactionDirection.Inverse)),
+
+ UnitTest(m.ArrowColor.fromFoldChangeSign, [-2], ExactValue(m.ArrowColor.DownRegulated)),
+ UnitTest(m.ArrowColor.fromFoldChangeSign, [2], ExactValue(m.ArrowColor.UpRegulated)),
+
+ UnitTest(
+ m.Arrow(m.Arrow.MAX_W, m.ArrowColor.UpRegulated).styleReactionElements,
+ [metabMap, "reactId"],
+ ExactValue(None)),
+
+ UnitTest(m.getArrowBodyElementId, ["reactId"], ExactValue("R_reactId")),
+ UnitTest(m.getArrowBodyElementId, ["reactId_F"], ExactValue("R_reactId")),
+
+ UnitTest(
+ m.getArrowHeadElementId, ["reactId"],
+ Many(ExactValue("F_reactId"), ExactValue("B_reactId"))),
+
+ UnitTest(
+ m.getArrowHeadElementId, ["reactId_F"],
+ Many(ExactValue("F_reactId"), ExactValue(""))),
+
+ UnitTest(
+ m.getArrowHeadElementId, ["reactId_B"],
+ Many(ExactValue("B_reactId"), ExactValue(""))),
+
+ UnitTest(
+ m.getElementById, ["reactId_F", metabMap],
+ SatisfiesPredicate(lambda res : res.isErr and isinstance(res.value, utils.Result.ResultErr))),
+
+ UnitTest(
+ m.getElementById, ["F_tyr_L_t", metabMap],
+ SatisfiesPredicate(lambda res : res.isOk and res.unwrap().get("id") == "F_tyr_L_t")),
+ ).testModule()
+
+def unit_rps_generator() -> None:
+ import rps_generator as rps
+ import math
+ import pandas as pd
+ import utils.general_utils as utils
+ dataset = pd.DataFrame({
+ "cell lines" : ["normal", "cancer"],
+ "pyru_vate" : [5.3, 7.01],
+ "glu,cose" : [8.2, 4.0],
+ "unknown" : [3.0, 3.97],
+ "()atp" : [7.05, 8.83],
+ })
+
+ abundancesNormalRaw = {
+ "pyru_vate" : 5.3,
+ "glu,cose" : 8.2,
+ "unknown" : 3.0,
+ "()atp" : 7.05,
+ }
+
+ abundancesNormal = {
+ "pyr" : 5.3,
+ "glc__D" : 8.2,
+ "atp" : 7.05,
+ }
+
+ # TODO: this currently doesn't work due to "the pickle extension problem", see FileFormat class for details.
+ synsDict = utils.readPickle(utils.FilePath("synonyms", utils.FileFormat.PICKLE, prefix = "./local/pickle files"))
+
+ reactionsDict = {
+ "r1" : {
+ "glc__D" : 1
+ },
+
+ "r2" : {
+ "co2" : 2,
+ "pyr" : 3,
+ },
+
+ "r3" : {
+ "atp" : 2,
+ "glc__D" : 4,
+ },
+
+ "r4" : {
+ "atp" : 3,
+ }
+ }
+
+ abundancesNormalEdited = {
+ "pyr" : 5.3,
+ "glc__D" : 8.2,
+ "atp" : 7.05,
+ "co2" : 1,
+ }
+
+ blackList = ["atp"] # No jokes allowed!
+ missingInDataset = ["co2"]
+
+ normalRpsShape = MatchingShape({
+ "r1" : ExactValue(8.2 ** 1),
+ "r2" : ExactValue((1 ** 2) * (5.3 ** 3)),
+ "r3" : ExactValue((8.2 ** 4) * (7.05 ** 2)),
+ "r4" : SatisfiesPredicate(lambda n : math.isnan(n))
+ }, "rps dict")
+
+ UnitTester("rps_generator", LogMode.Pedantic, False,
+ UnitTest(rps.get_abund_data, [dataset, 0], MatchingShape({
+ "pyru_vate" : ExactValue(5.3),
+ "glu,cose" : ExactValue(8.2),
+ "unknown" : ExactValue(3.0),
+ "()atp" : ExactValue(7.05),
+ "name" : ExactValue("normal")
+ }, "abundance series")),
+
+ UnitTest(rps.get_abund_data, [dataset, 1], MatchingShape({
+ "pyru_vate" : ExactValue(7.01),
+ "glu,cose" : ExactValue(4.0),
+ "unknown" : ExactValue(3.97),
+ "()atp" : ExactValue(8.83),
+ "name" : ExactValue("cancer")
+ }, "abundance series")),
+
+ UnitTest(rps.get_abund_data, [dataset, -1], ExactValue(None)),
+
+ UnitTest(rps.check_missing_metab, [reactionsDict, abundancesNormal.copy()], Many(MatchingShape({
+ "pyr" : ExactValue(5.3),
+ "glc__D" : ExactValue(8.2),
+ "atp" : ExactValue(7.05),
+ "co2" : ExactValue(1)
+ }, "updated abundances"), Many(ExactValue("co2")))),
+
+ UnitTest(rps.clean_metabolite_name, ["4,4'-diphenylmethane diisocyanate"], ExactValue("44diphenylmethanediisocyanate")),
+
+ UnitTest(rps.get_metabolite_id, ["tryptophan", synsDict], ExactValue("trp__L")),
+
+ UnitTest(rps.calculate_rps, [reactionsDict, abundancesNormalEdited, blackList, missingInDataset], normalRpsShape),
+
+ UnitTest(rps.rps_for_cell_lines, [dataset, reactionsDict, blackList, synsDict, "", True], Many(normalRpsShape, MatchingShape({
+ "r1" : ExactValue(4.0 ** 1),
+ "r2" : ExactValue((1 ** 2) * (7.01 ** 3)),
+ "r3" : ExactValue((4.0 ** 4) * (8.83 ** 2)),
+ "r4" : SatisfiesPredicate(lambda n : math.isnan(n))
+ }, "rps dict"))),
+
+ #UnitTest(rps.main, [], ExactValue(None)) # Complains about sys argvs
+ ).testModule()
+
+def unit_custom_data_generator() -> None:
+ import custom_data_generator as cdg
+
+ UnitTester("custom data generator", LogMode.Pedantic, False,
+ UnitTest(lambda :True, [], ExactValue(True)), # No tests can be done without a model at hand!
+ ).testModule()
+
+def unit_utils() -> None:
+ import utils.general_utils as utils
+ import utils.rule_parsing as ruleUtils
+ import utils.reaction_parsing as reactionUtils
+
+ UnitTester("utils", LogMode.Pedantic, False,
+ UnitTest(utils.CustomErr, ["myMsg", "more details"], MatchingShape({
+ "details" : ExactValue("more details"),
+ "msg" : ExactValue("myMsg"),
+ "id" : ExactValue(0) # this will fail if any custom errors happen anywhere else before!
+ })),
+
+ UnitTest(utils.CustomErr, ["myMsg", "more details", 42], MatchingShape({
+ "details" : ExactValue("more details"),
+ "msg" : ExactValue("myMsg"),
+ "id" : ExactValue(42)
+ })),
+
+ UnitTest(utils.Bool("someArg").check, ["TrUe"], ExactValue(True)),
+ UnitTest(utils.Bool("someArg").check, ["FALse"], ExactValue(False)),
+ UnitTest(utils.Bool("someArg").check, ["foo"], Exists(False)), # should panic!
+
+ UnitTest(utils.Model.ENGRO2.getRules, ["."], IsOfType(dict)),
+ UnitTest(utils.Model.Custom.getRules, [".", ""], Exists(False)), # expected panic
+
+ # rule utilities tests:
+ UnitTest(ruleUtils.parseRuleToNestedList, ["A"], Many(ExactValue("A"))),
+ UnitTest(ruleUtils.parseRuleToNestedList, ["A or B"], Many(ExactValue("A"), ExactValue("B"))),
+ UnitTest(ruleUtils.parseRuleToNestedList, ["A and B"], Many(ExactValue("A"), ExactValue("B"))),
+ UnitTest(ruleUtils.parseRuleToNestedList, ["A foo B"], Exists(False)), # expected panic
+ UnitTest(ruleUtils.parseRuleToNestedList, ["A)"], Exists(False)), # expected panic
+
+ UnitTest(
+ ruleUtils.parseRuleToNestedList, ["A or B"],
+ MatchingShape({ "op" : ExactValue(ruleUtils.RuleOp.OR)})),
+
+ UnitTest(
+ ruleUtils.parseRuleToNestedList, ["A and B"],
+ MatchingShape({ "op" : ExactValue(ruleUtils.RuleOp.AND)})),
+
+ UnitTest(
+ ruleUtils.parseRuleToNestedList, ["A or B and C"],
+ MatchingShape({ "op" : ExactValue(ruleUtils.RuleOp.OR)})),
+
+ UnitTest(
+ ruleUtils.parseRuleToNestedList, ["A or B and C or (D and E)"],
+ Many(
+ ExactValue("A"),
+ Many(ExactValue("B"), ExactValue("C")),
+ Many(ExactValue("D"), ExactValue("E"))
+ )),
+
+ UnitTest(lambda s : ruleUtils.RuleOp(s), ["or"], ExactValue(ruleUtils.RuleOp.OR)),
+ UnitTest(lambda s : ruleUtils.RuleOp(s), ["and"], ExactValue(ruleUtils.RuleOp.AND)),
+ UnitTest(lambda s : ruleUtils.RuleOp(s), ["foo"], Exists(False)), # expected panic
+
+ UnitTest(ruleUtils.RuleOp.isOperator, ["or"], ExactValue(True)),
+ UnitTest(ruleUtils.RuleOp.isOperator, ["and"], ExactValue(True)),
+ UnitTest(ruleUtils.RuleOp.isOperator, ["foo"], ExactValue(False)),
+
+ # reaction utilities tests:
+ UnitTest(reactionUtils.ReactionDir.fromReaction, ["atp <=> adp + pi"], ExactValue(reactionUtils.ReactionDir.REVERSIBLE)),
+ UnitTest(reactionUtils.ReactionDir.fromReaction, ["atp --> adp + pi"], ExactValue(reactionUtils.ReactionDir.FORWARD)),
+ UnitTest(reactionUtils.ReactionDir.fromReaction, ["atp <-- adp + pi"], ExactValue(reactionUtils.ReactionDir.BACKWARD)),
+ UnitTest(reactionUtils.ReactionDir.fromReaction, ["atp ??? adp + pi"], Exists(False)), # should panic
+
+ UnitTest(
+ reactionUtils.create_reaction_dict,
+ [{'shdgd': '2 pyruvate + 1 h2o <=> 1 h2o + 2 acetate', 'sgwrw': '2 co2 + 6 h2o --> 3 atp'}],
+ MatchingShape({
+ "shdgd_B" : MatchingShape({
+ "acetate" : ExactValue(2),
+ "h2o" : ExactValue(1),
+ }),
+
+ "shdgd_F" : MatchingShape({
+ "pyruvate" : ExactValue(2),
+ "h2o" : ExactValue(1)
+ }),
+
+ "sgwrw" : MatchingShape({
+ "co2" : ExactValue(2),
+ "h2o" : ExactValue(6),
+ })
+ }, "reaction dict")),
+ ).testModule()
+
+ rule = "A and B or C or D and (E or F and G) or H"
+ print(f"rule \"{rule}\" should comes out as: {ruleUtils.parseRuleToNestedList(rule)}")
+
+def unit_ras_generator() -> None:
+ import ras_generator as ras
+ import utils.rule_parsing as ruleUtils
+
+ # Making an alias to mask the name of the inner function and separate the 2 tests:
+ def opListAlias(op_list, dataset):
+ ras.ARGS.none = False
+ return ras.ras_op_list(op_list, dataset)
+
+ ras.ARGS = ras.process_args()
+ rule = ruleUtils.OpList(ruleUtils.RuleOp.AND)
+ rule.extend(["foo", "bar", "baz"])
+
+ dataset = { "foo" : 5, "bar" : 2, "baz" : None }
+
+ UnitTester("ras generator", LogMode.Pedantic, False,
+ UnitTest(ras.ras_op_list, [rule, dataset], ExactValue(2)),
+ UnitTest(opListAlias, [rule, dataset], ExactValue(None)),
+ ).testModule()
+
+if __name__ == "__main__":
+ unit_cobraxy()
+ unit_custom_data_generator()
+ unit_utils()
+ unit_ras_generator()
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/CBS_backend.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/CBS_backend.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,200 @@
+from swiglpk import *
+import random
+import pandas as pd
+import numpy as np
+import cobra as cb
+
+# Initialize LP problem
+def initialize_lp_problem(S):
+
+ len_vector=len(S.keys())
+ values=list(S.values())
+ indexes=list(S.keys())
+ ia = intArray(len_vector+1);
+ ja = intArray(len_vector+1);
+ ar = doubleArray(len_vector+1);
+
+ i=0
+ ind_row=[indexes[i][0]+1 for i in range(0, len(values) )]
+ ind_col=[indexes[i][1]+1 for i in range(0, len(values) )]
+ for i in range(1, len(values) + 1):
+ ia[i]=ind_row[i-1]
+ ja[i]=ind_col[i-1]
+ ar[i] = values[i-1]
+
+ nrows=S.shape[0]
+ ncol=S.shape[1]
+
+ return len_vector, values, indexes, ia, ja, ar, nrows, ncol
+
+
+
+# Solve LP problem from the structure of the metabolic model
+def create_and_solve_lp_problem(lb,ub,nrows, ncol, len_vector, ia, ja, ar,
+ obj_coefs,reactions,return_lp=False):
+
+
+ lp = glp_create_prob();
+ glp_set_prob_name(lp, "sample");
+ glp_set_obj_dir(lp, GLP_MAX);
+ glp_add_rows(lp, nrows);
+ eps = 1e-16
+ for i in range(nrows):
+ glp_set_row_name(lp, i+1, "constrain_"+str(i+1));
+ glp_set_row_bnds(lp, i+1, GLP_FX, 0.0, 0.0);
+ glp_add_cols(lp, ncol);
+ for i in range(ncol):
+ glp_set_col_name(lp, i+1, "flux_"+str(i+1));
+ glp_set_col_bnds(lp, i+1, GLP_DB,lb[i]-eps,ub[i]+eps);
+ glp_load_matrix(lp, len_vector, ia, ja, ar);
+
+ try:
+ fluxes,Z=solve_lp_problem(lp,obj_coefs,reactions)
+ if return_lp:
+ return fluxes,Z,lp
+ else:
+ glp_delete_prob(lp);
+ return fluxes,Z
+ except Exception as e:
+ glp_delete_prob(lp)
+ raise Exception(e)
+
+
+# Solve LP problem from the structure of the metabolic model
+def solve_lp_problem(lp,obj_coefs,reactions):
+
+ # Set the coefficients of the objective function
+ i=1
+ for ind_coef in obj_coefs:
+ glp_set_obj_coef(lp, i, ind_coef);
+ i+=1
+
+ # Initialize the parameters
+ params=glp_smcp()
+ params.presolve=GLP_ON
+ params.msg_lev = GLP_MSG_ALL
+ params.tm_lim=4000
+ glp_init_smcp(params)
+
+ # Solve the problem
+ glp_scale_prob(lp,GLP_SF_AUTO)
+
+ value=glp_simplex(lp, params)
+
+ Z = glp_get_obj_val(lp);
+
+ if value == 0:
+ fluxes = []
+ for i in range(len(reactions)): fluxes.append(glp_get_col_prim(lp, i+1))
+ return fluxes,Z
+ else:
+ raise Exception("error in LP problem. Problem:",str(value))
+
+
+# Create LP structure
+def create_lp_structure(model):
+
+ reactions=[el.id for el in model.reactions]
+ coefs_obj=[reaction.objective_coefficient for reaction in model.reactions]
+
+ # Lower and upper bounds
+ lb=[reaction.lower_bound for reaction in model.reactions]
+ ub=[reaction.upper_bound for reaction in model.reactions]
+
+ # Create S matrix
+ S=cb.util.create_stoichiometric_matrix(model,array_type="dok")
+
+ return S,lb,ub,coefs_obj,reactions
+
+# CBS sampling interface
+def randomObjectiveFunctionSampling(model, nsample, coefficients_df, df_sample):
+
+ S,lb,ub,coefs_obj,reactions = create_lp_structure(model)
+ len_vector, values, indexes, ia, ja, ar, nrow, ncol = initialize_lp_problem(S)
+
+ for i in range(nsample):
+
+ coefs_obj=coefficients_df.iloc[:,i].values
+
+ if coefs_obj[-1]==1: #minimize
+ coefs_obj= coefs_obj[0:-1] * -1
+ else:
+ coefs_obj=coefs_obj[0:-1]
+
+ fluxes,Z = create_and_solve_lp_problem(lb,ub, nrow, ncol, len_vector,
+ ia, ja, ar, coefs_obj,reactions,return_lp=False)
+ df_sample.loc[i] = fluxes
+ pass
+
+def randomObjectiveFunctionSampling_cobrapy(model, nsample, coefficients_df, df_sample):
+
+ for i in range(nsample):
+
+ dict_coeff={}
+ if(coefficients_df.iloc[-1][i]==1):
+ type_problem = -1 #minimize
+ else:
+ type_problem = 1
+
+ for rxn in [reaction.id for reaction in model.reactions]:
+ dict_coeff[model.reactions.get_by_id(rxn)] = coefficients_df.loc[rxn][i] * type_problem
+
+ model.objective = dict_coeff
+ solution = model.optimize().fluxes
+ for rxn, flux in solution.items():
+ df_sample.loc[i][rxn] = flux
+
+ pass
+
+# Create random coefficients for CBS
+def randomObjectiveFunction(model, n_samples, df_fva, seed=0):
+
+
+ #reactions = model.reactions
+ reactions = [reaction.id for reaction in model.reactions]
+ cont=seed
+ list_ex=reactions.copy()
+ list_ex.append("type_of_problem")
+ coefficients_df = pd.DataFrame(index=list_ex,columns=[str(i) for i in range(n_samples)])
+
+ for i in range(0, n_samples):
+
+ cont=cont+1
+ random.seed(cont)
+
+ # Genera un numero casuale tra 0 e 1
+ threshold = random.random() #coefficiente tra 0 e 1
+
+ for reaction in reactions:
+
+ cont=cont+1
+ random.seed(cont)
+
+ val=random.random()
+
+ if val>threshold:
+
+ cont=cont+1
+ random.seed(cont)
+
+ c=2*random.random()-1 #coefficiente tra -1 e 1
+
+ val_max=np.max([df_fva.loc[reaction,"minimum"],df_fva.loc[reaction,"maximum"]])
+
+ if val_max!=0: #solo se la fva è diversa da zero
+ coefficients_df.loc[reaction,str(i)] = c/val_max #divido per la fva
+ else:
+ coefficients_df.loc[reaction,str(i)] = 0
+
+ else:
+ coefficients_df.loc[reaction,str(i)] = 0
+
+ cont=cont+1
+ random.seed(cont)
+
+ if random.random()<0.5:
+ coefficients_df.loc["type_of_problem",str(i)] = 0 #maximize
+ else:
+ coefficients_df.loc["type_of_problem",str(i)] = 1 #minimize
+
+ return coefficients_df
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/general_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/general_utils.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,573 @@
+import math
+import re
+import sys
+import csv
+import pickle
+import lxml.etree as ET
+
+from enum import Enum
+from itertools import count
+from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union
+
+import pandas as pd
+import cobra
+
+# FILES
+class FileFormat(Enum):
+ """
+ Encodes possible file extensions to conditionally save data in a different format.
+ """
+ DAT = ("dat",) # this is how galaxy treats all your files!
+ CSV = ("csv",) # this is how most editable input data is written
+ TSV = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!!
+
+ SVG = ("svg",) # this is how most metabolic maps are written
+ PNG = ("png",) # this is a common output format for images (such as metabolic maps)
+ PDF = ("pdf",) # this is also a common output format for images, as it's required in publications.
+
+ XML = ("xml",) # this is one main way cobra models appear in
+ JSON = ("json",) # this is the other
+
+ PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved
+ #TODO: we're in a pickle (ba dum tss), there's no point in supporting many extensions internally. The
+ # issue will never be solved for user-uploaded files and those are saved as .dat by galaxy anyway so it
+ # doesn't matter as long as we CAN recognize these 3 names as valid pickle extensions. We must however
+ # agree on an internal standard and use only that one, otherwise constructing usable paths becomes a nightmare.
+ @classmethod
+ def fromExt(cls, ext :str) -> "FileFormat":
+ """
+ Converts a file extension string to a FileFormat instance.
+
+ Args:
+ ext : The file extension as a string.
+
+ Returns:
+ FileFormat: The FileFormat instance corresponding to the file extension.
+ """
+ variantName = ext.upper()
+ if variantName in FileFormat.__members__: return FileFormat[variantName]
+
+ variantName = variantName.lower()
+ for member in cls:
+ if variantName in member.value: return member
+
+ raise ValueErr("ext", "a valid FileFormat file extension", ext)
+
+ def __str__(self) -> str:
+ """
+ (Private) converts to str representation. Good practice for usage with argparse.
+
+ Returns:
+ str : the string representation of the file extension.
+ """
+ return self.value[-1] #TODO: fix, it's the dumb pickle thing
+
+class FilePath():
+ """
+ Represents a file path. View this as an attempt to standardize file-related operations by expecting
+ values of this type in any process requesting a file path.
+ """
+ def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None:
+ """
+ (Private) Initializes an instance of FilePath.
+
+ Args:
+ path : the end of the path, containing the file name.
+ ext : the file's extension.
+ prefix : anything before path, if the last '/' isn't there it's added by the code.
+
+ Returns:
+ None : practically, a FilePath instance.
+ """
+ self.ext = ext
+ self.filePath = filePath
+
+ if prefix and prefix[-1] != '/': prefix += '/'
+ self.prefix = prefix
+
+ @classmethod
+ def fromStrPath(cls, path :str) -> "FilePath":
+ """
+ Factory method to parse a string from which to obtain, if possible, a valid FilePath instance.
+
+ Args:
+ path : the string containing the path
+
+ Raises:
+ PathErr : if the provided string doesn't represent a valid path.
+
+ Returns:
+ FilePath : the constructed instance.
+ """
+ # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should*
+ # always be correct paths and could be used as raw strings, however most if not all functions that work with
+ # file paths request the FilePath objects specifically, which is a very good thing in any case other than this.
+ # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only
+ # to call show() immediately to bring back the string and open the file it points to.
+ # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES
+ # their correctness when modifying the UI and avoids the pointless back-and-forth.
+ result = re.search(r"^(?P.*\/)?(?P.*)\.(?P[^.]*)$", path)
+ if not result or not result["name"] or not result["ext"]:
+ raise PathErr(path, "cannot recognize folder structure or extension in path")
+
+ prefix = result["prefix"] if result["prefix"] else ""
+ return cls(result["name"], FileFormat.fromExt(result["ext"]), prefix = prefix)
+
+ def show(self) -> str:
+ """
+ Shows the path as a string.
+
+ Returns:
+ str : the path shown as a string.
+ """
+ return f"{self.prefix}{self.filePath}.{self.ext}"
+
+ def __str__(self) -> str: return self.show()
+
+# ERRORS
+def terminate(msg :str) -> None:
+ """
+ Terminate the execution of the script with an error message.
+
+ Args:
+ msg (str): The error message to be displayed.
+
+ Returns:
+ None
+ """
+ sys.exit(f"Execution aborted: {msg}\n")
+
+def logWarning(msg :str, loggerPath :str) -> None:
+ """
+ Log a warning message to an output log file and print it to the console. The final period and a
+ newline is added by the function.
+
+ Args:
+ s (str): The warning message to be logged and printed.
+ loggerPath : The file path of the output log file. Given as a string, parsed to a FilePath and
+ immediately read back (beware relative expensive operation, log with caution).
+
+ Returns:
+ None
+ """
+ # building the path and then reading it immediately seems useless, but it's actually a way of
+ # validating that reduces repetition on the caller's side. Besides, logging a message by writing
+ # to a file is supposed to be computationally expensive anyway, so this is also a good deterrent from
+ # mindlessly logging whenever something comes up, log at the very end and tell the user everything
+ # that went wrong. If you don't like it: implement a persistent runtime buffer that gets dumped to
+ # the file only at the end of the program's execution.
+ with open(FilePath.fromStrPath(loggerPath).show(), 'a') as log: log.write(f"{msg}.\n")
+
+class CustomErr(Exception):
+ """
+ Custom error class to handle exceptions in a structured way, with a unique identifier and a message.
+ """
+ __idGenerator = count()
+ errName = "Custom Error"
+ def __init__(self, msg :str, details = "", explicitErrCode = -1) -> None:
+ """
+ (Private) Initializes an instance of CustomErr.
+
+ Args:
+ msg (str): Error message to be displayed.
+ details (str): Informs the user more about the error encountered. Defaults to "".
+ explicitErrCode (int): Explicit error code to be used. Defaults to -1.
+
+ Returns:
+ None : practically, a CustomErr instance.
+ """
+ self.msg = msg
+ self.details = details
+
+ self.id = max(explicitErrCode, next(CustomErr.__idGenerator))
+
+ def throw(self, loggerPath = "") -> None:
+ """
+ Raises the current CustomErr instance, logging a warning message before doing so.
+
+ Raises:
+ self: The current CustomErr instance.
+
+ Returns:
+ None
+ """
+ if loggerPath: logWarning(str(self), loggerPath)
+ raise self
+
+ def abort(self) -> None:
+ """
+ Aborts the execution of the script.
+
+ Returns:
+ None
+ """
+ terminate(str(self))
+
+ def __str__(self) -> str:
+ """
+ (Private) Returns a string representing the current CustomErr instance.
+
+ Returns:
+ str: A string representing the current CustomErr instance.
+ """
+ return f"{CustomErr.errName} #{self.id}: {self.msg}, {self.details}."
+
+class ArgsErr(CustomErr):
+ """
+ CustomErr subclass for UI arguments errors.
+ """
+ errName = "Args Error"
+ def __init__(self, argName :str, expected :Any, actual :Any, msg = "no further details provided") -> None:
+ super().__init__(f"argument \"{argName}\" expected {expected} but got {actual}", msg)
+
+class DataErr(CustomErr):
+ """
+ CustomErr subclass for data formatting errors.
+ """
+ errName = "Data Format Error"
+ def __init__(self, fileName :str, msg = "no further details provided") -> None:
+ super().__init__(f"file \"{fileName}\" contains malformed data", msg)
+
+class PathErr(CustomErr):
+ """
+ CustomErr subclass for filepath formatting errors.
+ """
+ errName = "Path Error"
+ def __init__(self, path :FilePath, msg = "no further details provided") -> None:
+ super().__init__(f"path \"{path}\" is invalid", msg)
+
+class ValueErr(CustomErr):
+ """
+ CustomErr subclass for any value error.
+ """
+ errName = "Value Error"
+ def __init__(self, valueName: str, expected :Any, actual :Any, msg = "no further details provided") -> None:
+ super().__init__("value " + f"\"{valueName}\" " * bool(valueName) + f"was supposed to be {expected}, but got {actual} instead", msg)
+
+# RESULT
+T = TypeVar('T')
+E = TypeVar('E', bound = CustomErr) # should bind to Result.ResultErr but python happened!
+class Result(Generic[T, E]):
+ class ResultErr(CustomErr):
+ """
+ CustomErr subclass for all Result errors.
+ """
+ errName = "Result Error"
+ def __init__(self, msg = "no further details provided") -> None:
+ super().__init__(msg)
+ """
+ Class to handle the result of an operation, with a value and a boolean flag to indicate
+ whether the operation was successful or not.
+ """
+ def __init__(self, value :Union[T, E], isOk :bool) -> None:
+ """
+ (Private) Initializes an instance of Result.
+
+ Args:
+ value (Union[T, E]): The value to be stored in the Result instance.
+ isOk (bool): A boolean flag to indicate whether the operation was successful or not.
+
+ Returns:
+ None : practically, a Result instance.
+ """
+ self.isOk = isOk
+ self.isErr = not isOk
+ self.value = value
+
+ @classmethod
+ def Ok(cls, value :T) -> "Result":
+ """
+ Constructs a new Result instance with a successful operation.
+
+ Args:
+ value (T): The value to be stored in the Result instance, set as successful.
+
+ Returns:
+ Result: A new Result instance with a successful operation.
+ """
+ return Result(value, isOk = True)
+
+ @classmethod
+ def Err(cls, value :E) -> "Result":
+ """
+ Constructs a new Result instance with a failed operation.
+
+ Args:
+ value (E): The value to be stored in the Result instance, set as failed.
+
+ Returns:
+ Result: A new Result instance with a failed operation.
+ """
+ return Result(value, isOk = False)
+
+ def unwrap(self) -> T:
+ """
+ Unwraps the value of the Result instance, if the operation was successful.
+
+ Raises:
+ ResultErr: If the operation was not successful.
+
+ Returns:
+ T: The value of the Result instance, if the operation was successful.
+ """
+ if self.isOk: return self.value
+ raise Result.ResultErr(f"Unwrapped Result.Err : {self.value}")
+
+ def unwrapOr(self, default :T) -> T:
+ """
+ Unwraps the value of the Result instance, if the operation was successful, otherwise
+ it returns a default value.
+
+ Args:
+ default (T): The default value to be returned if the operation was not successful.
+
+ Returns:
+ T: The value of the Result instance, if the operation was successful,
+ otherwise the default value.
+ """
+ return self.value if self.isOk else default
+
+ def expect(self, err :"Result.ResultErr") -> T:
+ """
+ Expects that the value of the Result instance is successful, otherwise it raises an error.
+
+ Args:
+ err (Exception): The error to be raised if the operation was not successful.
+
+ Raises:
+ err: The error raised if the operation was not successful.
+
+ Returns:
+ T: The value of the Result instance, if the operation was successful.
+ """
+ if self.isOk: return self.value
+ raise err
+
+ U = TypeVar("U")
+ def map(self, mapper: Callable[[T], U]) -> "Result[U, E]":
+ """
+ Maps the value of the current Result to whatever is returned by the mapper function.
+ If the Result contained an unsuccessful operation to begin with it remains unchanged
+ (a reference to the current instance is returned).
+ If the mapper function panics the returned result instance will be of the error kind.
+
+ Args:
+ mapper (Callable[[T], U]): The mapper operation to be applied to the Result value.
+
+ Returns:
+ Result[U, E]: The result of the mapper operation applied to the Result value.
+ """
+ if self.isErr: return self
+ try: return Result.Ok(mapper(self.value))
+ except Exception as e: return Result.Err(e)
+
+ D = TypeVar("D", bound = "Result.ResultErr")
+ def mapErr(self, mapper :Callable[[E], D]) -> "Result[T, D]":
+ """
+ Maps the error of the current Result to whatever is returned by the mapper function.
+ If the Result contained a successful operation it remains unchanged
+ (a reference to the current instance is returned).
+ If the mapper function panics this method does as well.
+
+ Args:
+ mapper (Callable[[E], D]): The mapper operation to be applied to the Result error.
+
+ Returns:
+ Result[U, E]: The result of the mapper operation applied to the Result error.
+ """
+ if self.isOk: return self
+ return Result.Err(mapper(self.value))
+
+ def __str__(self):
+ return f"Result::{'Ok' if self.isOk else 'Err'}({self.value})"
+
+# FILES
+def read_dataset(path :FilePath, datasetName = "Dataset (not actual file name!)") -> pd.DataFrame:
+ """
+ Reads a .csv or .tsv file and returns it as a Pandas DataFrame.
+
+ Args:
+ path : the path to the dataset file.
+ datasetName : the name of the dataset.
+
+ Raises:
+ DataErr: If anything goes wrong when trying to open the file, if pandas thinks the dataset is empty or if
+ it has less than 2 columns.
+
+ Returns:
+ pandas.DataFrame: The dataset loaded as a Pandas DataFrame.
+ """
+ # I advise against the use of this function. This is an attempt at standardizing bad legacy code rather than
+ # removing / replacing it to avoid introducing as many bugs as possible in the tools still relying on this code.
+ # First off, this is not the best way to distinguish between .csv and .tsv files and Galaxy itself makes it really
+ # hard to implement anything better. Also, this function's name advertizes it as a dataset-specific operation and
+ # contains dubious responsibility (how many columns..) while being a file-opening function instead. My suggestion is
+ # TODO: stop using dataframes ever at all in anything and find a way to have tight control over file extensions.
+ try: dataset = pd.read_csv(path.show(), sep = '\t', header = None, engine = "python")
+ except:
+ try: dataset = pd.read_csv(path.show(), sep = ',', header = 0, engine = "python")
+ except Exception as err: raise DataErr(datasetName, f"encountered empty or wrongly formatted data: {err}")
+
+ if len(dataset.columns) < 2: raise DataErr(datasetName, "a dataset is always meant to have at least 2 columns")
+ return dataset
+
+def readPickle(path :FilePath) -> Any:
+ """
+ Reads the contents of a .pickle file, which needs to exist at the given path.
+
+ Args:
+ path : the path to the .pickle file.
+
+ Returns:
+ Any : the data inside a pickle file, could be anything.
+ """
+ with open(path.show(), "rb") as fd: return pickle.load(fd)
+
+def writePickle(path :FilePath, data :Any) -> None:
+ """
+ Saves any data in a .pickle file, created at the given path.
+
+ Args:
+ path : the path to the .pickle file.
+ data : the data to be written to the file.
+
+ Returns:
+ None
+ """
+ with open(path.show(), "wb") as fd: pickle.dump(data, fd)
+
+def readCsv(path :FilePath, delimiter = ',', *, skipHeader = True) -> List[List[str]]:
+ """
+ Reads the contents of a .csv file, which needs to exist at the given path.
+
+ Args:
+ path : the path to the .csv file.
+ delimiter : allows other subformats such as .tsv to be opened by the same method (\\t delimiter).
+ skipHeader : whether the first row of the file is a header and should be skipped.
+
+ Returns:
+ List[List[str]] : list of rows from the file, each parsed as a list of strings originally separated by commas.
+ """
+ with open(path.show(), "r", newline = "") as fd: return list(csv.reader(fd, delimiter = delimiter))[skipHeader:]
+
+def readSvg(path :FilePath, customErr :Optional[Exception] = None) -> ET.ElementTree:
+ """
+ Reads the contents of a .svg file, which needs to exist at the given path.
+
+ Args:
+ path : the path to the .svg file.
+
+ Raises:
+ DataErr : if the map is malformed.
+
+ Returns:
+ Any : the data inside a svg file, could be anything.
+ """
+ try: return ET.parse(path.show())
+ except (ET.XMLSyntaxError, ET.XMLSchemaParseError) as err:
+ raise customErr if customErr else err
+
+def writeSvg(path :FilePath, data:ET.ElementTree) -> None:
+ """
+ Saves svg data opened with lxml.etree in a .svg file, created at the given path.
+
+ Args:
+ path : the path to the .svg file.
+ data : the data to be written to the file.
+
+ Returns:
+ None
+ """
+ with open(path.show(), "wb") as fd: fd.write(ET.tostring(data))
+
+# UI ARGUMENTS
+class Bool:
+ def __init__(self, argName :str) -> None:
+ self.argName = argName
+
+ def __call__(self, s :str) -> bool: return self.check(s)
+
+ def check(self, s :str) -> bool:
+ s = s.lower()
+ if s == "true" : return True
+ if s == "false": return False
+ raise ArgsErr(self.argName, "boolean string (true or false, not case sensitive)", f"\"{s}\"")
+
+class Float:
+ def __init__(self, argName = "Dataset values, not an argument") -> None:
+ self.argName = argName
+
+ def __call__(self, s :str) -> float: return self.check(s)
+
+ def check(self, s :str) -> float:
+ try: return float(s)
+ except ValueError:
+ s = s.lower()
+ if s == "nan" or s == "none": return math.nan
+ raise ArgsErr(self.argName, "numeric string or \"None\" or \"NaN\" (not case sensitive)", f"\"{s}\"")
+
+# MODELS
+OldRule = List[Union[str, "OldRule"]]
+class Model(Enum):
+ """
+ Represents a metabolic model, either custom or locally supported. Custom models don't point
+ to valid file paths.
+ """
+
+ Recon = "Recon"
+ ENGRO2 = "ENGRO2"
+ ENGRO2_no_legend = "ENGRO2_no_legend"
+ HMRcore = "HMRcore"
+ HMRcore_no_legend = "HMRcore_no_legend"
+ Custom = "Custom" # Exists as a valid variant in the UI, but doesn't point to valid file paths.
+
+ def __raiseMissingPathErr(self, path :Optional[FilePath]) -> None:
+ if not path: raise PathErr("<>", "it's necessary to provide a custom path when retrieving files from a custom model")
+
+ def getRules(self, toolDir :str, customPath :Optional[FilePath] = None) -> Dict[str, Dict[str, OldRule]]:
+ """
+ Open "rules" file for this model.
+
+ Returns:
+ Dict[str, Dict[str, OldRule]] : the rules for this model.
+ """
+ path = customPath if self is Model.Custom else FilePath(f"{self.name}_rules", FileFormat.PICKLE, prefix = f"{toolDir}/local/pickle files/")
+ self.__raiseMissingPathErr(path)
+ return readPickle(path)
+
+ def getTranslator(self, toolDir :str, customPath :Optional[FilePath] = None) -> Dict[str, Dict[str, str]]:
+ """
+ Open "gene translator (old: gene_in_rule)" file for this model.
+
+ Returns:
+ Dict[str, Dict[str, str]] : the translator dict for this model.
+ """
+ path = customPath if self is Model.Custom else FilePath(f"{self.name}_genes", FileFormat.PICKLE, prefix = f"{toolDir}/local/pickle files/")
+ self.__raiseMissingPathErr(path)
+ return readPickle(path)
+
+ def getMap(self, toolDir = ".", customPath :Optional[FilePath] = None) -> ET.ElementTree:
+ path = customPath if self is Model.Custom else FilePath(f"{self.name}_map", FileFormat.SVG, prefix = f"{toolDir}/local/svg metabolic maps/")
+ self.__raiseMissingPathErr(path)
+ return readSvg(path, customErr = DataErr(path, f"custom map in wrong format"))
+
+ def getCOBRAmodel(self, toolDir = ".", customPath :Optional[FilePath] = None, customExtension :Optional[FilePath]=None)->cobra.Model:
+ if(self is Model.Custom):
+ return self.load_custom_model(customPath, customExtension)
+ else:
+ return cobra.io.read_sbml_model(FilePath(f"{self.name}", FileFormat.XML, prefix = f"{toolDir}/local/models/").show())
+
+ def load_custom_model(self, file_path :FilePath, ext :Optional[FileFormat] = None) -> cobra.Model:
+ ext = ext if ext else file_path.ext
+ try:
+ if ext is FileFormat.XML:
+ return cobra.io.read_sbml_model(file_path.show())
+
+ if ext is FileFormat.JSON:
+ return cobra.io.load_json_model(file_path.show())
+
+ except Exception as e: raise DataErr(file_path, e.__str__())
+ raise DataErr(file_path,
+ f"Fomat \"{file_path.ext}\" is not recognized, only JSON and XML files are supported.")
+
+ def __str__(self) -> str: return self.value
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/reaction_parsing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/reaction_parsing.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,130 @@
+from enum import Enum
+import utils.general_utils as utils
+from typing import Dict
+import csv
+import re
+
+# Reaction direction encoding:
+class ReactionDir(Enum):
+ """
+ A reaction can go forwards, backwards or be reversible (able to proceed in both directions).
+ Models created / managed with cobrapy encode this information within the reaction's
+ formula using the arrows this enum keeps as values.
+ """
+ FORWARD = "-->"
+ BACKWARD = "<--"
+ REVERSIBLE = "<=>"
+
+ @classmethod
+ def fromReaction(cls, reaction :str) -> 'ReactionDir':
+ """
+ Takes a whole reaction formula string and looks for one of the arrows, returning the
+ corresponding reaction direction.
+
+ Args:
+ reaction : the reaction's formula.
+
+ Raises:
+ ValueError : if no valid arrow is found.
+
+ Returns:
+ ReactionDir : the corresponding reaction direction.
+ """
+ for member in cls:
+ if member.value in reaction: return member
+
+ raise ValueError("No valid arrow found within reaction string.")
+
+ReactionsDict = Dict[str, Dict[str, float]]
+
+
+def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
+ """
+ Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id
+ (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient.
+ Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id)
+ appears without an explicit coeff, the value 1.0 will be used instead.
+
+ Args:
+ reactionsDict : dictionary encoding custom reactions information.
+ rId : unique reaction id.
+ reaction : the reaction's formula.
+
+ Returns:
+ None
+
+ Side effects:
+ reactionsDict : mut
+ """
+ reaction = reaction.strip()
+ if not reaction: return
+
+ reactionsDict[rId] = {}
+ # We assume the '+' separating consecutive metabs in a reaction is spaced from them,
+ # to avoid confusing it for electrical charge:
+ for word in reaction.split(" + "):
+ metabId, stoichCoeff = word, 1.0
+ # Implicit stoichiometric coeff is equal to 1, some coeffs are floats.
+
+ # Accepted coeffs can be integer or floats with a dot (.) decimal separator
+ # and must be separated from the metab with a space:
+ foundCoeff = re.search(r"\d+(\.\d+)? ", word)
+ if foundCoeff:
+ wholeMatch = foundCoeff.group(0)
+ metabId = word[len(wholeMatch):].strip()
+ stoichCoeff = float(wholeMatch.strip())
+
+ reactionsDict[rId][metabId] = stoichCoeff
+
+ if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
+
+
+def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
+ """
+ Parses the given dictionary into the correct format.
+
+ Args:
+ unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings.
+
+ Returns:
+ ReactionsDict: The correctly parsed dict.
+ """
+ reactionsDict :ReactionsDict = {}
+ for rId, reaction in unparsed_reactions.items():
+ reactionDir = ReactionDir.fromReaction(reaction)
+ left, right = reaction.split(f" {reactionDir.value} ")
+
+ # Reversible reactions are split into distinct reactions, one for each direction.
+ # In general we only care about substrates, the product information is lost.
+ reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
+ if reactionDir is not ReactionDir.BACKWARD:
+ add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
+
+ if reactionDir is not ReactionDir.FORWARD:
+ add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
+
+ # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B
+ # and whichever direction we DO keep (forward if --> and backward if <--) loses this information.
+ # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow
+ # through with a similar convention on ALL reactions and correctly encode direction based on their
+ # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps,
+ # which involves re-writing the "reactions" dictionary.
+
+ return reactionsDict
+
+
+def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
+ """
+ Creates a custom dictionary encoding reactions information from a csv file containing
+ data about these reactions, the path of which is given as input.
+
+ Args:
+ customReactionsPath : path to the reactions information file.
+
+ Returns:
+ ReactionsDict : dictionary encoding custom reactions information.
+ """
+ reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath))}
+
+ return create_reaction_dict(reactionsData)
+
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/rule_parsing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cobraxy-9688ad27287b/COBRAxy/utils/cobraxy-9688ad27287b/COBRAxy/utils/rule_parsing.py Sun Oct 13 11:38:28 2024 +0000
@@ -0,0 +1,243 @@
+from enum import Enum
+import utils.general_utils as utils
+from typing import List, Union, Optional
+
+class RuleErr(utils.CustomErr):
+ """
+ CustomErr subclass for rule syntax errors.
+ """
+ errName = "Rule Syntax Error"
+ def __init__(self, rule :str, msg = "no further details provided") -> None:
+ super().__init__(
+ f"rule \"{rule}\" is malformed, {msg}",
+ "please verify your input follows the validity guidelines")
+
+class RuleOp(Enum):
+ """
+ Encodes all operators valid in gene rules.
+ """
+ OR = "or"
+ AND = "and"
+
+ @classmethod
+ def isOperator(cls, op :str) -> bool:
+ return op.upper() in cls.__members__
+
+ def __str__(self) -> str: return self.value
+
+class OpList(List[Union[str, "OpList"]]):
+ """
+ Represents a parsed rule and each of its nesting levels, including the operator that level uses.
+ """
+ def __init__(self, op :Optional[RuleOp] = None) -> None:
+ """
+ (Private) Initializes an instance of OpList.
+
+ Args:
+ op (str): Operator to be assigned to the OpList. Defaults to "".
+
+ Returns:
+ None : practically, an OpList instance.
+ """
+ self.op = op
+
+ def setOpIfMissing(self, op :RuleOp) -> None:
+ """
+ Sets the operator of the OpList if it's missing.
+
+ Args:
+ op (str): Operator to be assigned to the OpList.
+
+ Returns:
+ None
+ """
+ if not self.op: self.op = op
+
+ def __repr__(self, indent = "") -> str:
+ """
+ (Private) Returns a string representation of the current OpList instance.
+
+ Args:
+ indent (str): Indentation level . Defaults to "".
+
+ Returns:
+ str: A string representation of the current OpList instance.
+ """
+ nextIndent = indent + " "
+ return f"<{self.op}>[\n" + ",\n".join([
+ f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}"
+ for item in self ]) + f"\n{indent}]"
+
+class RuleStack:
+ """
+ FILO stack structure to save the intermediate representation of a Rule during parsing, with the
+ current nesting level at the top of the stack.
+ """
+ def __init__(self) -> None:
+ """
+ (Private) initializes an instance of RuleStack.
+
+ Returns:
+ None : practically, a RuleStack instance.
+ """
+ self.__stack = [OpList()] # the stack starts out with the result list already allocated
+ self.__updateCurrent()
+
+ def pop(self) -> None:
+ """
+ Removes the OpList on top of the stack, also flattening it once when possible.
+
+ Side Effects:
+ self : mut
+
+ Returns:
+ None
+ """
+ oldTop = self.__stack.pop()
+ if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0]
+ self.__updateCurrent()
+
+ def push(self, operator = "") -> None:
+ """
+ Adds a new nesting level, in the form of a new OpList on top of the stack.
+
+ Args:
+ operator : the operator assigned to the new OpList.
+
+ Side Effects:
+ self : mut
+
+ Returns:
+ None
+ """
+ newLevel = OpList(operator)
+ self.current.append(newLevel)
+ self.__stack.append(newLevel)
+ self.__updateCurrent()
+
+ def popForward(self) -> None:
+ """
+ Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per
+ the example below:
+ stack : [list_a, list_b]
+ list_a : [item1, item2, list_b] --> [item1, list_b]
+ list_b : [item3, item4] --> [item2, item3, item4]
+
+ This is essentially a "give back as needed" operation.
+
+ Side Effects:
+ self : mut
+
+ Returns:
+ None
+ """
+ self.current.insert(0, self.__stack[-2].pop(-2))
+
+ def currentIsAnd(self) -> bool:
+ """
+ Checks if the current OpList's assigned operator is "and".
+
+ Returns:
+ bool : True if the current OpList's assigned operator is "and", False otherwise.
+ """
+ return self.current.op is RuleOp.AND
+
+ def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]:
+ """
+ Obtains the first OpList on the stack, only if it's the only element.
+
+ Args:
+ err : The error to raise if obtaining the result is not possible.
+
+ Side Effects:
+ self : mut
+
+ Raises:
+ err: If given, otherwise None is returned.
+
+ Returns:
+ Optional[OpList]: The first OpList on the stack, only if it's the only element.
+ """
+
+ if len(self.__stack) == 1: return self.__stack.pop()
+ if err: raise err
+ return None
+
+ def __updateCurrent(self) -> None:
+ """
+ (Private) Updates the current OpList to the one on top of the stack.
+
+ Side Effects:
+ self : mut
+
+ Returns:
+ None
+ """
+ self.current = self.__stack[-1]
+
+def parseRuleToNestedList(rule :str) -> OpList:
+ """
+ Parse a single rule from its string representation to an OpList, making all priority explicit
+ through nesting levels.
+
+ Args:
+ rule : the string representation of a rule to be parsed.
+
+ Raises:
+ RuleErr : whenever something goes wrong during parsing.
+
+ Returns:
+ OpList : the parsed rule.
+ """
+ source = iter(rule
+ .replace("(", "( ").replace(")", " )") # Single out parens as words
+ .strip() # remove whitespace at extremities
+ .split()) # split by spaces
+
+ stack = RuleStack()
+ nestingErr = RuleErr(rule, "mismatch between open and closed parentheses")
+ try:
+ while True: # keep reading until source ends
+ while True:
+ operand = next(source, None) # expected name or rule opening
+ if operand is None: raise RuleErr(rule, "found trailing open parentheses")
+ if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic
+ raise RuleErr(rule, f"found \"{operand}\" in unexpected position")
+
+ if operand != "(": break # found name
+
+ # found rule opening, we add new nesting level but don't know the operator
+ stack.push()
+
+ stack.current.append(operand)
+
+ while True: # keep reading until operator is found or source ends
+ operator = next(source, None) # expected operator or rule closing
+ if operator and operator != ")": break # found operator
+
+ if stack.currentIsAnd(): stack.pop() # we close the "and" chain
+
+ if not operator: break
+ stack.pop() # we close the parentheses
+
+ # we proceed with operator:
+ if not operator: break # there is no such thing as a double loop break.. yet
+
+ if not RuleOp.isOperator(operator): raise RuleErr(
+ rule, f"found \"{operator}\" in unexpected position, expected operator")
+
+ operator = RuleOp(operator)
+ if operator is RuleOp.OR and stack.currentIsAnd():
+ stack.pop()
+
+ elif operator is RuleOp.AND and not stack.currentIsAnd():
+ stack.push(operator)
+ stack.popForward()
+
+ stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had
+
+ except RuleErr as err: raise err # bubble up proper errors
+ except: raise nestingErr # everything else is interpreted as a nesting error.
+
+ parsedRule = stack.obtain(nestingErr)
+ return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/general_utils.py
--- a/cobraxy-9688ad27287b/COBRAxy/utils/general_utils.py Sun Oct 13 11:35:56 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,573 +0,0 @@
-import math
-import re
-import sys
-import csv
-import pickle
-import lxml.etree as ET
-
-from enum import Enum
-from itertools import count
-from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union
-
-import pandas as pd
-import cobra
-
-# FILES
-class FileFormat(Enum):
- """
- Encodes possible file extensions to conditionally save data in a different format.
- """
- DAT = ("dat",) # this is how galaxy treats all your files!
- CSV = ("csv",) # this is how most editable input data is written
- TSV = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!!
-
- SVG = ("svg",) # this is how most metabolic maps are written
- PNG = ("png",) # this is a common output format for images (such as metabolic maps)
- PDF = ("pdf",) # this is also a common output format for images, as it's required in publications.
-
- XML = ("xml",) # this is one main way cobra models appear in
- JSON = ("json",) # this is the other
-
- PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved
- #TODO: we're in a pickle (ba dum tss), there's no point in supporting many extensions internally. The
- # issue will never be solved for user-uploaded files and those are saved as .dat by galaxy anyway so it
- # doesn't matter as long as we CAN recognize these 3 names as valid pickle extensions. We must however
- # agree on an internal standard and use only that one, otherwise constructing usable paths becomes a nightmare.
- @classmethod
- def fromExt(cls, ext :str) -> "FileFormat":
- """
- Converts a file extension string to a FileFormat instance.
-
- Args:
- ext : The file extension as a string.
-
- Returns:
- FileFormat: The FileFormat instance corresponding to the file extension.
- """
- variantName = ext.upper()
- if variantName in FileFormat.__members__: return FileFormat[variantName]
-
- variantName = variantName.lower()
- for member in cls:
- if variantName in member.value: return member
-
- raise ValueErr("ext", "a valid FileFormat file extension", ext)
-
- def __str__(self) -> str:
- """
- (Private) converts to str representation. Good practice for usage with argparse.
-
- Returns:
- str : the string representation of the file extension.
- """
- return self.value[-1] #TODO: fix, it's the dumb pickle thing
-
-class FilePath():
- """
- Represents a file path. View this as an attempt to standardize file-related operations by expecting
- values of this type in any process requesting a file path.
- """
- def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None:
- """
- (Private) Initializes an instance of FilePath.
-
- Args:
- path : the end of the path, containing the file name.
- ext : the file's extension.
- prefix : anything before path, if the last '/' isn't there it's added by the code.
-
- Returns:
- None : practically, a FilePath instance.
- """
- self.ext = ext
- self.filePath = filePath
-
- if prefix and prefix[-1] != '/': prefix += '/'
- self.prefix = prefix
-
- @classmethod
- def fromStrPath(cls, path :str) -> "FilePath":
- """
- Factory method to parse a string from which to obtain, if possible, a valid FilePath instance.
-
- Args:
- path : the string containing the path
-
- Raises:
- PathErr : if the provided string doesn't represent a valid path.
-
- Returns:
- FilePath : the constructed instance.
- """
- # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should*
- # always be correct paths and could be used as raw strings, however most if not all functions that work with
- # file paths request the FilePath objects specifically, which is a very good thing in any case other than this.
- # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only
- # to call show() immediately to bring back the string and open the file it points to.
- # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES
- # their correctness when modifying the UI and avoids the pointless back-and-forth.
- result = re.search(r"^(?P.*\/)?(?P.*)\.(?P[^.]*)$", path)
- if not result or not result["name"] or not result["ext"]:
- raise PathErr(path, "cannot recognize folder structure or extension in path")
-
- prefix = result["prefix"] if result["prefix"] else ""
- return cls(result["name"], FileFormat.fromExt(result["ext"]), prefix = prefix)
-
- def show(self) -> str:
- """
- Shows the path as a string.
-
- Returns:
- str : the path shown as a string.
- """
- return f"{self.prefix}{self.filePath}.{self.ext}"
-
- def __str__(self) -> str: return self.show()
-
-# ERRORS
-def terminate(msg :str) -> None:
- """
- Terminate the execution of the script with an error message.
-
- Args:
- msg (str): The error message to be displayed.
-
- Returns:
- None
- """
- sys.exit(f"Execution aborted: {msg}\n")
-
-def logWarning(msg :str, loggerPath :str) -> None:
- """
- Log a warning message to an output log file and print it to the console. The final period and a
- newline is added by the function.
-
- Args:
- s (str): The warning message to be logged and printed.
- loggerPath : The file path of the output log file. Given as a string, parsed to a FilePath and
- immediately read back (beware relative expensive operation, log with caution).
-
- Returns:
- None
- """
- # building the path and then reading it immediately seems useless, but it's actually a way of
- # validating that reduces repetition on the caller's side. Besides, logging a message by writing
- # to a file is supposed to be computationally expensive anyway, so this is also a good deterrent from
- # mindlessly logging whenever something comes up, log at the very end and tell the user everything
- # that went wrong. If you don't like it: implement a persistent runtime buffer that gets dumped to
- # the file only at the end of the program's execution.
- with open(FilePath.fromStrPath(loggerPath).show(), 'a') as log: log.write(f"{msg}.\n")
-
-class CustomErr(Exception):
- """
- Custom error class to handle exceptions in a structured way, with a unique identifier and a message.
- """
- __idGenerator = count()
- errName = "Custom Error"
- def __init__(self, msg :str, details = "", explicitErrCode = -1) -> None:
- """
- (Private) Initializes an instance of CustomErr.
-
- Args:
- msg (str): Error message to be displayed.
- details (str): Informs the user more about the error encountered. Defaults to "".
- explicitErrCode (int): Explicit error code to be used. Defaults to -1.
-
- Returns:
- None : practically, a CustomErr instance.
- """
- self.msg = msg
- self.details = details
-
- self.id = max(explicitErrCode, next(CustomErr.__idGenerator))
-
- def throw(self, loggerPath = "") -> None:
- """
- Raises the current CustomErr instance, logging a warning message before doing so.
-
- Raises:
- self: The current CustomErr instance.
-
- Returns:
- None
- """
- if loggerPath: logWarning(str(self), loggerPath)
- raise self
-
- def abort(self) -> None:
- """
- Aborts the execution of the script.
-
- Returns:
- None
- """
- terminate(str(self))
-
- def __str__(self) -> str:
- """
- (Private) Returns a string representing the current CustomErr instance.
-
- Returns:
- str: A string representing the current CustomErr instance.
- """
- return f"{CustomErr.errName} #{self.id}: {self.msg}, {self.details}."
-
-class ArgsErr(CustomErr):
- """
- CustomErr subclass for UI arguments errors.
- """
- errName = "Args Error"
- def __init__(self, argName :str, expected :Any, actual :Any, msg = "no further details provided") -> None:
- super().__init__(f"argument \"{argName}\" expected {expected} but got {actual}", msg)
-
-class DataErr(CustomErr):
- """
- CustomErr subclass for data formatting errors.
- """
- errName = "Data Format Error"
- def __init__(self, fileName :str, msg = "no further details provided") -> None:
- super().__init__(f"file \"{fileName}\" contains malformed data", msg)
-
-class PathErr(CustomErr):
- """
- CustomErr subclass for filepath formatting errors.
- """
- errName = "Path Error"
- def __init__(self, path :FilePath, msg = "no further details provided") -> None:
- super().__init__(f"path \"{path}\" is invalid", msg)
-
-class ValueErr(CustomErr):
- """
- CustomErr subclass for any value error.
- """
- errName = "Value Error"
- def __init__(self, valueName: str, expected :Any, actual :Any, msg = "no further details provided") -> None:
- super().__init__("value " + f"\"{valueName}\" " * bool(valueName) + f"was supposed to be {expected}, but got {actual} instead", msg)
-
-# RESULT
-T = TypeVar('T')
-E = TypeVar('E', bound = CustomErr) # should bind to Result.ResultErr but python happened!
-class Result(Generic[T, E]):
- class ResultErr(CustomErr):
- """
- CustomErr subclass for all Result errors.
- """
- errName = "Result Error"
- def __init__(self, msg = "no further details provided") -> None:
- super().__init__(msg)
- """
- Class to handle the result of an operation, with a value and a boolean flag to indicate
- whether the operation was successful or not.
- """
- def __init__(self, value :Union[T, E], isOk :bool) -> None:
- """
- (Private) Initializes an instance of Result.
-
- Args:
- value (Union[T, E]): The value to be stored in the Result instance.
- isOk (bool): A boolean flag to indicate whether the operation was successful or not.
-
- Returns:
- None : practically, a Result instance.
- """
- self.isOk = isOk
- self.isErr = not isOk
- self.value = value
-
- @classmethod
- def Ok(cls, value :T) -> "Result":
- """
- Constructs a new Result instance with a successful operation.
-
- Args:
- value (T): The value to be stored in the Result instance, set as successful.
-
- Returns:
- Result: A new Result instance with a successful operation.
- """
- return Result(value, isOk = True)
-
- @classmethod
- def Err(cls, value :E) -> "Result":
- """
- Constructs a new Result instance with a failed operation.
-
- Args:
- value (E): The value to be stored in the Result instance, set as failed.
-
- Returns:
- Result: A new Result instance with a failed operation.
- """
- return Result(value, isOk = False)
-
- def unwrap(self) -> T:
- """
- Unwraps the value of the Result instance, if the operation was successful.
-
- Raises:
- ResultErr: If the operation was not successful.
-
- Returns:
- T: The value of the Result instance, if the operation was successful.
- """
- if self.isOk: return self.value
- raise Result.ResultErr(f"Unwrapped Result.Err : {self.value}")
-
- def unwrapOr(self, default :T) -> T:
- """
- Unwraps the value of the Result instance, if the operation was successful, otherwise
- it returns a default value.
-
- Args:
- default (T): The default value to be returned if the operation was not successful.
-
- Returns:
- T: The value of the Result instance, if the operation was successful,
- otherwise the default value.
- """
- return self.value if self.isOk else default
-
- def expect(self, err :"Result.ResultErr") -> T:
- """
- Expects that the value of the Result instance is successful, otherwise it raises an error.
-
- Args:
- err (Exception): The error to be raised if the operation was not successful.
-
- Raises:
- err: The error raised if the operation was not successful.
-
- Returns:
- T: The value of the Result instance, if the operation was successful.
- """
- if self.isOk: return self.value
- raise err
-
- U = TypeVar("U")
- def map(self, mapper: Callable[[T], U]) -> "Result[U, E]":
- """
- Maps the value of the current Result to whatever is returned by the mapper function.
- If the Result contained an unsuccessful operation to begin with it remains unchanged
- (a reference to the current instance is returned).
- If the mapper function panics the returned result instance will be of the error kind.
-
- Args:
- mapper (Callable[[T], U]): The mapper operation to be applied to the Result value.
-
- Returns:
- Result[U, E]: The result of the mapper operation applied to the Result value.
- """
- if self.isErr: return self
- try: return Result.Ok(mapper(self.value))
- except Exception as e: return Result.Err(e)
-
- D = TypeVar("D", bound = "Result.ResultErr")
- def mapErr(self, mapper :Callable[[E], D]) -> "Result[T, D]":
- """
- Maps the error of the current Result to whatever is returned by the mapper function.
- If the Result contained a successful operation it remains unchanged
- (a reference to the current instance is returned).
- If the mapper function panics this method does as well.
-
- Args:
- mapper (Callable[[E], D]): The mapper operation to be applied to the Result error.
-
- Returns:
- Result[U, E]: The result of the mapper operation applied to the Result error.
- """
- if self.isOk: return self
- return Result.Err(mapper(self.value))
-
- def __str__(self):
- return f"Result::{'Ok' if self.isOk else 'Err'}({self.value})"
-
-# FILES
-def read_dataset(path :FilePath, datasetName = "Dataset (not actual file name!)") -> pd.DataFrame:
- """
- Reads a .csv or .tsv file and returns it as a Pandas DataFrame.
-
- Args:
- path : the path to the dataset file.
- datasetName : the name of the dataset.
-
- Raises:
- DataErr: If anything goes wrong when trying to open the file, if pandas thinks the dataset is empty or if
- it has less than 2 columns.
-
- Returns:
- pandas.DataFrame: The dataset loaded as a Pandas DataFrame.
- """
- # I advise against the use of this function. This is an attempt at standardizing bad legacy code rather than
- # removing / replacing it to avoid introducing as many bugs as possible in the tools still relying on this code.
- # First off, this is not the best way to distinguish between .csv and .tsv files and Galaxy itself makes it really
- # hard to implement anything better. Also, this function's name advertizes it as a dataset-specific operation and
- # contains dubious responsibility (how many columns..) while being a file-opening function instead. My suggestion is
- # TODO: stop using dataframes ever at all in anything and find a way to have tight control over file extensions.
- try: dataset = pd.read_csv(path.show(), sep = '\t', header = None, engine = "python")
- except:
- try: dataset = pd.read_csv(path.show(), sep = ',', header = 0, engine = "python")
- except Exception as err: raise DataErr(datasetName, f"encountered empty or wrongly formatted data: {err}")
-
- if len(dataset.columns) < 2: raise DataErr(datasetName, "a dataset is always meant to have at least 2 columns")
- return dataset
-
-def readPickle(path :FilePath) -> Any:
- """
- Reads the contents of a .pickle file, which needs to exist at the given path.
-
- Args:
- path : the path to the .pickle file.
-
- Returns:
- Any : the data inside a pickle file, could be anything.
- """
- with open(path.show(), "rb") as fd: return pickle.load(fd)
-
-def writePickle(path :FilePath, data :Any) -> None:
- """
- Saves any data in a .pickle file, created at the given path.
-
- Args:
- path : the path to the .pickle file.
- data : the data to be written to the file.
-
- Returns:
- None
- """
- with open(path.show(), "wb") as fd: pickle.dump(data, fd)
-
-def readCsv(path :FilePath, delimiter = ',', *, skipHeader = True) -> List[List[str]]:
- """
- Reads the contents of a .csv file, which needs to exist at the given path.
-
- Args:
- path : the path to the .csv file.
- delimiter : allows other subformats such as .tsv to be opened by the same method (\\t delimiter).
- skipHeader : whether the first row of the file is a header and should be skipped.
-
- Returns:
- List[List[str]] : list of rows from the file, each parsed as a list of strings originally separated by commas.
- """
- with open(path.show(), "r", newline = "") as fd: return list(csv.reader(fd, delimiter = delimiter))[skipHeader:]
-
-def readSvg(path :FilePath, customErr :Optional[Exception] = None) -> ET.ElementTree:
- """
- Reads the contents of a .svg file, which needs to exist at the given path.
-
- Args:
- path : the path to the .svg file.
-
- Raises:
- DataErr : if the map is malformed.
-
- Returns:
- Any : the data inside a svg file, could be anything.
- """
- try: return ET.parse(path.show())
- except (ET.XMLSyntaxError, ET.XMLSchemaParseError) as err:
- raise customErr if customErr else err
-
-def writeSvg(path :FilePath, data:ET.ElementTree) -> None:
- """
- Saves svg data opened with lxml.etree in a .svg file, created at the given path.
-
- Args:
- path : the path to the .svg file.
- data : the data to be written to the file.
-
- Returns:
- None
- """
- with open(path.show(), "wb") as fd: fd.write(ET.tostring(data))
-
-# UI ARGUMENTS
-class Bool:
- def __init__(self, argName :str) -> None:
- self.argName = argName
-
- def __call__(self, s :str) -> bool: return self.check(s)
-
- def check(self, s :str) -> bool:
- s = s.lower()
- if s == "true" : return True
- if s == "false": return False
- raise ArgsErr(self.argName, "boolean string (true or false, not case sensitive)", f"\"{s}\"")
-
-class Float:
- def __init__(self, argName = "Dataset values, not an argument") -> None:
- self.argName = argName
-
- def __call__(self, s :str) -> float: return self.check(s)
-
- def check(self, s :str) -> float:
- try: return float(s)
- except ValueError:
- s = s.lower()
- if s == "nan" or s == "none": return math.nan
- raise ArgsErr(self.argName, "numeric string or \"None\" or \"NaN\" (not case sensitive)", f"\"{s}\"")
-
-# MODELS
-OldRule = List[Union[str, "OldRule"]]
-class Model(Enum):
- """
- Represents a metabolic model, either custom or locally supported. Custom models don't point
- to valid file paths.
- """
-
- Recon = "Recon"
- ENGRO2 = "ENGRO2"
- ENGRO2_no_legend = "ENGRO2_no_legend"
- HMRcore = "HMRcore"
- HMRcore_no_legend = "HMRcore_no_legend"
- Custom = "Custom" # Exists as a valid variant in the UI, but doesn't point to valid file paths.
-
- def __raiseMissingPathErr(self, path :Optional[FilePath]) -> None:
- if not path: raise PathErr("<>", "it's necessary to provide a custom path when retrieving files from a custom model")
-
- def getRules(self, toolDir :str, customPath :Optional[FilePath] = None) -> Dict[str, Dict[str, OldRule]]:
- """
- Open "rules" file for this model.
-
- Returns:
- Dict[str, Dict[str, OldRule]] : the rules for this model.
- """
- path = customPath if self is Model.Custom else FilePath(f"{self.name}_rules", FileFormat.PICKLE, prefix = f"{toolDir}/local/pickle files/")
- self.__raiseMissingPathErr(path)
- return readPickle(path)
-
- def getTranslator(self, toolDir :str, customPath :Optional[FilePath] = None) -> Dict[str, Dict[str, str]]:
- """
- Open "gene translator (old: gene_in_rule)" file for this model.
-
- Returns:
- Dict[str, Dict[str, str]] : the translator dict for this model.
- """
- path = customPath if self is Model.Custom else FilePath(f"{self.name}_genes", FileFormat.PICKLE, prefix = f"{toolDir}/local/pickle files/")
- self.__raiseMissingPathErr(path)
- return readPickle(path)
-
- def getMap(self, toolDir = ".", customPath :Optional[FilePath] = None) -> ET.ElementTree:
- path = customPath if self is Model.Custom else FilePath(f"{self.name}_map", FileFormat.SVG, prefix = f"{toolDir}/local/svg metabolic maps/")
- self.__raiseMissingPathErr(path)
- return readSvg(path, customErr = DataErr(path, f"custom map in wrong format"))
-
- def getCOBRAmodel(self, toolDir = ".", customPath :Optional[FilePath] = None, customExtension :Optional[FilePath]=None)->cobra.Model:
- if(self is Model.Custom):
- return self.load_custom_model(customPath, customExtension)
- else:
- return cobra.io.read_sbml_model(FilePath(f"{self.name}", FileFormat.XML, prefix = f"{toolDir}/local/models/").show())
-
- def load_custom_model(self, file_path :FilePath, ext :Optional[FileFormat] = None) -> cobra.Model:
- ext = ext if ext else file_path.ext
- try:
- if ext is FileFormat.XML:
- return cobra.io.read_sbml_model(file_path.show())
-
- if ext is FileFormat.JSON:
- return cobra.io.load_json_model(file_path.show())
-
- except Exception as e: raise DataErr(file_path, e.__str__())
- raise DataErr(file_path,
- f"Fomat \"{file_path.ext}\" is not recognized, only JSON and XML files are supported.")
-
- def __str__(self) -> str: return self.value
\ No newline at end of file
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/reaction_parsing.py
--- a/cobraxy-9688ad27287b/COBRAxy/utils/reaction_parsing.py Sun Oct 13 11:35:56 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-from enum import Enum
-import utils.general_utils as utils
-from typing import Dict
-import csv
-import re
-
-# Reaction direction encoding:
-class ReactionDir(Enum):
- """
- A reaction can go forwards, backwards or be reversible (able to proceed in both directions).
- Models created / managed with cobrapy encode this information within the reaction's
- formula using the arrows this enum keeps as values.
- """
- FORWARD = "-->"
- BACKWARD = "<--"
- REVERSIBLE = "<=>"
-
- @classmethod
- def fromReaction(cls, reaction :str) -> 'ReactionDir':
- """
- Takes a whole reaction formula string and looks for one of the arrows, returning the
- corresponding reaction direction.
-
- Args:
- reaction : the reaction's formula.
-
- Raises:
- ValueError : if no valid arrow is found.
-
- Returns:
- ReactionDir : the corresponding reaction direction.
- """
- for member in cls:
- if member.value in reaction: return member
-
- raise ValueError("No valid arrow found within reaction string.")
-
-ReactionsDict = Dict[str, Dict[str, float]]
-
-
-def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
- """
- Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id
- (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient.
- Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id)
- appears without an explicit coeff, the value 1.0 will be used instead.
-
- Args:
- reactionsDict : dictionary encoding custom reactions information.
- rId : unique reaction id.
- reaction : the reaction's formula.
-
- Returns:
- None
-
- Side effects:
- reactionsDict : mut
- """
- reaction = reaction.strip()
- if not reaction: return
-
- reactionsDict[rId] = {}
- # We assume the '+' separating consecutive metabs in a reaction is spaced from them,
- # to avoid confusing it for electrical charge:
- for word in reaction.split(" + "):
- metabId, stoichCoeff = word, 1.0
- # Implicit stoichiometric coeff is equal to 1, some coeffs are floats.
-
- # Accepted coeffs can be integer or floats with a dot (.) decimal separator
- # and must be separated from the metab with a space:
- foundCoeff = re.search(r"\d+(\.\d+)? ", word)
- if foundCoeff:
- wholeMatch = foundCoeff.group(0)
- metabId = word[len(wholeMatch):].strip()
- stoichCoeff = float(wholeMatch.strip())
-
- reactionsDict[rId][metabId] = stoichCoeff
-
- if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
-
-
-def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
- """
- Parses the given dictionary into the correct format.
-
- Args:
- unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings.
-
- Returns:
- ReactionsDict: The correctly parsed dict.
- """
- reactionsDict :ReactionsDict = {}
- for rId, reaction in unparsed_reactions.items():
- reactionDir = ReactionDir.fromReaction(reaction)
- left, right = reaction.split(f" {reactionDir.value} ")
-
- # Reversible reactions are split into distinct reactions, one for each direction.
- # In general we only care about substrates, the product information is lost.
- reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
- if reactionDir is not ReactionDir.BACKWARD:
- add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
-
- if reactionDir is not ReactionDir.FORWARD:
- add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
-
- # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B
- # and whichever direction we DO keep (forward if --> and backward if <--) loses this information.
- # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow
- # through with a similar convention on ALL reactions and correctly encode direction based on their
- # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps,
- # which involves re-writing the "reactions" dictionary.
-
- return reactionsDict
-
-
-def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
- """
- Creates a custom dictionary encoding reactions information from a csv file containing
- data about these reactions, the path of which is given as input.
-
- Args:
- customReactionsPath : path to the reactions information file.
-
- Returns:
- ReactionsDict : dictionary encoding custom reactions information.
- """
- reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath))}
-
- return create_reaction_dict(reactionsData)
-
diff -r a48b2e06ebe7 -r f4f93df8c221 cobraxy-9688ad27287b/COBRAxy/utils/rule_parsing.py
--- a/cobraxy-9688ad27287b/COBRAxy/utils/rule_parsing.py Sun Oct 13 11:35:56 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,243 +0,0 @@
-from enum import Enum
-import utils.general_utils as utils
-from typing import List, Union, Optional
-
-class RuleErr(utils.CustomErr):
- """
- CustomErr subclass for rule syntax errors.
- """
- errName = "Rule Syntax Error"
- def __init__(self, rule :str, msg = "no further details provided") -> None:
- super().__init__(
- f"rule \"{rule}\" is malformed, {msg}",
- "please verify your input follows the validity guidelines")
-
-class RuleOp(Enum):
- """
- Encodes all operators valid in gene rules.
- """
- OR = "or"
- AND = "and"
-
- @classmethod
- def isOperator(cls, op :str) -> bool:
- return op.upper() in cls.__members__
-
- def __str__(self) -> str: return self.value
-
-class OpList(List[Union[str, "OpList"]]):
- """
- Represents a parsed rule and each of its nesting levels, including the operator that level uses.
- """
- def __init__(self, op :Optional[RuleOp] = None) -> None:
- """
- (Private) Initializes an instance of OpList.
-
- Args:
- op (str): Operator to be assigned to the OpList. Defaults to "".
-
- Returns:
- None : practically, an OpList instance.
- """
- self.op = op
-
- def setOpIfMissing(self, op :RuleOp) -> None:
- """
- Sets the operator of the OpList if it's missing.
-
- Args:
- op (str): Operator to be assigned to the OpList.
-
- Returns:
- None
- """
- if not self.op: self.op = op
-
- def __repr__(self, indent = "") -> str:
- """
- (Private) Returns a string representation of the current OpList instance.
-
- Args:
- indent (str): Indentation level . Defaults to "".
-
- Returns:
- str: A string representation of the current OpList instance.
- """
- nextIndent = indent + " "
- return f"<{self.op}>[\n" + ",\n".join([
- f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}"
- for item in self ]) + f"\n{indent}]"
-
-class RuleStack:
- """
- FILO stack structure to save the intermediate representation of a Rule during parsing, with the
- current nesting level at the top of the stack.
- """
- def __init__(self) -> None:
- """
- (Private) initializes an instance of RuleStack.
-
- Returns:
- None : practically, a RuleStack instance.
- """
- self.__stack = [OpList()] # the stack starts out with the result list already allocated
- self.__updateCurrent()
-
- def pop(self) -> None:
- """
- Removes the OpList on top of the stack, also flattening it once when possible.
-
- Side Effects:
- self : mut
-
- Returns:
- None
- """
- oldTop = self.__stack.pop()
- if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0]
- self.__updateCurrent()
-
- def push(self, operator = "") -> None:
- """
- Adds a new nesting level, in the form of a new OpList on top of the stack.
-
- Args:
- operator : the operator assigned to the new OpList.
-
- Side Effects:
- self : mut
-
- Returns:
- None
- """
- newLevel = OpList(operator)
- self.current.append(newLevel)
- self.__stack.append(newLevel)
- self.__updateCurrent()
-
- def popForward(self) -> None:
- """
- Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per
- the example below:
- stack : [list_a, list_b]
- list_a : [item1, item2, list_b] --> [item1, list_b]
- list_b : [item3, item4] --> [item2, item3, item4]
-
- This is essentially a "give back as needed" operation.
-
- Side Effects:
- self : mut
-
- Returns:
- None
- """
- self.current.insert(0, self.__stack[-2].pop(-2))
-
- def currentIsAnd(self) -> bool:
- """
- Checks if the current OpList's assigned operator is "and".
-
- Returns:
- bool : True if the current OpList's assigned operator is "and", False otherwise.
- """
- return self.current.op is RuleOp.AND
-
- def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]:
- """
- Obtains the first OpList on the stack, only if it's the only element.
-
- Args:
- err : The error to raise if obtaining the result is not possible.
-
- Side Effects:
- self : mut
-
- Raises:
- err: If given, otherwise None is returned.
-
- Returns:
- Optional[OpList]: The first OpList on the stack, only if it's the only element.
- """
-
- if len(self.__stack) == 1: return self.__stack.pop()
- if err: raise err
- return None
-
- def __updateCurrent(self) -> None:
- """
- (Private) Updates the current OpList to the one on top of the stack.
-
- Side Effects:
- self : mut
-
- Returns:
- None
- """
- self.current = self.__stack[-1]
-
-def parseRuleToNestedList(rule :str) -> OpList:
- """
- Parse a single rule from its string representation to an OpList, making all priority explicit
- through nesting levels.
-
- Args:
- rule : the string representation of a rule to be parsed.
-
- Raises:
- RuleErr : whenever something goes wrong during parsing.
-
- Returns:
- OpList : the parsed rule.
- """
- source = iter(rule
- .replace("(", "( ").replace(")", " )") # Single out parens as words
- .strip() # remove whitespace at extremities
- .split()) # split by spaces
-
- stack = RuleStack()
- nestingErr = RuleErr(rule, "mismatch between open and closed parentheses")
- try:
- while True: # keep reading until source ends
- while True:
- operand = next(source, None) # expected name or rule opening
- if operand is None: raise RuleErr(rule, "found trailing open parentheses")
- if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic
- raise RuleErr(rule, f"found \"{operand}\" in unexpected position")
-
- if operand != "(": break # found name
-
- # found rule opening, we add new nesting level but don't know the operator
- stack.push()
-
- stack.current.append(operand)
-
- while True: # keep reading until operator is found or source ends
- operator = next(source, None) # expected operator or rule closing
- if operator and operator != ")": break # found operator
-
- if stack.currentIsAnd(): stack.pop() # we close the "and" chain
-
- if not operator: break
- stack.pop() # we close the parentheses
-
- # we proceed with operator:
- if not operator: break # there is no such thing as a double loop break.. yet
-
- if not RuleOp.isOperator(operator): raise RuleErr(
- rule, f"found \"{operator}\" in unexpected position, expected operator")
-
- operator = RuleOp(operator)
- if operator is RuleOp.OR and stack.currentIsAnd():
- stack.pop()
-
- elif operator is RuleOp.AND and not stack.currentIsAnd():
- stack.push(operator)
- stack.popForward()
-
- stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had
-
- except RuleErr as err: raise err # bubble up proper errors
- except: raise nestingErr # everything else is interpreted as a nesting error.
-
- parsedRule = stack.obtain(nestingErr)
- return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule
\ No newline at end of file