Mercurial > repos > bimib > cobraxy

--- a/COBRAxy/custom_data_generator.py	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/custom_data_generator.py	Mon Sep 08 14:44:15 2025 +0000
@@ -10,40 +10,36 @@
 import utils.reaction_parsing as reactionUtils

 ARGS : argparse.Namespace
-def process_args(args: List[str] = None) -> argparse.Namespace:
+def process_args(args:List[str] = None) -> argparse.Namespace:
     """
-    Parse command-line arguments for CustomDataGenerator.
-    """
+    Interfaces the script of a module with its frontend, making the user's choices for
+    various parameters available as values in code.

-    parser = argparse.ArgumentParser(
-        usage="%(prog)s [options]",
-        description="Generate custom data from a given model"
-    )
-
-    parser.add_argument("--out_log", type=str, required=True,
-                        help="Output log file")
+    Args:
+        args : Always obtained (in file) from sys.argv

-    parser.add_argument("--model", type=str,
-                        help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
-    parser.add_argument("--input", type=str,
-                        help="Custom model file (JSON or XML)")
-    parser.add_argument("--name", type=str, required=True,
-                        help="Model name (default or custom)")
+    Returns:
+        Namespace : An object containing the parsed arguments
+    """
+    parser = argparse.ArgumentParser(
+        usage = "%(prog)s [options]",
+        description = "generate custom data from a given model")

-    parser.add_argument("--medium_selector", type=str, required=True,
-                        help="Medium selection option")
+    parser.add_argument("-ol", "--out_log", type = str, required = True, help = "Output log")

-    parser.add_argument("--gene_format", type=str, default="Default",
-                        help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
-
-    parser.add_argument("--out_tabular", type=str,
-                        help="Output file for the merged dataset (CSV or XLSX)")
-
-    parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
-                        help="Tool directory (passed from Galaxy as $__tool_directory__)")
+    parser.add_argument("-orules", "--out_rules", type = str, required = True, help = "Output rules")
+    parser.add_argument("-orxns", "--out_reactions", type = str, required = True, help = "Output reactions")
+    parser.add_argument("-omedium", "--out_medium", type = str, required = True, help = "Output medium")
+    parser.add_argument("-obnds", "--out_bounds", type = str, required = True, help = "Output bounds")

+    parser.add_argument("-id", "--input",   type = str, required = True, help = "Input model")
+    parser.add_argument("-mn", "--name",    type = str, required = True, help = "Input model name")
+    # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
+    parser.add_argument('-idop', '--output_path', type = str, default='result', help = 'output path for maps')
+    argsNamespace = parser.parse_args(args)
+    # ^ can't get this one to work from xml, there doesn't seem to be a way to get the directory attribute from the collection

-    return parser.parse_args(args)
+    return argsNamespace

 ################################- INPUT DATA LOADING -################################
 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
@@ -147,52 +143,6 @@
     return bounds


-
-def generate_compartments(model: cobra.Model) -> pd.DataFrame:
-    """
-    Generates a DataFrame containing compartment information for each reaction.
-    Creates columns for each compartment position (Compartment_1, Compartment_2, etc.)
-
-    Args:
-        model: the COBRA model to extract compartment data from.
-
-    Returns:
-        pd.DataFrame: DataFrame with ReactionID and compartment columns
-    """
-    pathway_data = []
-
-    # First pass: determine the maximum number of pathways any reaction has
-    max_pathways = 0
-    reaction_pathways = {}
-
-    for reaction in model.reactions:
-        # Get unique pathways from all metabolites in the reaction
-        if type(reaction.annotation['pathways']) == list:
-            reaction_pathways[reaction.id] = reaction.annotation['pathways']
-            max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
-        else:
-            reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
-
-    # Create column names for pathways
-    pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
-
-    # Second pass: create the data
-    for reaction_id, pathways in reaction_pathways.items():
-        row = {"ReactionID": reaction_id}
-
-        # Fill pathway columns
-        for i in range(max_pathways):
-            col_name = pathway_columns[i]
-            if i < len(pathways):
-                row[col_name] = pathways[i]
-            else:
-                row[col_name] = None  # or "" if you prefer empty strings
-
-        pathway_data.append(row)
-
-    return pd.DataFrame(pathway_data)
-
-
 ###############################- FILE SAVING -################################
 def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
     """
@@ -232,14 +182,6 @@
         for key, value in data.items():
             writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })

-def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
-    try:
-        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
-        df.to_csv(path, sep="\t", index=False)
-    except Exception as e:
-        raise utils.DataErr(path, f"failed writing tabular output: {e}")
-
-
 ###############################- ENTRY POINT -################################
 def main(args:List[str] = None) -> None:
     """
@@ -252,92 +194,24 @@
     global ARGS
     ARGS = process_args(args)

-
-    if ARGS.input:
-        # load custom model
-        model = load_custom_model(
-            utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
-    else:
-        # load built-in model
-
-        try:
-            model_enum = utils.Model[ARGS.model]  # e.g., Model['ENGRO2']
-        except KeyError:
-            raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
-
-        # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
-        try:
-            model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
-        except Exception as e:
-            # Wrap/normalize load errors as DataErr for consistency
-            raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
+    # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this!
+    if os.path.isdir(ARGS.output_path) == False: os.makedirs(ARGS.output_path)

-    # Determine final model name: explicit --name overrides, otherwise use the model id
-
-    model_name = ARGS.name if ARGS.name else ARGS.model
-
-    if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
-        df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
-        ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
-        medium = df_mediums[[ARGS.medium_selector]]
-        medium = medium[ARGS.medium_selector].to_dict()
-
-        # Set all reactions to zero in the medium
-        for rxn_id, _ in model.medium.items():
-            model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
-
-        # Set medium conditions
-        for reaction, value in medium.items():
-            if value is not None:
-                model.reactions.get_by_id(reaction).lower_bound = -float(value)
-
-    if ARGS.name == "ENGRO2" and ARGS.gene_format != "Default":
-
-        model = utils.convert_genes(model, ARGS.gene_format.replace("HGNC_", "HGNC "))
+    # load custom model
+    model = load_custom_model(
+        utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)

     # generate data
     rules = generate_rules(model, asParsed = False)
     reactions = generate_reactions(model, asParsed = False)
     bounds = generate_bounds(model)
     medium = get_medium(model)
-    if ARGS.name == "ENGRO2":
-        compartments = generate_compartments(model)

-    df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"])
-    df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"])
-
-    df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
-    df_medium = medium.rename(columns = {"reaction": "ReactionID"})
-    df_medium["InMedium"] = True # flag per indicare la presenza nel medium
-
-    merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
-    merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
-    if ARGS.name == "ENGRO2":
-        merged = merged.merge(compartments, on = "ReactionID", how = "outer")
-    merged = merged.merge(df_medium, on = "ReactionID", how = "left")
-
-    merged["InMedium"] = merged["InMedium"].fillna(False)
-
-    merged = merged.sort_values(by = "InMedium", ascending = False)
-
-    #out_file = os.path.join(ARGS.output_path, f"{os.path.basename(ARGS.name).split('.')[0]}_custom_data")
-
-    #merged.to_csv(out_file, sep = '\t', index = False)
-
-
-    ####
-
-
-    if not ARGS.out_tabular:
-        raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
-    save_as_tabular_df(merged, ARGS.out_tabular)
-    expected = ARGS.out_tabular
-
-    # verify output exists and non-empty
-    if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
-        raise utils.DataErr(expected, "Output non creato o vuoto")
-
-    print("CustomDataGenerator: completed successfully")
+    # save files out of collection: path coming from xml
+    save_as_csv(rules, ARGS.out_rules, ("ReactionID", "Rule"))
+    save_as_csv(reactions, ARGS.out_reactions, ("ReactionID", "Reaction"))
+    bounds.to_csv(ARGS.out_bounds, sep = '\t')
+    medium.to_csv(ARGS.out_medium, sep = '\t')

 if __name__ == '__main__':
     main()
\ No newline at end of file
--- a/COBRAxy/custom_data_generator.xml	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/custom_data_generator.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -1,4 +1,8 @@
 <tool id="CustomDataGenerator" name="Custom Data Generator" version="2.0.0">
+
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>

 	<requirements>
         <requirement type="package" version="1.24.4">numpy</requirement>
@@ -7,88 +11,29 @@
         <requirement type="package" version="5.2.2">lxml</requirement>
 	</requirements>

-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-
     <command detect_errors="exit_code">
         <![CDATA[
       	python $__tool_directory__/custom_data_generator.py
-        --tool_dir $__tool_directory__
-        --medium_selector $cond_model.cond_medium.medium_selector
-        #if $cond_model.model_selector == 'Custom_model'
-            --input $cond_model.input
-            --name $cond_model.name
-        #else
-            --model $cond_model.model_selector
-            --name $cond_model.model_selector
-        #end if
-
-        --gene_format $cond_model.gene_format
-
+        --input $input
+        --name $input.element_identifier
         --out_log $log
-        --out_tabular $out_tabular
+        --out_rules $rules
+        --out_reactions $reactions
+        --out_bounds $bounds
+        --out_medium $medium
         ]]>
     </command>
     <inputs>
-        <conditional name="cond_model">
-            <expand macro="options_model"/>
-
-            <!-- ENGRO2 -->
-            <when value="ENGRO2">
-                <param name="name" argument="--name" type="text" value="ENGRO2" hidden="true" />
-                <conditional name="cond_medium">
-                    <expand macro="options_ras_to_bounds_medium"/>
-                </conditional>
-
-                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
-                    <option value="Default" selected="true">Keep original gene nomenclature</option>
-                    <option value="ENSG">ENSNG (Ensembl Gene ID)</option>
-                    <option value="HGNC_symbol">HGNC Symbol</option>
-                    <option value="HGNC_ID">HGNC ID</option>
-                    <option value="entrez_id">Entrez Gene ID</option>
-                </param>
-            </when>
-
-            <!-- Recon -->
-            <when value="Recon">
-                <param name="name" argument="--name" type="text" value="Recon" hidden="true" />
-                <conditional name="cond_medium">
-                    <param name="medium_selector" argument="--medium_selector" type="select" label="Medium">
-                        <option value="Default" selected="true">Default (Recon built-in medium)</option>
-                    </param>
-                    <when value="Default">
-                        <!-- Nessun parametro aggiuntivo necessario -->
-                    </when>
-                </conditional>
-                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
-                    <option value="Default" selected="true">Keep original gene nomenclature</option>
-                </param>
-            </when>
-
-            <!-- Custom model -->
-            <when value="Custom_model">
-                <param name="input" argument="--input" type="data" format="json,xml" label="Custom model file:" />
-                <param name="name" argument="--name" type="text" label="Model's name:" value="CustomModel" />
-                <conditional name="cond_medium">
-                    <param name="medium_selector" argument="--medium_selector" type="select" label="Medium">
-                        <option value="Default" selected="true">Don't use a separate medium file (use model defaults)</option>
-                    </param>
-                    <when value="Default">
-                        <!-- Nessun parametro aggiuntivo necessario -->
-                    </when>
-                </conditional>
-                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
-                    <option value="Default" selected="true">Keep original gene nomenclature</option>
-                </param>
-            </when>
-        </conditional>
-
+        <param name="input" argument="--input" type="data" format="xml, json" label="Custom model:" />
+        <param name="name" argument="--name" type="text" label="Model's name:" value="Model" help="Default: Model" />
     </inputs>

     <outputs>
-        <data name="log" format="txt" label="CustomDataGenerator - Log" />
-        <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_data_tabular" optional="true"/>
+        <data format="txt" name="log" label="${tool.name} - Log" />
+        <data format="tabular" name="rules" label="${name}_Rules" />
+        <data format="tabular" name="reactions" label="${name}_Reactions" />
+        <data format="tabular" name="bounds" label="${name}_Bounds" />
+        <data format="tabular" name="medium" label="${name}_Medium" />
     </outputs>

     <help>
@@ -100,7 +45,8 @@
 Reactions and rules can be used as inputs for the RAS and RPS generator tools.

 Accepted files:
-    - A model: JSON, XML, MAT or YAML (.yml) file reporting reactions and rules contained in the model. Supported compressed formats: .zip, .gz and .bz2. Filename must follow the pattern: {model_name}.{extension}.[zip|gz|bz2]
+    - A model: JSON or XML file reporting reactions and rules contained in the model.
+

 Output:
 -------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/custom_data_generator_beta.py	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,343 @@
+import os
+import csv
+import cobra
+import pickle
+import argparse
+import pandas as pd
+import utils.general_utils as utils
+import utils.rule_parsing  as rulesUtils
+from typing import Optional, Tuple, Union, List, Dict
+import utils.reaction_parsing as reactionUtils
+
+ARGS : argparse.Namespace
+def process_args(args: List[str] = None) -> argparse.Namespace:
+    """
+    Parse command-line arguments for CustomDataGenerator.
+    """
+
+    parser = argparse.ArgumentParser(
+        usage="%(prog)s [options]",
+        description="Generate custom data from a given model"
+    )
+
+    parser.add_argument("--out_log", type=str, required=True,
+                        help="Output log file")
+
+    parser.add_argument("--model", type=str,
+                        help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)")
+    parser.add_argument("--input", type=str,
+                        help="Custom model file (JSON or XML)")
+    parser.add_argument("--name", type=str, required=True,
+                        help="Model name (default or custom)")
+
+    parser.add_argument("--medium_selector", type=str, required=True,
+                        help="Medium selection option")
+
+    parser.add_argument("--gene_format", type=str, default="Default",
+                        help="Gene nomenclature format: Default (original), ENSNG, HGNC_SYMBOL, HGNC_ID, ENTREZ")
+
+    parser.add_argument("--out_tabular", type=str,
+                        help="Output file for the merged dataset (CSV or XLSX)")
+
+    parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__),
+                        help="Tool directory (passed from Galaxy as $__tool_directory__)")
+
+
+    return parser.parse_args(args)
+
+################################- INPUT DATA LOADING -################################
+def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model:
+    """
+    Loads a custom model from a file, either in JSON or XML format.
+
+    Args:
+        file_path : The path to the file containing the custom model.
+        ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour.
+
+    Raises:
+        DataErr : if the file is in an invalid format or cannot be opened for whatever reason.
+
+    Returns:
+        cobra.Model : the model, if successfully opened.
+    """
+    ext = ext if ext else file_path.ext
+    try:
+        if ext is utils.FileFormat.XML:
+            return cobra.io.read_sbml_model(file_path.show())
+
+        if ext is utils.FileFormat.JSON:
+            return cobra.io.load_json_model(file_path.show())
+
+    except Exception as e: raise utils.DataErr(file_path, e.__str__())
+    raise utils.DataErr(file_path,
+        f"Formato \"{file_path.ext}\" non riconosciuto, sono supportati solo file JSON e XML")
+
+################################- DATA GENERATION -################################
+ReactionId = str
+def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]:
+    """
+    Generates a dictionary mapping reaction ids to rules from the model.
+
+    Args:
+        model : the model to derive data from.
+        asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings.
+
+    Returns:
+        Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules.
+        Dict[ReactionId, str] : the generated dictionary of raw rules.
+    """
+    # Is the below approach convoluted? yes
+    # Ok but is it inefficient? probably
+    # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane)
+    _ruleGetter   =  lambda reaction : reaction.gene_reaction_rule
+    ruleExtractor = (lambda reaction :
+        rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter
+
+    return {
+        reaction.id : ruleExtractor(reaction)
+        for reaction in model.reactions
+        if reaction.gene_reaction_rule }
+
+def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]:
+    """
+    Generates a dictionary mapping reaction ids to reaction formulas from the model.
+
+    Args:
+        model : the model to derive data from.
+        asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are.
+
+    Returns:
+        Dict[ReactionId, str] : the generated dictionary.
+    """
+
+    unparsedReactions = {
+        reaction.id : reaction.reaction
+        for reaction in model.reactions
+        if reaction.reaction
+    }
+
+    if not asParsed: return unparsedReactions
+
+    return reactionUtils.create_reaction_dict(unparsedReactions)
+
+def get_medium(model:cobra.Model) -> pd.DataFrame:
+    trueMedium=[]
+    for r in model.reactions:
+        positiveCoeff=0
+        for m in r.metabolites:
+            if r.get_coefficient(m.id)>0:
+                positiveCoeff=1;
+        if (positiveCoeff==0 and r.lower_bound<0):
+            trueMedium.append(r.id)
+
+    df_medium = pd.DataFrame()
+    df_medium["reaction"] = trueMedium
+    return df_medium
+
+def generate_bounds(model:cobra.Model) -> pd.DataFrame:
+
+    rxns = []
+    for reaction in model.reactions:
+        rxns.append(reaction.id)
+
+    bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns)
+
+    for reaction in model.reactions:
+        bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound]
+    return bounds
+
+
+
+def generate_compartments(model: cobra.Model) -> pd.DataFrame:
+    """
+    Generates a DataFrame containing compartment information for each reaction.
+    Creates columns for each compartment position (Compartment_1, Compartment_2, etc.)
+
+    Args:
+        model: the COBRA model to extract compartment data from.
+
+    Returns:
+        pd.DataFrame: DataFrame with ReactionID and compartment columns
+    """
+    pathway_data = []
+
+    # First pass: determine the maximum number of pathways any reaction has
+    max_pathways = 0
+    reaction_pathways = {}
+
+    for reaction in model.reactions:
+        # Get unique pathways from all metabolites in the reaction
+        if type(reaction.annotation['pathways']) == list:
+            reaction_pathways[reaction.id] = reaction.annotation['pathways']
+            max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
+        else:
+            reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
+
+    # Create column names for pathways
+    pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
+
+    # Second pass: create the data
+    for reaction_id, pathways in reaction_pathways.items():
+        row = {"ReactionID": reaction_id}
+
+        # Fill pathway columns
+        for i in range(max_pathways):
+            col_name = pathway_columns[i]
+            if i < len(pathways):
+                row[col_name] = pathways[i]
+            else:
+                row[col_name] = None  # or "" if you prefer empty strings
+
+        pathway_data.append(row)
+
+    return pd.DataFrame(pathway_data)
+
+
+###############################- FILE SAVING -################################
+def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None:
+    """
+    Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath.
+
+    Args:
+        data : the data to be written to the file.
+        file_path : the path to the .csv file.
+        fieldNames : the names of the fields (columns) in the .csv file.
+
+    Returns:
+        None
+    """
+    with open(file_path.show(), 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+        writer.writeheader()
+
+        for key, value in data.items():
+            writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None:
+    """
+    Saves any dictionary-shaped data in a .csv file created at the given file_path as string.
+
+    Args:
+        data : the data to be written to the file.
+        file_path : the path to the .csv file.
+        fieldNames : the names of the fields (columns) in the .csv file.
+
+    Returns:
+        None
+    """
+    with open(file_path, 'w', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab")
+        writer.writeheader()
+
+        for key, value in data.items():
+            writer.writerow({ fieldNames[0] : key, fieldNames[1] : value })
+
+def save_as_tabular_df(df: pd.DataFrame, path: str) -> None:
+    try:
+        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+        df.to_csv(path, sep="\t", index=False)
+    except Exception as e:
+        raise utils.DataErr(path, f"failed writing tabular output: {e}")
+
+
+###############################- ENTRY POINT -################################
+def main(args:List[str] = None) -> None:
+    """
+    Initializes everything and sets the program in motion based on the fronted input arguments.
+
+    Returns:
+        None
+    """
+    # get args from frontend (related xml)
+    global ARGS
+    ARGS = process_args(args)
+
+
+    if ARGS.input:
+        # load custom model
+        model = load_custom_model(
+            utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext)
+    else:
+        # load built-in model
+
+        try:
+            model_enum = utils.Model[ARGS.model]  # e.g., Model['ENGRO2']
+        except KeyError:
+            raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model)
+
+        # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models)
+        try:
+            model = model_enum.getCOBRAmodel(toolDir=ARGS.tool_dir)
+        except Exception as e:
+            # Wrap/normalize load errors as DataErr for consistency
+            raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}")
+
+    # Determine final model name: explicit --name overrides, otherwise use the model id
+
+    model_name = ARGS.name if ARGS.name else ARGS.model
+
+    if ARGS.name == "ENGRO2" and ARGS.medium_selector != "Default":
+        df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
+        ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
+        medium = df_mediums[[ARGS.medium_selector]]
+        medium = medium[ARGS.medium_selector].to_dict()
+
+        # Set all reactions to zero in the medium
+        for rxn_id, _ in model.medium.items():
+            model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
+
+        # Set medium conditions
+        for reaction, value in medium.items():
+            if value is not None:
+                model.reactions.get_by_id(reaction).lower_bound = -float(value)
+
+    if ARGS.name == "ENGRO2" and ARGS.gene_format != "Default":
+
+        model = utils.convert_genes(model, ARGS.gene_format.replace("HGNC_", "HGNC "))
+
+    # generate data
+    rules = generate_rules(model, asParsed = False)
+    reactions = generate_reactions(model, asParsed = False)
+    bounds = generate_bounds(model)
+    medium = get_medium(model)
+    if ARGS.name == "ENGRO2":
+        compartments = generate_compartments(model)
+
+    df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"])
+    df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"])
+
+    df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
+    df_medium = medium.rename(columns = {"reaction": "ReactionID"})
+    df_medium["InMedium"] = True # flag per indicare la presenza nel medium
+
+    merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
+    merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
+    if ARGS.name == "ENGRO2":
+        merged = merged.merge(compartments, on = "ReactionID", how = "outer")
+    merged = merged.merge(df_medium, on = "ReactionID", how = "left")
+
+    merged["InMedium"] = merged["InMedium"].fillna(False)
+
+    merged = merged.sort_values(by = "InMedium", ascending = False)
+
+    #out_file = os.path.join(ARGS.output_path, f"{os.path.basename(ARGS.name).split('.')[0]}_custom_data")
+
+    #merged.to_csv(out_file, sep = '\t', index = False)
+
+
+    ####
+
+
+    if not ARGS.out_tabular:
+        raise utils.ArgsErr("out_tabular", "output path (--out_tabular) is required when output_format == tabular", ARGS.out_tabular)
+    save_as_tabular_df(merged, ARGS.out_tabular)
+    expected = ARGS.out_tabular
+
+    # verify output exists and non-empty
+    if not expected or not os.path.exists(expected) or os.path.getsize(expected) == 0:
+        raise utils.DataErr(expected, "Output non creato o vuoto")
+
+    print("CustomDataGenerator: completed successfully")
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/custom_data_generator_beta.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,117 @@
+<tool id="CustomDataGenerator - Beta" name="Custom Data Generator - BETA" version="2.0.0">
+
+	<requirements>
+        <requirement type="package" version="1.24.4">numpy</requirement>
+        <requirement type="package" version="2.0.3">pandas</requirement>
+		<requirement type="package" version="0.29.0">cobra</requirement>
+        <requirement type="package" version="5.2.2">lxml</requirement>
+	</requirements>
+
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+
+    <command detect_errors="exit_code">
+        <![CDATA[
+      	python $__tool_directory__/custom_data_generator_beta.py
+        --tool_dir $__tool_directory__
+        --medium_selector $cond_model.cond_medium.medium_selector
+        #if $cond_model.model_selector == 'Custom_model'
+            --input $cond_model.input
+            --name $cond_model.name
+        #else
+            --model $cond_model.model_selector
+            --name $cond_model.model_selector
+        #end if
+
+        --gene_format $cond_model.gene_format
+
+        --out_log $log
+        --out_tabular $out_tabular
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="cond_model">
+            <expand macro="options_model"/>
+
+            <!-- ENGRO2 -->
+            <when value="ENGRO2">
+                <param name="name" argument="--name" type="text" value="ENGRO2" hidden="true" />
+                <conditional name="cond_medium">
+                    <expand macro="options_ras_to_bounds_medium"/>
+                </conditional>
+
+                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
+                    <option value="Default" selected="true">Keep original gene nomenclature</option>
+                    <option value="ENSG">ENSNG (Ensembl Gene ID)</option>
+                    <option value="HGNC_symbol">HGNC Symbol</option>
+                    <option value="HGNC_ID">HGNC ID</option>
+                    <option value="entrez_id">Entrez Gene ID</option>
+                </param>
+            </when>
+
+            <!-- Recon -->
+            <when value="Recon">
+                <param name="name" argument="--name" type="text" value="Recon" hidden="true" />
+                <conditional name="cond_medium">
+                    <param name="medium_selector" argument="--medium_selector" type="select" label="Medium">
+                        <option value="Default" selected="true">Default (Recon built-in medium)</option>
+                    </param>
+                    <when value="Default">
+                        <!-- Nessun parametro aggiuntivo necessario -->
+                    </when>
+                </conditional>
+                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
+                    <option value="Default" selected="true">Keep original gene nomenclature</option>
+                </param>
+            </when>
+
+            <!-- Custom model -->
+            <when value="Custom_model">
+                <param name="input" argument="--input" type="data" format="json,xml" label="Custom model file:" />
+                <param name="name" argument="--name" type="text" label="Model's name:" value="CustomModel" />
+                <conditional name="cond_medium">
+                    <param name="medium_selector" argument="--medium_selector" type="select" label="Medium">
+                        <option value="Default" selected="true">Don't use a separate medium file (use model defaults)</option>
+                    </param>
+                    <when value="Default">
+                        <!-- Nessun parametro aggiuntivo necessario -->
+                    </when>
+                </conditional>
+                <param name="gene_format" argument="--gene_format" type="select" label="Gene nomenclature format:">
+                    <option value="Default" selected="true">Keep original gene nomenclature</option>
+                </param>
+            </when>
+        </conditional>
+
+    </inputs>
+
+    <outputs>
+        <data name="log" format="txt" label="CustomDataGenerator - Log" />
+        <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_data_tabular" optional="true"/>
+    </outputs>
+
+    <help>
+    <![CDATA[
+What it does
+-------------
+
+This tool generates four files containing reactions, rules, reaction bounds and medium composition respectively, starting from a custom model in JSON or XML format.
+Reactions and rules can be used as inputs for the RAS and RPS generator tools.
+
+Accepted files:
+    - A model: JSON, XML, MAT or YAML (.yml) file reporting reactions and rules contained in the model. Supported compressed formats: .zip, .gz and .bz2. Filename must follow the pattern: {model_name}.{extension}.[zip|gz|bz2]
+
+Output:
+-------------
+
+The tool generates:
+    - rules: reporting the rules for each reaction in the custom model given. Format: csv (tab separated).
+    - reactions: reporting the reactions in the custom model given. Format: csv (tab separated).
+    - reaction bounds: reporting the lower and upper bounds of each model reaction. Format: csv (tab separated).
+    - medium composition: reporting the list of exchange/transport reactions. Format: csv (tab separated).
+    - a log file (.txt).
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- a/COBRAxy/ras_generator.py	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/ras_generator.py	Mon Sep 08 14:44:15 2025 +0000
@@ -27,10 +27,15 @@
         usage = '%(prog)s [options]',
         description = "process some value's genes to create a comparison's map.")

-    parser.add_argument("-rl", "--model_upload", type = str,
-        help = "path to input file containing the rules")
+    parser.add_argument(
+        '-rs', '--rules_selector',
+        type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model),
+        help = 'chose which type of dataset you want use')
+
+    parser.add_argument("-rl", "--rule_list", type = str,
+        help = "path to input file with custom rules, if provided")

-    parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name")
+    parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name")
     # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in

     parser.add_argument(
@@ -518,8 +523,8 @@
     """
     ras_values_by_cell_line = {}
     dataset.set_index(dataset.columns[0], inplace=True)
-
-    for cell_line_name in dataset.columns: #[1:]:
+    # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata
+    for cell_line_name in dataset.columns[1:]:
         cell_line = dataset[cell_line_name].to_dict()
         ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
     return ras_values_by_cell_line
@@ -637,50 +642,16 @@
     Returns:
         Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
     """
-    datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat
-
-    #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext
-    #except utils.PathErr as err:
-    #    utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log)
-    #    filenamePath = None
+    datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat
+
+    try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext
+    except utils.PathErr as err:
+        raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}")

-    #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
-
-    dict_rule = {}
+    if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)

-    try:
-        # Proviamo prima con delimitatore tab
-        for line in utils.readCsv(datFilePath, delimiter = "\t"):
-            if len(line) < 3:  # Controlliamo che ci siano almeno 3 colonne
-                utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
-                continue
-
-            if line[2] == "":
-                dict_rule[line[0]] = ruleUtils.OpList([""])
-            else:
-                dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
-
-    except Exception as e:
-        # Se fallisce con tab, proviamo con virgola
-        try:
-            dict_rule = {}
-            for line in utils.readCsv(datFilePath, delimiter = ","):
-                if len(line) < 3:
-                    utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
-                    continue
-
-                if line[2] == "":
-                    dict_rule[line[0]] = ruleUtils.OpList([""])
-                else:
-                    dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
-        except Exception as e2:
-            raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
-
-    if not dict_rule:
-            raise ValueError("No valid rules found in the uploaded file. Please check the file format.")
     # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
-    return dict_rule
-
+    return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) }

 def main(args:List[str] = None) -> None:
     """
@@ -700,46 +671,35 @@
     # remove versioning from gene names
     dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]

-    rules = load_custom_rules()
-    reactions = list(rules.keys())
+    # handle custom models
+    model :utils.Model = ARGS.rules_selector

-    save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
-    if ERRORS: utils.logWarning(
-        f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
-        ARGS.out_log)
-
-
-    ############
+    if model is utils.Model.Custom:
+        rules = load_custom_rules()
+        reactions = list(rules.keys())

-    # handle custom models
-    #model :utils.Model = ARGS.rules_selector
-
-    #if model is utils.Model.Custom:
-    #    rules = load_custom_rules()
-    #    reactions = list(rules.keys())
-
-    #    save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
-    #    if ERRORS: utils.logWarning(
-    #        f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
-    #        ARGS.out_log)
+        save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
+        if ERRORS: utils.logWarning(
+            f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
+            ARGS.out_log)

-    #    return
+        return

     # This is the standard flow of the ras_generator program, for non-custom models.
-    #name = "RAS Dataset"
-    #type_gene = gene_type(dataset.iloc[0, 0], name)
-
-    #rules      = model.getRules(ARGS.tool_dir)
-    #genes      = data_gene(dataset, type_gene, name, None)
-    #ids, rules = load_id_rules(rules.get(type_gene))
+    name = "RAS Dataset"
+    type_gene = gene_type(dataset.iloc[0, 0], name)

-    #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
-    #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
+    rules      = model.getRules(ARGS.tool_dir)
+    genes      = data_gene(dataset, type_gene, name, None)
+    ids, rules = load_id_rules(rules.get(type_gene))

-    #if err: utils.logWarning(
-    #    f"Warning: gene(s) {err} not found in class \"{name}\", " +
-    #    "the expression level for this gene will be considered NaN",
-    #    ARGS.out_log)
+    resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
+    create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
+
+    if err: utils.logWarning(
+        f"Warning: gene(s) {err} not found in class \"{name}\", " +
+        "the expression level for this gene will be considered NaN",
+        ARGS.out_log)

     print("Execution succeded")
--- a/COBRAxy/ras_generator.xml	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/ras_generator.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -12,23 +12,27 @@
     <command detect_errors="exit_code">
         <![CDATA[
       	python $__tool_directory__/ras_generator.py
-        --tool_dir $__tool_directory__
-        --model_upload $model_upload
-        --model_upload_name $model_upload.element_identifier
+      	--rules_selector $cond_rule.rules_selector
         --input $input
         --none $none
-
+        --tool_dir $__tool_directory__
         --out_log $log
         --ras_output $ras_output
-
+        #if $cond_rule.rules_selector == 'Custom'
+            --rule_list $rule_list
+            --rules_name $rule_list.element_identifier
+        #end if
         ]]>
     </command>
     <inputs>
-        <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular"
-                label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." />
-        <param name="input" argument="--input" type="data" format="tabular,csv,tsv" label="Gene Expression dataset:" />
-        <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS"
-                help="Default: Dataset_RAS. Do not use white spaces or special symbols." />
+        <conditional name="cond_rule">
+            <expand macro="options"/>
+            <when value="Custom">
+                <param name="rule_list" argument="--rule_list" type="data" format="tabular, csv, pickle, p, pk" label="Custom rules" />
+            </when>
+        </conditional>
+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Gene Expression dataset:" />
+        <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS" help="Default: Dataset_RAS. Do not use white spaces or special symbols." />
         <param name="none" argument="--none" type="boolean" checked="true" label="(A and NaN) solved as (A)?" />
     </inputs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/ras_generator_beta.py	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,748 @@
+from __future__ import division
+# galaxy complains this ^^^ needs to be at the very beginning of the file, for some reason.
+import sys
+import argparse
+import collections
+import pandas as pd
+import pickle as pk
+import utils.general_utils as utils
+import utils.rule_parsing as ruleUtils
+from typing import Union, Optional, List, Dict, Tuple, TypeVar
+import os
+
+ERRORS = []
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args(args:List[str] = None) -> argparse.Namespace:
+    """
+    Processes command-line arguments.
+
+    Args:
+        args (list): List of command-line arguments.
+
+    Returns:
+        Namespace: An object containing parsed arguments.
+    """
+    parser = argparse.ArgumentParser(
+        usage = '%(prog)s [options]',
+        description = "process some value's genes to create a comparison's map.")
+
+    parser.add_argument("-rl", "--model_upload", type = str,
+        help = "path to input file containing the rules")
+
+    parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name")
+    # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
+
+    parser.add_argument(
+        '-n', '--none',
+        type = utils.Bool("none"), default = True,
+        help = 'compute Nan values')
+
+    parser.add_argument(
+        '-td', '--tool_dir',
+        type = str,
+        required = True, help = 'your tool directory')
+
+    parser.add_argument(
+        '-ol', '--out_log',
+        type = str,
+        help = "Output log")
+
+    parser.add_argument(
+        '-in', '--input', #id è diventato in
+        type = str,
+        help = 'input dataset')
+
+    parser.add_argument(
+        '-ra', '--ras_output',
+        type = str,
+        required = True, help = 'ras output')
+
+
+    return parser.parse_args(args)
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+    """
+    Read a dataset from a CSV file and return it as a pandas DataFrame.
+
+    Args:
+        data (str): Path to the CSV file containing the dataset.
+        name (str): Name of the dataset, used in error messages.
+
+    Returns:
+        pandas.DataFrame: DataFrame containing the dataset.
+
+    Raises:
+        pd.errors.EmptyDataError: If the CSV file is empty.
+        sys.exit: If the CSV file has the wrong format, the execution is aborted.
+    """
+    try:
+        dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+    except pd.errors.EmptyDataError:
+        sys.exit('Execution aborted: wrong format of ' + name + '\n')
+    if len(dataset.columns) < 2:
+        sys.exit('Execution aborted: wrong format of ' + name + '\n')
+    return dataset
+
+############################ load id e rules ##################################
+def load_id_rules(reactions :Dict[str, Dict[str, List[str]]]) -> Tuple[List[str], List[Dict[str, List[str]]]]:
+    """
+    Load IDs and rules from a dictionary of reactions.
+
+    Args:
+        reactions (dict): A dictionary where keys are IDs and values are rules.
+
+    Returns:
+        tuple: A tuple containing two lists, the first list containing IDs and the second list containing rules.
+    """
+    ids, rules = [], []
+    for key, value in reactions.items():
+            ids.append(key)
+            rules.append(value)
+    return (ids, rules)
+
+############################ check_methods ####################################
+def gene_type(l :str, name :str) -> str:
+    """
+    Determine the type of gene ID.
+
+    Args:
+        l (str): The gene identifier to check.
+        name (str): The name of the dataset, used in error messages.
+
+    Returns:
+        str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id').
+
+    Raises:
+        sys.exit: If the gene ID type is not supported, the execution is aborted.
+    """
+    if check_hgnc(l):
+        return 'hugo_id'
+    elif check_ensembl(l):
+        return 'ensembl_gene_id'
+    elif check_symbol(l):
+        return 'symbol'
+    elif check_entrez(l):
+        return 'entrez_id'
+    else:
+        sys.exit('Execution aborted:\n' +
+                 'gene ID type in ' + name + ' not supported. Supported ID'+
+                 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
+
+def check_hgnc(l :str) -> bool:
+    """
+    Check if a gene identifier follows the HGNC format.
+
+    Args:
+        l (str): The gene identifier to check.
+
+    Returns:
+        bool: True if the gene identifier follows the HGNC format, False otherwise.
+    """
+    if len(l) > 5:
+        if (l.upper()).startswith('HGNC:'):
+            return l[5:].isdigit()
+        else:
+            return False
+    else:
+        return False
+
+def check_ensembl(l :str) -> bool:
+    """
+    Check if a gene identifier follows the Ensembl format.
+
+    Args:
+        l (str): The gene identifier to check.
+
+    Returns:
+        bool: True if the gene identifier follows the Ensembl format, False otherwise.
+    """
+    return l.upper().startswith('ENS')
+
+
+def check_symbol(l :str) -> bool:
+    """
+    Check if a gene identifier follows the symbol format.
+
+    Args:
+        l (str): The gene identifier to check.
+
+    Returns:
+        bool: True if the gene identifier follows the symbol format, False otherwise.
+    """
+    if len(l) > 0:
+        if l[0].isalpha() and l[1:].isalnum():
+            return True
+        else:
+            return False
+    else:
+        return False
+
+def check_entrez(l :str) -> bool:
+    """
+    Check if a gene identifier follows the Entrez ID format.
+
+    Args:
+        l (str): The gene identifier to check.
+
+    Returns:
+        bool: True if the gene identifier follows the Entrez ID format, False otherwise.
+    """
+    if len(l) > 0:
+        return l.isdigit()
+    else:
+        return False
+
+############################ gene #############################################
+def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]:
+    """
+    Process gene data to ensure correct formatting and handle duplicates.
+
+    Args:
+        gene (DataFrame): DataFrame containing gene data.
+        type_gene (str): Type of gene data (e.g., 'hugo_id', 'ensembl_gene_id', 'symbol', 'entrez_id').
+        name (str): Name of the dataset.
+        gene_custom (dict or None): Custom gene data dictionary if provided.
+
+    Returns:
+        dict: A dictionary containing gene data with gene IDs as keys and corresponding values.
+    """
+
+    for i in range(len(gene)):
+        tmp = gene.iloc[i, 0]
+        gene.iloc[i, 0] = tmp.strip().split('.')[0]
+
+    gene_dup = [item for item, count in
+               collections.Counter(gene[gene.columns[0]]).items() if count > 1]
+    pat_dup = [item for item, count in
+               collections.Counter(list(gene.columns)).items() if count > 1]
+
+    gene_in_rule = None
+
+    if gene_dup:
+        if gene_custom == None:
+
+            if str(ARGS.rules_selector) == 'HMRcore':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb'))
+
+            elif str(ARGS.rules_selector) == 'Recon':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/Recon_genes.p', 'rb'))
+
+            elif str(ARGS.rules_selector) == 'ENGRO2':
+                gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb'))
+
+            utils.logWarning(f"{ARGS.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log)
+
+            gene_in_rule = gene_in_rule.get(type_gene)
+
+        else:
+            gene_in_rule = gene_custom
+
+        tmp = []
+        for i in gene_dup:
+            if gene_in_rule.get(i) == 'ok':
+                tmp.append(i)
+        if tmp:
+            sys.exit('Execution aborted because gene ID '
+                     +str(tmp)+' in '+name+' is duplicated\n')
+
+    if pat_dup: utils.logWarning(f"Warning: duplicated label\n{pat_dup} in {name}", ARGS.out_log)
+    return (gene.set_index(gene.columns[0])).to_dict()
+
+############################ resolve ##########################################
+def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]:
+    """
+    Replace gene identifiers with corresponding values from a dictionary.
+
+    Args:
+        l (str): String of gene identifier.
+        d (str): String corresponding to its value.
+
+    Returns:
+        tuple: A tuple containing two lists: the first list contains replaced values, and the second list contains any errors encountered during replacement.
+    """
+    tmp = []
+    err = []
+    while l:
+        if isinstance(l[0], list):
+            tmp_rules, tmp_err = replace_gene_value(l[0], d)
+            tmp.append(tmp_rules)
+            err.extend(tmp_err)
+        else:
+            value = replace_gene(l[0], d)
+            tmp.append(value)
+            if value == None:
+                err.append(l[0])
+        l = l[1:]
+    return (tmp, err)
+
+def replace_gene(l :str, d :str) -> Union[int, float]:
+    """
+    Replace a single gene identifier with its corresponding value from a dictionary.
+
+    Args:
+        l (str): Gene identifier to replace.
+        d (str): String corresponding to its value.
+
+    Returns:
+        float/int: Corresponding value from the dictionary if found, None otherwise.
+
+    Raises:
+        sys.exit: If the value associated with the gene identifier is not valid.
+    """
+    if l =='and' or l == 'or':
+        return l
+    else:
+        value = d.get(l, None)
+        if not(value == None or isinstance(value, (int, float))):
+            sys.exit('Execution aborted: ' + value + ' value not valid\n')
+        return value
+
+T = TypeVar("T", bound = Optional[Union[int, float]])
+def computes(val1 :T, op :str, val2 :T, cn :bool) -> T:
+    """
+    Compute the RAS value between two value and an operator ('and' or 'or').
+
+    Args:
+        val1(Optional(Union[float, int])): First value.
+        op (str): Operator ('and' or 'or').
+        val2(Optional(Union[float, int])): Second value.
+        cn (bool): Control boolean value.
+
+    Returns:
+        Optional(Union[float, int]): Result of the computation.
+    """
+    if val1 != None and val2 != None:
+        if op == 'and':
+            return min(val1, val2)
+        else:
+            return val1 + val2
+    elif op == 'and':
+        if cn is True:
+            if val1 != None:
+                return val1
+            elif val2 != None:
+                return val2
+            else:
+                return None
+        else:
+            return None
+    else:
+        if val1 != None:
+            return val1
+        elif val2 != None:
+            return val2
+        else:
+            return None
+
+# ris should be Literal[None] but Literal is not supported in Python 3.7
+def control(ris, l :List[Union[int, float, list]], cn :bool) -> Union[bool, int, float]: #Union[Literal[False], int, float]:
+    """
+    Control the format of the expression.
+
+    Args:
+        ris: Intermediate result.
+        l (list): Expression to control.
+        cn (bool): Control boolean value.
+
+    Returns:
+        Union[Literal[False], int, float]: Result of the control.
+    """
+    if len(l) == 1:
+        if isinstance(l[0], (float, int)) or l[0] == None:
+            return l[0]
+        elif isinstance(l[0], list):
+            return control(None, l[0], cn)
+        else:
+            return False
+    elif len(l) > 2:
+        return control_list(ris, l, cn)
+    else:
+        return False
+
+def control_list(ris, l :List[Optional[Union[float, int, list]]], cn :bool) -> Optional[bool]: #Optional[Literal[False]]:
+    """
+    Control the format of a list of expressions.
+
+    Args:
+        ris: Intermediate result.
+        l (list): List of expressions to control.
+        cn (bool): Control boolean value.
+
+    Returns:
+        Optional[Literal[False]]: Result of the control.
+    """
+    while l:
+        if len(l) == 1:
+            return False
+        elif (isinstance(l[0], (float, int)) or
+              l[0] == None) and l[1] in ['and', 'or']:
+            if isinstance(l[2], (float, int)) or l[2] == None:
+                ris = computes(l[0], l[1], l[2], cn)
+            elif isinstance(l[2], list):
+                tmp = control(None, l[2], cn)
+                if tmp is False:
+                    return False
+                else:
+                    ris = computes(l[0], l[1], tmp, cn)
+            else:
+                return False
+            l = l[3:]
+        elif l[0] in ['and', 'or']:
+            if isinstance(l[1], (float, int)) or l[1] == None:
+                ris = computes(ris, l[0], l[1], cn)
+            elif isinstance(l[1], list):
+                tmp = control(None,l[1], cn)
+                if tmp is False:
+                    return False
+                else:
+                    ris = computes(ris, l[0], tmp, cn)
+            else:
+                return False
+            l = l[2:]
+        elif isinstance(l[0], list) and l[1] in ['and', 'or']:
+            if isinstance(l[2], (float, int)) or l[2] == None:
+                tmp = control(None, l[0], cn)
+                if tmp is False:
+                    return False
+                else:
+                    ris = computes(tmp, l[1], l[2], cn)
+            elif isinstance(l[2], list):
+                tmp = control(None, l[0], cn)
+                tmp2 = control(None, l[2], cn)
+                if tmp is False or tmp2 is False:
+                    return False
+                else:
+                    ris = computes(tmp, l[1], tmp2, cn)
+            else:
+                return False
+            l = l[3:]
+        else:
+            return False
+    return ris
+
+ResolvedRules = Dict[str, List[Optional[Union[float, int]]]]
+def resolve(genes: Dict[str, str], rules: List[str], ids: List[str], resolve_none: bool, name: str) -> Tuple[Optional[ResolvedRules], Optional[list]]:
+    """
+    Resolve rules using gene data to compute scores for each rule.
+
+    Args:
+        genes (dict): Dictionary containing gene data with gene IDs as keys and corresponding values.
+        rules (list): List of rules to resolve.
+        ids (list): List of IDs corresponding to the rules.
+        resolve_none (bool): Flag indicating whether to resolve None values in the rules.
+        name (str): Name of the dataset.
+
+    Returns:
+        tuple: A tuple containing resolved rules as a dictionary and a list of gene IDs not found in the data.
+    """
+    resolve_rules = {}
+    not_found = []
+    flag = False
+    for key, value in genes.items():
+        tmp_resolve = []
+        for i in range(len(rules)):
+            tmp = rules[i]
+            if tmp:
+                tmp, err = replace_gene_value(tmp, value)
+                if err:
+                    not_found.extend(err)
+                ris = control(None, tmp, resolve_none)
+                if ris is False or ris == None:
+                    tmp_resolve.append(None)
+                else:
+                    tmp_resolve.append(ris)
+                    flag = True
+            else:
+                tmp_resolve.append(None)
+        resolve_rules[key] = tmp_resolve
+
+    if flag is False:
+        utils.logWarning(
+            f"Warning: no computable score (due to missing gene values) for class {name}, the class has been disregarded",
+            ARGS.out_log)
+
+        return (None, None)
+
+    return (resolve_rules, list(set(not_found)))
+############################ create_ras #######################################
+def create_ras(resolve_rules: Optional[ResolvedRules], dataset_name: str, rules: List[str], ids: List[str], file: str) -> None:
+    """
+    Create a RAS (Reaction Activity Score) file from resolved rules.
+
+    Args:
+        resolve_rules (dict): Dictionary containing resolved rules.
+        dataset_name (str): Name of the dataset.
+        rules (list): List of rules.
+        file (str): Path to the output RAS file.
+
+    Returns:
+        None
+    """
+    if resolve_rules is None:
+        utils.logWarning(f"Couldn't generate RAS for current dataset: {dataset_name}", ARGS.out_log)
+
+    for geni in resolve_rules.values():
+        for i, valori in enumerate(geni):
+            if valori == None:
+                geni[i] = 'None'
+
+    output_ras = pd.DataFrame.from_dict(resolve_rules)
+
+    output_ras.insert(0, 'Reactions', ids)
+    output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
+
+    text_file = open(file, "w")
+
+    text_file.write(output_to_csv)
+    text_file.close()
+
+################################- NEW RAS COMPUTATION -################################
+Expr = Optional[Union[int, float]]
+Ras  = Expr
+def ras_for_cell_lines(dataset: pd.DataFrame, rules: Dict[str, ruleUtils.OpList]) -> Dict[str, Dict[str, Ras]]:
+    """
+    Generates the RAS scores for each cell line found in the dataset.
+
+    Args:
+        dataset (pd.DataFrame): Dataset containing gene values.
+        rules (dict): The dict containing reaction ids as keys and rules as values.
+
+    Side effects:
+        dataset : mut
+
+    Returns:
+        dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
+        where each key corresponds to a reaction ID and each value is its computed RAS score.
+    """
+    ras_values_by_cell_line = {}
+    dataset.set_index(dataset.columns[0], inplace=True)
+
+    for cell_line_name in dataset.columns: #[1:]:
+        cell_line = dataset[cell_line_name].to_dict()
+        ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
+    return ras_values_by_cell_line
+
+def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
+    """
+    Computes the RAS (Reaction Activity Score) values for each rule in the given dict.
+
+    Args:
+        value_rules (dict): A dictionary where keys are reaction ids and values are OpLists.
+        dataset : gene expression data of one cell line.
+
+    Returns:
+        dict: A dictionary where keys are reaction ids and values are the computed RAS values for each rule.
+    """
+    return {key: ras_op_list(op_list, dataset) for key, op_list in value_rules.items()}
+
+def get_gene_expr(dataset :Dict[str, Expr], name :str) -> Expr:
+    """
+    Extracts the gene expression of the given gene from a cell line dataset.
+
+    Args:
+        dataset : gene expression data of one cell line.
+        name : gene name.
+
+    Returns:
+        Expr : the gene's expression value.
+    """
+    expr = dataset.get(name, None)
+    if expr is None: ERRORS.append(name)
+
+    return expr
+
+def ras_op_list(op_list: ruleUtils.OpList, dataset: Dict[str, Expr]) -> Ras:
+    """
+    Computes recursively the RAS (Reaction Activity Score) value for the given OpList, considering the specified flag to control None behavior.
+
+    Args:
+        op_list (OpList): The OpList representing a rule with gene values.
+        dataset : gene expression data of one cell line.
+
+    Returns:
+        Ras: The computed RAS value for the given OpList.
+    """
+    op = op_list.op
+    ras_value :Ras = None
+    if not op: return get_gene_expr(dataset, op_list[0])
+    if op is ruleUtils.RuleOp.AND and not ARGS.none and None in op_list: return None
+
+    for i in range(len(op_list)):
+        item = op_list[i]
+        if isinstance(item, ruleUtils.OpList):
+            item = ras_op_list(item, dataset)
+
+        else:
+          item = get_gene_expr(dataset, item)
+
+        if item is None:
+          if op is ruleUtils.RuleOp.AND and not ARGS.none: return None
+          continue
+
+        if ras_value is None:
+          ras_value = item
+        else:
+          ras_value = ras_value + item if op is ruleUtils.RuleOp.OR else min(ras_value, item)
+
+    return ras_value
+
+def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None:
+    """
+    Save computed ras scores to the given path, as a tsv file.
+
+    Args:
+        rasScores : the computed ras scores.
+        path : the output tsv file's path.
+
+    Returns:
+        None
+    """
+    for scores in rasScores.values(): # this is actually a lot faster than using the ootb dataframe metod, sadly
+        for reactId, score in scores.items():
+            if score is None: scores[reactId] = "None"
+
+    output_ras = pd.DataFrame.from_dict(rasScores)
+    output_ras.insert(0, 'Reactions', reactions)
+    output_ras.to_csv(ARGS.ras_output, sep = '\t', index = False)
+
+############################ MAIN #############################################
+#TODO: not used but keep, it will be when the new translator dicts will be used.
+def translateGene(geneName :str, encoding :str, geneTranslator :Dict[str, Dict[str, str]]) -> str:
+    """
+    Translate gene from any supported encoding to HugoID.
+
+    Args:
+        geneName (str): the name of the gene in its current encoding.
+        encoding (str): the encoding.
+        geneTranslator (Dict[str, Dict[str, str]]): the dict containing all supported gene names
+        and encodings in the current model, mapping each to the corresponding HugoID encoding.
+
+    Raises:
+        ValueError: When the gene isn't supported in the model.
+
+    Returns:
+        str: the gene in HugoID encoding.
+    """
+    supportedGenesInEncoding = geneTranslator[encoding]
+    if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName]
+    raise ValueError(f"Gene \"{geneName}\" non trovato, verifica di star utilizzando il modello corretto!")
+
+def load_custom_rules() -> Dict[str, ruleUtils.OpList]:
+    """
+    Opens custom rules file and extracts the rules. If the file is in .csv format an additional parsing step will be
+    performed, significantly impacting the runtime.
+
+    Returns:
+        Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
+    """
+    datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat
+
+    #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext
+    #except utils.PathErr as err:
+    #    utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log)
+    #    filenamePath = None
+
+    #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
+
+    dict_rule = {}
+
+    try:
+        # Proviamo prima con delimitatore tab
+        for line in utils.readCsv(datFilePath, delimiter = "\t"):
+            if len(line) < 3:  # Controlliamo che ci siano almeno 3 colonne
+                utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
+                continue
+
+            if line[2] == "":
+                dict_rule[line[0]] = ruleUtils.OpList([""])
+            else:
+                dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
+
+    except Exception as e:
+        # Se fallisce con tab, proviamo con virgola
+        try:
+            dict_rule = {}
+            for line in utils.readCsv(datFilePath, delimiter = ","):
+                if len(line) < 3:
+                    utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
+                    continue
+
+                if line[2] == "":
+                    dict_rule[line[0]] = ruleUtils.OpList([""])
+                else:
+                    dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
+        except Exception as e2:
+            raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
+
+    if not dict_rule:
+            raise ValueError("No valid rules found in the uploaded file. Please check the file format.")
+    # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
+    return dict_rule
+
+
+def main(args:List[str] = None) -> None:
+    """
+    Initializes everything and sets the program in motion based on the fronted input arguments.
+
+    Returns:
+        None
+    """
+    # get args from frontend (related xml)
+    global ARGS
+    ARGS = process_args(args)
+
+    # read dataset
+    dataset = read_dataset(ARGS.input, "dataset")
+    dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
+
+    # remove versioning from gene names
+    dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
+
+    rules = load_custom_rules()
+    reactions = list(rules.keys())
+
+    save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
+    if ERRORS: utils.logWarning(
+        f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
+        ARGS.out_log)
+
+
+    ############
+
+    # handle custom models
+    #model :utils.Model = ARGS.rules_selector
+
+    #if model is utils.Model.Custom:
+    #    rules = load_custom_rules()
+    #    reactions = list(rules.keys())
+
+    #    save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
+    #    if ERRORS: utils.logWarning(
+    #        f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
+    #        ARGS.out_log)
+
+    #    return
+
+    # This is the standard flow of the ras_generator program, for non-custom models.
+    #name = "RAS Dataset"
+    #type_gene = gene_type(dataset.iloc[0, 0], name)
+
+    #rules      = model.getRules(ARGS.tool_dir)
+    #genes      = data_gene(dataset, type_gene, name, None)
+    #ids, rules = load_id_rules(rules.get(type_gene))
+
+    #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
+    #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
+
+    #if err: utils.logWarning(
+    #    f"Warning: gene(s) {err} not found in class \"{name}\", " +
+    #    "the expression level for this gene will be considered NaN",
+    #    ARGS.out_log)
+
+    print("Execution succeded")
+
+###############################################################################
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/ras_generator_beta.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,105 @@
+<tool id="MaREA RAS Generator - Beta" name="Expression2RAS - BETA" version="2.0.0">
+    <description>- Reaction Activity Scores computation</description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.24.4">numpy</requirement>
+        <requirement type="package" version="2.0.3">pandas</requirement>
+        <requirement type="package" version="5.2.2">lxml</requirement>
+		<requirement type="package" version="0.29.0">cobra</requirement>
+	</requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+      	python $__tool_directory__/ras_generator_beta.py
+        --tool_dir $__tool_directory__
+        --model_upload $model_upload
+        --model_upload_name $model_upload.element_identifier
+        --input $input
+        --none $none
+
+        --out_log $log
+        --ras_output $ras_output
+
+        ]]>
+    </command>
+    <inputs>
+        <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular"
+                label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." />
+        <param name="input" argument="--input" type="data" format="tabular,csv,tsv" label="Gene Expression dataset:" />
+        <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS"
+                help="Default: Dataset_RAS. Do not use white spaces or special symbols." />
+        <param name="none" argument="--none" type="boolean" checked="true" label="(A and NaN) solved as (A)?" />
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="Expression2RAS - $name - Log" />
+        <data format="tabular" name="ras_output" label='$name'/>
+    </outputs>
+
+    <help>
+<![CDATA[
+
+What it does
+-------------
+
+This tool computes Reaction Activity Scores from gene expression (RNA-seq) dataset(s), as described in Graudenzi et al. Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power. Journal of Biomedical Informatics, 2018, 87: 37-49.
+
+Accepted files:
+    - A gene expression dataset
+
+Format:
+Tab-separated text file reporting the normalized expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). All values must be positive to correctly compute the RAS.
+Column header: sample ID.
+Row header: gene ID.
+
+
+Optional files:
+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
+
+	* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
+	* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
+
+Computation option ‘(A and NaN) solved as (A)’:
+In case of missing expression value, referred to as NaN (Not a Number), for a gene joined with an AND operator in a given GPR rule, the rule ‘A and NaN’
+
+If YES is selected: the GPR will be solved as A.
+
+If NO is selected: the GPR will be disregarded tout-court (i.e., treated as NaN).
+
+Example input
+-------------
+
+Custom GPR rules:
+
++------------+--------------------------------------+
+| id         |         rule (with entrez-id         |
++============+======================================+
+| r1642      |             155060 or 10357          |
++------------+--------------------------------------+
+| r1643      |        155060 or 100134869           |
++------------+--------------------------------------+
+| r1640      |     155060 and 100134869 or 10357    |
++------------+--------------------------------------+
+
+RNA-seq dataset:
+
++------------+----------------+----------------+----------------+
+| Hugo_ID    |   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |
++============+================+================+================+
+| HGNC:24086 |    0.523167    |    0.371355    |    0.925661    |
++------------+----------------+----------------+----------------+
+| HGNC:24086 |    0.568765    |    0.765567    |    0.456789    |
++------------+----------------+----------------+----------------+
+| HGNC:9876  |    0.876545    |    0.768933    |    0.987654    |
++------------+----------------+----------------+----------------+
+| HGNC:9     |    0.456788    |    0.876543    |    0.876542    |
++------------+----------------+----------------+----------------+
+| HGNC:23    |    0.876543    |    0.786543    |    0.897654    |
++------------+----------------+----------------+----------------+
+
+]]>
+    </help>
+<expand macro="citations" />
+</tool>
+
--- a/COBRAxy/ras_to_bounds.xml	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/ras_to_bounds.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -87,7 +87,7 @@
 By default, all reactions in model.medium that are not present in the medium file have lower bound set to 0.0 and not set to the default model value.

 Accepted files:
-    - A model: JSON, XML, MAT or YAML (.yml) file reporting reactions and rules contained in the model. Supported compressed formats: .zip, .gz and .bz2. Filename must follow the pattern: {model_name}.{extension}.[zip|gz|bz2]
+    - A model: JSON or XML file reporting reactions and rules contained in the model.
     - RAS matrix: tab-separated RAS file as returned by RAS generator. Multiple RAS files having different file name can be uploaded too (e.g. one RAS matrix for normal cells and one for cancer cells). Note that if multiple RAs matrices are uploaded, the bounds are normalzed across all cells.
     - Medium: tab-separated file containing lower and upper-bounds of medium reactions.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/ras_to_bounds_beta.py	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,287 @@
+import argparse
+import utils.general_utils as utils
+from typing import Optional, List
+import os
+import numpy as np
+import pandas as pd
+import cobra
+import sys
+import csv
+from joblib import Parallel, delayed, cpu_count
+
+################################# process args ###############################
+def process_args(args :List[str] = None) -> argparse.Namespace:
+    """
+    Processes command-line arguments.
+
+    Args:
+        args (list): List of command-line arguments.
+
+    Returns:
+        Namespace: An object containing parsed arguments.
+    """
+    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+                                     description = 'process some value\'s')
+
+    parser.add_argument(
+        '-ms', '--model_selector',
+        type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom],
+        help = 'chose which type of model you want use')
+
+    parser.add_argument("-mo", "--model", type = str,
+        help = "path to input file with custom rules, if provided")
+
+    parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name")
+
+    parser.add_argument(
+        '-mes', '--medium_selector',
+        default = "allOpen",
+        help = 'chose which type of medium you want use')
+
+    parser.add_argument("-meo", "--medium", type = str,
+        help = "path to input file with custom medium, if provided")
+
+    parser.add_argument('-ol', '--out_log',
+                        help = "Output log")
+
+    parser.add_argument('-td', '--tool_dir',
+                        type = str,
+                        required = True,
+                        help = 'your tool directory')
+
+    parser.add_argument('-ir', '--input_ras',
+                        type=str,
+                        required = False,
+                        help = 'input ras')
+
+    parser.add_argument('-rn', '--name',
+                type=str,
+                help = 'ras class names')
+
+    parser.add_argument('-rs', '--ras_selector',
+                        required = True,
+                        type=utils.Bool("using_RAS"),
+                        help = 'ras selector')
+
+    parser.add_argument('-cc', '--cell_class',
+                    type = str,
+                    help = 'output of cell class')
+    parser.add_argument(
+        '-idop', '--output_path',
+        type = str,
+        default='ras_to_bounds/',
+        help = 'output path for maps')
+
+
+    ARGS = parser.parse_args(args)
+    return ARGS
+
+########################### warning ###########################################
+def warning(s :str) -> None:
+    """
+    Log a warning message to an output log file and print it to the console.
+
+    Args:
+        s (str): The warning message to be logged and printed.
+
+    Returns:
+      None
+    """
+    with open(ARGS.out_log, 'a') as log:
+        log.write(s + "\n\n")
+    print(s)
+
+############################ dataset input ####################################
+def read_dataset(data :str, name :str) -> pd.DataFrame:
+    """
+    Read a dataset from a CSV file and return it as a pandas DataFrame.
+
+    Args:
+        data (str): Path to the CSV file containing the dataset.
+        name (str): Name of the dataset, used in error messages.
+
+    Returns:
+        pandas.DataFrame: DataFrame containing the dataset.
+
+    Raises:
+        pd.errors.EmptyDataError: If the CSV file is empty.
+        sys.exit: If the CSV file has the wrong format, the execution is aborted.
+    """
+    try:
+        dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
+    except pd.errors.EmptyDataError:
+        sys.exit('Execution aborted: wrong format of ' + name + '\n')
+    if len(dataset.columns) < 2:
+        sys.exit('Execution aborted: wrong format of ' + name + '\n')
+    return dataset
+
+
+def apply_ras_bounds(bounds, ras_row):
+    """
+    Adjust the bounds of reactions in the model based on RAS values.
+
+    Args:
+        bounds (pd.DataFrame): Model bounds.
+        ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
+    Returns:
+        new_bounds (pd.DataFrame): integrated bounds.
+    """
+    new_bounds = bounds.copy()
+    for reaction in ras_row.index:
+        scaling_factor = ras_row[reaction]
+        if not np.isnan(scaling_factor):
+            lower_bound=bounds.loc[reaction, "lower_bound"]
+            upper_bound=bounds.loc[reaction, "upper_bound"]
+            valMax=float((upper_bound)*scaling_factor)
+            valMin=float((lower_bound)*scaling_factor)
+            if upper_bound!=0 and lower_bound==0:
+                new_bounds.loc[reaction, "upper_bound"] = valMax
+            if upper_bound==0 and lower_bound!=0:
+                new_bounds.loc[reaction, "lower_bound"] = valMin
+            if upper_bound!=0 and lower_bound!=0:
+                new_bounds.loc[reaction, "lower_bound"] = valMin
+                new_bounds.loc[reaction, "upper_bound"] = valMax
+    return new_bounds
+
+def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder):
+    """
+    Process a single RAS cell, apply bounds, and save the bounds to a CSV file.
+
+    Args:
+        cellName (str): The name of the RAS cell (used for naming the output file).
+        ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
+        model (cobra.Model): The metabolic model to be modified.
+        rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied.
+        output_folder (str): Folder path where the output CSV file will be saved.
+
+    Returns:
+        None
+    """
+    bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
+    new_bounds = apply_ras_bounds(bounds, ras_row)
+    new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True)
+    pass
+
+def generate_bounds(model: cobra.Model, medium: dict, ras=None, output_folder='output/') -> pd.DataFrame:
+    """
+    Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments.
+
+    Args:
+        model (cobra.Model): The metabolic model for which bounds will be generated.
+        medium (dict): A dictionary where keys are reaction IDs and values are the medium conditions.
+        ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None.
+        output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'.
+
+    Returns:
+        pd.DataFrame: DataFrame containing the bounds of reactions in the model.
+    """
+    rxns_ids = [rxn.id for rxn in model.reactions]
+
+    # Set all reactions to zero in the medium
+    for rxn_id, _ in model.medium.items():
+        model.reactions.get_by_id(rxn_id).lower_bound = float(0.0)
+
+    # Set medium conditions
+    for reaction, value in medium.items():
+        if value is not None:
+            model.reactions.get_by_id(reaction).lower_bound = -float(value)
+
+
+    # Perform Flux Variability Analysis (FVA) on this medium
+    df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
+
+    # Set FVA bounds
+    for reaction in rxns_ids:
+        model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"])
+        model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"])
+
+    if ras is not None:
+        Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(cellName, ras_row, model, rxns_ids, output_folder) for cellName, ras_row in ras.iterrows())
+    else:
+        bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
+        newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids))
+        newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True)
+    pass
+
+
+
+############################# main ###########################################
+def main(args:List[str] = None) -> None:
+    """
+    Initializes everything and sets the program in motion based on the fronted input arguments.
+
+    Returns:
+        None
+    """
+    if not os.path.exists('ras_to_bounds'):
+        os.makedirs('ras_to_bounds')
+
+
+    global ARGS
+    ARGS = process_args(args)
+
+    if(ARGS.ras_selector == True):
+        ras_file_list = ARGS.input_ras.split(",")
+        ras_file_names = ARGS.name.split(",")
+        if len(ras_file_names) != len(set(ras_file_names)):
+            error_message = "Duplicated file names in the uploaded RAS matrices."
+            warning(error_message)
+            raise ValueError(error_message)
+            pass
+        ras_class_names = []
+        for file in ras_file_names:
+            ras_class_names.append(file.rsplit(".", 1)[0])
+        ras_list = []
+        class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
+        for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names):
+            ras = read_dataset(ras_matrix, "ras dataset")
+            ras.replace("None", None, inplace=True)
+            ras.set_index("Reactions", drop=True, inplace=True)
+            ras = ras.T
+            ras = ras.astype(float)
+            if(len(ras_file_list)>1):
+                #append class name to patient id (dataframe index)
+                ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index]
+            else:
+                ras.index = [f"{idx}" for idx in ras.index]
+            ras_list.append(ras)
+            for patient_id in ras.index:
+                class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name]
+
+
+        # Concatenate all ras DataFrames into a single DataFrame
+        ras_combined = pd.concat(ras_list, axis=0)
+        # Normalize the RAS values by max RAS
+        ras_combined = ras_combined.div(ras_combined.max(axis=0))
+        ras_combined.dropna(axis=1, how='all', inplace=True)
+
+
+
+    model_type :utils.Model = ARGS.model_selector
+    if model_type is utils.Model.Custom:
+        model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
+    else:
+        model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
+
+    if(ARGS.medium_selector == "Custom"):
+        medium = read_dataset(ARGS.medium, "medium dataset")
+        medium.set_index(medium.columns[0], inplace=True)
+        medium = medium.astype(float)
+        medium = medium[medium.columns[0]].to_dict()
+    else:
+        df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
+        ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
+        medium = df_mediums[[ARGS.medium_selector]]
+        medium = medium[ARGS.medium_selector].to_dict()
+
+    if(ARGS.ras_selector == True):
+        generate_bounds(model, medium, ras = ras_combined, output_folder=ARGS.output_path)
+        class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False)
+    else:
+        generate_bounds(model, medium, output_folder=ARGS.output_path)
+
+    pass
+
+##############################################################################
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/ras_to_bounds_beta.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,121 @@
+<tool id="MaREA RAS to bounds - Beta" name="RAStoBounds - BETA" version="2.0.0">
+
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+
+	<requirements>
+        <requirement type="package" version="1.24.4">numpy</requirement>
+        <requirement type="package" version="2.0.3">pandas</requirement>
+		<requirement type="package" version="0.29.0">cobra</requirement>
+        <requirement type="package" version="5.2.2">lxml</requirement>
+        <requirement type="package" version="1.4.2">joblib</requirement>
+	</requirements>
+
+    <command detect_errors="exit_code">
+        <![CDATA[
+      	python $__tool_directory__/ras_to_bounds_beta.py
+        --tool_dir $__tool_directory__
+        --model_selector $cond_model.model_selector
+        --cell_class $cell_class
+        #if $cond_model.model_selector == 'Custom'
+            --model $model
+            --model_name $model.element_identifier
+        #end if
+        --medium_selector $cond_medium.medium_selector
+        #if $cond_medium.medium_selector == 'Custom'
+            --medium $medium
+        #end if
+        --ras_selector $cond_ras.ras_choice
+        #set $names = ""
+        #if $cond_ras.ras_choice == "True"
+            --input_ras "${",".join(map(str, $cond_ras.input_ras))}"
+            #for $input_temp in $cond_ras.input_ras:
+                #set $names = $names + $input_temp.element_identifier + ","
+            #end for
+        #end if
+        --name "$names"
+        --out_log $log
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="cond_model">
+            <expand macro="options_ras_to_bounds_model"/>
+            <when value="Custom">
+                <param name="model" argument="--model" type="data" format="json, xml" label="Custom model" />
+            </when>
+        </conditional>
+
+        <conditional name="cond_ras">
+			<param name="ras_choice" argument="--ras_choice" type="select" label="Do want to use RAS?">
+                	<option value="True" selected="true">Yes</option>
+                	<option value="False">No</option>
+        	</param>
+            <when value="True">
+                <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" />
+            </when>
+        </conditional>
+
+        <conditional name="cond_medium">
+            <expand macro="options_ras_to_bounds_medium"/>
+            <when value="Custom">
+                <param name="medium" argument="--medium" type="data" format="tabular, csv, tsv" label="Custom medium" />
+            </when>
+        </conditional>
+
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="RAStoBounds- Log" />
+        <data format="tabular" name="cell_class" label="RAStoBounds - Cells class" />
+        <collection name="ras_to_bounds" type="list" label="Ras to Bounds">
+            <discover_datasets name = "collection" pattern="__name_and_ext__" directory="ras_to_bounds"/>
+        </collection>
+
+    </outputs>
+
+    <help>
+
+    <![CDATA[
+
+What it does
+-------------
+
+This tool generates the reactions bounds for a given metabolic model (JSON or XML format) both with and without the use of the Reaction Activity Scores (RAS) matrix generated by RAS generator.
+Moreover, it enables to use custom/pre-defined growth mediums to constrain exchange reactions. For a custom medium, It is suggested to use the template file returned by the Custom Data Generator tool.
+If the RAS matrix, generated by the RAS generator tool, is used, then a bounds file is generated for each cell. Otherwise, a single bounds file is returned.
+By default, all reactions in model.medium that are not present in the medium file have lower bound set to 0.0 and not set to the default model value.
+
+Accepted files:
+    - A model: JSON, XML, MAT or YAML (.yml) file reporting reactions and rules contained in the model. Supported compressed formats: .zip, .gz and .bz2. Filename must follow the pattern: {model_name}.{extension}.[zip|gz|bz2]
+    - RAS matrix: tab-separated RAS file as returned by RAS generator. Multiple RAS files having different file name can be uploaded too (e.g. one RAS matrix for normal cells and one for cancer cells). Note that if multiple RAs matrices are uploaded, the bounds are normalzed across all cells.
+    - Medium: tab-separated file containing lower and upper-bounds of medium reactions.
+
+Example of custum growth medium file:
+
+
++------------+----------------+----------------+
+| Reaction ID|   lower_bound  |   upper_bound  |
++============+================+================+
+| r1         |    0.123167    |    0.371355    |
++------------+----------------+----------------+
+| r2         |    0.268765    |    0.765567    |
++------------+----------------+----------------+
+
+
+Example for multiple RAS matrices:
+    - cancer.csv and normal.csv generated by RAS generator tool (the two class names are 'cancer' and 'normal').
+    - This tool returns one unique collection of bounds files for both cancer and normal cells (normalization is performed across all cells).
+    - The association cell-class is reported in the 'cell_class' file that is useful to perform flux enrichment analysis based on class partenrship.
+
+Output:
+-------------
+
+The tool generates:
+    - bounds: reporting the bounds of the model, or cells if RAS is used. Format: tab-separated.
+    - Classes: a file containing the class of each cell (only if multiple RAS matrices were uploaded). The class name of a RAS matrix corresponds to its file name. Format: tab-separated.
+    - a log file (.txt).
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- a/COBRAxy/rps_generator.py	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/rps_generator.py	Mon Sep 08 14:44:15 2025 +0000
@@ -25,11 +25,14 @@
     parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
                                      description = 'process some value\'s'+
                                      ' abundances and reactions to create RPS scores.')
-
-    parser.add_argument("-rl", "--model_upload", type = str,
-        help = "path to input file containing the reactions")
-
-    # model_upload custom
+    parser.add_argument('-rc', '--reaction_choice',
+                        type = str,
+                        default = 'default',
+                        choices = ['default','custom'],
+                        help = 'chose which type of reaction dataset you want use')
+    parser.add_argument('-cm', '--custom',
+                        type = str,
+                        help='your dataset if you want custom reactions')
     parser.add_argument('-td', '--tool_dir',
                         type = str,
                         required = True,
@@ -118,8 +121,7 @@
     """
     name = clean_metabolite_name(name)
     for id, synonyms in syn_dict.items():
-        if name in synonyms:
-            return id
+        if name in synonyms: return id

     return ""

@@ -129,8 +131,7 @@
     Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.

     Parameters:
-        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
-                          stoichiometric coefficients as values.
+        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
         dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
         cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.

@@ -198,26 +199,23 @@
     Returns:
         None
     """
-
     cell_lines = dataset[0][1:]
     abundances_dict = {}

+    translationIsApplied = ARGS.reaction_choice == "default"
     for row in dataset[1:]:
-        id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
-        if id:
-            abundances_dict[id] = list(map(utils.Float(), row[1:]))
-
+        id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
+        if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
+
     missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
-
+
     rps_scores :Dict[Dict[str, float]] = {}
     for pos, cell_line_name in enumerate(cell_lines):
         abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
-
         rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)

     df = pd.DataFrame.from_dict(rps_scores)
-    df = df.loc[list(reactions.keys()),:]
-    print(df.head(10))
+
     df.index.name = 'Reactions'
     df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)

@@ -240,35 +238,18 @@
         syn_dict = pk.load(sd)

     dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
-    tmp_dict = None
-    #if ARGS.reaction_choice == 'default':
-    #    reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
-    #    substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
+
+    if ARGS.reaction_choice == 'default':
+        reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
+        substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))

-    #elif ARGS.reaction_choice == 'custom':
-    reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
-    for r, s in reactions.items():
-        tmp_list = list(s.keys())
-        for k in tmp_list:
-            if k[-2] == '_':
-                s[k[:-2]] = s.pop(k)
-    substrateFreqTable = {}
-    for _, substrates in reactions.items():
-        for substrateName, _ in substrates.items():
-            if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
-            substrateFreqTable[substrateName] += 1
-
-        print(f"Reactions: {reactions}")
-        print(f"Substrate Frequencies: {substrateFreqTable}")
-        print(f"Synonyms: {syn_dict}")
-        tmp_dict = {}
-        for metabName, freq in substrateFreqTable.items():
-            tmp_metabName = clean_metabolite_name(metabName)
-            for syn_key, syn_list in syn_dict.items():
-                if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
-                    print(f"Mapping {tmp_metabName} to {syn_key}")
-                    tmp_dict[syn_key] = syn_list
-                    tmp_dict[syn_key].append(tmp_metabName)
+    elif ARGS.reaction_choice == 'custom':
+        reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
+        substrateFreqTable = {}
+        for _, substrates in reactions.items():
+            for substrateName, _ in substrates.items():
+                if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
+                substrateFreqTable[substrateName] += 1

     rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
     print('Execution succeded')
--- a/COBRAxy/rps_generator.xml	Mon Sep 08 13:52:58 2025 +0000
+++ b/COBRAxy/rps_generator.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -12,19 +12,29 @@
     <command detect_errors="exit_code">
         <![CDATA[
       	python $__tool_directory__/rps_generator.py
-        --input $input
+        --input $input
+      	--reaction_choice $cond_reactions.reaction_choice
         --tool_dir $__tool_directory__
         --out_log $log
         --rps_output $rps_output
-        --model_upload $model_upload
+        #if $cond_reactions.reaction_choice == 'custom'
+        	--custom $cond_reactions.Custom_react
+        #end if
         ]]>
     </command>
-    <inputs>
-        <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular"
-                label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." />
-
+    <inputs>
         <param name="input" argument="--input" type="data" format="tabular, tsv, csv" label="Abundance dataset:" />
         <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RPS" help="Default: Dataset_RPS. Do not use white spaces or special symbols." />
+
+        <conditional name="cond_reactions">
+			<param name="reaction_choice" argument="--reaction_choice" type="select" label="Choose reaction dataset:">
+                	<option value="default" selected="true">ENGRO2 reaction dataset </option>
+                	<option value="custom">Custom reaction dataset</option>
+        	</param>
+            <when value="custom">
+                <param name="Custom_react" type="data" format="csv" label="Custom reactions" />
+            </when>
+        </conditional>
     </inputs>

     <outputs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/rps_generator_beta.py	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,277 @@
+import math
+import argparse
+
+import numpy  as np
+import pickle as pk
+import pandas as pd
+
+from typing import Optional, List, Dict
+
+import utils.general_utils as utils
+import utils.reaction_parsing as reactionUtils
+
+########################## argparse ##########################################
+ARGS :argparse.Namespace
+def process_args(args:List[str] = None) -> argparse.Namespace:
+    """
+    Processes command-line arguments.
+
+    Args:
+        args (list): List of command-line arguments.
+
+    Returns:
+        Namespace: An object containing parsed arguments.
+    """
+    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
+                                     description = 'process some value\'s'+
+                                     ' abundances and reactions to create RPS scores.')
+
+    parser.add_argument("-rl", "--model_upload", type = str,
+        help = "path to input file containing the reactions")
+
+    # model_upload custom
+    parser.add_argument('-td', '--tool_dir',
+                        type = str,
+                        required = True,
+                        help = 'your tool directory')
+    parser.add_argument('-ol', '--out_log',
+                        help = "Output log")
+    parser.add_argument('-id', '--input',
+                        type = str,
+                        required = True,
+                        help = 'input dataset')
+    parser.add_argument('-rp', '--rps_output',
+                        type = str,
+                        required = True,
+                        help = 'rps output')
+
+    args = parser.parse_args(args)
+    return args
+
+############################ dataset name #####################################
+def name_dataset(name_data :str, count :int) -> str:
+    """
+    Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
+
+    Args:
+        name_data : name associated with the dataset (from frontend input params)
+        count : counter from 1 to make these names unique (external)
+
+    Returns:
+        str : the name made unique
+    """
+    if str(name_data) == 'Dataset':
+        return str(name_data) + '_' + str(count)
+    else:
+        return str(name_data)
+
+############################ get_abund_data ####################################
+def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]:
+    """
+    Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are
+    metabolites and columns are cell lines.
+
+    Args:
+        dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites.
+        cell_line_index (int): The index of the cell line of interest in the dataset.
+
+    Returns:
+        pd.Series or None: A series containing abundance values for the specified cell line.
+                           The name of the series is the name of the cell line.
+                           Returns None if the cell index is invalid.
+    """
+    if cell_line_index < 0 or cell_line_index >= len(dataset.index):
+        print(f"Errore: This cell line index: '{cell_line_index}' is not valid.")
+        return None
+
+    cell_line_name = dataset.columns[cell_line_index]
+    abundances_series = dataset[cell_line_name][1:]
+
+    return abundances_series
+
+############################ clean_metabolite_name ####################################
+def clean_metabolite_name(name :str) -> str:
+    """
+    Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify
+    the search of a match in the dictionary of synonyms.
+
+    Args:
+        name : the metabolite's name, as given in the dataset.
+
+    Returns:
+        str : a new string with the cleaned name.
+    """
+    return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower()
+
+############################ get_metabolite_id ####################################
+def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str:
+    """
+    Looks through a dictionary of synonyms to find a match for a given metabolite's name.
+
+    Args:
+        name : the metabolite's name, as given in the dataset.
+        syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values.
+
+    Returns:
+        str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
+        An empty string is returned if a match isn't found.
+    """
+    name = clean_metabolite_name(name)
+    for id, synonyms in syn_dict.items():
+        if name in synonyms:
+            return id
+
+    return ""
+
+############################ check_missing_metab ####################################
+def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
+    """
+    Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
+
+    Parameters:
+        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
+                          stoichiometric coefficients as values.
+        dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
+        cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
+
+    Returns:
+        list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
+
+    Side effects:
+        dataset_by_rows : mut
+    """
+    missing_list = []
+    for reaction in reactions.values():
+        for metabolite in reaction.keys():
+          if metabolite not in dataset_by_rows:
+            dataset_by_rows[metabolite] = [1] * cell_lines_amt
+            missing_list.append(metabolite)
+
+    return missing_list
+
+############################ calculate_rps ####################################
+def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str], substrateFreqTable: Dict[str, int]) -> Dict[str, float]:
+    """
+    Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
+    The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient
+    for each reaction using the provided coefficient and abundance values. The value is then normalized, based on how frequent the metabolite is in the selected model's reactions,
+    and log-transformed.
+
+    Parameters:
+        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+        abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances.
+        black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
+        missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1.
+        substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
+
+    Returns:
+        dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores.
+    """
+    rps_scores = {}
+
+    for reaction_name, substrates in reactions.items():
+        total_contribution = 0
+        metab_significant  = False
+        for metabolite, stoichiometry in substrates.items():
+            abundance = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite]
+            if metabolite not in black_list and metabolite not in missing_list:
+                metab_significant = True
+
+            total_contribution += math.log((abundance + np.finfo(float).eps) / substrateFreqTable[metabolite]) * stoichiometry
+
+        rps_scores[reaction_name] = total_contribution if metab_significant else math.nan
+
+    return rps_scores
+
+############################ rps_for_cell_lines ####################################
+def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]], substrateFreqTable: Dict[str, int]) -> None:
+    """
+    Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file.
+
+    Parameters:
+        dataset : the dataset's data, by rows
+        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+        black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
+        syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms.
+        substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
+
+    Returns:
+        None
+    """
+
+    cell_lines = dataset[0][1:]
+    abundances_dict = {}
+
+    for row in dataset[1:]:
+        id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
+        if id:
+            abundances_dict[id] = list(map(utils.Float(), row[1:]))
+
+    missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
+
+    rps_scores :Dict[Dict[str, float]] = {}
+    for pos, cell_line_name in enumerate(cell_lines):
+        abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
+
+        rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
+
+    df = pd.DataFrame.from_dict(rps_scores)
+    df = df.loc[list(reactions.keys()),:]
+    print(df.head(10))
+    df.index.name = 'Reactions'
+    df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
+
+############################ main ####################################
+def main(args:List[str] = None) -> None:
+    """
+    Initializes everything and sets the program in motion based on the fronted input arguments.
+
+    Returns:
+        None
+    """
+    global ARGS
+    ARGS = process_args(args)
+
+    # TODO:use utils functions vvv
+    with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
+        black_list = pk.load(bl)
+
+    with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
+        syn_dict = pk.load(sd)
+
+    dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
+    tmp_dict = None
+    #if ARGS.reaction_choice == 'default':
+    #    reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
+    #    substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
+
+    #elif ARGS.reaction_choice == 'custom':
+    reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
+    for r, s in reactions.items():
+        tmp_list = list(s.keys())
+        for k in tmp_list:
+            if k[-2] == '_':
+                s[k[:-2]] = s.pop(k)
+    substrateFreqTable = {}
+    for _, substrates in reactions.items():
+        for substrateName, _ in substrates.items():
+            if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
+            substrateFreqTable[substrateName] += 1
+
+        print(f"Reactions: {reactions}")
+        print(f"Substrate Frequencies: {substrateFreqTable}")
+        print(f"Synonyms: {syn_dict}")
+        tmp_dict = {}
+        for metabName, freq in substrateFreqTable.items():
+            tmp_metabName = clean_metabolite_name(metabName)
+            for syn_key, syn_list in syn_dict.items():
+                if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
+                    print(f"Mapping {tmp_metabName} to {syn_key}")
+                    tmp_dict[syn_key] = syn_list
+                    tmp_dict[syn_key].append(tmp_metabName)
+
+    rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
+    print('Execution succeded')
+
+##############################################################################
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COBRAxy/rps_generator_beta.xml	Mon Sep 08 14:44:15 2025 +0000
@@ -0,0 +1,69 @@
+<tool id="MaREA RPS Generator - Beta" name="Expression2RPS - BETA" version="2.0.0">
+    <description>- Reaction Propensity Scores computation</description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.24.4">numpy</requirement>
+        <requirement type="package" version="2.0.3">pandas</requirement>
+        <requirement type="package" version="5.2.2">lxml</requirement>
+		<requirement type="package" version="0.29.0">cobra</requirement>
+	</requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+      	python $__tool_directory__/rps_generator_beta.py
+        --input $input
+        --tool_dir $__tool_directory__
+        --out_log $log
+        --rps_output $rps_output
+        --model_upload $model_upload
+        ]]>
+    </command>
+    <inputs>
+        <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular"
+                label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." />
+
+        <param name="input" argument="--input" type="data" format="tabular, tsv, csv" label="Abundance dataset:" />
+        <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RPS" help="Default: Dataset_RPS. Do not use white spaces or special symbols." />
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="Expression2RPS - $name - Log" />
+        <data format="tabular" name="rps_output" label="$name"/>
+    </outputs>
+
+    <help>
+<![CDATA[
+
+What it does
+-------------
+
+This tool computes Reaction Propensity Scores based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
+The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient. According to themass action law, the rate of any chemical reaction is indeed proportional to this product.
+This assumption holds as long as the substrate is in significant excess over the enzyme constant KM.
+If a metabolite is either missing in the model provided with respect to its reactions or it is present in our "black list", the RPS score is set to NaN.
+This "black list" of metabolites contains those substrates that are present in too many reactions to be significant. It is defined in the file black_list.pickle and cannot be modified by the user.
+
+Accepted files:
+    - An abundance dataset: Tab-separated text file reporting the abundance value of each metabolite for each cell line in the dataset.
+                            Column header: cell line ID.
+                            Row header: metabolite ID.
+
+
+Optional files:
+    - Custom reaction dataset: .csv file specifying for each reaction ID the corresponding formula.
+                                First column: reaction ID
+                                Second column: reaction formula.
+
+
+Output:
+-------------
+
+The tool generates:
+    - a tab-separated file(.csv): reporting the RPS values for each reaction and each cell line in the dataset.
+    - a log file (.txt).
+]]>
+    </help>
+<expand macro="citations" />
+</tool>
+