| 
418
 | 
     1 import os
 | 
| 
 | 
     2 import csv
 | 
| 
 | 
     3 import cobra
 | 
| 
 | 
     4 import pickle
 | 
| 
 | 
     5 import argparse
 | 
| 
 | 
     6 import pandas as pd
 | 
| 
 | 
     7 from typing import Optional, Tuple, Union, List, Dict
 | 
| 
 | 
     8 import utils.general_utils as utils
 | 
| 
 | 
     9 import utils.rule_parsing  as rulesUtils
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 ################################- DATA GENERATION -################################
 | 
| 
 | 
    12 ReactionId = str
 | 
| 
 | 
    13 def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]:
 | 
| 
 | 
    14     """
 | 
| 
 | 
    15     Generates a dictionary mapping reaction ids to rules from the model.
 | 
| 
 | 
    16 
 | 
| 
 | 
    17     Args:
 | 
| 
 | 
    18         model : the model to derive data from.
 | 
| 
 | 
    19         asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings.
 | 
| 
 | 
    20 
 | 
| 
 | 
    21     Returns:
 | 
| 
 | 
    22         Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules.
 | 
| 
 | 
    23         Dict[ReactionId, str] : the generated dictionary of raw rules.
 | 
| 
 | 
    24     """
 | 
| 
 | 
    25     # Is the below approach convoluted? yes
 | 
| 
 | 
    26     # Ok but is it inefficient? probably
 | 
| 
 | 
    27     # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane)
 | 
| 
 | 
    28     _ruleGetter   =  lambda reaction : reaction.gene_reaction_rule
 | 
| 
 | 
    29     ruleExtractor = (lambda reaction :
 | 
| 
 | 
    30         rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter
 | 
| 
 | 
    31 
 | 
| 
 | 
    32     return {
 | 
| 
 | 
    33         reaction.id : ruleExtractor(reaction)
 | 
| 
 | 
    34         for reaction in model.reactions
 | 
| 
 | 
    35         if reaction.gene_reaction_rule }
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]:
 | 
| 
 | 
    38     """
 | 
| 
 | 
    39     Generates a dictionary mapping reaction ids to reaction formulas from the model.
 | 
| 
 | 
    40 
 | 
| 
 | 
    41     Args:
 | 
| 
 | 
    42         model : the model to derive data from.
 | 
| 
 | 
    43         asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are.
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     Returns:
 | 
| 
 | 
    46         Dict[ReactionId, str] : the generated dictionary.
 | 
| 
 | 
    47     """
 | 
| 
 | 
    48 
 | 
| 
 | 
    49     unparsedReactions = {
 | 
| 
 | 
    50         reaction.id : reaction.reaction
 | 
| 
 | 
    51         for reaction in model.reactions
 | 
| 
 | 
    52         if reaction.reaction 
 | 
| 
 | 
    53     }
 | 
| 
 | 
    54 
 | 
| 
 | 
    55     if not asParsed: return unparsedReactions
 | 
| 
 | 
    56     
 | 
| 
 | 
    57     return reactionUtils.create_reaction_dict(unparsedReactions)
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 def get_medium(model:cobra.Model) -> pd.DataFrame:
 | 
| 
 | 
    60     trueMedium=[]
 | 
| 
 | 
    61     for r in model.reactions:
 | 
| 
 | 
    62         positiveCoeff=0
 | 
| 
 | 
    63         for m in r.metabolites:
 | 
| 
 | 
    64             if r.get_coefficient(m.id)>0:
 | 
| 
 | 
    65                 positiveCoeff=1;
 | 
| 
 | 
    66         if (positiveCoeff==0 and r.lower_bound<0):
 | 
| 
 | 
    67             trueMedium.append(r.id)
 | 
| 
 | 
    68 
 | 
| 
 | 
    69     df_medium = pd.DataFrame()
 | 
| 
 | 
    70     df_medium["reaction"] = trueMedium
 | 
| 
 | 
    71     return df_medium
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 def generate_bounds(model:cobra.Model) -> pd.DataFrame:
 | 
| 
 | 
    74 
 | 
| 
 | 
    75     rxns = []
 | 
| 
 | 
    76     for reaction in model.reactions:
 | 
| 
 | 
    77         rxns.append(reaction.id)
 | 
| 
 | 
    78 
 | 
| 
 | 
    79     bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns)
 | 
| 
 | 
    80 
 | 
| 
 | 
    81     for reaction in model.reactions:
 | 
| 
 | 
    82         bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound]
 | 
| 
 | 
    83     return bounds
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 def generate_compartments(model: cobra.Model) -> pd.DataFrame:
 | 
| 
 | 
    88     """
 | 
| 
 | 
    89     Generates a DataFrame containing compartment information for each reaction.
 | 
| 
 | 
    90     Creates columns for each compartment position (Compartment_1, Compartment_2, etc.)
 | 
| 
 | 
    91     
 | 
| 
 | 
    92     Args:
 | 
| 
 | 
    93         model: the COBRA model to extract compartment data from.
 | 
| 
 | 
    94         
 | 
| 
 | 
    95     Returns:
 | 
| 
 | 
    96         pd.DataFrame: DataFrame with ReactionID and compartment columns
 | 
| 
 | 
    97     """
 | 
| 
 | 
    98     pathway_data = []
 | 
| 
 | 
    99 
 | 
| 
 | 
   100     # First pass: determine the maximum number of pathways any reaction has
 | 
| 
 | 
   101     max_pathways = 0
 | 
| 
 | 
   102     reaction_pathways = {}
 | 
| 
 | 
   103 
 | 
| 
 | 
   104     for reaction in model.reactions:
 | 
| 
 | 
   105         # Get unique pathways from all metabolites in the reaction
 | 
| 
 | 
   106         if type(reaction.annotation['pathways']) == list:
 | 
| 
 | 
   107             reaction_pathways[reaction.id] = reaction.annotation['pathways']
 | 
| 
 | 
   108             max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
 | 
| 
 | 
   109         else:
 | 
| 
 | 
   110             reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
 | 
| 
 | 
   111 
 | 
| 
 | 
   112     # Create column names for pathways
 | 
| 
 | 
   113     pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
 | 
| 
 | 
   114 
 | 
| 
 | 
   115     # Second pass: create the data
 | 
| 
 | 
   116     for reaction_id, pathways in reaction_pathways.items():
 | 
| 
 | 
   117         row = {"ReactionID": reaction_id}
 | 
| 
 | 
   118         
 | 
| 
 | 
   119         # Fill pathway columns
 | 
| 
 | 
   120         for i in range(max_pathways):
 | 
| 
 | 
   121             col_name = pathway_columns[i]
 | 
| 
 | 
   122             if i < len(pathways):
 | 
| 
 | 
   123                 row[col_name] = pathways[i]
 | 
| 
 | 
   124             else:
 | 
| 
 | 
   125                 row[col_name] = None  # or "" if you prefer empty strings
 | 
| 
 | 
   126 
 | 
| 
 | 
   127         pathway_data.append(row)
 | 
| 
 | 
   128 
 | 
| 
 | 
   129     return pd.DataFrame(pathway_data) |