| 418 | 1 import os | 
|  | 2 import csv | 
|  | 3 import cobra | 
|  | 4 import pickle | 
|  | 5 import argparse | 
|  | 6 import pandas as pd | 
|  | 7 from typing import Optional, Tuple, Union, List, Dict | 
|  | 8 import utils.general_utils as utils | 
|  | 9 import utils.rule_parsing  as rulesUtils | 
|  | 10 | 
|  | 11 ################################- DATA GENERATION -################################ | 
|  | 12 ReactionId = str | 
|  | 13 def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: | 
|  | 14     """ | 
|  | 15     Generates a dictionary mapping reaction ids to rules from the model. | 
|  | 16 | 
|  | 17     Args: | 
|  | 18         model : the model to derive data from. | 
|  | 19         asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. | 
|  | 20 | 
|  | 21     Returns: | 
|  | 22         Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. | 
|  | 23         Dict[ReactionId, str] : the generated dictionary of raw rules. | 
|  | 24     """ | 
|  | 25     # Is the below approach convoluted? yes | 
|  | 26     # Ok but is it inefficient? probably | 
|  | 27     # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) | 
|  | 28     _ruleGetter   =  lambda reaction : reaction.gene_reaction_rule | 
|  | 29     ruleExtractor = (lambda reaction : | 
|  | 30         rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter | 
|  | 31 | 
|  | 32     return { | 
|  | 33         reaction.id : ruleExtractor(reaction) | 
|  | 34         for reaction in model.reactions | 
|  | 35         if reaction.gene_reaction_rule } | 
|  | 36 | 
|  | 37 def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: | 
|  | 38     """ | 
|  | 39     Generates a dictionary mapping reaction ids to reaction formulas from the model. | 
|  | 40 | 
|  | 41     Args: | 
|  | 42         model : the model to derive data from. | 
|  | 43         asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. | 
|  | 44 | 
|  | 45     Returns: | 
|  | 46         Dict[ReactionId, str] : the generated dictionary. | 
|  | 47     """ | 
|  | 48 | 
|  | 49     unparsedReactions = { | 
|  | 50         reaction.id : reaction.reaction | 
|  | 51         for reaction in model.reactions | 
|  | 52         if reaction.reaction | 
|  | 53     } | 
|  | 54 | 
|  | 55     if not asParsed: return unparsedReactions | 
|  | 56 | 
|  | 57     return reactionUtils.create_reaction_dict(unparsedReactions) | 
|  | 58 | 
|  | 59 def get_medium(model:cobra.Model) -> pd.DataFrame: | 
|  | 60     trueMedium=[] | 
|  | 61     for r in model.reactions: | 
|  | 62         positiveCoeff=0 | 
|  | 63         for m in r.metabolites: | 
|  | 64             if r.get_coefficient(m.id)>0: | 
|  | 65                 positiveCoeff=1; | 
|  | 66         if (positiveCoeff==0 and r.lower_bound<0): | 
|  | 67             trueMedium.append(r.id) | 
|  | 68 | 
|  | 69     df_medium = pd.DataFrame() | 
|  | 70     df_medium["reaction"] = trueMedium | 
|  | 71     return df_medium | 
|  | 72 | 
|  | 73 def generate_bounds(model:cobra.Model) -> pd.DataFrame: | 
|  | 74 | 
|  | 75     rxns = [] | 
|  | 76     for reaction in model.reactions: | 
|  | 77         rxns.append(reaction.id) | 
|  | 78 | 
|  | 79     bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) | 
|  | 80 | 
|  | 81     for reaction in model.reactions: | 
|  | 82         bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] | 
|  | 83     return bounds | 
|  | 84 | 
|  | 85 | 
|  | 86 | 
|  | 87 def generate_compartments(model: cobra.Model) -> pd.DataFrame: | 
|  | 88     """ | 
|  | 89     Generates a DataFrame containing compartment information for each reaction. | 
|  | 90     Creates columns for each compartment position (Compartment_1, Compartment_2, etc.) | 
|  | 91 | 
|  | 92     Args: | 
|  | 93         model: the COBRA model to extract compartment data from. | 
|  | 94 | 
|  | 95     Returns: | 
|  | 96         pd.DataFrame: DataFrame with ReactionID and compartment columns | 
|  | 97     """ | 
|  | 98     pathway_data = [] | 
|  | 99 | 
|  | 100     # First pass: determine the maximum number of pathways any reaction has | 
|  | 101     max_pathways = 0 | 
|  | 102     reaction_pathways = {} | 
|  | 103 | 
|  | 104     for reaction in model.reactions: | 
|  | 105         # Get unique pathways from all metabolites in the reaction | 
|  | 106         if type(reaction.annotation['pathways']) == list: | 
|  | 107             reaction_pathways[reaction.id] = reaction.annotation['pathways'] | 
|  | 108             max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) | 
|  | 109         else: | 
|  | 110             reaction_pathways[reaction.id] = [reaction.annotation['pathways']] | 
|  | 111 | 
|  | 112     # Create column names for pathways | 
|  | 113     pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] | 
|  | 114 | 
|  | 115     # Second pass: create the data | 
|  | 116     for reaction_id, pathways in reaction_pathways.items(): | 
|  | 117         row = {"ReactionID": reaction_id} | 
|  | 118 | 
|  | 119         # Fill pathway columns | 
|  | 120         for i in range(max_pathways): | 
|  | 121             col_name = pathway_columns[i] | 
|  | 122             if i < len(pathways): | 
|  | 123                 row[col_name] = pathways[i] | 
|  | 124             else: | 
|  | 125                 row[col_name] = None  # or "" if you prefer empty strings | 
|  | 126 | 
|  | 127         pathway_data.append(row) | 
|  | 128 | 
|  | 129     return pd.DataFrame(pathway_data) |