comparison COBRAxy/utils/model_utils.py @ 503:8dd07e59f631 draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 18:01:30 +0000
parents 9bfd1ec3ae6f
children 96f512dff490
comparison
equal deleted inserted replaced
502:054c872e3880 503:8dd07e59f631
238 max_pathways = 0 238 max_pathways = 0
239 reaction_pathways = {} 239 reaction_pathways = {}
240 240
241 for reaction in model.reactions: 241 for reaction in model.reactions:
242 # Get unique pathways from all metabolites in the reaction 242 # Get unique pathways from all metabolites in the reaction
243 if type(reaction.annotation['pathways']) == list: 243 if 'pathways' in reaction.annotation:
244 reaction_pathways[reaction.id] = reaction.annotation['pathways'] 244 if type(reaction.annotation['pathways']) == list:
245 max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) 245 reaction_pathways[reaction.id] = reaction.annotation['pathways']
246 max_pathways = max(max_pathways, len(reaction.annotation['pathways']))
247 else:
248 reaction_pathways[reaction.id] = [reaction.annotation['pathways']]
246 else: 249 else:
247 reaction_pathways[reaction.id] = [reaction.annotation['pathways']] 250 # No pathway annotation - use empty list
251 reaction_pathways[reaction.id] = []
248 252
249 # Create column names for pathways 253 # Create column names for pathways
250 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] 254 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)]
251 255
252 # Second pass: create the data 256 # Second pass: create the data
615 logger.warning("Mapping dataframe is empty for the requested source genes; skipping uniqueness validation.") 619 logger.warning("Mapping dataframe is empty for the requested source genes; skipping uniqueness validation.")
616 return 620 return
617 621
618 # normalize temporary columns for grouping (without altering the original df) 622 # normalize temporary columns for grouping (without altering the original df)
619 tmp = mapping_df[[source_col, target_col]].copy() 623 tmp = mapping_df[[source_col, target_col]].copy()
620 tmp['_src_norm'] = tmp[source_col].astype(str).map(_normalize_gene_id) 624 tmp['_src_norm'] = tmp[source_col].astype(str).apply(_normalize_gene_id)
621 tmp['_tgt_norm'] = tmp[target_col].astype(str).str.strip() 625 tmp['_tgt_norm'] = tmp[target_col].astype(str).str.strip()
622 626
623 # optionally filter to the set of model source genes 627 # optionally filter to the set of model source genes
624 if model_source_genes is not None: 628 if model_source_genes is not None:
625 tmp = tmp[tmp['_src_norm'].isin(model_source_genes)] 629 tmp = tmp[tmp['_src_norm'].isin(model_source_genes)]
883 887
884 model_source_genes = { _normalize_gene_id(g.id) for g in model.genes } 888 model_source_genes = { _normalize_gene_id(g.id) for g in model.genes }
885 logger.info(f"Filtering mapping to {len(model_source_genes)} source genes present in model (normalized).") 889 logger.info(f"Filtering mapping to {len(model_source_genes)} source genes present in model (normalized).")
886 890
887 tmp_map = mapping_df[[col_for_src, col_for_tgt]].dropna().copy() 891 tmp_map = mapping_df[[col_for_src, col_for_tgt]].dropna().copy()
888 tmp_map[col_for_src + "_norm"] = tmp_map[col_for_src].astype(str).map(_normalize_gene_id) 892 tmp_map[col_for_src + "_norm"] = tmp_map[col_for_src].astype(str).apply(_normalize_gene_id)
889 893
890 filtered_map = tmp_map[tmp_map[col_for_src + "_norm"].isin(model_source_genes)].copy() 894 filtered_map = tmp_map[tmp_map[col_for_src + "_norm"].isin(model_source_genes)].copy()
891 895
892 if filtered_map.empty: 896 if filtered_map.empty:
893 logger.warning("No mapping rows correspond to source genes present in the model after filtering. Proceeding with empty mapping (no translation will occur).") 897 logger.warning("No mapping rows correspond to source genes present in the model after filtering. Proceeding with empty mapping (no translation will occur).")
953 Build mapping dict: source_id -> list of target_ids 957 Build mapping dict: source_id -> list of target_ids
954 Normalizes IDs (removes prefixes like 'HGNC:' etc). 958 Normalizes IDs (removes prefixes like 'HGNC:' etc).
955 """ 959 """
956 df = mapping_df[[source_col, target_col]].dropna().copy() 960 df = mapping_df[[source_col, target_col]].dropna().copy()
957 # normalize to string 961 # normalize to string
958 df[source_col] = df[source_col].astype(str).map(_normalize_gene_id) 962 df[source_col] = df[source_col].astype(str).apply(_normalize_gene_id)
959 df[target_col] = df[target_col].astype(str).str.strip() 963 df[target_col] = df[target_col].astype(str).str.strip()
960 964
961 df = df.drop_duplicates() 965 df = df.drop_duplicates()
962 966
963 logger.info(f"Creating mapping from {len(df)} rows") 967 logger.info(f"Creating mapping from {len(df)} rows")