Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/model_utils.py @ 503:8dd07e59f631 draft
Uploaded
author | francesco_lapi |
---|---|
date | Tue, 30 Sep 2025 18:01:30 +0000 |
parents | 9bfd1ec3ae6f |
children | 96f512dff490 |
comparison
equal
deleted
inserted
replaced
502:054c872e3880 | 503:8dd07e59f631 |
---|---|
238 max_pathways = 0 | 238 max_pathways = 0 |
239 reaction_pathways = {} | 239 reaction_pathways = {} |
240 | 240 |
241 for reaction in model.reactions: | 241 for reaction in model.reactions: |
242 # Get unique pathways from all metabolites in the reaction | 242 # Get unique pathways from all metabolites in the reaction |
243 if type(reaction.annotation['pathways']) == list: | 243 if 'pathways' in reaction.annotation: |
244 reaction_pathways[reaction.id] = reaction.annotation['pathways'] | 244 if type(reaction.annotation['pathways']) == list: |
245 max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) | 245 reaction_pathways[reaction.id] = reaction.annotation['pathways'] |
246 max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) | |
247 else: | |
248 reaction_pathways[reaction.id] = [reaction.annotation['pathways']] | |
246 else: | 249 else: |
247 reaction_pathways[reaction.id] = [reaction.annotation['pathways']] | 250 # No pathway annotation - use empty list |
251 reaction_pathways[reaction.id] = [] | |
248 | 252 |
249 # Create column names for pathways | 253 # Create column names for pathways |
250 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] | 254 pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] |
251 | 255 |
252 # Second pass: create the data | 256 # Second pass: create the data |
615 logger.warning("Mapping dataframe is empty for the requested source genes; skipping uniqueness validation.") | 619 logger.warning("Mapping dataframe is empty for the requested source genes; skipping uniqueness validation.") |
616 return | 620 return |
617 | 621 |
618 # normalize temporary columns for grouping (without altering the original df) | 622 # normalize temporary columns for grouping (without altering the original df) |
619 tmp = mapping_df[[source_col, target_col]].copy() | 623 tmp = mapping_df[[source_col, target_col]].copy() |
620 tmp['_src_norm'] = tmp[source_col].astype(str).map(_normalize_gene_id) | 624 tmp['_src_norm'] = tmp[source_col].astype(str).apply(_normalize_gene_id) |
621 tmp['_tgt_norm'] = tmp[target_col].astype(str).str.strip() | 625 tmp['_tgt_norm'] = tmp[target_col].astype(str).str.strip() |
622 | 626 |
623 # optionally filter to the set of model source genes | 627 # optionally filter to the set of model source genes |
624 if model_source_genes is not None: | 628 if model_source_genes is not None: |
625 tmp = tmp[tmp['_src_norm'].isin(model_source_genes)] | 629 tmp = tmp[tmp['_src_norm'].isin(model_source_genes)] |
883 | 887 |
884 model_source_genes = { _normalize_gene_id(g.id) for g in model.genes } | 888 model_source_genes = { _normalize_gene_id(g.id) for g in model.genes } |
885 logger.info(f"Filtering mapping to {len(model_source_genes)} source genes present in model (normalized).") | 889 logger.info(f"Filtering mapping to {len(model_source_genes)} source genes present in model (normalized).") |
886 | 890 |
887 tmp_map = mapping_df[[col_for_src, col_for_tgt]].dropna().copy() | 891 tmp_map = mapping_df[[col_for_src, col_for_tgt]].dropna().copy() |
888 tmp_map[col_for_src + "_norm"] = tmp_map[col_for_src].astype(str).map(_normalize_gene_id) | 892 tmp_map[col_for_src + "_norm"] = tmp_map[col_for_src].astype(str).apply(_normalize_gene_id) |
889 | 893 |
890 filtered_map = tmp_map[tmp_map[col_for_src + "_norm"].isin(model_source_genes)].copy() | 894 filtered_map = tmp_map[tmp_map[col_for_src + "_norm"].isin(model_source_genes)].copy() |
891 | 895 |
892 if filtered_map.empty: | 896 if filtered_map.empty: |
893 logger.warning("No mapping rows correspond to source genes present in the model after filtering. Proceeding with empty mapping (no translation will occur).") | 897 logger.warning("No mapping rows correspond to source genes present in the model after filtering. Proceeding with empty mapping (no translation will occur).") |
953 Build mapping dict: source_id -> list of target_ids | 957 Build mapping dict: source_id -> list of target_ids |
954 Normalizes IDs (removes prefixes like 'HGNC:' etc). | 958 Normalizes IDs (removes prefixes like 'HGNC:' etc). |
955 """ | 959 """ |
956 df = mapping_df[[source_col, target_col]].dropna().copy() | 960 df = mapping_df[[source_col, target_col]].dropna().copy() |
957 # normalize to string | 961 # normalize to string |
958 df[source_col] = df[source_col].astype(str).map(_normalize_gene_id) | 962 df[source_col] = df[source_col].astype(str).apply(_normalize_gene_id) |
959 df[target_col] = df[target_col].astype(str).str.strip() | 963 df[target_col] = df[target_col].astype(str).str.strip() |
960 | 964 |
961 df = df.drop_duplicates() | 965 df = df.drop_duplicates() |
962 | 966 |
963 logger.info(f"Creating mapping from {len(df)} rows") | 967 logger.info(f"Creating mapping from {len(df)} rows") |