Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/model_utils.py @ 501:9bfd1ec3ae6f draft
Uploaded
| author | francesco_lapi | 
|---|---|
| date | Tue, 30 Sep 2025 17:06:37 +0000 | 
| parents | 4e7e67693ce7 | 
| children | 8dd07e59f631 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 500:4e7e67693ce7 | 501:9bfd1ec3ae6f | 
|---|---|
| 277 | 277 | 
| 278 Returns: | 278 Returns: | 
| 279 cobra.Model: The constructed COBRApy model. | 279 cobra.Model: The constructed COBRApy model. | 
| 280 """ | 280 """ | 
| 281 | 281 | 
| 282 df = pd.read_csv(csv_path, sep='\t') | 282 # Try to detect separator | 
| 283 with open(csv_path, 'r') as f: | |
| 284 first_line = f.readline() | |
| 285 sep = '\t' if '\t' in first_line else ',' | |
| 286 | |
| 287 df = pd.read_csv(csv_path, sep=sep) | |
| 288 | |
| 289 # Check required columns | |
| 290 required_cols = ['ReactionID', 'Formula'] | |
| 291 missing_cols = [col for col in required_cols if col not in df.columns] | |
| 292 if missing_cols: | |
| 293 raise ValueError(f"Missing required columns: {missing_cols}. Available columns: {list(df.columns)}") | |
| 283 | 294 | 
| 284 model = cobraModel(model_id) | 295 model = cobraModel(model_id) | 
| 285 | 296 | 
| 286 metabolites_dict = {} | 297 metabolites_dict = {} | 
| 287 compartments_dict = {} | 298 compartments_dict = {} | 
| 385 | 396 | 
| 386 Returns the IDs including the compartment suffix exactly as written. | 397 Returns the IDs including the compartment suffix exactly as written. | 
| 387 """ | 398 """ | 
| 388 pattern = re.compile( | 399 pattern = re.compile( | 
| 389 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) | 400 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) | 
| 390 r'(?:\d+(?:\.\d+)?\s*)?' # optional coefficient | 401 r'(?:\d+(?:\.\d+)?\s+)?' # optional coefficient (requires space after) | 
| 391 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment | 402 r'([A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment (can start with number) | 
| 392 ) | 403 ) | 
| 393 return {m.group(1) for m in pattern.finditer(reaction_formula)} | 404 return {m.group(1) for m in pattern.finditer(reaction_formula)} | 
| 394 | 405 | 
| 395 | 406 | 
| 396 | 407 | 
| 405 | 416 | 
| 406 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | 417 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | 
| 407 """Parse a reaction formula and set metabolites with their coefficients.""" | 418 """Parse a reaction formula and set metabolites with their coefficients.""" | 
| 408 | 419 | 
| 409 if '<=>' in formula: | 420 if '<=>' in formula: | 
| 410 left, right = formula.split('<=>') | 421 parts = formula.split('<=>') | 
| 411 reversible = True | 422 reversible = True | 
| 412 elif '<--' in formula: | 423 elif '<--' in formula: | 
| 413 left, right = formula.split('<--') | 424 parts = formula.split('<--') | 
| 414 reversible = False | 425 reversible = False | 
| 415 elif '-->' in formula: | 426 elif '-->' in formula: | 
| 416 left, right = formula.split('-->') | 427 parts = formula.split('-->') | 
| 417 reversible = False | 428 reversible = False | 
| 418 elif '<-' in formula: | 429 elif '<-' in formula: | 
| 419 left, right = formula.split('<-') | 430 parts = formula.split('<-') | 
| 420 reversible = False | 431 reversible = False | 
| 421 else: | 432 else: | 
| 422 raise ValueError(f"Unrecognized reaction format: {formula}") | 433 raise ValueError(f"Unrecognized reaction format: {formula}") | 
| 423 | 434 | 
| 424 reactants = parse_metabolites_side(left.strip()) | 435 # Handle cases where one side might be empty (exchange reactions) | 
| 425 products = parse_metabolites_side(right.strip()) | 436 if len(parts) != 2: | 
| 437 raise ValueError(f"Invalid reaction format, expected 2 parts: {formula}") | |
| 438 | |
| 439 left, right = parts[0].strip(), parts[1].strip() | |
| 440 | |
| 441 reactants = parse_metabolites_side(left) if left else {} | |
| 442 products = parse_metabolites_side(right) if right else {} | |
| 426 | 443 | 
| 427 metabolites_to_add = {} | 444 metabolites_to_add = {} | 
| 428 | 445 | 
| 429 for met_id, coeff in reactants.items(): | 446 for met_id, coeff in reactants.items(): | 
| 430 if met_id in metabolites_dict: | 447 if met_id in metabolites_dict: | 
| 447 for term in terms: | 464 for term in terms: | 
| 448 term = term.strip() | 465 term = term.strip() | 
| 449 if not term: | 466 if not term: | 
| 450 continue | 467 continue | 
| 451 | 468 | 
| 452 # optional coefficient + id ending with _<compartment> | 469 # First check if term has space-separated coefficient and metabolite | 
| 453 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | 470 parts = term.split() | 
| 454 if match: | 471 if len(parts) == 2: | 
| 455 coeff_str, met_id = match.groups() | 472 # Two parts: potential coefficient + metabolite | 
| 456 coeff = float(coeff_str) if coeff_str else 1.0 | 473 try: | 
| 457 metabolites[met_id] = coeff | 474 coeff = float(parts[0]) | 
| 475 met_id = parts[1] | |
| 476 # Verify the second part looks like a metabolite with compartment | |
| 477 if re.match(r'[A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', met_id): | |
| 478 metabolites[met_id] = coeff | |
| 479 continue | |
| 480 except ValueError: | |
| 481 pass | |
| 482 | |
| 483 # Single term - check if it's a metabolite (no coefficient) | |
| 484 # Updated pattern to include metabolites starting with numbers | |
| 485 if re.match(r'[A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', term): | |
| 486 metabolites[term] = 1.0 | |
| 487 else: | |
| 488 print(f"Warning: Could not parse metabolite term: '{term}'") | |
| 458 | 489 | 
| 459 return metabolites | 490 return metabolites | 
| 460 | 491 | 
| 461 | 492 | 
| 462 | 493 | 
| 485 | 516 | 
| 486 | 517 | 
| 487 | 518 | 
| 488 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): | 519 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): | 
| 489 """Set the medium based on the 'InMedium' column in the dataframe.""" | 520 """Set the medium based on the 'InMedium' column in the dataframe.""" | 
| 521 if 'InMedium' not in df.columns: | |
| 522 print("No 'InMedium' column found, skipping medium setup") | |
| 523 return | |
| 524 | |
| 490 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | 525 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | 
| 491 | 526 | 
| 492 medium_dict = {} | 527 medium_dict = {} | 
| 493 for rxn_id in medium_reactions: | 528 for rxn_id in medium_reactions: | 
| 494 if rxn_id in [r.id for r in model.reactions]: | 529 if rxn_id in [r.id for r in model.reactions]: | 
| 495 reaction = model.reactions.get_by_id(rxn_id) | 530 reaction = model.reactions.get_by_id(rxn_id) | 
| 496 if reaction.lower_bound < 0: # Solo reazioni di uptake | 531 if reaction.lower_bound < 0: | 
| 497 medium_dict[rxn_id] = abs(reaction.lower_bound) | 532 medium_dict[rxn_id] = abs(reaction.lower_bound) | 
| 498 | 533 | 
| 499 if medium_dict: | 534 if medium_dict: | 
| 500 model.medium = medium_dict | 535 model.medium = medium_dict | 
| 501 print(f"Medium set with {len(medium_dict)} components") | 536 print(f"Medium set with {len(medium_dict)} components") | 
| 502 | 537 else: | 
| 503 | 538 print("No medium components found") | 
| 504 def validate_model(model: cobraModel) -> Dict[str, any]: | 539 def validate_model(model: cobraModel) -> Dict[str, any]: | 
| 505 """Validate the model and return basic statistics.""" | 540 """Validate the model and return basic statistics.""" | 
| 506 validation = { | 541 validation = { | 
| 507 'num_reactions': len(model.reactions), | 542 'num_reactions': len(model.reactions), | 
| 508 'num_metabolites': len(model.metabolites), | 543 'num_metabolites': len(model.metabolites), | 
