Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/model_utils.py @ 501:9bfd1ec3ae6f draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Tue, 30 Sep 2025 17:06:37 +0000 |
| parents | 4e7e67693ce7 |
| children | 8dd07e59f631 |
comparison
equal
deleted
inserted
replaced
| 500:4e7e67693ce7 | 501:9bfd1ec3ae6f |
|---|---|
| 277 | 277 |
| 278 Returns: | 278 Returns: |
| 279 cobra.Model: The constructed COBRApy model. | 279 cobra.Model: The constructed COBRApy model. |
| 280 """ | 280 """ |
| 281 | 281 |
| 282 df = pd.read_csv(csv_path, sep='\t') | 282 # Try to detect separator |
| 283 with open(csv_path, 'r') as f: | |
| 284 first_line = f.readline() | |
| 285 sep = '\t' if '\t' in first_line else ',' | |
| 286 | |
| 287 df = pd.read_csv(csv_path, sep=sep) | |
| 288 | |
| 289 # Check required columns | |
| 290 required_cols = ['ReactionID', 'Formula'] | |
| 291 missing_cols = [col for col in required_cols if col not in df.columns] | |
| 292 if missing_cols: | |
| 293 raise ValueError(f"Missing required columns: {missing_cols}. Available columns: {list(df.columns)}") | |
| 283 | 294 |
| 284 model = cobraModel(model_id) | 295 model = cobraModel(model_id) |
| 285 | 296 |
| 286 metabolites_dict = {} | 297 metabolites_dict = {} |
| 287 compartments_dict = {} | 298 compartments_dict = {} |
| 385 | 396 |
| 386 Returns the IDs including the compartment suffix exactly as written. | 397 Returns the IDs including the compartment suffix exactly as written. |
| 387 """ | 398 """ |
| 388 pattern = re.compile( | 399 pattern = re.compile( |
| 389 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) | 400 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) |
| 390 r'(?:\d+(?:\.\d+)?\s*)?' # optional coefficient | 401 r'(?:\d+(?:\.\d+)?\s+)?' # optional coefficient (requires space after) |
| 391 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment | 402 r'([A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment (can start with number) |
| 392 ) | 403 ) |
| 393 return {m.group(1) for m in pattern.finditer(reaction_formula)} | 404 return {m.group(1) for m in pattern.finditer(reaction_formula)} |
| 394 | 405 |
| 395 | 406 |
| 396 | 407 |
| 405 | 416 |
| 406 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | 417 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): |
| 407 """Parse a reaction formula and set metabolites with their coefficients.""" | 418 """Parse a reaction formula and set metabolites with their coefficients.""" |
| 408 | 419 |
| 409 if '<=>' in formula: | 420 if '<=>' in formula: |
| 410 left, right = formula.split('<=>') | 421 parts = formula.split('<=>') |
| 411 reversible = True | 422 reversible = True |
| 412 elif '<--' in formula: | 423 elif '<--' in formula: |
| 413 left, right = formula.split('<--') | 424 parts = formula.split('<--') |
| 414 reversible = False | 425 reversible = False |
| 415 elif '-->' in formula: | 426 elif '-->' in formula: |
| 416 left, right = formula.split('-->') | 427 parts = formula.split('-->') |
| 417 reversible = False | 428 reversible = False |
| 418 elif '<-' in formula: | 429 elif '<-' in formula: |
| 419 left, right = formula.split('<-') | 430 parts = formula.split('<-') |
| 420 reversible = False | 431 reversible = False |
| 421 else: | 432 else: |
| 422 raise ValueError(f"Unrecognized reaction format: {formula}") | 433 raise ValueError(f"Unrecognized reaction format: {formula}") |
| 423 | 434 |
| 424 reactants = parse_metabolites_side(left.strip()) | 435 # Handle cases where one side might be empty (exchange reactions) |
| 425 products = parse_metabolites_side(right.strip()) | 436 if len(parts) != 2: |
| 437 raise ValueError(f"Invalid reaction format, expected 2 parts: {formula}") | |
| 438 | |
| 439 left, right = parts[0].strip(), parts[1].strip() | |
| 440 | |
| 441 reactants = parse_metabolites_side(left) if left else {} | |
| 442 products = parse_metabolites_side(right) if right else {} | |
| 426 | 443 |
| 427 metabolites_to_add = {} | 444 metabolites_to_add = {} |
| 428 | 445 |
| 429 for met_id, coeff in reactants.items(): | 446 for met_id, coeff in reactants.items(): |
| 430 if met_id in metabolites_dict: | 447 if met_id in metabolites_dict: |
| 447 for term in terms: | 464 for term in terms: |
| 448 term = term.strip() | 465 term = term.strip() |
| 449 if not term: | 466 if not term: |
| 450 continue | 467 continue |
| 451 | 468 |
| 452 # optional coefficient + id ending with _<compartment> | 469 # First check if term has space-separated coefficient and metabolite |
| 453 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | 470 parts = term.split() |
| 454 if match: | 471 if len(parts) == 2: |
| 455 coeff_str, met_id = match.groups() | 472 # Two parts: potential coefficient + metabolite |
| 456 coeff = float(coeff_str) if coeff_str else 1.0 | 473 try: |
| 457 metabolites[met_id] = coeff | 474 coeff = float(parts[0]) |
| 475 met_id = parts[1] | |
| 476 # Verify the second part looks like a metabolite with compartment | |
| 477 if re.match(r'[A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', met_id): | |
| 478 metabolites[met_id] = coeff | |
| 479 continue | |
| 480 except ValueError: | |
| 481 pass | |
| 482 | |
| 483 # Single term - check if it's a metabolite (no coefficient) | |
| 484 # Updated pattern to include metabolites starting with numbers | |
| 485 if re.match(r'[A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', term): | |
| 486 metabolites[term] = 1.0 | |
| 487 else: | |
| 488 print(f"Warning: Could not parse metabolite term: '{term}'") | |
| 458 | 489 |
| 459 return metabolites | 490 return metabolites |
| 460 | 491 |
| 461 | 492 |
| 462 | 493 |
| 485 | 516 |
| 486 | 517 |
| 487 | 518 |
| 488 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): | 519 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): |
| 489 """Set the medium based on the 'InMedium' column in the dataframe.""" | 520 """Set the medium based on the 'InMedium' column in the dataframe.""" |
| 521 if 'InMedium' not in df.columns: | |
| 522 print("No 'InMedium' column found, skipping medium setup") | |
| 523 return | |
| 524 | |
| 490 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | 525 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() |
| 491 | 526 |
| 492 medium_dict = {} | 527 medium_dict = {} |
| 493 for rxn_id in medium_reactions: | 528 for rxn_id in medium_reactions: |
| 494 if rxn_id in [r.id for r in model.reactions]: | 529 if rxn_id in [r.id for r in model.reactions]: |
| 495 reaction = model.reactions.get_by_id(rxn_id) | 530 reaction = model.reactions.get_by_id(rxn_id) |
| 496 if reaction.lower_bound < 0: # Solo reazioni di uptake | 531 if reaction.lower_bound < 0: |
| 497 medium_dict[rxn_id] = abs(reaction.lower_bound) | 532 medium_dict[rxn_id] = abs(reaction.lower_bound) |
| 498 | 533 |
| 499 if medium_dict: | 534 if medium_dict: |
| 500 model.medium = medium_dict | 535 model.medium = medium_dict |
| 501 print(f"Medium set with {len(medium_dict)} components") | 536 print(f"Medium set with {len(medium_dict)} components") |
| 502 | 537 else: |
| 503 | 538 print("No medium components found") |
| 504 def validate_model(model: cobraModel) -> Dict[str, any]: | 539 def validate_model(model: cobraModel) -> Dict[str, any]: |
| 505 """Validate the model and return basic statistics.""" | 540 """Validate the model and return basic statistics.""" |
| 506 validation = { | 541 validation = { |
| 507 'num_reactions': len(model.reactions), | 542 'num_reactions': len(model.reactions), |
| 508 'num_metabolites': len(model.metabolites), | 543 'num_metabolites': len(model.metabolites), |
