Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/model_utils.py @ 501:9bfd1ec3ae6f draft
Uploaded
author | francesco_lapi |
---|---|
date | Tue, 30 Sep 2025 17:06:37 +0000 |
parents | 4e7e67693ce7 |
children | 8dd07e59f631 |
comparison
equal
deleted
inserted
replaced
500:4e7e67693ce7 | 501:9bfd1ec3ae6f |
---|---|
277 | 277 |
278 Returns: | 278 Returns: |
279 cobra.Model: The constructed COBRApy model. | 279 cobra.Model: The constructed COBRApy model. |
280 """ | 280 """ |
281 | 281 |
282 df = pd.read_csv(csv_path, sep='\t') | 282 # Try to detect separator |
283 with open(csv_path, 'r') as f: | |
284 first_line = f.readline() | |
285 sep = '\t' if '\t' in first_line else ',' | |
286 | |
287 df = pd.read_csv(csv_path, sep=sep) | |
288 | |
289 # Check required columns | |
290 required_cols = ['ReactionID', 'Formula'] | |
291 missing_cols = [col for col in required_cols if col not in df.columns] | |
292 if missing_cols: | |
293 raise ValueError(f"Missing required columns: {missing_cols}. Available columns: {list(df.columns)}") | |
283 | 294 |
284 model = cobraModel(model_id) | 295 model = cobraModel(model_id) |
285 | 296 |
286 metabolites_dict = {} | 297 metabolites_dict = {} |
287 compartments_dict = {} | 298 compartments_dict = {} |
385 | 396 |
386 Returns the IDs including the compartment suffix exactly as written. | 397 Returns the IDs including the compartment suffix exactly as written. |
387 """ | 398 """ |
388 pattern = re.compile( | 399 pattern = re.compile( |
389 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) | 400 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) |
390 r'(?:\d+(?:\.\d+)?\s*)?' # optional coefficient | 401 r'(?:\d+(?:\.\d+)?\s+)?' # optional coefficient (requires space after) |
391 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment | 402 r'([A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment (can start with number) |
392 ) | 403 ) |
393 return {m.group(1) for m in pattern.finditer(reaction_formula)} | 404 return {m.group(1) for m in pattern.finditer(reaction_formula)} |
394 | 405 |
395 | 406 |
396 | 407 |
405 | 416 |
406 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | 417 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): |
407 """Parse a reaction formula and set metabolites with their coefficients.""" | 418 """Parse a reaction formula and set metabolites with their coefficients.""" |
408 | 419 |
409 if '<=>' in formula: | 420 if '<=>' in formula: |
410 left, right = formula.split('<=>') | 421 parts = formula.split('<=>') |
411 reversible = True | 422 reversible = True |
412 elif '<--' in formula: | 423 elif '<--' in formula: |
413 left, right = formula.split('<--') | 424 parts = formula.split('<--') |
414 reversible = False | 425 reversible = False |
415 elif '-->' in formula: | 426 elif '-->' in formula: |
416 left, right = formula.split('-->') | 427 parts = formula.split('-->') |
417 reversible = False | 428 reversible = False |
418 elif '<-' in formula: | 429 elif '<-' in formula: |
419 left, right = formula.split('<-') | 430 parts = formula.split('<-') |
420 reversible = False | 431 reversible = False |
421 else: | 432 else: |
422 raise ValueError(f"Unrecognized reaction format: {formula}") | 433 raise ValueError(f"Unrecognized reaction format: {formula}") |
423 | 434 |
424 reactants = parse_metabolites_side(left.strip()) | 435 # Handle cases where one side might be empty (exchange reactions) |
425 products = parse_metabolites_side(right.strip()) | 436 if len(parts) != 2: |
437 raise ValueError(f"Invalid reaction format, expected 2 parts: {formula}") | |
438 | |
439 left, right = parts[0].strip(), parts[1].strip() | |
440 | |
441 reactants = parse_metabolites_side(left) if left else {} | |
442 products = parse_metabolites_side(right) if right else {} | |
426 | 443 |
427 metabolites_to_add = {} | 444 metabolites_to_add = {} |
428 | 445 |
429 for met_id, coeff in reactants.items(): | 446 for met_id, coeff in reactants.items(): |
430 if met_id in metabolites_dict: | 447 if met_id in metabolites_dict: |
447 for term in terms: | 464 for term in terms: |
448 term = term.strip() | 465 term = term.strip() |
449 if not term: | 466 if not term: |
450 continue | 467 continue |
451 | 468 |
452 # optional coefficient + id ending with _<compartment> | 469 # First check if term has space-separated coefficient and metabolite |
453 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | 470 parts = term.split() |
454 if match: | 471 if len(parts) == 2: |
455 coeff_str, met_id = match.groups() | 472 # Two parts: potential coefficient + metabolite |
456 coeff = float(coeff_str) if coeff_str else 1.0 | 473 try: |
457 metabolites[met_id] = coeff | 474 coeff = float(parts[0]) |
475 met_id = parts[1] | |
476 # Verify the second part looks like a metabolite with compartment | |
477 if re.match(r'[A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', met_id): | |
478 metabolites[met_id] = coeff | |
479 continue | |
480 except ValueError: | |
481 pass | |
482 | |
483 # Single term - check if it's a metabolite (no coefficient) | |
484 # Updated pattern to include metabolites starting with numbers | |
485 if re.match(r'[A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', term): | |
486 metabolites[term] = 1.0 | |
487 else: | |
488 print(f"Warning: Could not parse metabolite term: '{term}'") | |
458 | 489 |
459 return metabolites | 490 return metabolites |
460 | 491 |
461 | 492 |
462 | 493 |
485 | 516 |
486 | 517 |
487 | 518 |
488 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): | 519 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): |
489 """Set the medium based on the 'InMedium' column in the dataframe.""" | 520 """Set the medium based on the 'InMedium' column in the dataframe.""" |
521 if 'InMedium' not in df.columns: | |
522 print("No 'InMedium' column found, skipping medium setup") | |
523 return | |
524 | |
490 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | 525 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() |
491 | 526 |
492 medium_dict = {} | 527 medium_dict = {} |
493 for rxn_id in medium_reactions: | 528 for rxn_id in medium_reactions: |
494 if rxn_id in [r.id for r in model.reactions]: | 529 if rxn_id in [r.id for r in model.reactions]: |
495 reaction = model.reactions.get_by_id(rxn_id) | 530 reaction = model.reactions.get_by_id(rxn_id) |
496 if reaction.lower_bound < 0: # Solo reazioni di uptake | 531 if reaction.lower_bound < 0: |
497 medium_dict[rxn_id] = abs(reaction.lower_bound) | 532 medium_dict[rxn_id] = abs(reaction.lower_bound) |
498 | 533 |
499 if medium_dict: | 534 if medium_dict: |
500 model.medium = medium_dict | 535 model.medium = medium_dict |
501 print(f"Medium set with {len(medium_dict)} components") | 536 print(f"Medium set with {len(medium_dict)} components") |
502 | 537 else: |
503 | 538 print("No medium components found") |
504 def validate_model(model: cobraModel) -> Dict[str, any]: | 539 def validate_model(model: cobraModel) -> Dict[str, any]: |
505 """Validate the model and return basic statistics.""" | 540 """Validate the model and return basic statistics.""" |
506 validation = { | 541 validation = { |
507 'num_reactions': len(model.reactions), | 542 'num_reactions': len(model.reactions), |
508 'num_metabolites': len(model.metabolites), | 543 'num_metabolites': len(model.metabolites), |