comparison COBRAxy/utils/model_utils.py @ 501:9bfd1ec3ae6f draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 17:06:37 +0000
parents 4e7e67693ce7
children 8dd07e59f631
comparison
equal deleted inserted replaced
500:4e7e67693ce7 501:9bfd1ec3ae6f
277 277
278 Returns: 278 Returns:
279 cobra.Model: The constructed COBRApy model. 279 cobra.Model: The constructed COBRApy model.
280 """ 280 """
281 281
282 df = pd.read_csv(csv_path, sep='\t') 282 # Try to detect separator
283 with open(csv_path, 'r') as f:
284 first_line = f.readline()
285 sep = '\t' if '\t' in first_line else ','
286
287 df = pd.read_csv(csv_path, sep=sep)
288
289 # Check required columns
290 required_cols = ['ReactionID', 'Formula']
291 missing_cols = [col for col in required_cols if col not in df.columns]
292 if missing_cols:
293 raise ValueError(f"Missing required columns: {missing_cols}. Available columns: {list(df.columns)}")
283 294
284 model = cobraModel(model_id) 295 model = cobraModel(model_id)
285 296
286 metabolites_dict = {} 297 metabolites_dict = {}
287 compartments_dict = {} 298 compartments_dict = {}
385 396
386 Returns the IDs including the compartment suffix exactly as written. 397 Returns the IDs including the compartment suffix exactly as written.
387 """ 398 """
388 pattern = re.compile( 399 pattern = re.compile(
389 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :) 400 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :)
390 r'(?:\d+(?:\.\d+)?\s*)?' # optional coefficient 401 r'(?:\d+(?:\.\d+)?\s+)?' # optional coefficient (requires space after)
391 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment 402 r'([A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment (can start with number)
392 ) 403 )
393 return {m.group(1) for m in pattern.finditer(reaction_formula)} 404 return {m.group(1) for m in pattern.finditer(reaction_formula)}
394 405
395 406
396 407
405 416
406 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): 417 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]):
407 """Parse a reaction formula and set metabolites with their coefficients.""" 418 """Parse a reaction formula and set metabolites with their coefficients."""
408 419
409 if '<=>' in formula: 420 if '<=>' in formula:
410 left, right = formula.split('<=>') 421 parts = formula.split('<=>')
411 reversible = True 422 reversible = True
412 elif '<--' in formula: 423 elif '<--' in formula:
413 left, right = formula.split('<--') 424 parts = formula.split('<--')
414 reversible = False 425 reversible = False
415 elif '-->' in formula: 426 elif '-->' in formula:
416 left, right = formula.split('-->') 427 parts = formula.split('-->')
417 reversible = False 428 reversible = False
418 elif '<-' in formula: 429 elif '<-' in formula:
419 left, right = formula.split('<-') 430 parts = formula.split('<-')
420 reversible = False 431 reversible = False
421 else: 432 else:
422 raise ValueError(f"Unrecognized reaction format: {formula}") 433 raise ValueError(f"Unrecognized reaction format: {formula}")
423 434
424 reactants = parse_metabolites_side(left.strip()) 435 # Handle cases where one side might be empty (exchange reactions)
425 products = parse_metabolites_side(right.strip()) 436 if len(parts) != 2:
437 raise ValueError(f"Invalid reaction format, expected 2 parts: {formula}")
438
439 left, right = parts[0].strip(), parts[1].strip()
440
441 reactants = parse_metabolites_side(left) if left else {}
442 products = parse_metabolites_side(right) if right else {}
426 443
427 metabolites_to_add = {} 444 metabolites_to_add = {}
428 445
429 for met_id, coeff in reactants.items(): 446 for met_id, coeff in reactants.items():
430 if met_id in metabolites_dict: 447 if met_id in metabolites_dict:
447 for term in terms: 464 for term in terms:
448 term = term.strip() 465 term = term.strip()
449 if not term: 466 if not term:
450 continue 467 continue
451 468
452 # optional coefficient + id ending with _<compartment> 469 # First check if term has space-separated coefficient and metabolite
453 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) 470 parts = term.split()
454 if match: 471 if len(parts) == 2:
455 coeff_str, met_id = match.groups() 472 # Two parts: potential coefficient + metabolite
456 coeff = float(coeff_str) if coeff_str else 1.0 473 try:
457 metabolites[met_id] = coeff 474 coeff = float(parts[0])
475 met_id = parts[1]
476 # Verify the second part looks like a metabolite with compartment
477 if re.match(r'[A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', met_id):
478 metabolites[met_id] = coeff
479 continue
480 except ValueError:
481 pass
482
483 # Single term - check if it's a metabolite (no coefficient)
484 # Updated pattern to include metabolites starting with numbers
485 if re.match(r'[A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', term):
486 metabolites[term] = 1.0
487 else:
488 print(f"Warning: Could not parse metabolite term: '{term}'")
458 489
459 return metabolites 490 return metabolites
460 491
461 492
462 493
485 516
486 517
487 518
488 def set_medium_from_data(model: cobraModel, df: pd.DataFrame): 519 def set_medium_from_data(model: cobraModel, df: pd.DataFrame):
489 """Set the medium based on the 'InMedium' column in the dataframe.""" 520 """Set the medium based on the 'InMedium' column in the dataframe."""
521 if 'InMedium' not in df.columns:
522 print("No 'InMedium' column found, skipping medium setup")
523 return
524
490 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() 525 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist()
491 526
492 medium_dict = {} 527 medium_dict = {}
493 for rxn_id in medium_reactions: 528 for rxn_id in medium_reactions:
494 if rxn_id in [r.id for r in model.reactions]: 529 if rxn_id in [r.id for r in model.reactions]:
495 reaction = model.reactions.get_by_id(rxn_id) 530 reaction = model.reactions.get_by_id(rxn_id)
496 if reaction.lower_bound < 0: # Solo reazioni di uptake 531 if reaction.lower_bound < 0:
497 medium_dict[rxn_id] = abs(reaction.lower_bound) 532 medium_dict[rxn_id] = abs(reaction.lower_bound)
498 533
499 if medium_dict: 534 if medium_dict:
500 model.medium = medium_dict 535 model.medium = medium_dict
501 print(f"Medium set with {len(medium_dict)} components") 536 print(f"Medium set with {len(medium_dict)} components")
502 537 else:
503 538 print("No medium components found")
504 def validate_model(model: cobraModel) -> Dict[str, any]: 539 def validate_model(model: cobraModel) -> Dict[str, any]:
505 """Validate the model and return basic statistics.""" 540 """Validate the model and return basic statistics."""
506 validation = { 541 validation = {
507 'num_reactions': len(model.reactions), 542 'num_reactions': len(model.reactions),
508 'num_metabolites': len(model.metabolites), 543 'num_metabolites': len(model.metabolites),