comparison COBRAxy/utils/model_utils.py @ 500:4e7e67693ce7 draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 16:39:30 +0000
parents a2f7a6dd9d0b
children 9bfd1ec3ae6f
comparison
equal deleted inserted replaced
499:a2f7a6dd9d0b 500:4e7e67693ce7
372 # pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[A-Za-z0-9]+)' 372 # pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[A-Za-z0-9]+)'
373 # matches = re.findall(pattern, reaction_formula) 373 # matches = re.findall(pattern, reaction_formula)
374 # metabolites.update(matches) 374 # metabolites.update(matches)
375 # return metabolites 375 # return metabolites
376 376
377 import re 377
378 from typing import Set
379
380 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore e [comp])
381 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]: 378 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]:
382 """ 379 """
383 Estrae gli ID dei metaboliti da una formula di reazione. 380 Extract metabolite IDs from a reaction formula.
384 Gestisce: 381
385 - coefficienti stechiometrici opzionali (interi o decimali) 382 Handles:
386 - compartimenti sia in forma [c] sia _c, sempre a fine metabolita 383 - optional stoichiometric coefficients (integers or decimals)
387 Restituisce gli ID includendo il suffisso di compartimento così come appare. 384 - compartment tags at the end of the metabolite, either [c] or _c
385
386 Returns the IDs including the compartment suffix exactly as written.
388 """ 387 """
389 pattern = re.compile( 388 pattern = re.compile(
390 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # confine a sinistra 389 r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))' # left boundary (start, space, +, comma, =, :)
391 r'(?:\d+(?:\.\d+)?\s*)?' # coefficiente opzionale 390 r'(?:\d+(?:\.\d+)?\s*)?' # optional coefficient
392 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolita + compartimento 391 r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))' # metabolite + compartment
393 ) 392 )
394 return {m.group(1) for m in pattern.finditer(reaction_formula)} 393 return {m.group(1) for m in pattern.finditer(reaction_formula)}
395 394
396 395
396
397 def extract_compartment_from_metabolite(metabolite_id: str) -> str: 397 def extract_compartment_from_metabolite(metabolite_id: str) -> str:
398 """Extract the compartment from a metabolite ID.""" 398 """Extract the compartment from a metabolite ID."""
399 if '_' in metabolite_id: 399 if '_' == metabolite_id[-2]:
400 return metabolite_id.split('_')[-1] 400 return metabolite_id.split('_')[-1]
401 if metabolite_id[-1] == ']' and metabolite_id[-3] == '[': 401 if metabolite_id[-1] == ']' and metabolite_id[-3] == '[':
402 return metabolite_id[-2] 402 return metabolite_id[-2]
403 return 'c' # default cytoplasm 403 return 'c' # default cytoplasm
404 404