diff get_infos.py @ 10:dc1167469d62 draft

planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 94f89a44e330ccfccc8d94b5e7acf583c9d39343
author tduigou
date Thu, 27 Mar 2025 09:27:06 +0000
parents 6a2871e89352
children 062f51695ae0
line wrap: on
line diff
--- a/get_infos.py	Wed Feb 14 15:25:38 2024 +0000
+++ b/get_infos.py	Thu Mar 27 09:27:06 2025 +0000
@@ -1,179 +1,140 @@
 from argparse import ArgumentParser
-from libsbml import (
-    readSBMLFromFile
-)
+from libsbml import readSBMLFromFile
 from taxonid import get_taxonid
+from requests import get as r_get
 
 
 def get_biomass_rxn(sbml_doc):
-    '''
+    """
     Returns the biomass reaction of the model
-    
+
     Parameters
     ----------
     sbml_doc: libsbml.SBMLDocument
         SBML model
-        
+
     Returns
     -------
     biomass_rxn: libsbml.Reaction
         Biomass reaction
-    '''
+    """
     reactions = sbml_doc.getModel().getListOfReactions()
     # Search for 'biomass' keyword in reaction name
     for rxn in reactions:
-        if 'biomass' in rxn.getName().lower():
+        if "biomass" in rxn.getName().lower():
             return rxn
     # Search for 'biomass' keyword in products
     # AND not in reactants
     for rxn in reactions:
         in_reactants = False
         for reac in rxn.getListOfReactants():
-            if 'biomass' in reac.getSpecies().lower():
+            if "biomass" in reac.getSpecies().lower():
                 in_reactants = True
                 break
         if not in_reactants:
             for prod in rxn.getListOfProducts():
-                if 'biomass' in prod.getSpecies().lower():
+                if "biomass" in prod.getSpecies().lower():
                     return rxn
     return None
 
 
 def args():
-    parser = ArgumentParser('Returns cell informations')
-    parser.add_argument(
-        'infile',
-        type=str,
-        help='SBML input file (xml)'
-    )
-    # argument to tag file from BiGG
-    parser.add_argument(
-        '--bigg',
-        action='store_true',
-        help='Tag file from BiGG'
-    )
-    parser.add_argument(
-        '--comp',
-        type=str,
-        help='Path to store cell compartments'
-    )
-    parser.add_argument(
-        '--biomass',
-        type=str,
-        help='Path to store biomass reaction ID'
-    )
-    parser.add_argument(
-        '--biomass-id',
-        type=str,
-        help='ID of biomass reaction'
-    )
-    parser.add_argument(
-        '--hostname',
-        type=str,
-        help='Name of the host organism'
-    )
-    parser.add_argument(
-        '--taxid',
-        type=str,
-        help='Path to store host taxonomy ID'
-    )
+    parser = ArgumentParser("Returns cell informations")
+    parser.add_argument("infile", type=str, help="SBML input file (xml)")
+    parser.add_argument("--hostname-or-id", type=str, help="Hostname or model ID")
+    parser.add_argument("--comp", type=str, help="Path to store cell compartments")
+    parser.add_argument("--biomass", type=str, help="Path to store biomass reaction ID")
+    parser.add_argument("--biomass-id", type=str, help="ID of biomass reaction")
+    parser.add_argument("--taxid", type=str, help="Path to store host taxonomy ID")
     params = parser.parse_args()
     return params
 
 
-def get_taxon_id(hostid: str, bigg: bool):
-    '''
-    Returns the taxonomy ID of the host organism
-    
-    Parameters
-    ----------
-    hostid: str
-        Extended name of the host organism or host ID if from BiGG
-    bigg: bool
-        True if the model is from BiGG
-        
-    Returns
-    -------
-    taxid: str
-        Taxonomy ID of the host organism
-    '''
-    if not bigg:
-        return get_taxonid(hostid)
+def get_organism_from_bigg_model(model_id):
+    """Try to retrieve organism info from BiGG Models for a given model ID."""
+    url = f"http://bigg.ucsd.edu/api/v2/models/{model_id}"
+    try:
+        response = r_get(url)
+        if response.status_code == 200:
+            data = response.json()
+            organism = data.get("organism")
+            return organism
+    except Exception as e:
+        print(f"Error querying BiGG: {e}")
+    return None
 
-    hostname = ''
-    # Extended Name
-    server = 'http://bigg.ucsd.edu/api/v2/models/'
-    ext = hostid
-    r = r_get(server+ext, headers={ "Content-Type" : "application/json"})
-    if not r.ok:
-        print(f"Warning: unable to retrieve host name for id {hostid}")
-    else:
-        try:
-            hostname = r.json()["organism"]
-        except KeyError:
-            print(f"Warning: unable to retrieve host name for id {hostid}")
-    if not hostname:
-        taxid = ''
-    else:
-        # TAXON ID
-        server = 'https://rest.ensembl.org'
-        ext = f'/taxonomy/id/{hostname}?'
-        r = r_get(server+ext, headers={ "Content-Type" : "application/json"})
-        if not r.ok:
-            print(f"Warning: unable to retrieve taxonomy ID for host organism {hostname}")
-        else:
-            try:
-                taxid = r.json()["id"]
-            except KeyError:
-                print(f"Warning: unable to retrieve taxonomy ID for host organism {hostname}")
-                taxid = ''
-    return taxid
+def get_taxon_id(input_name):
+    """Try BiGG model name first, then NCBI directly."""
+    print(f"Trying input: {input_name}")
+    
+    # Try resolving as a BiGG model
+    organism = get_organism_from_bigg_model(input_name)
+    if organism:
+        print(f"Model '{input_name}' maps to organism: {organism}")
+        taxon_id = get_taxonid(organism)
+        if taxon_id:
+            return taxon_id
+
+    # If not a model, try directly as an organism name
+    print(f"Trying NCBI search with input: {input_name}")
+    return get_taxonid(input_name)
 
 
 def entry_point():
+    params = args()
 
-    params = args()
+    # test if the file exists
+    with open(params.infile):
+        pass
 
     sbml_doc = readSBMLFromFile(params.infile)
 
     compartments = sbml_doc.getModel().getListOfCompartments()
-    comp_str = ''
+    comp_str = ""
     for comp in compartments:
-        comp_str += f'{comp.getId()}\t{comp.getName()}\n'
+        comp_str += f"{comp.getId()}\t{comp.getName()}\n"
+    print("Compartments:")
+    for comp in compartments:
+        print(f"{comp.getId()}\t{comp.getName()}".replace("\n", " | "))
     if params.comp:
-        with open(params.comp, 'w') as f:
-            f.write('#ID\tNAME\n')
+        with open(params.comp, "w") as f:
+            f.write("#ID\tNAME\n")
             f.write(comp_str)
-    else:
-        print('Compartments:')
-        for comp in compartments:
-            print(f'{comp.getId()}\t{comp.getName()}'.replace('\n', ' | '))
 
     if params.biomass_id:
         biomass_rxn = sbml_doc.getModel().getReaction(params.biomass_id)
     else:
         biomass_rxn = get_biomass_rxn(sbml_doc)
     if not biomass_rxn:
-        print('Warning: unable to retrieve biomass reaction')
-        biomass_id = ''
+        print("Warning: unable to retrieve biomass reaction")
+        biomass_id = ""
     else:
         biomass_id = biomass_rxn.getId()
+    print(f"Biomass reaction ID: {biomass_id}")
     if params.biomass:
-        with open(params.biomass, 'w') as f:
-            f.write('#ID\n')
-            f.write(f'{biomass_id}\n')
+        with open(params.biomass, "w") as f:
+            f.write("#ID\n")
+            f.write(f"{biomass_id}\n")
+
+    if params.hostname_or_id:
+        taxid = get_taxon_id(params.hostname_or_id)
     else:
-        print(f'Biomass reaction ID: {biomass_id}')
-
-    taxid = get_taxon_id(params.hostname, params.bigg)
+        model_id = sbml_doc.getModel().getId()
+        taxid = -1
+        if model_id:
+            taxid = get_taxon_id(sbml_doc.getModel().getId())
+        if taxid == -1:
+            # Try with model name
+            model_name = sbml_doc.getModel().getName()
+            if model_name:
+                taxid = get_taxon_id(sbml_doc.getModel().getName())
+    print(f"Taxonomy ID: {taxid}")
 
     if params.taxid:
-        with open(params.taxid, 'w') as f:
-            f.write('#ID\n')
-            f.write(f'{taxid}\n')
-    else:
-        print(f'Taxonomy ID: {taxid}')
+        with open(params.taxid, "w") as f:
+            f.write("#ID\n")
+            f.write(f"{taxid}\n")
 
 
 if __name__ == "__main__":
-    entry_point()
\ No newline at end of file
+    entry_point()