Mercurial > repos > tduigou > get_sbml_model
changeset 10:dc1167469d62 draft
planemo upload for repository https://github.com/brsynth/synbiocad-galaxy-wrappers commit 94f89a44e330ccfccc8d94b5e7acf583c9d39343
author | tduigou |
---|---|
date | Thu, 27 Mar 2025 09:27:06 +0000 |
parents | 6a2871e89352 |
children | 062f51695ae0 |
files | get_infos.py get_sbml_model.xml |
diffstat | 2 files changed, 182 insertions(+), 191 deletions(-) [+] |
line wrap: on
line diff
--- a/get_infos.py Wed Feb 14 15:25:38 2024 +0000 +++ b/get_infos.py Thu Mar 27 09:27:06 2025 +0000 @@ -1,179 +1,140 @@ from argparse import ArgumentParser -from libsbml import ( - readSBMLFromFile -) +from libsbml import readSBMLFromFile from taxonid import get_taxonid +from requests import get as r_get def get_biomass_rxn(sbml_doc): - ''' + """ Returns the biomass reaction of the model - + Parameters ---------- sbml_doc: libsbml.SBMLDocument SBML model - + Returns ------- biomass_rxn: libsbml.Reaction Biomass reaction - ''' + """ reactions = sbml_doc.getModel().getListOfReactions() # Search for 'biomass' keyword in reaction name for rxn in reactions: - if 'biomass' in rxn.getName().lower(): + if "biomass" in rxn.getName().lower(): return rxn # Search for 'biomass' keyword in products # AND not in reactants for rxn in reactions: in_reactants = False for reac in rxn.getListOfReactants(): - if 'biomass' in reac.getSpecies().lower(): + if "biomass" in reac.getSpecies().lower(): in_reactants = True break if not in_reactants: for prod in rxn.getListOfProducts(): - if 'biomass' in prod.getSpecies().lower(): + if "biomass" in prod.getSpecies().lower(): return rxn return None def args(): - parser = ArgumentParser('Returns cell informations') - parser.add_argument( - 'infile', - type=str, - help='SBML input file (xml)' - ) - # argument to tag file from BiGG - parser.add_argument( - '--bigg', - action='store_true', - help='Tag file from BiGG' - ) - parser.add_argument( - '--comp', - type=str, - help='Path to store cell compartments' - ) - parser.add_argument( - '--biomass', - type=str, - help='Path to store biomass reaction ID' - ) - parser.add_argument( - '--biomass-id', - type=str, - help='ID of biomass reaction' - ) - parser.add_argument( - '--hostname', - type=str, - help='Name of the host organism' - ) - parser.add_argument( - '--taxid', - type=str, - help='Path to store host taxonomy ID' - ) + parser = ArgumentParser("Returns cell informations") + parser.add_argument("infile", type=str, help="SBML input file (xml)") + parser.add_argument("--hostname-or-id", type=str, help="Hostname or model ID") + parser.add_argument("--comp", type=str, help="Path to store cell compartments") + parser.add_argument("--biomass", type=str, help="Path to store biomass reaction ID") + parser.add_argument("--biomass-id", type=str, help="ID of biomass reaction") + parser.add_argument("--taxid", type=str, help="Path to store host taxonomy ID") params = parser.parse_args() return params -def get_taxon_id(hostid: str, bigg: bool): - ''' - Returns the taxonomy ID of the host organism - - Parameters - ---------- - hostid: str - Extended name of the host organism or host ID if from BiGG - bigg: bool - True if the model is from BiGG - - Returns - ------- - taxid: str - Taxonomy ID of the host organism - ''' - if not bigg: - return get_taxonid(hostid) +def get_organism_from_bigg_model(model_id): + """Try to retrieve organism info from BiGG Models for a given model ID.""" + url = f"http://bigg.ucsd.edu/api/v2/models/{model_id}" + try: + response = r_get(url) + if response.status_code == 200: + data = response.json() + organism = data.get("organism") + return organism + except Exception as e: + print(f"Error querying BiGG: {e}") + return None - hostname = '' - # Extended Name - server = 'http://bigg.ucsd.edu/api/v2/models/' - ext = hostid - r = r_get(server+ext, headers={ "Content-Type" : "application/json"}) - if not r.ok: - print(f"Warning: unable to retrieve host name for id {hostid}") - else: - try: - hostname = r.json()["organism"] - except KeyError: - print(f"Warning: unable to retrieve host name for id {hostid}") - if not hostname: - taxid = '' - else: - # TAXON ID - server = 'https://rest.ensembl.org' - ext = f'/taxonomy/id/{hostname}?' - r = r_get(server+ext, headers={ "Content-Type" : "application/json"}) - if not r.ok: - print(f"Warning: unable to retrieve taxonomy ID for host organism {hostname}") - else: - try: - taxid = r.json()["id"] - except KeyError: - print(f"Warning: unable to retrieve taxonomy ID for host organism {hostname}") - taxid = '' - return taxid +def get_taxon_id(input_name): + """Try BiGG model name first, then NCBI directly.""" + print(f"Trying input: {input_name}") + + # Try resolving as a BiGG model + organism = get_organism_from_bigg_model(input_name) + if organism: + print(f"Model '{input_name}' maps to organism: {organism}") + taxon_id = get_taxonid(organism) + if taxon_id: + return taxon_id + + # If not a model, try directly as an organism name + print(f"Trying NCBI search with input: {input_name}") + return get_taxonid(input_name) def entry_point(): + params = args() - params = args() + # test if the file exists + with open(params.infile): + pass sbml_doc = readSBMLFromFile(params.infile) compartments = sbml_doc.getModel().getListOfCompartments() - comp_str = '' + comp_str = "" for comp in compartments: - comp_str += f'{comp.getId()}\t{comp.getName()}\n' + comp_str += f"{comp.getId()}\t{comp.getName()}\n" + print("Compartments:") + for comp in compartments: + print(f"{comp.getId()}\t{comp.getName()}".replace("\n", " | ")) if params.comp: - with open(params.comp, 'w') as f: - f.write('#ID\tNAME\n') + with open(params.comp, "w") as f: + f.write("#ID\tNAME\n") f.write(comp_str) - else: - print('Compartments:') - for comp in compartments: - print(f'{comp.getId()}\t{comp.getName()}'.replace('\n', ' | ')) if params.biomass_id: biomass_rxn = sbml_doc.getModel().getReaction(params.biomass_id) else: biomass_rxn = get_biomass_rxn(sbml_doc) if not biomass_rxn: - print('Warning: unable to retrieve biomass reaction') - biomass_id = '' + print("Warning: unable to retrieve biomass reaction") + biomass_id = "" else: biomass_id = biomass_rxn.getId() + print(f"Biomass reaction ID: {biomass_id}") if params.biomass: - with open(params.biomass, 'w') as f: - f.write('#ID\n') - f.write(f'{biomass_id}\n') + with open(params.biomass, "w") as f: + f.write("#ID\n") + f.write(f"{biomass_id}\n") + + if params.hostname_or_id: + taxid = get_taxon_id(params.hostname_or_id) else: - print(f'Biomass reaction ID: {biomass_id}') - - taxid = get_taxon_id(params.hostname, params.bigg) + model_id = sbml_doc.getModel().getId() + taxid = -1 + if model_id: + taxid = get_taxon_id(sbml_doc.getModel().getId()) + if taxid == -1: + # Try with model name + model_name = sbml_doc.getModel().getName() + if model_name: + taxid = get_taxon_id(sbml_doc.getModel().getName()) + print(f"Taxonomy ID: {taxid}") if params.taxid: - with open(params.taxid, 'w') as f: - f.write('#ID\n') - f.write(f'{taxid}\n') - else: - print(f'Taxonomy ID: {taxid}') + with open(params.taxid, "w") as f: + f.write("#ID\n") + f.write(f"{taxid}\n") if __name__ == "__main__": - entry_point() \ No newline at end of file + entry_point()
--- a/get_sbml_model.xml Wed Feb 14 15:25:38 2024 +0000 +++ b/get_sbml_model.xml Thu Mar 27 09:27:06 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="get_sbml_model" name="Pick SBML Model" version="0.2.0" profile="21.09" license="MIT"> +<tool id="get_sbml_model" name="Pick SBML Model" version="0.3.0" profile="21.09" license="MIT"> <description>Get an SBML model (BiGG)</description> <requirements> <requirement type="package" version="7.81.0">curl</requirement> @@ -8,17 +8,15 @@ <requirement type="package" version="0.1.1">taxonid</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ + #import re #if str($cond_src.from_src) == 'from_bigg' - curl -o - 'http://bigg.ucsd.edu/static/models/${cond_src.hostid}.xml.gz' | gunzip > '$model' && + curl -o - 'http://bigg.ucsd.edu/static/models/${cond_src.hostid.hostid}.xml.gz' | gunzip > '$model' && + #else + #set model=$cond_src.input_file #end if python '$__tool_directory__/'get_infos.py - #if str($cond_src.from_src) == 'from_bigg' - '$model' - --bigg - #else - '${cond_src.input_file}' - #end if - --hostname '${cond_src.hostid}' + '$model' + --hostname-or-id '$cond_src.hostid.hostid' --taxid '$taxid' --comp '$compartments' --biomass '$biomass' @@ -30,92 +28,124 @@ <option value="from_history">Select file from the History</option> </param> <when value="from_history"> - <param name="hostid" type="text" label="Model's fullname" help="Model extended name"> - <validator type="empty_field" message="Organism extended name required"/> - </param> <param name="input_file" type="data" format="sbml,xml" label="SBML model" help="An SBML file is expected"/> + <section name="hostid" title="Model's ID" expanded="false"> + <param name="hostid" type="text" label="Model's hostname or ID" help="Model extended name or ID" optional="True"/> + </section> </when> <when value="from_bigg"> - <param name="hostid" type="select" label="Strain"> - <option value="iCN718">Acinetobacter baumannii AYE (iCN718)</option> - <option value="iYO844">Bacillus subtilis subsp. subtilis str. 168 (iYO844)</option> - <option value="iRC1080">Chlamydomonas reinhardtii (iRC1080)</option> - <option value="iCN900">Clostridioides difficile 630 (iCN900)</option> - <option value="iHN637">Clostridium ljungdahlii DSM 13528 (iHN637)</option> - <option value="iCHOv1_DG44">Cricetulus griseus (iCHOv1_DG44)</option> - <option value="iCHOv1">Cricetulus griseus (iCHOv1)</option> - <option value="iAF1260b">Escherichia coli str. K-12 substr. MG1655 (iAF1260b)</option> - <option value="iAF1260">Escherichia coli str. K-12 substr. MG1655 (iAF1260)</option> - <option value="iML1515" selected="true">Escherichia coli str. K-12 substr. MG1655 (iML1515)</option> - <option value="iJO1366">Escherichia coli str. K-12 substr. MG1655 (iJO1366)</option> - <option value="iJR904">Escherichia coli str. K-12 substr. MG1655 (iJR904)</option> - <option value="e_coli_core">Escherichia coli str. K-12 substr. MG1655 (e_coli_core)</option> - <option value="iAF987">Geobacter metallireducens GS-15 (iAF987)</option> - <option value="iIT341">Helicobacter pylori 26695 (iIT341)</option> - <option value="iAT_PLT_636">Homo sapiens (iAT_PLT_636)</option> - <option value="Recon3D">Homo sapiens (Recon3D)</option> - <option value="iAB_RBC_283">Homo sapiens (iAB_RBC_283)</option> - <option value="RECON1">Homo sapiens (RECON1)</option> - <option value="iYL1228">Klebsiella pneumoniae subsp. pneumoniae MGH 78578 (iYL1228)</option> - <option value="iNF517">Lactococcus lactis subsp. cremoris MG1363 (iNF517)</option> - <option value="iAF692">Methanosarcina barkeri str. Fusaro (iAF692)</option> - <option value="iMM1415">Mus musculus (iMM1415)</option> - <option value="iNJ661">Mycobacterium tuberculosis H37Rv (iNJ661)</option> - <option value="iEK1008">Mycobacterium tuberculosis H37Rv (iEK1008)</option> - <option value="iLB1027_lipid">Phaeodactylum tricornutum CCAP 1055/1 (iLB1027_lipid)</option> - <option value="iAM_Pb448">Plasmodium berghei (iAM_Pb448)</option> - <option value="iAM_Pc455">Plasmodium cynomolgi strain B (iAM_Pc455)</option> - <option value="iAM_Pf480">Plasmodium falciparum 3D7 (iAM_Pf480)</option> - <option value="iAM_Pk459">Plasmodium knowlesi strain H (iAM_Pk459)</option> - <option value="iAM_Pv461">Plasmodium vivax Sal-1 (iAM_Pv461)</option> - <option value="iJN746">Pseudomonas putida KT2440 (iJN746)</option> - <option value="iJN1463">Pseudomonas putida KT2440 (iJN1463)</option> - <option value="iND750">Saccharomyces cerevisiae S288C (iND750)</option> - <option value="iMM904">Saccharomyces cerevisiae S288C (iMM904)</option> - <option value="STM_v1_0">Salmonella enterica subsp. enterica serovar Typhimurium str. LT2 (STM_v1_0)</option> - <option value="iYS1720">Salmonella pan-reactome (iYS1720)</option> - <option value="iSB619">Staphylococcus aureus subsp. aureus N315 (iSB619)</option> - <option value="iYS854">Staphylococcus aureus subsp. aureus USA300_TCH1516 (iYS854)</option> - <option value="iJB785">Synechococcus elongatus PCC 7942 (iJB785)</option> - <option value="iJN678">Synechocystis sp. PCC 6803 (iJN678)</option> - <option value="iSynCJ816">Synechocystis sp. PCC 6803 (iSynCJ816)</option> - <option value="iLJ478">Thermotoga maritima MSB8 (iLJ478)</option> - <option value="iIS312">Trypanosoma cruzi Dm28c (iIS312)</option> - <option value="iIS312_Trypomastigote">Trypanosoma cruzi Dm28c (iIS312_Trypomastigote)</option> - <option value="iIS312_Epimastigote">Trypanosoma cruzi Dm28c (iIS312_Epimastigote)</option> - <option value="iIS312_Amastigote">Trypanosoma cruzi Dm28c (iIS312_Amastigote)</option> - </param> + <section name="hostid" title="Model's ID" expanded="true"> + <param name="hostid" type="select" label="Strain"> + <!-- Be careful, the text name is used for the hostname variable --> + <option value="iCN718">Acinetobacter baumannii AYE (iCN718)</option> + <option value="iYO844">Bacillus subtilis subsp. subtilis str. 168 (iYO844)</option> + <option value="iRC1080">Chlamydomonas reinhardtii (iRC1080)</option> + <option value="iCN900">Clostridioides difficile 630 (iCN900)</option> + <option value="iHN637">Clostridium ljungdahlii DSM 13528 (iHN637)</option> + <option value="iCHOv1_DG44">Cricetulus griseus (iCHOv1_DG44)</option> + <option value="iCHOv1">Cricetulus griseus (iCHOv1)</option> + <option value="iAF1260b">Escherichia coli str. K-12 substr. MG1655 (iAF1260b)</option> + <option value="iAF1260">Escherichia coli str. K-12 substr. MG1655 (iAF1260)</option> + <option value="iML1515" selected="true">Escherichia coli str. K-12 substr. MG1655 (iML1515)</option> + <option value="iJO1366">Escherichia coli str. K-12 substr. MG1655 (iJO1366)</option> + <option value="iJR904">Escherichia coli str. K-12 substr. MG1655 (iJR904)</option> + <option value="e_coli_core">Escherichia coli str. K-12 substr. MG1655 (e_coli_core)</option> + <option value="iAF987">Geobacter metallireducens GS-15 (iAF987)</option> + <option value="iIT341">Helicobacter pylori 26695 (iIT341)</option> + <option value="iAT_PLT_636">Homo sapiens (iAT_PLT_636)</option> + <option value="Recon3D">Homo sapiens (Recon3D)</option> + <option value="iAB_RBC_283">Homo sapiens (iAB_RBC_283)</option> + <option value="RECON1">Homo sapiens (RECON1)</option> + <option value="iYL1228">Klebsiella pneumoniae subsp. pneumoniae MGH 78578 (iYL1228)</option> + <option value="iNF517">Lactococcus lactis subsp. cremoris MG1363 (iNF517)</option> + <option value="iAF692">Methanosarcina barkeri str. Fusaro (iAF692)</option> + <option value="iMM1415">Mus musculus (iMM1415)</option> + <option value="iNJ661">Mycobacterium tuberculosis H37Rv (iNJ661)</option> + <option value="iEK1008">Mycobacterium tuberculosis H37Rv (iEK1008)</option> + <option value="iLB1027_lipid">Phaeodactylum tricornutum CCAP 1055/1 (iLB1027_lipid)</option> + <option value="iAM_Pb448">Plasmodium berghei (iAM_Pb448)</option> + <option value="iAM_Pc455">Plasmodium cynomolgi strain B (iAM_Pc455)</option> + <option value="iAM_Pf480">Plasmodium falciparum 3D7 (iAM_Pf480)</option> + <option value="iAM_Pk459">Plasmodium knowlesi strain H (iAM_Pk459)</option> + <option value="iAM_Pv461">Plasmodium vivax Sal-1 (iAM_Pv461)</option> + <option value="iJN746">Pseudomonas putida KT2440 (iJN746)</option> + <option value="iJN1463">Pseudomonas putida KT2440 (iJN1463)</option> + <option value="iND750">Saccharomyces cerevisiae S288C (iND750)</option> + <option value="iMM904">Saccharomyces cerevisiae S288C (iMM904)</option> + <option value="STM_v1_0">Salmonella enterica subsp. enterica serovar Typhimurium str. LT2 (STM_v1_0)</option> + <option value="iYS1720">Salmonella pan-reactome (iYS1720)</option> + <option value="iSB619">Staphylococcus aureus subsp. aureus N315 (iSB619)</option> + <option value="iYS854">Staphylococcus aureus subsp. aureus USA300_TCH1516 (iYS854)</option> + <option value="iJB785">Synechococcus elongatus PCC 7942 (iJB785)</option> + <option value="iJN678">Synechocystis sp. PCC 6803 (iJN678)</option> + <option value="iSynCJ816">Synechocystis sp. PCC 6803 (iSynCJ816)</option> + <option value="iLJ478">Thermotoga maritima MSB8 (iLJ478)</option> + <option value="iIS312">Trypanosoma cruzi Dm28c (iIS312)</option> + <option value="iIS312_Trypomastigote">Trypanosoma cruzi Dm28c (iIS312_Trypomastigote)</option> + <option value="iIS312_Epimastigote">Trypanosoma cruzi Dm28c (iIS312_Epimastigote)</option> + <option value="iIS312_Amastigote">Trypanosoma cruzi Dm28c (iIS312_Amastigote)</option> + </param> + </section> </when> </conditional> </inputs> <outputs> - <data name="model" format="sbml" label="${cond_src.hostid}"> + <data name="model" format="sbml" label="${cond_src.hostid.hostid}"> <filter>cond_src['from_src'] == 'from_bigg'</filter> </data> - <data name="taxid" format="tsv" label="${cond_src.hostid} (taxon id)" /> - <data name="compartments" format="tsv" label="${cond_src.hostid} (compartments)" /> - <data name="biomass" format="tsv" label="${cond_src.hostid} (biomass reactions)" /> + <data name="taxid" format="tsv" label="${cond_src.hostid.hostid} (taxon id)" /> + <data name="compartments" format="tsv" label="${cond_src.hostid.hostid} (compartments)" /> + <data name="biomass" format="tsv" label="${cond_src.hostid.hostid} (biomass reactions)" /> </outputs> <tests> - <test> - <param name="from_src" value="from_bigg" /> - <param name="hostid" value="iML1515" /> + <test expect_num_outputs="4"> + <conditional name="cond_src"> + <param name="from_src" value="from_bigg" /> + <section name="hostid"> + <param name="hostid" value="iML1515" /> + </section> + </conditional> <output name="model" md5="9bf81d20cab5476700697ded95b716d1" /> - <output name="taxid" md5="7c62bd818057838b3557c8d87cca47fc" /> + <output name="taxid" md5="6b35ad8a1c2b640af4ba738c2f5f876e" /> <output name="compartments" md5="e93a875a2d8efc10a880ae3ac0018236" /> - <output name="biomass" md5="cffb2fbdb07d1301dfdb7bb284fb7e06" /> + <output name="biomass" md5="95c0255bd5dd8753c6dde53c0f2958ea" /> </test> - <test> + <test expect_num_outputs="3"> <conditional name="cond_src"> <param name="from_src" value="from_history" /> - <param name="hostname" value="Escherichia coli str. K-12 substr. MG1655" /> - <param name="hostid" value="e_coli_core" /> + <section name="hostid"> + <param name="hostid" value="Escherichia coli str. K-12 substr. MG1655" /> + </section> + <param name="input_file" value="e_coli_core.xml" /> + </conditional> + <output name="taxid" md5="6b35ad8a1c2b640af4ba738c2f5f876e" /> + <output name="compartments" md5="71dc18974a12e9bb75bb2c4cffd13edf" /> + <output name="biomass" md5="d10baa335181450c7bffa9b4ca01754a" /> + </test> + <test expect_num_outputs="3"> + <conditional name="cond_src"> + <param name="from_src" value="from_history" /> + <section name="hostid"> + <param name="hostid" value="e_coli_core" /> + </section> <param name="input_file" value="e_coli_core.xml" /> </conditional> - <output name="taxid" md5="ee08b511771328e6254775a51779d421" /> + <output name="taxid" md5="6b35ad8a1c2b640af4ba738c2f5f876e" /> <output name="compartments" md5="71dc18974a12e9bb75bb2c4cffd13edf" /> <output name="biomass" md5="d10baa335181450c7bffa9b4ca01754a" /> - </test> + </test> + <test expect_num_outputs="3"> + <conditional name="cond_src"> + <param name="from_src" value="from_history" /> + <section name="hostid"> + <param name="hostid" value="iMM1415" /> + </section> + <param name="input_file" value="e_coli_core.xml" /> + </conditional> + <output name="taxid" md5="9c5ebe6ee91a632cbfb244435db7ce7e" /> + <output name="compartments" md5="71dc18974a12e9bb75bb2c4cffd13edf" /> + <output name="biomass" md5="d10baa335181450c7bffa9b4ca01754a" /> + </test> </tests> <help><![CDATA[ Pick SBML Model