ncbi_eutils_efetch: __efetch_build

comparison __efetch_build_options.py @ 0:71bcf87a7031 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db

author	iuc
date	Thu, 07 Jul 2016 02:39:36 -0400
parents
children	0fc65a60436f

comparison

equal deleted inserted replaced

--1:000000000000
+:71bcf87a7031
+#!/usr/bin/env python
+# Daniel Blankenberg
+# Creates the options for tool interface
+import re
+# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
+db_list = '''
+<DbName>annotinfo</DbName>
+<DbName>assembly</DbName>
+<DbName>bioproject</DbName>
+<DbName>biosample</DbName>
+<DbName>biosystems</DbName>
+<DbName>blastdbinfo</DbName>
+<DbName>books</DbName>
+<DbName>cdd</DbName>
+<DbName>clinvar</DbName>
+<DbName>clone</DbName>
+<DbName>dbvar</DbName>
+<DbName>gap</DbName>
+<DbName>gapplus</DbName>
+<DbName>gds</DbName>
+<DbName>gencoll</DbName>
+<DbName>gene</DbName>
+<DbName>genome</DbName>
+<DbName>geoprofiles</DbName>
+<DbName>grasp</DbName>
+<DbName>gtr</DbName>
+<DbName>homologene</DbName>
+<DbName>medgen</DbName>
+<DbName>mesh</DbName>
+<DbName>ncbisearch</DbName>
+<DbName>nlmcatalog</DbName>
+<DbName>nuccore</DbName>
+<DbName>nucest</DbName>
+<DbName>nucgss</DbName>
+<DbName>nucleotide</DbName>
+<DbName>omim</DbName>
+<DbName>orgtrack</DbName>
+<DbName>pcassay</DbName>
+<DbName>pccompound</DbName>
+<DbName>pcsubstance</DbName>
+<DbName>pmc</DbName>
+<DbName>popset</DbName>
+<DbName>probe</DbName>
+<DbName>protein</DbName>
+<DbName>proteinclusters</DbName>
+<DbName>pubmed</DbName>
+<DbName>pubmedhealth</DbName>
+<DbName>seqannot</DbName>
+<DbName>snp</DbName>
+<DbName>sra</DbName>
+<DbName>structure</DbName>
+<DbName>taxonomy</DbName>
+<DbName>unigene</DbName>'''.replace( "<DbName>", "").replace( "</DbName>", "").split("\n")
+help = '''  (all)
+docsum             xml      Document Summary
+docsum             json     Document Summary
+full               text     Full Document
+uilist             xml      Unique Identifier List
+uilist             text     Unique Identifier List
+full               xml      Full Document
+bioproject
+native                      BioProject Report
+native             xml      RecordSet
+biosample
+native                      BioSample Report
+native             xml      BioSampleSet
+biosystems
+native             xml      Sys-set
+gds
+native             xml      RecordSet
+summary            text     Summary
+gene
+gene_table         xml      Gene Table
+native             text     Gene Report
+native             asn.1    Entrezgene
+native             xml      Entrezgene-Set
+tabular            tabular  Tabular Report
+homologene
+alignmentscores    text     Alignment Scores
+fasta              fasta    FASTA
+homologene         text     Homologene Report
+native             text     Homologene List
+native             asn.1    HG-Entry
+native             xml      Entrez-Homologene-Set
+mesh
+full               text     Full Record
+native             text     MeSH Report
+native             xml      RecordSet
+nlmcatalog
+native             text     Full Record
+native             xml      NLMCatalogRecordSet
+pmc
+medline            text     MEDLINE
+native             xml      pmc-articleset
+pubmed
+abstract           xml      Abstract
+medline            text     MEDLINE
+native             asn.1    Pubmed-entry
+native             xml      PubmedArticleSet
+(sequences)
+acc                text     Accession Number
+est                xml      EST Report
+fasta              fasta    FASTA
+fasta              xml      TinySeq
+fasta_cds_aa       fasta    CDS Products
+fasta_cds_na       fasta    Coding Regions
+ft                 text     Feature Table
+gb                 text     GenBank Flatfile
+gb                 xml      GBSet
+gbc                xml      INSDSet
+gbwithparts        text     GenBank with Contig Sequences
+gene_fasta         fasta    FASTA of Gene
+gp                 text     GenPept Flatfile
+gp                 xml      GBSet
+gpc                xml      INSDSet
+gss                text     GSS Report
+ipg                text     Identical Protein Report
+ipg                xml      IPGReportSet
+native             text     Seq-entry
+native             xml      Bioseq-set
+seqid              asn.1    Seq-id
+snp
+chr                text     Chromosome Report
+docset             text     Summary
+fasta              fasta    FASTA
+flt                text     Flat File
+native             asn.1    Rs
+native             xml      ExchangeSet
+rsr                tabular  RS Cluster Report
+ssexemplar         text     SS Exemplar List
+sra
+native             xml      EXPERIMENT_PACKAGE_SET
+runinfo            xml      SraRunInfo
+structure
+mmdb               asn.1    Ncbi-mime-asn1 strucseq
+native             text     MMDB Report
+native             xml      RecordSet
+taxonomy
+native             text     Taxonomy List
+native             xml      TaxaSet'''.split("\n")
+db = {}
+for db_name in db_list:
+db[db_name] = []
+section = None
+for line in help:
+line = re.split('\s{2,}', line.strip())
+# Ignore empties
+if len(line) == 0:
+continue
+# Section headers have one item
+elif len(line) == 1:
+section = line[0]
+db[section] = []
+# Format lines have 2+
+elif len(line) == 2:
+parent_format = line[0]
+description = line[1]
+if parent_format not in db[section]:
+db[section].append((parent_format, None, description))
+elif len(line) == 3:
+parent_format = line[0]
+format_modifier = line[1]
+description = line[2]
+if parent_format not in db[section]:
+db[section].append((parent_format, format_modifier, description))
+all_formats = db['(all)']
+del db['(all)']
+sequences_formats = db['(sequences)']
+del db['(sequences)']
+del db['']
+for key in db:
+db[key] += all_formats
+for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'):
+db[key] += sequences_formats
+MACRO_TPL = '''
+'''
+WHEN_TPL = '''      <when value="{format}">
+<param name="output_format" type="select" label="Output Format">
+{format_options}
+</param>
+</when>'''
+FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>'''
+format_names = {}
+print '''  <xml name="db">
+<conditional name="db">
+<expand macro="dbselect" />'''
+for key in sorted(db):
+format_options = []
+for (parent_format, format_modifier, description) in sorted(db[key]):
+name_human = description
+if format_modifier:
+name_human += ' (%s)' % format_modifier
+format_string = '%s-%s' % (parent_format, format_modifier)
+format_options.append(FORMAT_OPTION_TPL.format(
+name_type=format_string,
+name_type_human=name_human,
+))
+format_names[format_string] = format_modifier
+print WHEN_TPL.format(
+format=key,
+format_options='\n          '.join(format_options)
+)
+print '''    </conditional>
+</xml>'''
+CHANGE_FORMAT_TPL = '''
+<xml name="efetch_formats">
+<change_format>
+{formats}
+</change_format>
+</xml>
+'''
+CHANGE_FORMAT_WHEN_TPL = '''<when input="output_format" value="{key}" format="{value}"/>'''
+# Format options
+whens = []
+for (k, v) in format_names.items():
+if v is None:
+v = 'text'
+elif v == 'asn.1':
+v = 'asn1'
+whens.append(CHANGE_FORMAT_WHEN_TPL.format(
+key=k, value=v
+))
+print CHANGE_FORMAT_TPL.format(formats='\n      '.join(whens))

Mercurial > repos > iuc > ncbi_eutils_efetch

comparison __efetch_build_options.py @ 0:71bcf87a7031 draft