Mercurial > repos > iuc > ncbi_eutils_esummary
changeset 0:92bd8a680b9d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
author | iuc |
---|---|
date | Thu, 07 Jul 2016 02:41:02 -0400 |
parents | |
children | c8d4ea6376a7 |
files | README.rst __efetch_build_options.py ecitmatch.py efetch.py egquery.py einfo.py elink.py epost.py esearch.py esummary.py esummary.xml eutils.py eutils.pyc macros.xml test-data/ecitmatch.results.tsv test-data/ecitmatch.tsv test-data/egquery.1.xml test-data/esearch.pubmed.2014-01-pnas.xml test-data/esearch.pubmed.xml test-data/esummary.tax.xml test-data/example.history.json test-data/pm-tax-neighbor.xml test-data/pubmed.metadata.xml test-data/viruses.tax.xml tool_dependencies.xml |
diffstat | 25 files changed, 1785 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,38 @@ +Galaxy NCBI Entrez Tools +======================== + +This repo requires a readme as administrators should very aware of some +restrictions NCBI places on the use of the Entrez service. + +NCBI requests that you please limit large jobs to either weekends or +between 9:00 PM and 5:00 AM Eastern time during weekdays. This is not a +request that the Galaxy tool can easily service, so we've included it in +the disclaimer on every tool quite prominently. + +Failure to comply with NCBI's policies may result in an block until +you/the user contacts NCBI and registers the tool ID and their email. + +Note that these are *IP* level blocks so the Galaxy tools uses a +concatenation of the administrator's emails, and the user email, in +hopes that NCBI will contact all relevant parties should their system be +abused. + +Additionally, since these are IP level blocks, the Galaxy tool author +(@erasche) recommends using the following ``jobs_conf.xml`` snippet in +order to place a system-wide restriction of 1 concurrent Entrez job +amongst all users. + +.. code:: xml + + <destination id="entrez" runner="local"> + </destination> + <limit type="concurrent_jobs" id="entrez">1</limit> + <tools> + <tool id="ncbi.eutils.efetch" destination="entrez" /> + <tool id="ncbi.eutils.esearch" destination="entrez" /> + <tool id="ncbi.eutils.epost" destination="entrez" /> + <tool id="ncbi.eutils.elink" destination="entrez" /> + <tool id="ncbi.eutils.einfo" destination="entrez" /> + <tool id="ncbi.eutils.esummary" destination="entrez" /> + </tools> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__efetch_build_options.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# Daniel Blankenberg +# Creates the options for tool interface +import re + +# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi +db_list = ''' +<DbName>annotinfo</DbName> +<DbName>assembly</DbName> +<DbName>bioproject</DbName> +<DbName>biosample</DbName> +<DbName>biosystems</DbName> +<DbName>blastdbinfo</DbName> +<DbName>books</DbName> +<DbName>cdd</DbName> +<DbName>clinvar</DbName> +<DbName>clone</DbName> +<DbName>dbvar</DbName> +<DbName>gap</DbName> +<DbName>gapplus</DbName> +<DbName>gds</DbName> +<DbName>gencoll</DbName> +<DbName>gene</DbName> +<DbName>genome</DbName> +<DbName>geoprofiles</DbName> +<DbName>grasp</DbName> +<DbName>gtr</DbName> +<DbName>homologene</DbName> +<DbName>medgen</DbName> +<DbName>mesh</DbName> +<DbName>ncbisearch</DbName> +<DbName>nlmcatalog</DbName> +<DbName>nuccore</DbName> +<DbName>nucest</DbName> +<DbName>nucgss</DbName> +<DbName>nucleotide</DbName> +<DbName>omim</DbName> +<DbName>orgtrack</DbName> +<DbName>pcassay</DbName> +<DbName>pccompound</DbName> +<DbName>pcsubstance</DbName> +<DbName>pmc</DbName> +<DbName>popset</DbName> +<DbName>probe</DbName> +<DbName>protein</DbName> +<DbName>proteinclusters</DbName> +<DbName>pubmed</DbName> +<DbName>pubmedhealth</DbName> +<DbName>seqannot</DbName> +<DbName>snp</DbName> +<DbName>sra</DbName> +<DbName>structure</DbName> +<DbName>taxonomy</DbName> +<DbName>unigene</DbName>'''.replace( "<DbName>", "").replace( "</DbName>", "").split("\n") + + +help = ''' (all) + docsum xml Document Summary + docsum json Document Summary + full text Full Document + uilist xml Unique Identifier List + uilist text Unique Identifier List + full xml Full Document + + bioproject + native BioProject Report + native xml RecordSet + + biosample + native BioSample Report + native xml BioSampleSet + + biosystems + native xml Sys-set + + gds + native xml RecordSet + summary text Summary + + gene + gene_table xml Gene Table + native text Gene Report + native asn.1 Entrezgene + native xml Entrezgene-Set + tabular tabular Tabular Report + + homologene + alignmentscores text Alignment Scores + fasta fasta FASTA + homologene text Homologene Report + native text Homologene List + native asn.1 HG-Entry + native xml Entrez-Homologene-Set + + mesh + full text Full Record + native text MeSH Report + native xml RecordSet + + nlmcatalog + native text Full Record + native xml NLMCatalogRecordSet + + pmc + medline text MEDLINE + native xml pmc-articleset + + pubmed + abstract xml Abstract + medline text MEDLINE + native asn.1 Pubmed-entry + native xml PubmedArticleSet + + (sequences) + acc text Accession Number + est xml EST Report + fasta fasta FASTA + fasta xml TinySeq + fasta_cds_aa fasta CDS Products + fasta_cds_na fasta Coding Regions + ft text Feature Table + gb text GenBank Flatfile + gb xml GBSet + gbc xml INSDSet + gbwithparts text GenBank with Contig Sequences + gene_fasta fasta FASTA of Gene + gp text GenPept Flatfile + gp xml GBSet + gpc xml INSDSet + gss text GSS Report + ipg text Identical Protein Report + ipg xml IPGReportSet + native text Seq-entry + native xml Bioseq-set + seqid asn.1 Seq-id + + snp + chr text Chromosome Report + docset text Summary + fasta fasta FASTA + flt text Flat File + native asn.1 Rs + native xml ExchangeSet + rsr tabular RS Cluster Report + ssexemplar text SS Exemplar List + + sra + native xml EXPERIMENT_PACKAGE_SET + runinfo xml SraRunInfo + + structure + mmdb asn.1 Ncbi-mime-asn1 strucseq + native text MMDB Report + native xml RecordSet + + taxonomy + native text Taxonomy List + native xml TaxaSet'''.split("\n") + + +db = {} +for db_name in db_list: + db[db_name] = [] + +section = None +for line in help: + line = re.split('\s{2,}', line.strip()) + # Ignore empties + if len(line) == 0: + continue + # Section headers have one item + elif len(line) == 1: + section = line[0] + db[section] = [] + # Format lines have 2+ + elif len(line) == 2: + parent_format = line[0] + description = line[1] + + if parent_format not in db[section]: + db[section].append((parent_format, None, description)) + elif len(line) == 3: + parent_format = line[0] + format_modifier = line[1] + description = line[2] + + if parent_format not in db[section]: + db[section].append((parent_format, format_modifier, description)) + + +all_formats = db['(all)'] +del db['(all)'] +sequences_formats = db['(sequences)'] +del db['(sequences)'] +del db[''] + +for key in db: + db[key] += all_formats + +for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'): + db[key] += sequences_formats + +MACRO_TPL = ''' + +''' + +WHEN_TPL = ''' <when value="{format}"> + <param name="output_format" type="select" label="Output Format"> + {format_options} + </param> + </when>''' + +FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>''' + +format_names = {} + +print ''' <xml name="db"> + <conditional name="db"> + <expand macro="dbselect" />''' +for key in sorted(db): + format_options = [] + + for (parent_format, format_modifier, description) in sorted(db[key]): + name_human = description + if format_modifier: + name_human += ' (%s)' % format_modifier + format_string = '%s-%s' % (parent_format, format_modifier) + + format_options.append(FORMAT_OPTION_TPL.format( + name_type=format_string, + name_type_human=name_human, + )) + + format_names[format_string] = format_modifier + + print WHEN_TPL.format( + format=key, + format_options='\n '.join(format_options) + ) + +print ''' </conditional> + </xml>''' + +CHANGE_FORMAT_TPL = ''' + <xml name="efetch_formats"> + <change_format> + {formats} + </change_format> + </xml> +''' + +CHANGE_FORMAT_WHEN_TPL = '''<when input="output_format" value="{key}" format="{value}"/>''' +# Format options + + +whens = [] +for (k, v) in format_names.items(): + if v is None: + v = 'text' + elif v == 'asn.1': + v = 'asn1' + + whens.append(CHANGE_FORMAT_WHEN_TPL.format( + key=k, value=v + )) + +print CHANGE_FORMAT_TPL.format(formats='\n '.join(whens))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ecitmatch.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,60 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ECitMatch', epilog='') + parser.add_argument('--file', type=argparse.FileType('r'), help='Tabular file containing citations to search') + + parser.add_argument('--key', nargs='*', help='Citation Key') + parser.add_argument('--journal_title', nargs='*', help='Journal Title') + parser.add_argument('--year', nargs='*', help='Year') + parser.add_argument('--volume', nargs='*', help='Volume') + parser.add_argument('--first_page', nargs='*', help='First Page') + parser.add_argument('--author_name', nargs='*', help='Author name') + + # Emails + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + args = parser.parse_args() + + c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email) + + citations = [] + if args.file is None: + for key, journal, year, volume, first_page, author_name in \ + zip(args.key, args.journal_title, args.year, args.volume, args.first_page, args.author_name): + citations.append({ + 'key': key, + 'journal': journal, + 'year': year, + 'volume': volume, + 'first_page': first_page, + 'author_name': author_name, + }) + else: + for line in args.file: + line = line.strip() + if not line.startswith('#'): + tmp = line.split('\t') + try: + citations.append({ + 'journal': tmp[0], + 'year': tmp[1], + 'volume': tmp[2], + 'first_page': tmp[3], + 'author_name': tmp[4], + 'key': tmp[5], + }) + except KeyError: + print "Could not parse line: %s" % line + + payload = { + 'db': 'pubmed', + 'bdata': citations + } + + results = c.citmatch(**payload) + # We get data back as pipe separated, so just replace those with tabs + print results.replace('|', '\t')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/efetch.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,35 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EFetch', epilog='') + parser.add_argument('db', help='Database to use') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + + # ID source + parser.add_argument('--id_list', help='list of ids') + parser.add_argument('--id', help='Comma separated individual IDs') + parser.add_argument('--history_file', help='Fetch results from previous query') + + # Output + parser.add_argument('--retmode', help='Retmode') + parser.add_argument('--rettype', help='Rettype') + args = parser.parse_args() + + c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) + + payload = {} + if args.history_file is not None: + payload.update(c.get_history()) + else: + payload['id'] = ','.join(merged_ids) + + for attr in ('retmode', 'rettype'): + if getattr(args, attr, None) is not None: + payload[attr] = getattr(args, attr) + + c.fetch(args.db, ftype=args.retmode, **payload)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egquery.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EGQuery', epilog='') + parser.add_argument('term', help='Query') + # + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + args = parser.parse_args() + + c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email) + + payload = { + 'term': args.term, + } + results = c.gquery(**payload) + print results
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/einfo.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EInfo', epilog='') + parser.add_argument('--db', help='Database to use') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + args = parser.parse_args() + + c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email) + payload = {} + if args.db is not None: + payload['db'] = args.db + payload['version'] = '2.0' + print c.info(**payload)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/elink.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,59 @@ +#!/usr/bin/env python +import argparse +import json + +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EFetch', epilog='') + parser.add_argument('db', help='Database to use, sometimes "none" (e.g. *check)') + parser.add_argument('dbfrom', help='Database containing input UIDs') + parser.add_argument('cmd', choices=['neighbor', 'neighbor_score', + 'neighbor_history', 'acheck', 'ncheck', 'lcheck', + 'llinks', 'llinkslib', 'prlinks'], + help='ELink command mode') + # Only used in case of neighbor_history + parser.add_argument('--history_out', type=argparse.FileType('w'), + help='Output history file', default='-') + + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + # ID Sources + parser.add_argument('--id_list', help='list of ids') + parser.add_argument('--id', help='Comma separated individual IDs') + parser.add_argument('--history_file', help='Fetch results from previous query') + + # TODO: dates, linkname, term, holding + # neighbor or neighbor_history and dbfrom is pubmed + # parser.add_argument('--datetype', help='Date type') + # parser.add_argument('--reldate', help='In past N days') + # parser.add_argument('--mindate', help='Minimum date') + # parser.add_argument('--maxdate', help='maximum date') + + # Output + args = parser.parse_args() + + c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) + + payload = { + 'dbfrom': args.dbfrom, + 'cmd': args.cmd, + } + if args.history_file is not None: + payload.update(c.get_history()) + else: + payload['id'] = ','.join(merged_ids) + + # DB can be 'none' in a few cases. + if args.db != "none": + payload['db'] = args.db + + results = c.link(**payload) + + if args.cmd == "neighbor_history": + history = c.extract_history(results) + args.history_out.write(json.dumps(history, indent=4)) + + print results
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/epost.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,27 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EPost', epilog='') + parser.add_argument('db', help='Database to use') + parser.add_argument('--id_list', help='list of ids') + parser.add_argument('--id', help='Comma separated individual IDs') + parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + + args = parser.parse_args() + + c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) + + payload = {} + if args.history_file is not None: + payload.update(c.get_history()) + else: + payload['id'] = ','.join(merged_ids) + payload['WebEnv'] = '' + + print c.post(args.db, **payload)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/esearch.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env python +import json +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ESearch', epilog='') + parser.add_argument('db', help='Database to use') + parser.add_argument('term', help='Query') + parser.add_argument('--history_file', help='Filter existing history') + parser.add_argument('--datetype', help='Date type') + parser.add_argument('--reldate', help='In past N days') + parser.add_argument('--mindate', help='Minimum date') + parser.add_argument('--maxdate', help='maximum date') + # History + parser.add_argument('--history_out', type=argparse.FileType('w'), + help='Output history file') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + args = parser.parse_args() + + c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + + payload = { + 'db': args.db, + 'term': args.term, + 'retstart': 0, + 'retmax': 20, + # hmmm @ retmax + } + if args.history_file is not None: + payload.update(c.get_history()) + if args.history_out is not None: + payload['usehistory'] = 'y' + + for attr in ('datetype', 'reldate', 'mindate', 'maxdate'): + if getattr(args, attr, None) is not None: + payload[attr] = getattr(args, attr) + + results = c.search(**payload) + + if args.history_out is not None: + history = c.extract_history(results) + args.history_out.write(json.dumps(history, indent=4)) + + print results
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/esummary.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import argparse +import eutils + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ESummary', epilog='') + parser.add_argument('db', help='Database to use') + parser.add_argument('--id_list', help='list of ids') + parser.add_argument('--id', help='Comma separated individual IDs') + parser.add_argument('--history_file', help='Filter existing history') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + args = parser.parse_args() + + c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) + + payload = { + 'db': args.db, + } + + if args.history_file is not None: + payload.update(c.get_history()) + else: + payload['id'] = ','.join(merged_ids) + + print c.summary(**payload)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/esummary.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,57 @@ +<?xml version="1.0"?> +<tool id="ncbi_eutils_esummary" name="NCBI ESummary" version="@WRAPPER_VERSION@"> + <description>fetch summary of history/ids</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command>python esummary.py --version</version_command> + <command detect_errors="aggressive" interpreter="python"><![CDATA[esummary.py +$db_select + +@LIST_OR_HIST@ + + +@EMAIL_ARGUMENTS@ +> $default]]></command> + <inputs> + <expand macro="dbselect"/> + <expand macro="list_or_hist"/> + </inputs> + <outputs> + <data format="xml" name="default" label="Summary of NCBI Search"/> + </outputs> + <tests> + <test> + <param name="qss" value="id_list"/> + <param name="id_list" value="10239"/> + <param name="db_select" value="taxonomy"/> + <output name="default" file="esummary.tax.xml" ftype="xml"/> + </test> + </tests> + <help><![CDATA[ +NCBI Entrez ESummary +==================== + +Responds to a list of UIDs from a given database with the corresponding +document summaries. + +Example Queries +--------------- + +Search against protein: + ++----------------------+--------------------------------------+ +| Parameter | Value | ++======================+======================================+ +| NCBI Database to Use | Protein | ++----------------------+--------------------------------------+ +| ID List | 28800982 28628843 | ++----------------------+--------------------------------------+ + +@REFERENCES@ + +@DISCLAIMER@ + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eutils.py Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,127 @@ +import os +import json +import StringIO +from Bio import Entrez +Entrez.tool = "GalaxyEutils_1_0" +BATCH_SIZE = 200 + + +class Client(object): + + def __init__(self, history_file=None, user_email=None, admin_email=None): + self.using_history = False + + if user_email is not None and admin_email is not None: + Entrez.email = ';'.join((admin_email, user_email)) + elif user_email is not None: + Entrez.email = user_email + elif admin_email is not None: + Entrez.email = admin_email + else: + Entrez.email = os.environ.get('NCBI_EUTILS_CONTACT', None) + + if Entrez.email is None: + raise Exception("Cannot continue without an email; please set " + "administrator email in NCBI_EUTILS_CONTACT") + + if history_file is not None: + with open(history_file, 'r') as handle: + data = json.loads(handle.read()) + self.query_key = data['QueryKey'] + self.webenv = data['WebEnv'] + self.using_history = True + + def get_history(self): + if not self.using_history: + return {} + else: + return { + 'query_key': self.query_key, + 'WebEnv': self.webenv, + } + + def post(self, database, **payload): + return json.dumps(Entrez.read(Entrez.epost(database, **payload)), indent=4) + + def fetch(self, db, ftype=None, **payload): + os.makedirs("downloads") + + if 'id' in payload: + summary = self.id_summary(db, payload['id']) + else: + summary = self.history_summary(db) + + count = len(summary) + payload['retmax'] = BATCH_SIZE + + # This may be bad. I'm not sure yet. I think it will be ... but UGH. + for i in range(0, count, BATCH_SIZE): + payload['retstart'] = i + file_path = os.path.join('downloads', 'EFetch Results Chunk %s.%s' % (i, ftype)) + with open(file_path, 'w') as handle: + handle.write(Entrez.efetch(db, **payload).read()) + + def id_summary(self, db, id_list): + payload = { + 'db': db, + 'id': id_list, + } + return Entrez.read(Entrez.esummary(**payload)) + + def history_summary(self, db): + if not self.using_history: + raise Exception("History must be available for this method") + + payload = { + 'db': db, + 'query_key': self.query_key, + 'WebEnv': self.webenv, + } + return Entrez.read(Entrez.esummary(**payload)) + + def summary(self, **payload): + return Entrez.esummary(**payload).read() + + def link(self, **payload): + return Entrez.elink(**payload).read() + + def extract_history(self, xml_data): + parsed_data = Entrez.read(StringIO.StringIO(xml_data)) + history = {} + for key in ('QueryKey', 'WebEnv'): + if key in parsed_data: + history[key] = parsed_data[key] + + return history + + def search(self, **payload): + return Entrez.esearch(**payload).read() + + def info(self, **kwargs): + return Entrez.einfo(**kwargs).read() + + def gquery(self, **kwargs): + return Entrez.egquery(**kwargs).read() + + def citmatch(self, **kwargs): + return Entrez.ecitmatch(**kwargs).read() + + @classmethod + def parse_ids(cls, id_list, id, history_file): + """Parse IDs passed on --cli or in a file passed to the cli + """ + merged_ids = [] + if id is not None: + for pid in id.replace('__cn__', ',').replace('\n', ',').split(','): + if pid is not None and len(pid) > 0: + merged_ids.append(pid) + + if id_list is not None: + with open(id_list, 'r') as handle: + merged_ids += [x.strip() for x in handle.readlines()] + + # Exception hanlded here for uniformity + if len(merged_ids) == 0 and history_file is None: + raise Exception("Must provide history file or IDs") + + return merged_ids
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,847 @@ +<?xml version="1.0"?> +<macros> + <token name="@WRAPPER_VERSION@">1.1</token> + <token name="@EMAIL_ARGUMENTS@"> +--user_email "$__user_email__" +#set admin_emails = ';'.join(str($__admin_users__).split(',')) +--admin_email "$admin_emails" + </token> + <!-- TODO: citation --> + <token name="@REFERENCES@"><![CDATA[ + ]]></token> + <token name="@DISCLAIMER@"><![CDATA[ +Usage Guidelines and Requirements +================================= + +Frequency, Timing, and Registration of E-utility URL Requests +------------------------------------------------------------- + +In order not to overload the E-utility servers, NCBI recommends that users +limit large jobs to either weekends or between 9:00 PM and 5:00 AM Eastern time +during weekdays. Failure to comply with this policy may result in an IP address +being blocked from accessing NCBI. + +Minimizing the Number of Requests +--------------------------------- + +If a task requires searching for and/or downloading a large number of +records, it is much more efficient to use the Entrez History to upload +and/or retrieve these records in batches rather than using separate +requests for each record. Please refer to Application 3 in Chapter 3 +for an example. Many thousands of IDs can be uploaded using a single +EPost request, and several hundred records can be downloaded using one +EFetch request. + + +Disclaimer and Copyright Issues +------------------------------- + +In accordance with requirements of NCBI's E-Utilities, we must provide +the following disclaimer: + +Please note that abstracts in PubMed may incorporate material that may +be protected by U.S. and foreign copyright laws. All persons +reproducing, redistributing, or making commercial use of this +information are expected to adhere to the terms and conditions asserted +by the copyright holder. Transmission or reproduction of protected +items beyond that allowed by fair use (PDF) as defined in the copyright +laws requires the written permission of the copyright owners. NLM +provides no legal advice concerning distribution of copyrighted +materials. Please consult your legal counsel. If you wish to do a large +data mining project on PubMed data, you can enter into a licensing +agreement and lease the data for free from NLM. For more information on +this please see `http://www.nlm.nih.gov/databases/leased.html <http://www.nlm.nih.gov/databases/leased.html>`__ + +The `full disclaimer <http://www.ncbi.nlm.nih.gov/About/disclaimer.html>`__ is available on +their website + +Liability +~~~~~~~~~ + +For documents and software available from this server, the +U.S. Government does not warrant or assume any legal liability or +responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed. + +Endorsement +~~~~~~~~~~~ + +NCBI does not endorse or recommend any commercial +products, processes, or services. The views and opinions of authors +expressed on NCBI's Web sites do not necessarily state or reflect those +of the U.S. Government, and they may not be used for advertising or +product endorsement purposes. + +External Links +~~~~~~~~~~~~~~ + +Some NCBI Web pages may provide links to other Internet +sites for the convenience of users. NCBI is not responsible for the +availability or content of these external sites, nor does NCBI endorse, +warrant, or guarantee the products, services, or information described +or offered at these other Internet sites. Users cannot assume that the +external sites will abide by the same Privacy Policy to which NCBI +adheres. It is the responsibility of the user to examine the copyright +and licensing restrictions of linked pages and to secure all necessary +permissions. + ]]></token> + <xml name="dbselect" + token_name="db_select" + token_label="NCBI Database to Use" + > + <param name="@NAME@" type="select" label="@LABEL@"> + <option value="annotinfo">Annotation Information</option> + <option value="assembly">Assembly</option> + <option value="bioproject">BioProject</option> + <option value="biosample">BioSample</option> + <option value="biosystems">Biosystems</option> + <option value="blastdbinfo">Blast Database Information</option> + <option value="books">Books</option> + <option value="cdd">Conserved Domains</option> + <option value="clinvar">Clinical Variants</option> + <option value="clone">CLone</option> + <option value="dbvar">dbVar</option> + <option value="gap">dbGaP</option> + <option value="gapplus">gapplus</option> + <option value="gds">GEO Datasets</option> + <option value="gencoll">Gencoll</option> + <option value="gene">Gene</option> + <option value="genome">Genome</option> + <option value="geoprofiles">GEO Profiles</option> + <option value="grasp">grasp</option> + <option value="gtr">Genetic Testing Registry</option> + <option value="homologene">HomoloGene</option> + <option value="medgen">MedGen</option> + <option value="mesh">MeSH</option> + <option value="ncbisearch">NCBI Web Site</option> + <option value="nlmcatalog">NLM Catalog</option> + <option value="nuccore">Nuccore</option> + <option value="nucest">EST</option> + <option value="nucgss">GSS</option> + <option value="nucleotide">Nucleotide</option> + <option value="omim">OMIM</option> + <option value="orgtrack">Orgtrack</option> + <option value="pcassay">PubChem BioAssay</option> + <option value="pccompound">PubChem Compound</option> + <option value="pcsubstance">PubChem Substance</option> + <option value="pmc">PubMed Central</option> + <option value="popset">PopSet</option> + <option value="probe">Probe</option> + <option value="protein">Protein</option> + <option value="proteinclusters">Protein Clusters</option> + <option value="pubmed">PubMed</option> + <option value="pubmedhealth">PubMed Health</option> + <option value="seqannot">seqannot</option> + <option value="snp">SNP</option> + <option value="sra">SRA</option> + <option value="structure">Structure</option> + <option value="taxonomy">Taxonomy</option> + <option value="unigene">UniGene</option> + </param> + </xml> + <xml name="db"> + <conditional name="db"> + <expand macro="dbselect" /> + <when value="annotinfo"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="assembly"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="bioproject"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-None">BioProject Report</option> + <option value="native-xml">RecordSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="biosample"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-None">BioSample Report</option> + <option value="native-xml">BioSampleSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="biosystems"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-xml">Sys-set (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="blastdbinfo"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="books"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="cdd"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="clinvar"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="clone"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="dbvar"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gap"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gapplus"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gds"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-xml">RecordSet (xml)</option> + <option value="summary-text">Summary (text)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gencoll"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gene"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="gene_table-xml">Gene Table (xml)</option> + <option value="native-asn.1">Entrezgene (asn.1)</option> + <option value="native-text">Gene Report (text)</option> + <option value="native-xml">Entrezgene-Set (xml)</option> + <option value="tabular-tabular">Tabular Report (tabular)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="genome"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="geoprofiles"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="grasp"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="gtr"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="homologene"> + <param name="output_format" type="select" label="Output Format"> + <option value="alignmentscores-text">Alignment Scores (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="homologene-text">Homologene Report (text)</option> + <option value="native-asn.1">HG-Entry (asn.1)</option> + <option value="native-text">Homologene List (text)</option> + <option value="native-xml">Entrez-Homologene-Set (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="medgen"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="mesh"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-text">Full Record (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-text">MeSH Report (text)</option> + <option value="native-xml">RecordSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="ncbisearch"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="nlmcatalog"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-text">Full Record (text)</option> + <option value="native-xml">NLMCatalogRecordSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="nuccore"> + <param name="output_format" type="select" label="Output Format"> + <option value="acc-text">Accession Number (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="est-xml">EST Report (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="fasta-xml">TinySeq (xml)</option> + <option value="fasta_cds_aa-fasta">CDS Products (fasta)</option> + <option value="fasta_cds_na-fasta">Coding Regions (fasta)</option> + <option value="ft-text">Feature Table (text)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="gb-text">GenBank Flatfile (text)</option> + <option value="gb-xml">GBSet (xml)</option> + <option value="gbc-xml">INSDSet (xml)</option> + <option value="gbwithparts-text">GenBank with Contig Sequences (text)</option> + <option value="gene_fasta-fasta">FASTA of Gene (fasta)</option> + <option value="gp-text">GenPept Flatfile (text)</option> + <option value="gp-xml">GBSet (xml)</option> + <option value="gpc-xml">INSDSet (xml)</option> + <option value="gss-text">GSS Report (text)</option> + <option value="ipg-text">Identical Protein Report (text)</option> + <option value="ipg-xml">IPGReportSet (xml)</option> + <option value="native-text">Seq-entry (text)</option> + <option value="native-xml">Bioseq-set (xml)</option> + <option value="seqid-asn.1">Seq-id (asn.1)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="nucest"> + <param name="output_format" type="select" label="Output Format"> + <option value="acc-text">Accession Number (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="est-xml">EST Report (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="fasta-xml">TinySeq (xml)</option> + <option value="fasta_cds_aa-fasta">CDS Products (fasta)</option> + <option value="fasta_cds_na-fasta">Coding Regions (fasta)</option> + <option value="ft-text">Feature Table (text)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="gb-text">GenBank Flatfile (text)</option> + <option value="gb-xml">GBSet (xml)</option> + <option value="gbc-xml">INSDSet (xml)</option> + <option value="gbwithparts-text">GenBank with Contig Sequences (text)</option> + <option value="gene_fasta-fasta">FASTA of Gene (fasta)</option> + <option value="gp-text">GenPept Flatfile (text)</option> + <option value="gp-xml">GBSet (xml)</option> + <option value="gpc-xml">INSDSet (xml)</option> + <option value="gss-text">GSS Report (text)</option> + <option value="ipg-text">Identical Protein Report (text)</option> + <option value="ipg-xml">IPGReportSet (xml)</option> + <option value="native-text">Seq-entry (text)</option> + <option value="native-xml">Bioseq-set (xml)</option> + <option value="seqid-asn.1">Seq-id (asn.1)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="nucgss"> + <param name="output_format" type="select" label="Output Format"> + <option value="acc-text">Accession Number (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="est-xml">EST Report (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="fasta-xml">TinySeq (xml)</option> + <option value="fasta_cds_aa-fasta">CDS Products (fasta)</option> + <option value="fasta_cds_na-fasta">Coding Regions (fasta)</option> + <option value="ft-text">Feature Table (text)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="gb-text">GenBank Flatfile (text)</option> + <option value="gb-xml">GBSet (xml)</option> + <option value="gbc-xml">INSDSet (xml)</option> + <option value="gbwithparts-text">GenBank with Contig Sequences (text)</option> + <option value="gene_fasta-fasta">FASTA of Gene (fasta)</option> + <option value="gp-text">GenPept Flatfile (text)</option> + <option value="gp-xml">GBSet (xml)</option> + <option value="gpc-xml">INSDSet (xml)</option> + <option value="gss-text">GSS Report (text)</option> + <option value="ipg-text">Identical Protein Report (text)</option> + <option value="ipg-xml">IPGReportSet (xml)</option> + <option value="native-text">Seq-entry (text)</option> + <option value="native-xml">Bioseq-set (xml)</option> + <option value="seqid-asn.1">Seq-id (asn.1)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="nucleotide"> + <param name="output_format" type="select" label="Output Format"> + <option value="acc-text">Accession Number (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="est-xml">EST Report (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="fasta-xml">TinySeq (xml)</option> + <option value="fasta_cds_aa-fasta">CDS Products (fasta)</option> + <option value="fasta_cds_na-fasta">Coding Regions (fasta)</option> + <option value="ft-text">Feature Table (text)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="gb-text">GenBank Flatfile (text)</option> + <option value="gb-xml">GBSet (xml)</option> + <option value="gbc-xml">INSDSet (xml)</option> + <option value="gbwithparts-text">GenBank with Contig Sequences (text)</option> + <option value="gene_fasta-fasta">FASTA of Gene (fasta)</option> + <option value="gp-text">GenPept Flatfile (text)</option> + <option value="gp-xml">GBSet (xml)</option> + <option value="gpc-xml">INSDSet (xml)</option> + <option value="gss-text">GSS Report (text)</option> + <option value="ipg-text">Identical Protein Report (text)</option> + <option value="ipg-xml">IPGReportSet (xml)</option> + <option value="native-text">Seq-entry (text)</option> + <option value="native-xml">Bioseq-set (xml)</option> + <option value="seqid-asn.1">Seq-id (asn.1)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="omim"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="orgtrack"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pcassay"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pccompound"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pcsubstance"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pmc"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="medline-text">MEDLINE (text)</option> + <option value="native-xml">pmc-articleset (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="popset"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="probe"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="protein"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="proteinclusters"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pubmed"> + <param name="output_format" type="select" label="Output Format"> + <option value="abstract-xml">Abstract (xml)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="medline-text">MEDLINE (text)</option> + <option value="native-asn.1">Pubmed-entry (asn.1)</option> + <option value="native-xml">PubmedArticleSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="pubmedhealth"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="seqannot"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="snp"> + <param name="output_format" type="select" label="Output Format"> + <option value="chr-text">Chromosome Report (text)</option> + <option value="docset-text">Summary (text)</option> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="fasta-fasta">FASTA (fasta)</option> + <option value="flt-text">Flat File (text)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-asn.1">Rs (asn.1)</option> + <option value="native-xml">ExchangeSet (xml)</option> + <option value="rsr-tabular">RS Cluster Report (tabular)</option> + <option value="ssexemplar-text">SS Exemplar List (text)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="sra"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-xml">EXPERIMENT_PACKAGE_SET (xml)</option> + <option value="runinfo-xml">SraRunInfo (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="structure"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="mmdb-asn.1">Ncbi-mime-asn1 strucseq (asn.1)</option> + <option value="native-text">MMDB Report (text)</option> + <option value="native-xml">RecordSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="taxonomy"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="native-text">Taxonomy List (text)</option> + <option value="native-xml">TaxaSet (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + <when value="unigene"> + <param name="output_format" type="select" label="Output Format"> + <option value="docsum-json">Document Summary (json)</option> + <option value="docsum-xml">Document Summary (xml)</option> + <option value="full-text">Full Document (text)</option> + <option value="full-xml">Full Document (xml)</option> + <option value="uilist-text">Unique Identifier List (text)</option> + <option value="uilist-xml">Unique Identifier List (xml)</option> + </param> + </when> + </conditional> + </xml> + + <xml name="efetch_formats"> + <change_format> + <when input="output_format" value="fasta_cds_na-fasta" format="fasta"/> + <when input="output_format" value="ipg-xml" format="xml"/> + <when input="output_format" value="gbc-xml" format="xml"/> + <when input="output_format" value="medline-text" format="text"/> + <when input="output_format" value="native-None" format="text"/> + <when input="output_format" value="mmdb-asn.1" format="asn1"/> + <when input="output_format" value="seqid-asn.1" format="asn1"/> + <when input="output_format" value="acc-text" format="text"/> + <when input="output_format" value="summary-text" format="text"/> + <when input="output_format" value="gene_fasta-fasta" format="fasta"/> + <when input="output_format" value="native-text" format="text"/> + <when input="output_format" value="gbwithparts-text" format="text"/> + <when input="output_format" value="gpc-xml" format="xml"/> + <when input="output_format" value="fasta_cds_aa-fasta" format="fasta"/> + <when input="output_format" value="gp-text" format="text"/> + <when input="output_format" value="gss-text" format="text"/> + <when input="output_format" value="ipg-text" format="text"/> + <when input="output_format" value="uilist-xml" format="xml"/> + <when input="output_format" value="docsum-xml" format="xml"/> + <when input="output_format" value="rsr-tabular" format="tabular"/> + <when input="output_format" value="uilist-text" format="text"/> + <when input="output_format" value="gb-text" format="text"/> + <when input="output_format" value="chr-text" format="text"/> + <when input="output_format" value="alignmentscores-text" format="text"/> + <when input="output_format" value="native-asn.1" format="asn1"/> + <when input="output_format" value="gp-xml" format="xml"/> + <when input="output_format" value="tabular-tabular" format="tabular"/> + <when input="output_format" value="ssexemplar-text" format="text"/> + <when input="output_format" value="docsum-json" format="json"/> + <when input="output_format" value="fasta-xml" format="xml"/> + <when input="output_format" value="runinfo-xml" format="xml"/> + <when input="output_format" value="flt-text" format="text"/> + <when input="output_format" value="fasta-fasta" format="fasta"/> + <when input="output_format" value="full-text" format="text"/> + <when input="output_format" value="gb-xml" format="xml"/> + <when input="output_format" value="abstract-xml" format="xml"/> + <when input="output_format" value="full-xml" format="xml"/> + <when input="output_format" value="ft-text" format="text"/> + <when input="output_format" value="homologene-text" format="text"/> + <when input="output_format" value="est-xml" format="xml"/> + <when input="output_format" value="gene_table-xml" format="xml"/> + <when input="output_format" value="docset-text" format="text"/> + <when input="output_format" value="native-xml" format="xml"/> + </change_format> + </xml> + <token name="@LIST_OR_HIST@"> +#if $query_source.qss == "history": + --history_file $query_source.history_file +#else if $query_source.qss == "id_file": + --id_list $query_source.id_file +#else if $query_source.qss == "id_list": + --id $query_source.id_list +#end if + </token> + <xml name="list_or_hist"> + <conditional name="query_source"> + <param name="qss" type="select" label="Select source for IDs"> + <option value="history">NCBI WebEnv History</option> + <option value="id_file">File containing IDs (one per line)</option> + <option value="id_list">Direct Entry</option> + </param> + <when value="history"> + <param label="History File" name="history_file" type="data" format="json"/> + </when> + <when value="id_file"> + <param label="ID List" name="id_file" type="data" format="text,tabular"/> + </when> + <when value="id_list"> + <param label="ID List" name="id_list" type="text" area="true" help="Newline/Comma separated list of IDs"/> + </when> + </conditional> + </xml> + <xml name="history_out"> + <data format="json" name="history" label="NCBI Entrez WebEnv History"> + <yield/> + </data> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex">@Book{ncbiEutils, + author = {Eric Sayers}, + title = {Entrez Programming Utilities Help}, + year = {2010}, + publisher = {National Center for Biotechnology Information, Bethesda, Maryland}, + note = {http://ww.ncbi.nlm.nih.gov/books/NBK25500/} + }</citation> + </citations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.66">biopython</requirement> + </requirements> + </xml> + <xml name="linkname"> + <param name="linkname" type="select" label="To NCBI Database"> + <!-- TODO: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/entrezlinks.html --> + </param> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ecitmatch.results.tsv Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,2 @@ + 1991 88 3248 mann bj citation_1 2014248 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ecitmatch.tsv Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,2 @@ +#journal year volume first page author key +proc natl acad sci u s a 1991 88 3248 mann bj citation_1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/egquery.1.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE Result PUBLIC "-//NLM//DTD eSearchResult, January 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/egquery.dtd"> +<Result> + + <Term>bacteriophage</Term> + + <eGQueryResult>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.pubmed.2014-01-pnas.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd"> +<eSearchResult><Count>524</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList> +<Id>24620368</Id> +<Id>24613929</Id> +<Id>24596955</Id> +<Id>24596954</Id> +<Id>24571024</Id> +<Id>24555201</Id> +<Id>24555200</Id> +<Id>24550301</Id> +<Id>24520173</Id> +<Id>24520172</Id> +<Id>24497494</Id> +<Id>24497493</Id> +<Id>24488973</Id> +<Id>24488972</Id> +<Id>24488971</Id> +<Id>24481254</Id> +<Id>24481253</Id> +<Id>24481252</Id> +<Id>24477693</Id> +<Id>24477692</Id> +</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/01/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/02/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <OP>RANGE</OP> <OP>AND</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 2014/01/01[PDAT] : 2014/02/01[PDAT]</QueryTranslation></eSearchResult> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.pubmed.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd"> +<eSearchResult><Count>2651</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList> +<Id>16578858</Id> +<Id>11186225</Id> +<Id>11121081</Id> +<Id>11121080</Id> +<Id>11121079</Id> +<Id>11121078</Id> +<Id>11121077</Id> +<Id>11121076</Id> +<Id>11121075</Id> +<Id>11121074</Id> +<Id>11121073</Id> +<Id>11121072</Id> +<Id>11121071</Id> +<Id>11121070</Id> +<Id>11121069</Id> +<Id>11121068</Id> +<Id>11121067</Id> +<Id>11121066</Id> +<Id>11121065</Id> +<Id>11121064</Id> +</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>97[vi]</Term> <Field>vi</Field> <Count>77218</Count> <Explode>N</Explode> </TermSet> <OP>AND</OP> <OP>GROUP</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 97[vi]</QueryTranslation></eSearchResult> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esummary.tax.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD esummary v1 20041029//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20041029/esummary-v1.dtd"> +<eSummaryResult> +<DocSum> + <Id>10239</Id> + <Item Name="Status" Type="String">active</Item> + <Item Name="Rank" Type="String">superkingdom</Item> + <Item Name="Division" Type="String">viruses</Item> + <Item Name="ScientificName" Type="String">Viruses</Item> + <Item Name="CommonName" Type="String"></Item> + <Item Name="TaxId" Type="Integer">10239</Item> + <Item Name="AkaTaxId" Type="Integer">0</Item> + <Item Name="Genus" Type="String"></Item> + <Item Name="Species" Type="String"></Item> + <Item Name="Subsp" Type="String"></Item> + <Item Name="ModificationDate" Type="Date">2010/11/23 00:00</Item> +</DocSum> + +</eSummaryResult> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/example.history.json Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,4 @@ +{ + "QueryKey": "1", + "WebEnv": "NCID_1_9485527_130.14.22.215_9001_1430928295_33285243_0MetA0_S_MegaStore_F_1" +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pm-tax-neighbor.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD elink 20101123//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20101123/elink.dtd"> +<eLinkResult> + + <LinkSet> + <DbFrom>taxonomy</DbFrom> + <IdList> + <Id>510899</Id> + </IdList> + + <LinkSetDb> + <DbTo>pubmed</DbTo> + <LinkName>taxonomy_pubmed_entrez</LinkName> + + <Link> + <Id>22241621</Id> + </Link> + + </LinkSetDb> + + + </LinkSet> +</eLinkResult> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pubmed.metadata.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD einfo 20130322//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20130322/einfo.dtd"> +<eInfoResult> + <DbInfo> + <DbName>pubmed</DbName> + <MenuName>PubMed</MenuName> + <Description>PubMed bibliographic record</Description>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/viruses.tax.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,29 @@ +<?xml version="1.0"?> +<!DOCTYPE TaxaSet PUBLIC "-//NLM//DTD Taxon, 14th January 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/taxon.dtd"> +<TaxaSet><Taxon> + <TaxId>10239</TaxId> + <ScientificName>Viruses</ScientificName> + <OtherNames> + <BlastName>viruses</BlastName> + <Synonym>Vira</Synonym> + <Synonym>Viridae</Synonym> + </OtherNames> + <ParentTaxId>1</ParentTaxId> + <Rank>superkingdom</Rank> + <Division>Viruses</Division> + <GeneticCode> + <GCId>1</GCId> + <GCName>Standard</GCName> + </GeneticCode> + <MitoGeneticCode> + <MGCId>0</MGCId> + <MGCName>Unspecified</MGCName> + </MitoGeneticCode> + <Lineage/> + <CreateDate>1995/02/27 09:24:00</CreateDate> + <UpdateDate>2010/11/23 11:40:11</UpdateDate> + <PubDate>1993/04/20 01:00:00</PubDate> +</Taxon> + +</TaxaSet> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jul 07 02:41:02 2016 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <set_environment version="1.0"> + <environment_variable action="set_to" name="NCBI_EUTILS_CONTACT">/please set the administrator's contact email in the corresponding env.sh file/</environment_variable> + </set_environment> + <package name="biopython" version="1.66"> + <repository changeset_revision="8433ee4531ff" name="package_biopython_1_66" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>