# HG changeset patch
# User iuc
# Date 1467873561 14400
# Node ID 68cd8d564e0a11befd78984c144582fe4117bc25
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
diff -r 000000000000 -r 68cd8d564e0a README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,38 @@
+Galaxy NCBI Entrez Tools
+========================
+
+This repo requires a readme as administrators should very aware of some
+restrictions NCBI places on the use of the Entrez service.
+
+NCBI requests that you please limit large jobs to either weekends or
+between 9:00 PM and 5:00 AM Eastern time during weekdays. This is not a
+request that the Galaxy tool can easily service, so we've included it in
+the disclaimer on every tool quite prominently.
+
+Failure to comply with NCBI's policies may result in an block until
+you/the user contacts NCBI and registers the tool ID and their email.
+
+Note that these are *IP* level blocks so the Galaxy tools uses a
+concatenation of the administrator's emails, and the user email, in
+hopes that NCBI will contact all relevant parties should their system be
+abused.
+
+Additionally, since these are IP level blocks, the Galaxy tool author
+(@erasche) recommends using the following ``jobs_conf.xml`` snippet in
+order to place a system-wide restriction of 1 concurrent Entrez job
+amongst all users.
+
+.. code:: xml
+
+
+
+ 1
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a __efetch_build_options.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/__efetch_build_options.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,267 @@
+#!/usr/bin/env python
+# Daniel Blankenberg
+# Creates the options for tool interface
+import re
+
+# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
+db_list = '''
+annotinfo
+assembly
+bioproject
+biosample
+biosystems
+blastdbinfo
+books
+cdd
+clinvar
+clone
+dbvar
+gap
+gapplus
+gds
+gencoll
+gene
+genome
+geoprofiles
+grasp
+gtr
+homologene
+medgen
+mesh
+ncbisearch
+nlmcatalog
+nuccore
+nucest
+nucgss
+nucleotide
+omim
+orgtrack
+pcassay
+pccompound
+pcsubstance
+pmc
+popset
+probe
+protein
+proteinclusters
+pubmed
+pubmedhealth
+seqannot
+snp
+sra
+structure
+taxonomy
+unigene'''.replace( "", "").replace( "", "").split("\n")
+
+
+help = ''' (all)
+ docsum xml Document Summary
+ docsum json Document Summary
+ full text Full Document
+ uilist xml Unique Identifier List
+ uilist text Unique Identifier List
+ full xml Full Document
+
+ bioproject
+ native BioProject Report
+ native xml RecordSet
+
+ biosample
+ native BioSample Report
+ native xml BioSampleSet
+
+ biosystems
+ native xml Sys-set
+
+ gds
+ native xml RecordSet
+ summary text Summary
+
+ gene
+ gene_table xml Gene Table
+ native text Gene Report
+ native asn.1 Entrezgene
+ native xml Entrezgene-Set
+ tabular tabular Tabular Report
+
+ homologene
+ alignmentscores text Alignment Scores
+ fasta fasta FASTA
+ homologene text Homologene Report
+ native text Homologene List
+ native asn.1 HG-Entry
+ native xml Entrez-Homologene-Set
+
+ mesh
+ full text Full Record
+ native text MeSH Report
+ native xml RecordSet
+
+ nlmcatalog
+ native text Full Record
+ native xml NLMCatalogRecordSet
+
+ pmc
+ medline text MEDLINE
+ native xml pmc-articleset
+
+ pubmed
+ abstract xml Abstract
+ medline text MEDLINE
+ native asn.1 Pubmed-entry
+ native xml PubmedArticleSet
+
+ (sequences)
+ acc text Accession Number
+ est xml EST Report
+ fasta fasta FASTA
+ fasta xml TinySeq
+ fasta_cds_aa fasta CDS Products
+ fasta_cds_na fasta Coding Regions
+ ft text Feature Table
+ gb text GenBank Flatfile
+ gb xml GBSet
+ gbc xml INSDSet
+ gbwithparts text GenBank with Contig Sequences
+ gene_fasta fasta FASTA of Gene
+ gp text GenPept Flatfile
+ gp xml GBSet
+ gpc xml INSDSet
+ gss text GSS Report
+ ipg text Identical Protein Report
+ ipg xml IPGReportSet
+ native text Seq-entry
+ native xml Bioseq-set
+ seqid asn.1 Seq-id
+
+ snp
+ chr text Chromosome Report
+ docset text Summary
+ fasta fasta FASTA
+ flt text Flat File
+ native asn.1 Rs
+ native xml ExchangeSet
+ rsr tabular RS Cluster Report
+ ssexemplar text SS Exemplar List
+
+ sra
+ native xml EXPERIMENT_PACKAGE_SET
+ runinfo xml SraRunInfo
+
+ structure
+ mmdb asn.1 Ncbi-mime-asn1 strucseq
+ native text MMDB Report
+ native xml RecordSet
+
+ taxonomy
+ native text Taxonomy List
+ native xml TaxaSet'''.split("\n")
+
+
+db = {}
+for db_name in db_list:
+ db[db_name] = []
+
+section = None
+for line in help:
+ line = re.split('\s{2,}', line.strip())
+ # Ignore empties
+ if len(line) == 0:
+ continue
+ # Section headers have one item
+ elif len(line) == 1:
+ section = line[0]
+ db[section] = []
+ # Format lines have 2+
+ elif len(line) == 2:
+ parent_format = line[0]
+ description = line[1]
+
+ if parent_format not in db[section]:
+ db[section].append((parent_format, None, description))
+ elif len(line) == 3:
+ parent_format = line[0]
+ format_modifier = line[1]
+ description = line[2]
+
+ if parent_format not in db[section]:
+ db[section].append((parent_format, format_modifier, description))
+
+
+all_formats = db['(all)']
+del db['(all)']
+sequences_formats = db['(sequences)']
+del db['(sequences)']
+del db['']
+
+for key in db:
+ db[key] += all_formats
+
+for key in ('nuccore', 'nucest', 'nucgss', 'nucleotide'):
+ db[key] += sequences_formats
+
+MACRO_TPL = '''
+
+'''
+
+WHEN_TPL = '''
+
+ {format_options}
+
+ '''
+
+FORMAT_OPTION_TPL = ''''''
+
+format_names = {}
+
+print '''
+
+ '''
+for key in sorted(db):
+ format_options = []
+
+ for (parent_format, format_modifier, description) in sorted(db[key]):
+ name_human = description
+ if format_modifier:
+ name_human += ' (%s)' % format_modifier
+ format_string = '%s-%s' % (parent_format, format_modifier)
+
+ format_options.append(FORMAT_OPTION_TPL.format(
+ name_type=format_string,
+ name_type_human=name_human,
+ ))
+
+ format_names[format_string] = format_modifier
+
+ print WHEN_TPL.format(
+ format=key,
+ format_options='\n '.join(format_options)
+ )
+
+print '''
+ '''
+
+CHANGE_FORMAT_TPL = '''
+
+
+ {formats}
+
+
+'''
+
+CHANGE_FORMAT_WHEN_TPL = ''''''
+# Format options
+
+
+whens = []
+for (k, v) in format_names.items():
+ if v is None:
+ v = 'text'
+ elif v == 'asn.1':
+ v = 'asn1'
+
+ whens.append(CHANGE_FORMAT_WHEN_TPL.format(
+ key=k, value=v
+ ))
+
+print CHANGE_FORMAT_TPL.format(formats='\n '.join(whens))
diff -r 000000000000 -r 68cd8d564e0a ecitmatch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ecitmatch.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='ECitMatch', epilog='')
+ parser.add_argument('--file', type=argparse.FileType('r'), help='Tabular file containing citations to search')
+
+ parser.add_argument('--key', nargs='*', help='Citation Key')
+ parser.add_argument('--journal_title', nargs='*', help='Journal Title')
+ parser.add_argument('--year', nargs='*', help='Year')
+ parser.add_argument('--volume', nargs='*', help='Volume')
+ parser.add_argument('--first_page', nargs='*', help='First Page')
+ parser.add_argument('--author_name', nargs='*', help='Author name')
+
+ # Emails
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ args = parser.parse_args()
+
+ c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+
+ citations = []
+ if args.file is None:
+ for key, journal, year, volume, first_page, author_name in \
+ zip(args.key, args.journal_title, args.year, args.volume, args.first_page, args.author_name):
+ citations.append({
+ 'key': key,
+ 'journal': journal,
+ 'year': year,
+ 'volume': volume,
+ 'first_page': first_page,
+ 'author_name': author_name,
+ })
+ else:
+ for line in args.file:
+ line = line.strip()
+ if not line.startswith('#'):
+ tmp = line.split('\t')
+ try:
+ citations.append({
+ 'journal': tmp[0],
+ 'year': tmp[1],
+ 'volume': tmp[2],
+ 'first_page': tmp[3],
+ 'author_name': tmp[4],
+ 'key': tmp[5],
+ })
+ except KeyError:
+ print "Could not parse line: %s" % line
+
+ payload = {
+ 'db': 'pubmed',
+ 'bdata': citations
+ }
+
+ results = c.citmatch(**payload)
+ # We get data back as pipe separated, so just replace those with tabs
+ print results.replace('|', '\t')
diff -r 000000000000 -r 68cd8d564e0a ecitmatch.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ecitmatch.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,105 @@
+
+
+ search NCBI for citations in PubMed
+
+ macros.xml
+
+
+ python ecitmatch.py --version
+ $default]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a efetch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/efetch.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='EFetch', epilog='')
+ parser.add_argument('db', help='Database to use')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+
+ # ID source
+ parser.add_argument('--id_list', help='list of ids')
+ parser.add_argument('--id', help='Comma separated individual IDs')
+ parser.add_argument('--history_file', help='Fetch results from previous query')
+
+ # Output
+ parser.add_argument('--retmode', help='Retmode')
+ parser.add_argument('--rettype', help='Rettype')
+ args = parser.parse_args()
+
+ c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+ payload = {}
+ if args.history_file is not None:
+ payload.update(c.get_history())
+ else:
+ payload['id'] = ','.join(merged_ids)
+
+ for attr in ('retmode', 'rettype'):
+ if getattr(args, attr, None) is not None:
+ payload[attr] = getattr(args, attr)
+
+ c.fetch(args.db, ftype=args.retmode, **payload)
diff -r 000000000000 -r 68cd8d564e0a egquery.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/egquery.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='EGQuery', epilog='')
+ parser.add_argument('term', help='Query')
+ #
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ args = parser.parse_args()
+
+ c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+
+ payload = {
+ 'term': args.term,
+ }
+ results = c.gquery(**payload)
+ print results
diff -r 000000000000 -r 68cd8d564e0a einfo.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/einfo.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='EInfo', epilog='')
+ parser.add_argument('--db', help='Database to use')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ args = parser.parse_args()
+
+ c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email)
+ payload = {}
+ if args.db is not None:
+ payload['db'] = args.db
+ payload['version'] = '2.0'
+ print c.info(**payload)
diff -r 000000000000 -r 68cd8d564e0a elink.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/elink.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import argparse
+import json
+
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='EFetch', epilog='')
+ parser.add_argument('db', help='Database to use, sometimes "none" (e.g. *check)')
+ parser.add_argument('dbfrom', help='Database containing input UIDs')
+ parser.add_argument('cmd', choices=['neighbor', 'neighbor_score',
+ 'neighbor_history', 'acheck', 'ncheck', 'lcheck',
+ 'llinks', 'llinkslib', 'prlinks'],
+ help='ELink command mode')
+ # Only used in case of neighbor_history
+ parser.add_argument('--history_out', type=argparse.FileType('w'),
+ help='Output history file', default='-')
+
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ # ID Sources
+ parser.add_argument('--id_list', help='list of ids')
+ parser.add_argument('--id', help='Comma separated individual IDs')
+ parser.add_argument('--history_file', help='Fetch results from previous query')
+
+ # TODO: dates, linkname, term, holding
+ # neighbor or neighbor_history and dbfrom is pubmed
+ # parser.add_argument('--datetype', help='Date type')
+ # parser.add_argument('--reldate', help='In past N days')
+ # parser.add_argument('--mindate', help='Minimum date')
+ # parser.add_argument('--maxdate', help='maximum date')
+
+ # Output
+ args = parser.parse_args()
+
+ c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+ payload = {
+ 'dbfrom': args.dbfrom,
+ 'cmd': args.cmd,
+ }
+ if args.history_file is not None:
+ payload.update(c.get_history())
+ else:
+ payload['id'] = ','.join(merged_ids)
+
+ # DB can be 'none' in a few cases.
+ if args.db != "none":
+ payload['db'] = args.db
+
+ results = c.link(**payload)
+
+ if args.cmd == "neighbor_history":
+ history = c.extract_history(results)
+ args.history_out.write(json.dumps(history, indent=4))
+
+ print results
diff -r 000000000000 -r 68cd8d564e0a epost.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/epost.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='EPost', epilog='')
+ parser.add_argument('db', help='Database to use')
+ parser.add_argument('--id_list', help='list of ids')
+ parser.add_argument('--id', help='Comma separated individual IDs')
+ parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+
+ args = parser.parse_args()
+
+ c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+ payload = {}
+ if args.history_file is not None:
+ payload.update(c.get_history())
+ else:
+ payload['id'] = ','.join(merged_ids)
+ payload['WebEnv'] = ''
+
+ print c.post(args.db, **payload)
diff -r 000000000000 -r 68cd8d564e0a esearch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/esearch.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+import json
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='ESearch', epilog='')
+ parser.add_argument('db', help='Database to use')
+ parser.add_argument('term', help='Query')
+ parser.add_argument('--history_file', help='Filter existing history')
+ parser.add_argument('--datetype', help='Date type')
+ parser.add_argument('--reldate', help='In past N days')
+ parser.add_argument('--mindate', help='Minimum date')
+ parser.add_argument('--maxdate', help='maximum date')
+ # History
+ parser.add_argument('--history_out', type=argparse.FileType('w'),
+ help='Output history file')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ args = parser.parse_args()
+
+ c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+
+ payload = {
+ 'db': args.db,
+ 'term': args.term,
+ 'retstart': 0,
+ 'retmax': 20,
+ # hmmm @ retmax
+ }
+ if args.history_file is not None:
+ payload.update(c.get_history())
+ if args.history_out is not None:
+ payload['usehistory'] = 'y'
+
+ for attr in ('datetype', 'reldate', 'mindate', 'maxdate'):
+ if getattr(args, attr, None) is not None:
+ payload[attr] = getattr(args, attr)
+
+ results = c.search(**payload)
+
+ if args.history_out is not None:
+ history = c.extract_history(results)
+ args.history_out.write(json.dumps(history, indent=4))
+
+ print results
diff -r 000000000000 -r 68cd8d564e0a esummary.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/esummary.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+import argparse
+import eutils
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='ESummary', epilog='')
+ parser.add_argument('db', help='Database to use')
+ parser.add_argument('--id_list', help='list of ids')
+ parser.add_argument('--id', help='Comma separated individual IDs')
+ parser.add_argument('--history_file', help='Filter existing history')
+ parser.add_argument('--user_email', help="User email")
+ parser.add_argument('--admin_email', help="Admin email")
+ args = parser.parse_args()
+
+ c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
+
+ merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
+
+ payload = {
+ 'db': args.db,
+ }
+
+ if args.history_file is not None:
+ payload.update(c.get_history())
+ else:
+ payload['id'] = ','.join(merged_ids)
+
+ print c.summary(**payload)
diff -r 000000000000 -r 68cd8d564e0a eutils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/eutils.py Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,127 @@
+import os
+import json
+import StringIO
+from Bio import Entrez
+Entrez.tool = "GalaxyEutils_1_0"
+BATCH_SIZE = 200
+
+
+class Client(object):
+
+ def __init__(self, history_file=None, user_email=None, admin_email=None):
+ self.using_history = False
+
+ if user_email is not None and admin_email is not None:
+ Entrez.email = ';'.join((admin_email, user_email))
+ elif user_email is not None:
+ Entrez.email = user_email
+ elif admin_email is not None:
+ Entrez.email = admin_email
+ else:
+ Entrez.email = os.environ.get('NCBI_EUTILS_CONTACT', None)
+
+ if Entrez.email is None:
+ raise Exception("Cannot continue without an email; please set "
+ "administrator email in NCBI_EUTILS_CONTACT")
+
+ if history_file is not None:
+ with open(history_file, 'r') as handle:
+ data = json.loads(handle.read())
+ self.query_key = data['QueryKey']
+ self.webenv = data['WebEnv']
+ self.using_history = True
+
+ def get_history(self):
+ if not self.using_history:
+ return {}
+ else:
+ return {
+ 'query_key': self.query_key,
+ 'WebEnv': self.webenv,
+ }
+
+ def post(self, database, **payload):
+ return json.dumps(Entrez.read(Entrez.epost(database, **payload)), indent=4)
+
+ def fetch(self, db, ftype=None, **payload):
+ os.makedirs("downloads")
+
+ if 'id' in payload:
+ summary = self.id_summary(db, payload['id'])
+ else:
+ summary = self.history_summary(db)
+
+ count = len(summary)
+ payload['retmax'] = BATCH_SIZE
+
+ # This may be bad. I'm not sure yet. I think it will be ... but UGH.
+ for i in range(0, count, BATCH_SIZE):
+ payload['retstart'] = i
+ file_path = os.path.join('downloads', 'EFetch Results Chunk %s.%s' % (i, ftype))
+ with open(file_path, 'w') as handle:
+ handle.write(Entrez.efetch(db, **payload).read())
+
+ def id_summary(self, db, id_list):
+ payload = {
+ 'db': db,
+ 'id': id_list,
+ }
+ return Entrez.read(Entrez.esummary(**payload))
+
+ def history_summary(self, db):
+ if not self.using_history:
+ raise Exception("History must be available for this method")
+
+ payload = {
+ 'db': db,
+ 'query_key': self.query_key,
+ 'WebEnv': self.webenv,
+ }
+ return Entrez.read(Entrez.esummary(**payload))
+
+ def summary(self, **payload):
+ return Entrez.esummary(**payload).read()
+
+ def link(self, **payload):
+ return Entrez.elink(**payload).read()
+
+ def extract_history(self, xml_data):
+ parsed_data = Entrez.read(StringIO.StringIO(xml_data))
+ history = {}
+ for key in ('QueryKey', 'WebEnv'):
+ if key in parsed_data:
+ history[key] = parsed_data[key]
+
+ return history
+
+ def search(self, **payload):
+ return Entrez.esearch(**payload).read()
+
+ def info(self, **kwargs):
+ return Entrez.einfo(**kwargs).read()
+
+ def gquery(self, **kwargs):
+ return Entrez.egquery(**kwargs).read()
+
+ def citmatch(self, **kwargs):
+ return Entrez.ecitmatch(**kwargs).read()
+
+ @classmethod
+ def parse_ids(cls, id_list, id, history_file):
+ """Parse IDs passed on --cli or in a file passed to the cli
+ """
+ merged_ids = []
+ if id is not None:
+ for pid in id.replace('__cn__', ',').replace('\n', ',').split(','):
+ if pid is not None and len(pid) > 0:
+ merged_ids.append(pid)
+
+ if id_list is not None:
+ with open(id_list, 'r') as handle:
+ merged_ids += [x.strip() for x in handle.readlines()]
+
+ # Exception hanlded here for uniformity
+ if len(merged_ids) == 0 and history_file is None:
+ raise Exception("Must provide history file or IDs")
+
+ return merged_ids
diff -r 000000000000 -r 68cd8d564e0a eutils.pyc
Binary file eutils.pyc has changed
diff -r 000000000000 -r 68cd8d564e0a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,847 @@
+
+
+ 1.1
+
+--user_email "$__user_email__"
+#set admin_emails = ';'.join(str($__admin_users__).split(','))
+--admin_email "$admin_emails"
+
+
+
+ `__
+
+The `full disclaimer `__ is available on
+their website
+
+Liability
+~~~~~~~~~
+
+For documents and software available from this server, the
+U.S. Government does not warrant or assume any legal liability or
+responsibility for the accuracy, completeness, or usefulness of any
+information, apparatus, product, or process disclosed.
+
+Endorsement
+~~~~~~~~~~~
+
+NCBI does not endorse or recommend any commercial
+products, processes, or services. The views and opinions of authors
+expressed on NCBI's Web sites do not necessarily state or reflect those
+of the U.S. Government, and they may not be used for advertising or
+product endorsement purposes.
+
+External Links
+~~~~~~~~~~~~~~
+
+Some NCBI Web pages may provide links to other Internet
+sites for the convenience of users. NCBI is not responsible for the
+availability or content of these external sites, nor does NCBI endorse,
+warrant, or guarantee the products, services, or information described
+or offered at these other Internet sites. Users cannot assume that the
+external sites will abide by the same Privacy Policy to which NCBI
+adheres. It is the responsibility of the user to examine the copyright
+and licensing restrictions of linked pages and to secure all necessary
+permissions.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if $query_source.qss == "history":
+ --history_file $query_source.history_file
+#else if $query_source.qss == "id_file":
+ --id_list $query_source.id_file
+#else if $query_source.qss == "id_list":
+ --id $query_source.id_list
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @Book{ncbiEutils,
+ author = {Eric Sayers},
+ title = {Entrez Programming Utilities Help},
+ year = {2010},
+ publisher = {National Center for Biotechnology Information, Bethesda, Maryland},
+ note = {http://ww.ncbi.nlm.nih.gov/books/NBK25500/}
+ }
+
+
+
+
+ python
+ biopython
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a test-data/ecitmatch.results.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecitmatch.results.tsv Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,2 @@
+ 1991 88 3248 mann bj citation_1 2014248
+
diff -r 000000000000 -r 68cd8d564e0a test-data/ecitmatch.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecitmatch.tsv Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,2 @@
+#journal year volume first page author key
+proc natl acad sci u s a 1991 88 3248 mann bj citation_1
diff -r 000000000000 -r 68cd8d564e0a test-data/egquery.1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/egquery.1.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,7 @@
+
+
+
+
+ bacteriophage
+
+
diff -r 000000000000 -r 68cd8d564e0a test-data/esearch.pubmed.2014-01-pnas.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esearch.pubmed.2014-01-pnas.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,25 @@
+
+
+524200
+24620368
+24613929
+24596955
+24596954
+24571024
+24555201
+24555200
+24550301
+24520173
+24520172
+24497494
+24497493
+24488973
+24488972
+24488971
+24481254
+24481253
+24481252
+24477693
+24477692
+ PNAS[ta] "Proc Natl Acad Sci U S A"[Journal] "Proc Natl Acad Sci U S A"[Journal] Journal 124812 N 2014/01/01[PDAT] PDAT 0 N 2014/02/01[PDAT] PDAT 0 N RANGE AND "Proc Natl Acad Sci U S A"[Journal] AND 2014/01/01[PDAT] : 2014/02/01[PDAT]
+
diff -r 000000000000 -r 68cd8d564e0a test-data/esearch.pubmed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esearch.pubmed.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,25 @@
+
+
+2651200
+16578858
+11186225
+11121081
+11121080
+11121079
+11121078
+11121077
+11121076
+11121075
+11121074
+11121073
+11121072
+11121071
+11121070
+11121069
+11121068
+11121067
+11121066
+11121065
+11121064
+ PNAS[ta] "Proc Natl Acad Sci U S A"[Journal] "Proc Natl Acad Sci U S A"[Journal] Journal 124812 N 97[vi] vi 77218 N AND GROUP "Proc Natl Acad Sci U S A"[Journal] AND 97[vi]
+
diff -r 000000000000 -r 68cd8d564e0a test-data/esummary.tax.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/esummary.tax.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,20 @@
+
+
+
+
+ 10239
+ - active
+ - superkingdom
+ - viruses
+ - Viruses
+
+ - 10239
+ - 0
+
+
+
+ - 2010/11/23 00:00
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a test-data/example.history.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/example.history.json Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,4 @@
+{
+ "QueryKey": "1",
+ "WebEnv": "NCID_1_9485527_130.14.22.215_9001_1430928295_33285243_0MetA0_S_MegaStore_F_1"
+}
diff -r 000000000000 -r 68cd8d564e0a test-data/pm-tax-neighbor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pm-tax-neighbor.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,24 @@
+
+
+
+
+
+ taxonomy
+
+ 510899
+
+
+
+ pubmed
+ taxonomy_pubmed_entrez
+
+
+ 22241621
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a test-data/pubmed.metadata.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pubmed.metadata.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,7 @@
+
+
+
+
+ pubmed
+ PubMed
+ PubMed bibliographic record
diff -r 000000000000 -r 68cd8d564e0a test-data/viruses.tax.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/viruses.tax.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,29 @@
+
+
+
+ 10239
+ Viruses
+
+ viruses
+ Vira
+ Viridae
+
+ 1
+ superkingdom
+ Viruses
+
+ 1
+ Standard
+
+
+ 0
+ Unspecified
+
+
+ 1995/02/27 09:24:00
+ 2010/11/23 11:40:11
+ 1993/04/20 01:00:00
+
+
+
+
diff -r 000000000000 -r 68cd8d564e0a tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Jul 07 02:39:21 2016 -0400
@@ -0,0 +1,9 @@
+
+
+
+ /please set the administrator's contact email in the corresponding env.sh file/
+
+
+
+
+