# HG changeset patch # User iuc # Date 1600854559 0 # Node ID b00212deaea7c34c5bc8aafed3b9e8790b6ead33 # Parent 1dff3adb0a9771329666084a6311d05790272f70 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit dae34e5e182b4cceb808d7353080f14aa9a78ca9" diff -r 1dff3adb0a97 -r b00212deaea7 __efetch_build_options.py --- a/__efetch_build_options.py Wed Mar 11 04:03:55 2020 -0400 +++ b/__efetch_build_options.py Wed Sep 23 09:49:19 2020 +0000 @@ -53,7 +53,7 @@ sra structure taxonomy -unigene'''.replace( "", "").replace( "", "").split("\n") +unigene'''.replace("", "").replace("", "").split("\n") help = ''' (all) diff -r 1dff3adb0a97 -r b00212deaea7 ecitmatch.py --- a/ecitmatch.py Wed Mar 11 04:03:55 2020 -0400 +++ b/ecitmatch.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import argparse @@ -17,6 +16,8 @@ parser.add_argument('--first_page', nargs='*', help='First Page') parser.add_argument('--author_name', nargs='*', help='Author name') + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + # Emails parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") diff -r 1dff3adb0a97 -r b00212deaea7 efetch.py --- a/efetch.py Wed Mar 11 04:03:55 2020 -0400 +++ b/efetch.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,36 +1,111 @@ #!/usr/bin/env python + import argparse +import glob +import json +import logging +import os + import eutils +logging.basicConfig(level=logging.INFO) + + +def handleEfetchException(e, db, payload): + logging.error('No results returned. This could either be due to no records matching the supplied IDs for the query database or it could be an error due to invalid parameters. The reported exception was "%s".\n\nPayload used for the efetch query to database "%s"\n\n%s', e, db, json.dumps(payload, indent=4)) + + # Create a file in the downloads folder so that the user can access run information + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, r'downloads') + if not os.path.exists(final_directory): + os.makedirs(final_directory) + + print('The following files were downloaded:') + print(os.listdir(final_directory)) + + file_path = os.path.join('downloads', 'no_results.txt') + with open(file_path, 'w') as handle: + handle.write('No results') + + +def localFetch(db, gformat, newname, **payload): + problem = None + try: + c.fetch(db, **payload) + + for chunk, file in enumerate(glob.glob('downloads/EFetch *')): + os.rename(file, '%s%s.%s' % (newname, chunk + 1, gformat)) + + except Exception as e: + problem = e + handleEfetchException(e, db, payload) + else: + print('The following files were downloaded:') + print(os.listdir('downloads')) + + return problem + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='EFetch', epilog='') parser.add_argument('db', help='Database to use') parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + # ID source + parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink') + parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink') parser.add_argument('--id_list', help='list of ids') parser.add_argument('--id', help='Comma separated individual IDs') - parser.add_argument('--history_file', help='Fetch results from previous query') + parser.add_argument('--history_file', help='Fetch results from previous query (JSON)') + parser.add_argument('--history_xml', help='Fetch results from previous query (XML)') # Output parser.add_argument('--retmode', help='Retmode') parser.add_argument('--rettype', help='Rettype') + parser.add_argument('--galaxy_format', help='Galaxy format') args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) - merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) payload = {} - if args.history_file is not None: - payload.update(c.get_history()) - else: - payload['id'] = ','.join(merged_ids) - for attr in ('retmode', 'rettype'): if getattr(args, attr, None) is not None: payload[attr] = getattr(args, attr) - c.fetch(args.db, ftype=args.retmode, **payload) + if args.history_file is not None or args.history_xml is not None: + if args.history_file is not None: + input_histories = c.get_histories() + else: + input_histories = c.extract_histories_from_xml_file(args.history_xml) + + problem = None + for hist in input_histories: + qkey = hist['query_key'] + tmp_payload = payload + tmp_payload.update(hist) + newname = 'downloads/EFetch-%s-%s-querykey%s-chunk' % (args.rettype, args.retmode, qkey) + problem = localFetch(args.db, args.galaxy_format, newname, **tmp_payload) + + if os.path.exists('downloads'): + os.rename('downloads', 'downloads-qkey%s' % (qkey)) + + if not os.path.exists('downloads'): + os.makedirs('downloads') + + for relpath in glob.glob('downloads-qkey*/*'): + file = os.path.basename(relpath) + os.rename(relpath, 'downloads/%s' % (file)) + + if problem is not None: + raise(problem) + + else: + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json) + payload['id'] = ','.join(merged_ids) + newname = 'downloads/EFetch-%s-%s-chunk' % (args.rettype, args.retmode) + localFetch(args.db, args.galaxy_format, newname, **payload) diff -r 1dff3adb0a97 -r b00212deaea7 egquery.py --- a/egquery.py Wed Mar 11 04:03:55 2020 -0400 +++ b/egquery.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import argparse @@ -9,9 +8,12 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='EGQuery', epilog='') parser.add_argument('term', help='Query') - # + parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + args = parser.parse_args() c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email) diff -r 1dff3adb0a97 -r b00212deaea7 einfo.py --- a/einfo.py Wed Mar 11 04:03:55 2020 -0400 +++ b/einfo.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import argparse @@ -11,6 +10,7 @@ parser.add_argument('--db', help='Database to use') parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') args = parser.parse_args() c = eutils.Client(user_email=args.user_email, admin_email=args.admin_email) diff -r 1dff3adb0a97 -r b00212deaea7 elink.py --- a/elink.py Wed Mar 11 04:03:55 2020 -0400 +++ b/elink.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,12 +1,16 @@ #!/usr/bin/env python -from __future__ import print_function import argparse import json +import logging +import os import eutils +logging.basicConfig(level=logging.INFO) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='EFetch', epilog='') parser.add_argument('db', help='Database to use, sometimes "none" (e.g. *check)') @@ -15,16 +19,23 @@ 'neighbor_history', 'acheck', 'ncheck', 'lcheck', 'llinks', 'llinkslib', 'prlinks'], help='ELink command mode') - # Only used in case of neighbor_history - parser.add_argument('--history_out', type=argparse.FileType('w'), - help='Output history file', default='-') + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") + # ID Sources + parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink') + parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink') parser.add_argument('--id_list', help='list of ids') parser.add_argument('--id', help='Comma separated individual IDs') parser.add_argument('--history_file', help='Fetch results from previous query') + parser.add_argument('--history_xml', help='Fetch results from previous query') + + # Optional + parser.add_argument('--linkname', help='Restrict results to a specific link source') + parser.add_argument('--retmode', choices=['xml', 'json', 'uilist'], help='Output format') # TODO: dates, linkname, term, holding # neighbor or neighbor_history and dbfrom is pubmed @@ -37,25 +48,91 @@ args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) - merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) payload = { 'dbfrom': args.dbfrom, 'cmd': args.cmd, } - if args.history_file is not None: - payload.update(c.get_history()) - else: - payload['id'] = ','.join(merged_ids) # DB can be 'none' in a few cases. if args.db != "none": payload['db'] = args.db - results = c.link(**payload) + if args.linkname is not None: + payload['linkname'] = args.linkname + + results = [] + qkeys = [] + if args.history_file is not None or args.history_xml is not None: + payload['retmode'] = args.retmode + if args.history_file is not None: + input_histories = c.get_histories() + else: + input_histories = c.extract_histories_from_xml_file(args.history_xml) + for hist in input_histories: + qkeys += [hist['query_key']] + tmp_payload = payload + tmp_payload.update(hist) + results += [c.link(**tmp_payload)] + else: + # There is no uilist retmode + if args.retmode == "uilist": + payload['retmode'] = 'xml' + else: + payload['retmode'] = args.retmode + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json) + payload['id'] = ','.join(merged_ids) + qkeys += [1] + results += [c.link(**payload)] + + # There could be multiple sets of results if a history was supplied + if args.history_file is not None or args.history_xml is not None: + # Multiple result sets can be returned + # Create a directory for the output files + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, r'downloads') + if not os.path.exists(final_directory): + os.makedirs(final_directory) - if args.cmd == "neighbor_history": - history = c.extract_history(results) - args.history_out.write(json.dumps(history, indent=4)) - - print(results) + logging.info("Writing files:") + # When rettype is uilist, convert to text format (which elink does not do) + count = 0 + if args.retmode == 'uilist': + for result in results: + qkey = qkeys[count] + count += 1 + ids = c.xmlstring2UIlist(result) + file_path = os.path.join('downloads', '%s-querykey%s.tabular' % (args.db, qkey)) + logging.info('%s.tabular' % (args.db)) + with open(file_path, 'w') as handle: + for id in ids: + handle.write(id) + handle.write(os.linesep) + elif args.retmode == 'json': + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.json' % (args.db, qkey)) + logging.info('%s-link%s.json' % (args.db, count)) + with open(file_path, 'w') as handle: + json_data = c.jsonstring2jsondata(result) + handle.write(json.dumps(json_data, indent=4)) + else: + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.xml' % (args.db, qkey)) + logging.info('%s-link%s.xml' % (args.db, count)) + with open(file_path, 'w') as handle: + handle.write(result) + else: + # When rettype is uilist, convert to text format (which elink does not do) + if args.retmode == 'uilist': + ids = c.xmlstring2UIlist(results[0]) + for id in ids: + print(id) + elif args.retmode == 'json': + json_data = c.jsonstring2jsondata(results[0]) + print(json.dumps(json_data, indent=4)) + else: + print(results[0]) diff -r 1dff3adb0a97 -r b00212deaea7 epost.py --- a/epost.py Wed Mar 11 04:03:55 2020 -0400 +++ b/epost.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import argparse @@ -9,22 +8,37 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='EPost', epilog='') parser.add_argument('db', help='Database to use') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + + # ID source parser.add_argument('--id_list', help='list of ids') parser.add_argument('--id', help='Comma separated individual IDs') - parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv') - parser.add_argument('--user_email', help="User email") - parser.add_argument('--admin_email', help="Admin email") + parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink') + parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink') + + # Target history + parser.add_argument('--history_xml', help='Post to new QueryKey in an existing WebEnv (XML)') + parser.add_argument('--history_file', help='Post to new QueryKey in an existing WebEnv (JSON)') + parser.add_argument('--webenv', help='Post to new WebEnv (History ID)') args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) - merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) payload = {} if args.history_file is not None: - payload.update(c.get_history()) - else: - payload['id'] = ','.join(merged_ids) - payload['WebEnv'] = '' + hist = c.get_history() + payload['WebEnv'] = hist['WebEnv'] + elif args.history_xml is not None: + hist = c.extract_history_from_xml_file(args.history_xml) + payload['WebEnv'] = hist['WebEnv'] + elif args.webenv is not None: + payload['WebEnv'] = args.webenv + + merged_ids = c.parse_ids(args.id_list, args.id, None, args.id_xml, args.id_json) + payload['id'] = ','.join(merged_ids) print(c.post(args.db, **payload)) diff -r 1dff3adb0a97 -r b00212deaea7 esearch.py --- a/esearch.py Wed Mar 11 04:03:55 2020 -0400 +++ b/esearch.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,12 +1,16 @@ #!/usr/bin/env python -from __future__ import print_function import argparse import json +import logging + import eutils +logging.basicConfig(level=logging.INFO) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='ESearch', epilog='') parser.add_argument('db', help='Database to use') @@ -17,34 +21,54 @@ parser.add_argument('--mindate', help='Minimum date') parser.add_argument('--maxdate', help='maximum date') # History - parser.add_argument('--history_out', type=argparse.FileType('w'), - help='Output history file') + parser.add_argument('--history_out', action="store_true", help='Output history file') parser.add_argument('--user_email', help="User email") parser.add_argument('--admin_email', help="Admin email") + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + + # Output + parser.add_argument('--retmode', help='Retmode') + parser.add_argument('--rettype', help='Rettype') + parser.add_argument('--retstart', type=int, default=0, help='Retstart - Starting rec number (0)') + parser.add_argument('--retmax', type=int, default=20, help='Retmax - max number of recs returned (20, max 100000)') + args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) + max_retmax = 100000 + min_retmax = 1 + max = max(min(args.retmax, max_retmax), min_retmax) + payload = { 'db': args.db, 'term': args.term, - 'retstart': 0, - 'retmax': 20, - # hmmm @ retmax } if args.history_file is not None: payload.update(c.get_history()) - if args.history_out is not None: + + # if args.history_out is not None: + if args.history_out: payload['usehistory'] = 'y' - for attr in ('datetype', 'reldate', 'mindate', 'maxdate'): + payload['retmode'] = args.retmode + + for attr in ('datetype', 'reldate', 'mindate', 'maxdate', 'rettype', 'retmax', 'retstart'): if getattr(args, attr, None) is not None: payload[attr] = getattr(args, attr) + logging.info("Payload used for query:" + json.dumps(payload, indent=4)) + results = c.search(**payload) - if args.history_out is not None: - history = c.extract_history(results) - args.history_out.write(json.dumps(history, indent=4)) - - print(results) + # We're going to infer that rettype being uilist means convert to text format (which esearch does not do) + if args.retmode == 'text': + ids = c.xmlstring2UIlist(results) + for id in ids: + print(id) + elif args.retmode == 'json': + json_data = c.jsonstring2jsondata(results) + print(json.dumps(json_data, indent=4)) + else: + print(results) diff -r 1dff3adb0a97 -r b00212deaea7 esummary.py --- a/esummary.py Wed Mar 11 04:03:55 2020 -0400 +++ b/esummary.py Wed Sep 23 09:49:19 2020 +0000 @@ -1,32 +1,106 @@ #!/usr/bin/env python -from __future__ import print_function import argparse +import json +import logging +import os import eutils +logging.basicConfig(level=logging.INFO) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='ESummary', epilog='') parser.add_argument('db', help='Database to use') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + + # ID source + parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink') + parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink') parser.add_argument('--id_list', help='list of ids') parser.add_argument('--id', help='Comma separated individual IDs') - parser.add_argument('--history_file', help='Filter existing history') - parser.add_argument('--user_email', help="User email") - parser.add_argument('--admin_email', help="Admin email") + parser.add_argument('--history_file', help='Fetch results from previous query') + parser.add_argument('--history_xml', help='Fetch results from previous query') + + # Output + parser.add_argument('--retmode', help='Retmode') + parser.add_argument('--retstart', type=int, default=0, help='Retstart - Starting rec number (0)') + parser.add_argument('--retmax', type=int, default=20, help='Retmax - max number of recs returned (20, max 100000') + args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) - merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) - payload = { 'db': args.db, } - if args.history_file is not None: - payload.update(c.get_history()) + for attr in ('retmode', 'retmax', 'retstart'): + if getattr(args, attr, None) is not None: + payload[attr] = getattr(args, attr) + + results = [] + qkeys = [] + if args.history_file is not None or args.history_xml is not None: + payload['retmode'] = args.retmode + if args.history_file is not None: + input_histories = c.get_histories() + else: + input_histories = c.extract_histories_from_xml_file(args.history_xml) + + for hist in input_histories: + qkeys += [hist['query_key']] + tmp_payload = payload + tmp_payload.update(hist) + results += [c.summary(**tmp_payload)] else: + # There is no uilist retmode + if args.retmode == "uilist": + payload['retmode'] = 'xml' + else: + payload['retmode'] = args.retmode + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json) payload['id'] = ','.join(merged_ids) + qkeys += [1] + results += [c.summary(**payload)] + + # There could be multiple sets of results if a history was supplied + if args.history_file is not None or args.history_xml is not None: + # Multiple result sets can be returned + # Create a directory for the output files + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, r'downloads') + if not os.path.exists(final_directory): + os.makedirs(final_directory) - print(c.summary(**payload)) + logging.info("Writing files:") + count = 0 + if args.retmode == 'json': + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.json' % (args.db, qkey)) + logging.info('%s-link%s.json' % (args.db, count)) + with open(file_path, 'w') as handle: + json_data = c.jsonstring2jsondata(result) + handle.write(json.dumps(json_data, indent=4)) + else: + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.xml' % (args.db, qkey)) + logging.info('%s-link%s.xml' % (args.db, count)) + with open(file_path, 'w') as handle: + handle.write(result) + else: + # When rettype is uilist, convert to text format (which elink does not do) + if args.retmode == 'json': + json_data = c.jsonstring2jsondata(results[0]) + print(json.dumps(json_data, indent=4)) + else: + print(results[0]) diff -r 1dff3adb0a97 -r b00212deaea7 eutils.py --- a/eutils.py Wed Mar 11 04:03:55 2020 -0400 +++ b/eutils.py Wed Sep 23 09:49:19 2020 +0000 @@ -12,6 +12,7 @@ def __init__(self, history_file=None, user_email=None, admin_email=None): self.using_history = False + self.using_parsedids = False if user_email is not None and admin_email is not None: Entrez.email = ';'.join((admin_email, user_email)) @@ -29,18 +30,69 @@ if history_file is not None: with open(history_file, 'r') as handle: data = json.loads(handle.read()) - self.query_key = data['QueryKey'] - self.webenv = data['WebEnv'] - self.using_history = True + # esearch + if 'QueryKey' in data: + self.query_key = data['QueryKey'] + self.webenv = data['WebEnv'] + self.query_keys = [] + self.query_keys += [data['QueryKey']] + self.using_history = True + elif 'query_key' in data: + self.query_key = data['query_key'] + self.webenv = data['WebEnv'] + self.query_keys = [] + self.query_keys += [data['query_key']] + self.using_history = True + elif 'esearchresult' in data: + self.query_key = data['esearchresult']['querykey'] + self.webenv = data['esearchresult']['webenv'] + self.query_keys = [] + self.query_keys += [data['esearchresult']['querykey']] + self.using_history = True + # elink + elif 'linksets' in data: + # elink for cmd=neighbor_history + if 'linksetdbhistories' in data['linksets'][0]: + self.webenv = data['linksets'][0]['webenv'] + self.query_key = data['linksets'][0]['linksetdbhistories'][0]['querykey'] + self.using_history = True + # elink for cmd=neighbor|neighbor_score + elif 'linksetdbs' in data['linksets'][0]: + self.using_parsedids = True + # elink for neighbor + if isinstance(data['linksets'][0]['linksetdbs'][0]['links'][0], str): + self.idstr = ','.join(data['linksets'][0]['linksetdbs'][0]['links']) + # elink for neighbor_score + else: + self.idstr = ','.join(map(lambda x: x['id'], data['linksets'][0]['linksetdbs'][0]['links'])) + if 'linksetdbhistories' in data['linksets'][0]: + self.webenv = data['linksets'][0]['webenv'] + self.query_keys = [] + for query in data['linksets'][0]['linksetdbhistories']: + if 'querykey' in query: + self.query_keys += [query['querykey']] + else: + print("No match") + print(data) def get_history(self): - if not self.using_history: - return {} - else: + if self.using_history: return { 'query_key': self.query_key, 'WebEnv': self.webenv, } + elif self.using_parsedids: + return { + 'id': self.idstr, + } + else: + return {} + + def get_histories(self): + histories = [] + for key in self.query_keys: + histories += [{'WebEnv': self.webenv, 'query_key': key}] + return histories def post(self, database, **payload): return json.dumps(Entrez.read(Entrez.epost(database, **payload)), indent=4) @@ -50,8 +102,10 @@ if 'id' in payload: summary = self.id_summary(db, payload['id']) + elif 'WebEnv' not in payload or 'query_key' not in payload: + summary = self.history_summary(db) else: - summary = self.history_summary(db) + summary = payload count = len(summary) payload['retmax'] = BATCH_SIZE @@ -87,15 +141,90 @@ def link(self, **payload): return Entrez.elink(**payload).read() - def extract_history(self, xml_data): - parsed_data = Entrez.read(StringIO.StringIO(xml_data)) + def extract_history_from_xml_file(self, xml_file): history = {} - for key in ('QueryKey', 'WebEnv'): - if key in parsed_data: - history[key] = parsed_data[key] + with open(xml_file, 'r') as handle: + xml_str = handle.read() + history = self.extract_history_from_xml(xml_str) + return history + + def extract_history_from_xml(self, xml_str): + try: + parsed_data = Entrez.read(StringIO(xml_str)) + history = {} + gotit = 0 + + # New code doesn't work for esearch input to elink - Parsing esearch output (reading an xml history) does not work as an elink input payload, which needs 'QueryKey'. Notably, if parsing elink output as input to elink, conversion of xml 'QueryKey' to 'query_key' is needed for some reason. Also Notably, efetch returned results using the 'QueryKey' key + # For esearch xml history results + if 'QueryKey' in parsed_data: + history['query_key'] = parsed_data['QueryKey'] + gotit += 1 + if 'WebEnv' in parsed_data: + history['WebEnv'] = parsed_data['WebEnv'] + gotit += 1 + # For elink xml history results + if gotit < 2: + if 'LinkSetDbHistory' in parsed_data[0]: + if 'QueryKey' in parsed_data[0]['LinkSetDbHistory'][0]: + history['query_key'] = parsed_data[0]['LinkSetDbHistory'][0]['QueryKey'] + gotit += 1 + if 'WebEnv' in parsed_data[0]: + history['WebEnv'] = parsed_data[0]['WebEnv'] + gotit += 1 + if gotit < 2: + raise Exception("Could not find WebEnv in xml response") + except Exception as e: + print("Error parsing...") + print(xml_str) + raise(e) return history + def extract_histories_from_xml_file(self, xml_file): + histories = [] + with open(xml_file, 'r') as handle: + xml_str = handle.read() + histories = self.extract_histories_from_xml(xml_str) + return histories + + def extract_histories_from_xml(self, xml_str): + try: + parsed_data = Entrez.read(StringIO(xml_str)) + histories = [] + gotit = 0 + + # New code doesn't work for esearch input to elink - Parsing esearch output (reading an xml history) does not work as an elink input payload, which needs 'QueryKey'. Notably, if parsing elink output as input to elink, conversion of xml 'QueryKey' to 'query_key' is needed for some reason. Also Notably, efetch returned results using the 'QueryKey' key + # For esearch xml history results + if 'QueryKey' in parsed_data: + tmp_hist = {} + tmp_hist['query_key'] = parsed_data['QueryKey'] + gotit += 1 + if 'WebEnv' in parsed_data: + tmp_hist['WebEnv'] = parsed_data['WebEnv'] + gotit += 1 + if gotit == 2: + histories += [tmp_hist] + # For elink xml history results + else: + gotenv = 0 + if 'LinkSetDbHistory' in parsed_data[0]: + for query in parsed_data[0]['LinkSetDbHistory']: + tmp_hist = {} + if 'WebEnv' in parsed_data[0]: + tmp_hist['WebEnv'] = parsed_data[0]['WebEnv'] + if 'QueryKey' in query: + tmp_hist['query_key'] = query['QueryKey'] + histories += [tmp_hist] + gotit += 1 + if gotit == 0 and gotenv == 0: + raise Exception("Could not find WebEnv in xml response") + except Exception as e: + print("Error parsing...") + print(xml_str) + raise(e) + + return histories + def search(self, **payload): return Entrez.esearch(**payload).read() @@ -109,7 +238,90 @@ return Entrez.ecitmatch(**kwargs).read() @classmethod - def parse_ids(cls, id_list, id, history_file): + def jsonstring2jsondata(cls, json_str): + json_handle = StringIO(json_str) + json_data = json.loads(json_handle.read()) + return json_data + + @classmethod + def jsonfile2UIlist(cls, json_file): + merged_ids = [] + with open(json_file, 'r') as handle: + json_data = json.loads(handle.read()) + for id in cls.jsondata2UIlist(json_data): + merged_ids += [id] + return merged_ids + + @classmethod + def jsondata2UIlist(cls, json_data): + merged_ids = [] + + # Always prioritize the result links as opposed to the search links + # elink - retrieves linked IDs for cmd=neighbor|neighbor_score only + if 'linksets' in json_data: + for lnk in json_data['linksets'][0]['linksetdbs']: + if 'links' in lnk: + for id in lnk['links']: + # elink for neighbor + if isinstance(id, str): + merged_ids.append(id) + # elink for neighbor_score + else: + merged_ids.append(id['id']) + # esearch + elif 'esearchresult' in json_data: + for id in json_data['esearchresult']['idlist']: + merged_ids += [id] + + return merged_ids + + @classmethod + def xmlfile2UIlist(cls, xml_file): + merged_ids = [] + with open(xml_file, 'r') as handle: + xml_data = Entrez.read(handle) + for id in cls.xmldata2UIlist(xml_data): + merged_ids += [id] + return merged_ids + + @classmethod + def xmlstring2UIlist(cls, xml_str): + merged_ids = [] + xml_data = Entrez.read(StringIO(xml_str)) + for id in cls.xmldata2UIlist(xml_data): + merged_ids += [id] + return merged_ids + + @classmethod + def xmldata2UIlist(cls, xml_data): + merged_ids = [] + + try: + # Always prioritize the result links as opposed to the search links + # elink - retrieves linked IDs for cmd=neighbor|neighbor_score only + if 'LinkSetDb' in xml_data[0]: + for lnk in xml_data[0]['LinkSetDb'][0]['Link']: + # elink for neighbor + if isinstance(lnk, str): + merged_ids.append(lnk) + # elink for neighbor_score + else: + merged_ids.append(lnk['Id']) + # esearch + elif 'IdList' in xml_data: + for id in xml_data['IdList']: + merged_ids += [id] + # If it was not elink output, we will end up here + except Exception: + # esearch + if 'IdList' in xml_data: + for id in xml_data['IdList']: + merged_ids += [id] + + return merged_ids + + @classmethod + def parse_ids(cls, id_list, id, history_file, xml_file, json_file): """Parse IDs passed on --cli or in a file passed to the cli """ merged_ids = [] @@ -122,8 +334,21 @@ with open(id_list, 'r') as handle: merged_ids += [x.strip() for x in handle.readlines()] - # Exception hanlded here for uniformity - if len(merged_ids) == 0 and history_file is None: - raise Exception("Must provide history file or IDs") + if xml_file is not None: + tmp_ids = cls.xmlfile2UIlist(xml_file) + for id in tmp_ids: + merged_ids += [id] + + if json_file is not None: + tmp_ids = cls.jsonfile2UIlist(json_file) + for id in tmp_ids: + merged_ids += [id] return merged_ids + + @classmethod + def getVersion(cls): + """Return the biopython version + """ + import Bio + return Bio.__version__ diff -r 1dff3adb0a97 -r b00212deaea7 generate_macros_xml.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_macros_xml.pl Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,841 @@ +#!/usr/bin/env perl + +#Usage: perl generate_macros_xml.pl > macros.xml + +#Note, this script uses einfo.py to get database info. It also uses manually compiled data stored at the bottom of this script that is based on: https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly +#The data in the table on that page was manipulated to replace nulls with 'none', remove duplicates, and add missing formats based on correspondence with MLN. + +## +## use einfo to retrieve all the valid databases +## + +print STDERR "Retrieving database list\n"; + +my $dbxml = `python einfo.py --user_email "planemo@galaxyproject.org" --admin_email "planemo@galaxyproject.org;test@bx.psu.edu"`; + +my(@dblist); +my $dbs = {}; +my $dbfroms = {}; +my $dbnames = {}; +foreach(split(/\n/,$dbxml)) + { + if(/(.+)<\/DbName>/) + { + my $db = $1; + push(@dblist,$db); + $dbs->{$db} = 0; + $dbfroms->{$db} = 0; + $dbnames->{$db} = $_; + } + } + +## +## Use einfo to retrieve all the valid links for each database (Note: some databases are not linked) +## + +my $h = {}; +foreach my $db (sort {$dbnames->{$a} cmp $dbnames->{$b}} @dblist) + { + sleep(2); + + print STDERR "Retrieving info for $db\n"; + + my $response = `python einfo.py --db $db --user_email "planemo\@galaxyproject.org" --admin_email "planemo\@galaxyproject.org;test\@bx.psu.edu"`; + + my $dolinks = 0; + my $link = ""; + my $name = ""; + + foreach(split(/\n/,$response)) + { + if(//) + { + $dolinks = 1; + #Save whether there exist links from this database + $dbfroms->{$db} = 1; + } + elsif(!$dolinks) + { + if(/(.+)<\/MenuName>/) + {$dbnames->{$db} = "$1 ($db)"} + } + elsif($dolinks) + { + if(/(.+)<\/Name>/) + {$link=$1} + elsif(/(.*)<\/Menu>/) + {$name=$1} + elsif(/(.+)<\/DbTo>/) + { + $dbto=$1; + push(@{$h->{$db}->{$dbto}},[$link,$name]); + $link=""; + $name=""; + } + } + } + } + +my @sorted_dblist = sort {$dbnames->{$a} cmp $dbnames->{$b}} @dblist; + +## +## Generate XML to govern the valid databases to use with efetch +## + +my $efetch_dbhash = {}; #->{efetch-compatible-db}->{rettype-retmode-galaxy_format} = format_name (galaxy_format) +while() + { + chomp; + my($db,$galaxy_format,$retmode,$rettype,$format_name) = split(/\t/,$_); + $efetch_dbhash->{$db}->{"$rettype-$retmode-$galaxy_format"} = + "$format_name ($galaxy_format)"; + } + +#EFetch database select list + +print << 'EOXML'; + + +EOXML + +foreach my $db (grep {exists($dbs->{$_})} + sort {$dbnames->{$a} cmp $dbnames->{$b}} + keys(%$efetch_dbhash)) + { + my $selected = ''; + if($db eq 'pubmed') + {$selected = ' selected="True"'} + print << " EOXML"; + + EOXML + } + +print << 'EOXML'; + + +EOXML + +#EFetch output formats + +print << 'EOXML'; + + + +EOXML + +foreach my $db (grep {exists($dbs->{$_})} + sort {$dbnames->{$a} cmp $dbnames->{$b}} + keys(%$efetch_dbhash)) + { + print << " EOXML"; + + + EOXML + + foreach my $eutils_format (sort {$efetch_dbhash->{$db}->{$a} cmp + $efetch_dbhash->{$db}->{$b}} + keys(%{$efetch_dbhash->{$db}})) + { + print << " EOXML"; + + EOXML + } + + print << " EOXML"; + + + EOXML + } + +print << 'EOXML'; + + +EOXML + +## +## Create a select list for the databases linked *from* +## + +print << 'EOXML'; + + +EOXML + +foreach my $from (@sorted_dblist) + { + print << " EOXML"; + + EOXML + } + +print << 'EOXML'; + + +EOXML + +## +## Create a select list for the databases linked *to* +## + +print << 'EOXML'; + + +EOXML + +foreach my $from (grep {$dbfroms->{$_}} @sorted_dblist) + { + print << " EOXML"; + + EOXML + } + +print << 'EOXML'; + + +EOXML + +## +## Create empty entries for commands that take no *to* database or link +## + +print << 'EOXML'; + + + + + + + + + + + + + + + +EOXML + +foreach(grep {$dbfroms->{$_}} @sorted_dblist) + { + print << " EOXML"; + + + + EOXML + } + +print << 'EOXML'; + + +EOXML + +## +## This is the master macro for the command selection +## + +print << 'EOXML'; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOXML + +## +## Create selections for valid links for command types neighbor, neighbor_history, and neighbor_score +## + +print << 'EOXML'; + + + +EOXML + +foreach my $from (grep {$dbfroms->{$_}} @sorted_dblist) + { + print STDERR ("Creating Links From: $from\n"); + + print << " EOXML"; + + + + EOXML + + my @dbtos = (grep {exists($h->{$from}) && exists($h->{$from}->{$_})} + @sorted_dblist); + foreach(@dbtos) + { + print << " EOXML"; + + EOXML + } + if(scalar(@dbtos) == 0) + { + #Provide an option for a self-link: from->from + print << " EOXML"; + + EOXML + } + + print << ' EOXML'; + + EOXML + + if(exists($h->{$from})) + { + #There do exist links to invalid(/outdated/non-existant) databases that + #would result in an error if they are selected, so we use the original + #@dblist instead of the keys present in the sub hash of $h->{$from}, and + #then check for existence in the sub-hash + foreach my $to (grep {exists($h->{$from}->{$_})} @sorted_dblist) + { + print STDERR ("\tTo: $to Links: ", + join(',',map {$_->[0]} @{$h->{$from}->{$to}}), + "\n"); + + print << " EOXML"; + + + + EOXML + + foreach(sort {"$a->[1] ($a->[0])" cmp "$b->[1] ($b->[0])"} + @{$h->{$from}->{$to}}) + { + print << " EOXML"; + + EOXML + } + + print << " EOXML"; + + + EOXML + + } + } + else + { + ## + ## Add-on selections for self-links for command types neighbor, + ## neighbor_history, and neighbor_score + ## Note, I'm not sure this would yield a valid result from elink + ## + + #This shows $from, but this is the 'when' for db_to conditional + print << " EOXML"; + + + + + + EOXML + } + + print << ' EOXML'; + + + EOXML + } + +## +## Add-on selections for self-links for command types neighbor, +## neighbor_history, and neighbor_score +## Note, I'm not sure this would yield a valid result from elink +## + +foreach my $from (grep {!exists($h->{$_})} @sorted_dblist) + { + print << "EOXML"; + + + + + + + + + + + + +EOXML + } + +## +## This is the corresponding code for using the selections to add the respective command line options +## + +print << 'EOXML'; + + +EOXML + +print << 'EOXML'; + + + +EOXML + +sub startXML + { + print << ' EOXML'; + + + 18.01 + 1.70 + +--user_email "$__user_email__" +#set admin_emails = ';'.join(str($__admin_users__).split(',')) +--admin_email "$admin_emails" + + + + `__ + +The `full disclaimer `__ is available on +their website + +Liability +~~~~~~~~~ + +For documents and software available from this server, the +U.S. Government does not warrant or assume any legal liability or +responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed. + +Endorsement +~~~~~~~~~~~ + +NCBI does not endorse or recommend any commercial +products, processes, or services. The views and opinions of authors +expressed on NCBI's Web sites do not necessarily state or reflect those +of the U.S. Government, and they may not be used for advertising or +product endorsement purposes. + +External Links +~~~~~~~~~~~~~~ + +Some NCBI Web pages may provide links to other Internet +sites for the convenience of users. NCBI is not responsible for the +availability or content of these external sites, nor does NCBI endorse, +warrant, or guarantee the products, services, or information described +or offered at these other Internet sites. Users cannot assume that the +external sites will abide by the same Privacy Policy to which NCBI +adheres. It is the responsibility of the user to examine the copyright +and licensing restrictions of linked pages and to secure all necessary +permissions. + ]]> + +#if $query_source.qss == "history_json": + --history_file $query_source.history_file +#else if $query_source.qss == "history_xml": + --history_xml $query_source.history_xml +#else if $query_source.qss == "id_file": + --id_list $query_source.id_file +#else if $query_source.qss == "id_list": + --id $query_source.id_list +#else if $query_source.qss == "id_xml": + --id_xml $query_source.id_xml +#else if $query_source.qss == "id_json": + --id_json $query_source.id_json +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @Book{ncbiEutils, + author = {Eric Sayers}, + title = {Entrez Programming Utilities Help}, + year = {2010}, + publisher = {National Center for Biotechnology Information, Bethesda, Maryland}, + note = {https://www.ncbi.nlm.nih.gov/books/NBK25500/} + } + + + + + biopython + + + + + + EOXML + } + +sub endXML + { + print << ' EOXML'; + + EOXML + } + +BEGIN {startXML()} +END {endXML()} + + +## +## Output formats for efetch mapped to galaxy formats +## + +#Based on: +#https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/?report=objectonly + +#Note: While json works for esearch and elink, the only database that supports +#json (according to an NLM support ticket I have about this) is snp + +#The output_format param value for these will be "rettype-retmode-format" + +#db galaxy retmode rettype format_name +__DATA__ +bioproject tabular text uilist List of UIDs +bioproject xml xml docsum Document summary +bioproject xml xml uilist List of UIDs +bioproject xml xml xml Full record +biosample tabular text uilist List of UIDs +biosample txt text full Full record +biosample xml xml docsum Document summary +biosample xml xml full Full record +biosample xml xml uilist List of UIDs +biosystems tabular text uilist List of UIDs +biosystems xml xml docsum Document summary +biosystems xml xml uilist List of UIDs +biosystems xml xml xml Full record +clinvar tabular text uilist List of UIDs +clinvar xml xml clinvarset ClinVar Set +clinvar xml xml docsum Document summary +clinvar xml xml uilist List of UIDs +clinvar xml none none Full +gds tabular text uilist List of UIDs +gds txt text summary Summary +gds xml xml docsum Document summary +gds xml xml uilist List of UIDs +gds xml none none Full +gene txt text gene_table Gene table +gene tabular text uilist List of UIDs +gene txt asn.1 none text ASN.1 +gene xml xml docsum Document summary +gene xml xml none Full +gene xml xml uilist List of UIDs +gtr tabular text uilist List of UIDs +gtr xml xml docsum Document summary +gtr xml xml gtracc GTR Test Report +gtr xml xml uilist List of UIDs +gtr xml none none Full +homologene fasta text fasta FASTA +homologene tabular text alignmentscores Alignment scores +homologene tabular text uilist List of UIDs +homologene txt asn.1 none text ASN.1 +homologene txt text homologene HomoloGene +homologene xml xml docsum Document summary +homologene xml xml none Full +homologene xml xml uilist List of UIDs +mesh tabular text uilist List of UIDs +mesh txt text full Full record +mesh xml xml docsum Document summary +mesh xml xml uilist List of UIDs +nlmcatalog tabular text uilist List of UIDs +nlmcatalog txt text none Full record +nlmcatalog xml xml docsum Document summary +nlmcatalog xml xml none Full +nlmcatalog xml xml uilist List of UIDs +nuccore binary asn.1 none binary ASN.1 +nuccore fasta text fasta FASTA +nuccore fasta text fasta_cds_aa CDS protein FASTA +nuccore fasta text fasta_cds_na CDS nucleotide FASTA +nuccore genbank text gb GenBank flat file +nuccore genbank text gbwithparts GenBank flat file with full sequence (contigs) +nuccore tabular text acc Accession number(s) +nuccore txt text ft Feature table +nuccore tabular text seqid SeqID string +nuccore tabular text uilist List of UIDs +nuccore txt text none text ASN.1 +nuccore xml xml docsum Document summary +nuccore xml xml fasta TinySeq +nuccore xml xml gb GBSeq +nuccore xml xml gbc INSDSeq +nuccore xml xml native Full record +nuccore xml xml uilist List of UIDs +nucest binary asn.1 none binary ASN.1 +nucest fasta text fasta FASTA +nucest genbank text gb GenBank flat file +nucest tabular text acc Accession number(s) +nucest tabular text seqid SeqID string +nucest tabular text uilist List of UIDs +nucest txt text est EST report +nucest txt text none text ASN.1 +nucest xml xml docsum Document summary +nucest xml xml fasta TinySeq +nucest xml xml gb GBSeq +nucest xml xml gbc INSDSeq +nucest xml xml native Full record +nucest xml xml uilist List of UIDs +nucgss binary asn.1 none binary ASN.1 +nucgss fasta text fasta FASTA +nucgss genbank text gb GenBank flat file +nucgss tabular text acc Accession number(s) +nucgss tabular text seqid SeqID string +nucgss tabular text uilist List of UIDs +nucgss txt text gss GSS report +nucgss txt text none text ASN.1 +nucgss xml xml docsum Document summary +nucgss xml xml fasta TinySeq +nucgss xml xml gb GBSeq +nucgss xml xml gbc INSDSeq +nucgss xml xml native Full record +nucgss xml xml uilist List of UIDs +pmc tabular text uilist List of UIDs +pmc txt text medline MEDLINE +pmc xml xml docsum Document summary +pmc xml xml none FULL +pmc xml xml uilist List of UIDs +popset binary asn.1 none binary ASN.1 +popset fasta text fasta FASTA +popset genbank text gb GenBank flat file +popset tabular text acc Accession number(s) +popset tabular text seqid SeqID string +popset tabular text uilist List of UIDs +popset txt text none text ASN.1 +popset xml xml docsum Document summary +popset xml xml fasta TinySeq +popset xml xml gb GBSeq +popset xml xml gbc INSDSeq +popset xml xml native Full record +popset xml xml uilist List of UIDs +protein binary asn.1 none binary ASN.1 +protein fasta text fasta FASTA +protein tabular text acc Accession number(s) +protein txt text ft Feature table +protein tabular text seqid SeqID string +protein tabular text uilist List of UIDs +protein txt text gp GenPept flat file +protein txt text none text ASN.1 +protein xml xml docsum Document summary +protein xml xml fasta TinySeq +protein xml xml gp GBSeq +protein xml xml gpc INSDSeq +protein xml xml ipg Identical Protein +protein xml xml native Full record +protein xml xml uilist List of UIDs +pubmed tabular text uilist List of UIDs +pubmed txt asn.1 none text ASN.1 +pubmed txt text abstract Abstract +pubmed txt text medline MEDLINE +pubmed xml xml docsum Document summary +pubmed xml xml none Full +pubmed xml xml uilist List of UIDs +sequences fasta text fasta FASTA +sequences tabular text acc Accession number(s) +sequences tabular text seqid SeqID string +sequences tabular text uilist List of UIDs +sequences txt text none text ASN.1 +sequences xml xml docsum Document summary +sequences xml xml uilist List of UIDs +sequences xml none none Full +snp fasta text fasta FASTA +snp json json docsum Document summary +snp json json uilist List of UIDs +snp tabular text ssexemplar SS Exemplar list +snp tabular text uilist List of UIDs +snp txt asn.1 none text ASN.1 +snp txt text chr Chromosome report +snp txt text docset Summary +snp txt text flt Flat file +snp txt text rsr RS Cluster report +snp xml xml docsum Document summary +snp xml xml none XML +snp xml xml uilist List of UIDs +sra tabular text uilist List of UIDs +sra xml xml docsum Document summary +sra xml xml full Full +taxonomy tabular text uilist List of UIDs +taxonomy xml xml none Full +taxonomy xml xml docsum Document summary +taxonomy xml xml uilist List of UIDs diff -r 1dff3adb0a97 -r b00212deaea7 macros.xml --- a/macros.xml Wed Mar 11 04:03:55 2020 -0400 +++ b/macros.xml Wed Sep 23 09:49:19 2020 +0000 @@ -1,7 +1,7 @@ 18.01 - 1.3 + 1.70 --user_email "$__user_email__" #set admin_emails = ';'.join(str($__admin_users__).split(',')) @@ -86,743 +86,51 @@ and licensing restrictions of linked pages and to secure all necessary permissions. ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if $query_source.qss == "history": +#if $query_source.qss == "history_json": --history_file $query_source.history_file +#else if $query_source.qss == "history_xml": + --history_xml $query_source.history_xml #else if $query_source.qss == "id_file": --id_list $query_source.id_file #else if $query_source.qss == "id_list": --id $query_source.id_list +#else if $query_source.qss == "id_xml": + --id_xml $query_source.id_xml +#else if $query_source.qss == "id_json": + --id_json $query_source.id_json #end if - + - - - - + + + + + + + - - + + + + + - + + + + + + + - + - - - - - @Book{ncbiEutils, @@ -836,12 +144,4400 @@ + python biopython - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/efetchin.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/efetchin.tabular Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,2 @@ +1899688395 +1896832511 diff -r 1dff3adb0a97 -r b00212deaea7 test-data/einfo.dblist.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/einfo.dblist.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,52 @@ + + + + + + pubmed + protein + nuccore + ipg + nucleotide + structure + sparcle + genome + annotinfo + assembly + bioproject + biosample + blastdbinfo + books + cdd + clinvar + gap + gapplus + grasp + dbvar + gene + gds + geoprofiles + homologene + medgen + mesh + ncbisearch + nlmcatalog + omim + orgtrack + pmc + popset + proteinclusters + pcassay + biosystems + pccompound + pcsubstance + seqannot + snp + sra + taxonomy + biocollections + gtr + + + + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.elinkin_1link.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.elinkin_1link.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,23 @@ +{ + "header": { + "type": "elink", + "version": "0.3" + }, + "linksets": [ + { + "dbfrom": "gene", + "ids": [ + "118502329" + ], + "linksetdbs": [ + { + "dbto": "nuccore", + "linkname": "gene_nuccore_refseqrna", + "links": [ + "1899688395" + ] + } + ] + } + ] +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.elinkin_1link_hist.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.elinkin_1link_hist.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,22 @@ +{ + "header": { + "type": "elink", + "version": "0.3" + }, + "linksets": [ + { + "dbfrom": "nuccore", + "ids": [ + "1899688395" + ], + "linksetdbhistories": [ + { + "dbto": "gene", + "linkname": "nuccore_gene", + "querykey": "1" + } + ], + "webenv": "MCID_5f60d98126049170ce66fe2e" + } + ] +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.elinkin_1link_id.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.elinkin_1link_id.tabular Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,1 @@ +118502329 diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.elinkin_allalllinks_id.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.elinkin_allalllinks_id.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,18 @@ + + + + + nuccore + + 1899688395 + 1896832511 + + + gene + nuccore_gene + 1 + + MCID_5f60e00e98743c5c3572195e + + + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.elinkin_alllinks_id.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.elinkin_alllinks_id.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,44 @@ + + + + + + gene + + 118502329 + + + nuccore + gene_nuccore + + + 1899688395 + + + 1896832511 + + + + + + nuccore + gene_nuccore_pos + + + 1896832511 + + + + + + nuccore + gene_nuccore_refseqrna + + + 1899688395 + + + + + + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearch_in_xmlid.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearch_in_xmlid.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,6 @@ + + +110 +118502329 + 118502329[UID] UID -1 N GROUP 118502329[UID] + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearch_in_xmlid_1link.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearch_in_xmlid_1link.tabular Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,2 @@ +1899688395 +1896832511 diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearchin_id.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearchin_id.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,25 @@ +{ + "header": { + "type": "esearch", + "version": "0.3" + }, + "esearchresult": { + "count": "1", + "retmax": "1", + "retstart": "0", + "idlist": [ + "118502329" + ], + "translationset": [], + "translationstack": [ + { + "term": "118502329[UID]", + "field": "UID", + "count": "-1", + "explode": "N" + }, + "GROUP" + ], + "querytranslation": "118502329[UID]" + } +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearchin_id.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearchin_id.tabular Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,1 @@ +118502329 diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearchin_id_1link.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearchin_id_1link.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,21 @@ + + + + + + gene + + 118502329 + + + nuccore + gene_nuccore_refseqrna + + + 1899688395 + + + + + + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/elink.esearchin_id_alllinks.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/elink.esearchin_id_alllinks.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,38 @@ +{ + "header": { + "type": "elink", + "version": "0.3" + }, + "linksets": [ + { + "dbfrom": "gene", + "ids": [ + "118502329" + ], + "linksetdbs": [ + { + "dbto": "nuccore", + "linkname": "gene_nuccore", + "links": [ + "1899688395", + "1896832511" + ] + }, + { + "dbto": "nuccore", + "linkname": "gene_nuccore_pos", + "links": [ + "1896832511" + ] + }, + { + "dbto": "nuccore", + "linkname": "gene_nuccore_refseqrna", + "links": [ + "1899688395" + ] + } + ] + } + ] +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/esearch.gene.hist.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.gene.hist.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,27 @@ +{ + "header": { + "type": "esearch", + "version": "0.3" + }, + "esearchresult": { + "count": "1", + "retmax": "1", + "retstart": "0", + "querykey": "1", + "webenv": "MCID_5f5fd696d2dc7951442b7849", + "idlist": [ + "118502329" + ], + "translationset": [], + "translationstack": [ + { + "term": "118502329[UID]", + "field": "UID", + "count": "-1", + "explode": "N" + }, + "GROUP" + ], + "querytranslation": "118502329[UID]" + } +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/esearch.gene.hist.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.gene.hist.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,6 @@ + + +1101MCID_5f5fd690407bc55fc76bad4e +118502329 + 118502329[UID] UID -1 N GROUP 118502329[UID] + diff -r 1dff3adb0a97 -r b00212deaea7 test-data/esearch.gene.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.gene.json Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,25 @@ +{ + "header": { + "type": "esearch", + "version": "0.3" + }, + "esearchresult": { + "count": "1", + "retmax": "1", + "retstart": "0", + "idlist": [ + "118502329" + ], + "translationset": [], + "translationstack": [ + { + "term": "118502329[UID]", + "field": "UID", + "count": "-1", + "explode": "N" + }, + "GROUP" + ], + "querytranslation": "118502329[UID]" + } +} diff -r 1dff3adb0a97 -r b00212deaea7 test-data/esearch.gene.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.gene.tabular Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,22 @@ +106632260 +100008587 +106632264 +106631781 +109910382 +109910381 +109910380 +109910379 +109864282 +109864281 +109864280 +109864279 +109864274 +109864273 +109864272 +109864271 +106631777 +100861532 +100169758 +100169768 +100169767 +100169766 diff -r 1dff3adb0a97 -r b00212deaea7 test-data/esearch.gene.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/esearch.gene.xml Wed Sep 23 09:49:19 2020 +0000 @@ -0,0 +1,6 @@ + + +110 +118502329 + 118502329[UID] UID -1 N GROUP 118502329[UID] +