Mercurial > repos > iuc > ncbi_eutils_efetch
diff esummary.py @ 3:c09fcbe4b16a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit dae34e5e182b4cceb808d7353080f14aa9a78ca9"
author | iuc |
---|---|
date | Wed, 23 Sep 2020 09:48:53 +0000 |
parents | 0fc65a60436f |
children |
line wrap: on
line diff
--- a/esummary.py Wed Mar 11 04:03:36 2020 -0400 +++ b/esummary.py Wed Sep 23 09:48:53 2020 +0000 @@ -1,32 +1,106 @@ #!/usr/bin/env python -from __future__ import print_function import argparse +import json +import logging +import os import eutils +logging.basicConfig(level=logging.INFO) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='ESummary', epilog='') parser.add_argument('db', help='Database to use') + parser.add_argument('--user_email', help="User email") + parser.add_argument('--admin_email', help="Admin email") + + parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)') + + # ID source + parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink') + parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink') parser.add_argument('--id_list', help='list of ids') parser.add_argument('--id', help='Comma separated individual IDs') - parser.add_argument('--history_file', help='Filter existing history') - parser.add_argument('--user_email', help="User email") - parser.add_argument('--admin_email', help="Admin email") + parser.add_argument('--history_file', help='Fetch results from previous query') + parser.add_argument('--history_xml', help='Fetch results from previous query') + + # Output + parser.add_argument('--retmode', help='Retmode') + parser.add_argument('--retstart', type=int, default=0, help='Retstart - Starting rec number (0)') + parser.add_argument('--retmax', type=int, default=20, help='Retmax - max number of recs returned (20, max 100000') + args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) - merged_ids = c.parse_ids(args.id_list, args.id, args.history_file) - payload = { 'db': args.db, } - if args.history_file is not None: - payload.update(c.get_history()) + for attr in ('retmode', 'retmax', 'retstart'): + if getattr(args, attr, None) is not None: + payload[attr] = getattr(args, attr) + + results = [] + qkeys = [] + if args.history_file is not None or args.history_xml is not None: + payload['retmode'] = args.retmode + if args.history_file is not None: + input_histories = c.get_histories() + else: + input_histories = c.extract_histories_from_xml_file(args.history_xml) + + for hist in input_histories: + qkeys += [hist['query_key']] + tmp_payload = payload + tmp_payload.update(hist) + results += [c.summary(**tmp_payload)] else: + # There is no uilist retmode + if args.retmode == "uilist": + payload['retmode'] = 'xml' + else: + payload['retmode'] = args.retmode + merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json) payload['id'] = ','.join(merged_ids) + qkeys += [1] + results += [c.summary(**payload)] + + # There could be multiple sets of results if a history was supplied + if args.history_file is not None or args.history_xml is not None: + # Multiple result sets can be returned + # Create a directory for the output files + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, r'downloads') + if not os.path.exists(final_directory): + os.makedirs(final_directory) - print(c.summary(**payload)) + logging.info("Writing files:") + count = 0 + if args.retmode == 'json': + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.json' % (args.db, qkey)) + logging.info('%s-link%s.json' % (args.db, count)) + with open(file_path, 'w') as handle: + json_data = c.jsonstring2jsondata(result) + handle.write(json.dumps(json_data, indent=4)) + else: + for result in results: + qkey = qkeys[count] + count += 1 + file_path = os.path.join('downloads', '%s-querykey%s.xml' % (args.db, qkey)) + logging.info('%s-link%s.xml' % (args.db, count)) + with open(file_path, 'w') as handle: + handle.write(result) + else: + # When rettype is uilist, convert to text format (which elink does not do) + if args.retmode == 'json': + json_data = c.jsonstring2jsondata(results[0]) + print(json.dumps(json_data, indent=4)) + else: + print(results[0])