comparison efetch.py @ 3:c09fcbe4b16a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit dae34e5e182b4cceb808d7353080f14aa9a78ca9"
author iuc
date Wed, 23 Sep 2020 09:48:53 +0000
parents 0fc65a60436f
children 6c4b39080ed7
comparison
equal deleted inserted replaced
2:0977ec0f3ba8 3:c09fcbe4b16a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
2 import argparse 3 import argparse
4 import glob
5 import json
6 import logging
7 import os
8
3 9
4 import eutils 10 import eutils
11
12
13 logging.basicConfig(level=logging.INFO)
14
15
16 def handleEfetchException(e, db, payload):
17 logging.error('No results returned. This could either be due to no records matching the supplied IDs for the query database or it could be an error due to invalid parameters. The reported exception was "%s".\n\nPayload used for the efetch query to database "%s"\n\n%s', e, db, json.dumps(payload, indent=4))
18
19 # Create a file in the downloads folder so that the user can access run information
20 current_directory = os.getcwd()
21 final_directory = os.path.join(current_directory, r'downloads')
22 if not os.path.exists(final_directory):
23 os.makedirs(final_directory)
24
25 print('The following files were downloaded:')
26 print(os.listdir(final_directory))
27
28 file_path = os.path.join('downloads', 'no_results.txt')
29 with open(file_path, 'w') as handle:
30 handle.write('No results')
31
32
33 def localFetch(db, gformat, newname, **payload):
34 problem = None
35 try:
36 c.fetch(db, **payload)
37
38 for chunk, file in enumerate(glob.glob('downloads/EFetch *')):
39 os.rename(file, '%s%s.%s' % (newname, chunk + 1, gformat))
40
41 except Exception as e:
42 problem = e
43 handleEfetchException(e, db, payload)
44 else:
45 print('The following files were downloaded:')
46 print(os.listdir('downloads'))
47
48 return problem
5 49
6 50
7 if __name__ == '__main__': 51 if __name__ == '__main__':
8 parser = argparse.ArgumentParser(description='EFetch', epilog='') 52 parser = argparse.ArgumentParser(description='EFetch', epilog='')
9 parser.add_argument('db', help='Database to use') 53 parser.add_argument('db', help='Database to use')
10 parser.add_argument('--user_email', help="User email") 54 parser.add_argument('--user_email', help="User email")
11 parser.add_argument('--admin_email', help="Admin email") 55 parser.add_argument('--admin_email', help="Admin email")
12 56
57 parser.add_argument('--version', action='version', version=eutils.Client.getVersion(), help='Version (reports Biopython version)')
58
13 # ID source 59 # ID source
60 parser.add_argument('--id_json', help='list of ids in a json file as returned by esearch or elink')
61 parser.add_argument('--id_xml', help='list of ids in an xml file as returned by esearch or elink')
14 parser.add_argument('--id_list', help='list of ids') 62 parser.add_argument('--id_list', help='list of ids')
15 parser.add_argument('--id', help='Comma separated individual IDs') 63 parser.add_argument('--id', help='Comma separated individual IDs')
16 parser.add_argument('--history_file', help='Fetch results from previous query') 64 parser.add_argument('--history_file', help='Fetch results from previous query (JSON)')
65 parser.add_argument('--history_xml', help='Fetch results from previous query (XML)')
17 66
18 # Output 67 # Output
19 parser.add_argument('--retmode', help='Retmode') 68 parser.add_argument('--retmode', help='Retmode')
20 parser.add_argument('--rettype', help='Rettype') 69 parser.add_argument('--rettype', help='Rettype')
70 parser.add_argument('--galaxy_format', help='Galaxy format')
21 args = parser.parse_args() 71 args = parser.parse_args()
22 72
23 c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) 73 c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email)
24 merged_ids = c.parse_ids(args.id_list, args.id, args.history_file)
25 74
26 payload = {} 75 payload = {}
27 if args.history_file is not None:
28 payload.update(c.get_history())
29 else:
30 payload['id'] = ','.join(merged_ids)
31
32 for attr in ('retmode', 'rettype'): 76 for attr in ('retmode', 'rettype'):
33 if getattr(args, attr, None) is not None: 77 if getattr(args, attr, None) is not None:
34 payload[attr] = getattr(args, attr) 78 payload[attr] = getattr(args, attr)
35 79
36 c.fetch(args.db, ftype=args.retmode, **payload) 80 if args.history_file is not None or args.history_xml is not None:
81 if args.history_file is not None:
82 input_histories = c.get_histories()
83 else:
84 input_histories = c.extract_histories_from_xml_file(args.history_xml)
85
86 problem = None
87 for hist in input_histories:
88 qkey = hist['query_key']
89 tmp_payload = payload
90 tmp_payload.update(hist)
91 newname = 'downloads/EFetch-%s-%s-querykey%s-chunk' % (args.rettype, args.retmode, qkey)
92 problem = localFetch(args.db, args.galaxy_format, newname, **tmp_payload)
93
94 if os.path.exists('downloads'):
95 os.rename('downloads', 'downloads-qkey%s' % (qkey))
96
97 if not os.path.exists('downloads'):
98 os.makedirs('downloads')
99
100 for relpath in glob.glob('downloads-qkey*/*'):
101 file = os.path.basename(relpath)
102 os.rename(relpath, 'downloads/%s' % (file))
103
104 if problem is not None:
105 raise(problem)
106
107 else:
108 merged_ids = c.parse_ids(args.id_list, args.id, args.history_file, args.id_xml, args.id_json)
109 payload['id'] = ','.join(merged_ids)
110 newname = 'downloads/EFetch-%s-%s-chunk' % (args.rettype, args.retmode)
111 localFetch(args.db, args.galaxy_format, newname, **payload)