Mercurial > repos > galaxyp > uniprotxml_downloader
diff uniprotxml_downloader.py @ 5:265c35540faa draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author | galaxyp |
---|---|
date | Fri, 04 Nov 2022 15:08:37 +0000 |
parents | 12692567c7f9 |
children | a371252a2cf6 |
line wrap: on
line diff
--- a/uniprotxml_downloader.py Tue Jun 01 11:54:47 2021 +0000 +++ b/uniprotxml_downloader.py Fri Nov 04 15:08:37 2022 +0000 @@ -52,6 +52,7 @@ parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') + parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field') parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') (options, args) = parser.parse_args() @@ -66,7 +67,7 @@ taxid = fields[options.column].strip() if taxid: taxids.add(taxid) - taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] + taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids] taxon_query = ' OR '.join(taxon_queries) if options.output: dest_path = options.output @@ -74,20 +75,26 @@ dest_path = "uniprot_%s.xml" % '_'.join(taxids) reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' try: - url = 'https://www.uniprot.org/uniprot/' + url = 'https://rest.uniprot.org/uniprotkb/stream' query = "%s%s" % (taxon_query, reviewed) - params = {'query': query, 'force': 'yes', 'format': options.format} + params = {'query': query, 'format': options.format} if options.debug: print("%s ? %s" % (url, params), file=sys.stderr) data = parse.urlencode(params) - print(f"Retrieving: {url+data}") + print(f"Retrieving: {url}?{data}") adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) + http = requests.Session() http.mount("https://", adapter) - response = http.post(url, data=params) + response = http.get(url, params=params) http.close() + + if response.status_code != 200: + exit(f"Request failed with status code {response.status_code}:\n{response.text}") + with open(dest_path, 'w') as fh: fh.write(response.text) + if options.format == 'xml': with open(dest_path, 'r') as contents: while True: