Mercurial > repos > galaxyp > uniprotxml_downloader
comparison uniprotxml_downloader.py @ 5:265c35540faa draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author | galaxyp |
---|---|
date | Fri, 04 Nov 2022 15:08:37 +0000 |
parents | 12692567c7f9 |
children | a371252a2cf6 |
comparison
equal
deleted
inserted
replaced
4:12692567c7f9 | 5:265c35540faa |
---|---|
50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs') | 50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs') |
51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs') | 51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs') |
52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') | 52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') |
53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') | 53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') |
54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') | 54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') |
55 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field') | |
55 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') | 56 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') |
56 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') | 57 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') |
57 (options, args) = parser.parse_args() | 58 (options, args) = parser.parse_args() |
58 taxids = set(options.taxon) | 59 taxids = set(options.taxon) |
59 if options.input: | 60 if options.input: |
64 fields = line.rstrip('\r\n').split('\t') | 65 fields = line.rstrip('\r\n').split('\t') |
65 if len(fields) > abs(options.column): | 66 if len(fields) > abs(options.column): |
66 taxid = fields[options.column].strip() | 67 taxid = fields[options.column].strip() |
67 if taxid: | 68 if taxid: |
68 taxids.add(taxid) | 69 taxids.add(taxid) |
69 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] | 70 taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids] |
70 taxon_query = ' OR '.join(taxon_queries) | 71 taxon_query = ' OR '.join(taxon_queries) |
71 if options.output: | 72 if options.output: |
72 dest_path = options.output | 73 dest_path = options.output |
73 else: | 74 else: |
74 dest_path = "uniprot_%s.xml" % '_'.join(taxids) | 75 dest_path = "uniprot_%s.xml" % '_'.join(taxids) |
75 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' | 76 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' |
76 try: | 77 try: |
77 url = 'https://www.uniprot.org/uniprot/' | 78 url = 'https://rest.uniprot.org/uniprotkb/stream' |
78 query = "%s%s" % (taxon_query, reviewed) | 79 query = "%s%s" % (taxon_query, reviewed) |
79 params = {'query': query, 'force': 'yes', 'format': options.format} | 80 params = {'query': query, 'format': options.format} |
80 if options.debug: | 81 if options.debug: |
81 print("%s ? %s" % (url, params), file=sys.stderr) | 82 print("%s ? %s" % (url, params), file=sys.stderr) |
82 data = parse.urlencode(params) | 83 data = parse.urlencode(params) |
83 print(f"Retrieving: {url+data}") | 84 print(f"Retrieving: {url}?{data}") |
84 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) | 85 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) |
86 | |
85 http = requests.Session() | 87 http = requests.Session() |
86 http.mount("https://", adapter) | 88 http.mount("https://", adapter) |
87 response = http.post(url, data=params) | 89 response = http.get(url, params=params) |
88 http.close() | 90 http.close() |
91 | |
92 if response.status_code != 200: | |
93 exit(f"Request failed with status code {response.status_code}:\n{response.text}") | |
94 | |
89 with open(dest_path, 'w') as fh: | 95 with open(dest_path, 'w') as fh: |
90 fh.write(response.text) | 96 fh.write(response.text) |
97 | |
91 if options.format == 'xml': | 98 if options.format == 'xml': |
92 with open(dest_path, 'r') as contents: | 99 with open(dest_path, 'r') as contents: |
93 while True: | 100 while True: |
94 line = contents.readline() | 101 line = contents.readline() |
95 if options.debug: | 102 if options.debug: |