comparison uniprotxml_downloader.py @ 5:265c35540faa draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
author galaxyp
date Fri, 04 Nov 2022 15:08:37 +0000
parents 12692567c7f9
children a371252a2cf6
comparison
equal deleted inserted replaced
4:12692567c7f9 5:265c35540faa
50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs') 50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs')
51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs') 51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs')
52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') 52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') 53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') 54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format')
55 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id'], default='taxonomy_name', help='query field')
55 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') 56 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
56 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') 57 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
57 (options, args) = parser.parse_args() 58 (options, args) = parser.parse_args()
58 taxids = set(options.taxon) 59 taxids = set(options.taxon)
59 if options.input: 60 if options.input:
64 fields = line.rstrip('\r\n').split('\t') 65 fields = line.rstrip('\r\n').split('\t')
65 if len(fields) > abs(options.column): 66 if len(fields) > abs(options.column):
66 taxid = fields[options.column].strip() 67 taxid = fields[options.column].strip()
67 if taxid: 68 if taxid:
68 taxids.add(taxid) 69 taxids.add(taxid)
69 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] 70 taxon_queries = [f'{options.field}:"{taxid}"' for taxid in taxids]
70 taxon_query = ' OR '.join(taxon_queries) 71 taxon_query = ' OR '.join(taxon_queries)
71 if options.output: 72 if options.output:
72 dest_path = options.output 73 dest_path = options.output
73 else: 74 else:
74 dest_path = "uniprot_%s.xml" % '_'.join(taxids) 75 dest_path = "uniprot_%s.xml" % '_'.join(taxids)
75 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' 76 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
76 try: 77 try:
77 url = 'https://www.uniprot.org/uniprot/' 78 url = 'https://rest.uniprot.org/uniprotkb/stream'
78 query = "%s%s" % (taxon_query, reviewed) 79 query = "%s%s" % (taxon_query, reviewed)
79 params = {'query': query, 'force': 'yes', 'format': options.format} 80 params = {'query': query, 'format': options.format}
80 if options.debug: 81 if options.debug:
81 print("%s ? %s" % (url, params), file=sys.stderr) 82 print("%s ? %s" % (url, params), file=sys.stderr)
82 data = parse.urlencode(params) 83 data = parse.urlencode(params)
83 print(f"Retrieving: {url+data}") 84 print(f"Retrieving: {url}?{data}")
84 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) 85 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy)
86
85 http = requests.Session() 87 http = requests.Session()
86 http.mount("https://", adapter) 88 http.mount("https://", adapter)
87 response = http.post(url, data=params) 89 response = http.get(url, params=params)
88 http.close() 90 http.close()
91
92 if response.status_code != 200:
93 exit(f"Request failed with status code {response.status_code}:\n{response.text}")
94
89 with open(dest_path, 'w') as fh: 95 with open(dest_path, 'w') as fh:
90 fh.write(response.text) 96 fh.write(response.text)
97
91 if options.format == 'xml': 98 if options.format == 'xml':
92 with open(dest_path, 'r') as contents: 99 with open(dest_path, 'r') as contents:
93 while True: 100 while True:
94 line = contents.readline() 101 line = contents.readline()
95 if options.debug: 102 if options.debug: