annotate uniprotxml_downloader.py @ 7:4ddc8da62671 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
author galaxyp
date Wed, 11 Dec 2024 13:34:54 +0000
parents a371252a2cf6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
2 """
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
3 #
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
4 #------------------------------------------------------------------------------
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
5 # University of Minnesota
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
6 # Copyright 2016, Regents of the University of Minnesota
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
7 #------------------------------------------------------------------------------
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
8 # Author:
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
9 #
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
10 # James E Johnson
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
11 #
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
12 #------------------------------------------------------------------------------
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
13 """
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
14 import optparse
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
15 import re
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
16 import sys
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
17 from urllib import parse
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
18
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
19 import requests
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
20 from requests.adapters import HTTPAdapter, Retry
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
21
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
22
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
23 def __main__():
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
24 # Parse Command Line
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
25 parser = optparse.OptionParser()
6
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
26 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of search search_ids')
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
27 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains search search_ids')
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
28 parser.add_option('-s', '--search-id', dest='search_id', action='append', default=[], help='ID to search in Uniprot')
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
29 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
30 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta', 'tsv'], default='xml', help='output format')
6
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
31 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id', 'accession'], default='taxonomy_name', help='query field')
3
1a5690a5eedc planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents: 2
diff changeset
32 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
33 parser.add_option('--output_columns', dest='output_columns', help='Columns to include in output (tsv)')
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
34 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
35 (options, args) = parser.parse_args()
6
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
36 search_ids = set(options.search_id)
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
37 if options.input:
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
38 with open(options.input, 'r') as inputFile:
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
39 for linenum, line in enumerate(inputFile):
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
40 if line.startswith('#'):
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
41 continue
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
42 fields = line.rstrip('\r\n').split('\t')
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
43 if len(fields) > abs(options.column):
6
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
44 search_id = fields[options.column].strip()
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
45 if search_id:
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
46 search_ids.add(search_id)
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
47 search_queries = [f'{options.field}:"{search_id}"' for search_id in search_ids]
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
48 search_query = ' OR '.join(search_queries)
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
49 if options.output:
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
50 dest_path = options.output
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
51 else:
6
a371252a2cf6 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents: 5
diff changeset
52 dest_path = "uniprot_%s.xml" % '_'.join(search_ids)
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
53 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
54 try:
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
55 re_next_link = re.compile(r'<(.+)>; rel="next"')
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
56 retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
57 session = requests.Session()
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
58 session.mount("https://", HTTPAdapter(max_retries=retries))
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
59
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
60 def get_next_link(headers):
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
61 if "Link" in headers:
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
62 match = re_next_link.match(headers["Link"])
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
63 if match:
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
64 return match.group(1)
5
265c35540faa planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents: 4
diff changeset
65
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
66 def get_batch(batch_url):
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
67 while batch_url:
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
68 response = session.get(batch_url)
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
69 response.raise_for_status()
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
70 total = response.headers["x-total-results"]
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
71 release = response.headers["x-uniprot-release"]
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
72 yield response, total, release
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
73 batch_url = get_next_link(response.headers)
5
265c35540faa planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents: 4
diff changeset
74
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
75 params = {'size': 500, 'format': options.format, 'query': search_query + reviewed}
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
76 if options.output_columns:
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
77 params['fields'] = options.output_columns
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
78 url = f'https://rest.uniprot.org/uniprotkb/search?{parse.urlencode(params)}'
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
79 print(f"Downloading from:{url}")
5
265c35540faa planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents: 4
diff changeset
80
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
81 with open(dest_path, 'w') as fh:
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
82 for batch, total, release in get_batch(url):
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
83 fh.write(batch.text)
5
265c35540faa planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents: 4
diff changeset
84
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
85 if options.format == 'xml':
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
86 with open(dest_path, 'r') as contents:
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
87 while True:
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
88 line = contents.readline()
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
89 if options.debug:
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
90 print(line, file=sys.stderr)
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
91 if line is None:
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
92 break
2
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
93 if line.startswith('<?'):
e1abc9a35c64 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
94 continue
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
95 # pattern match <root or <ns:root for any ns string
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
96 pattern = r'^<(\w*:)?uniprot'
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
97 if re.match(pattern, line):
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
98 break
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
99 else:
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
100 print("failed: Not a uniprot xml file", file=sys.stderr)
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
101 exit(1)
7
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
102 print(f"Search IDs:{search_ids}")
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
103 print(f"UniProt-Release:{release}")
4ddc8da62671 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents: 6
diff changeset
104 print(f"Entries:{total}")
4
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
105 except Exception as e:
12692567c7f9 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 3
diff changeset
106 exit("%s" % e)
0
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
107
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
108
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
109 if __name__ == "__main__":
0bd2688166a5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
galaxyp
parents:
diff changeset
110 __main__()