Mercurial > repos > galaxyp > uniprotxml_downloader
comparison uniprotxml_downloader.py @ 0:0bd2688166a5 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit fa07533e9216dc40133a98e3129be9b87a963e80-dirty
author | galaxyp |
---|---|
date | Tue, 08 Mar 2016 12:03:49 -0500 |
parents | |
children | e1abc9a35c64 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0bd2688166a5 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 # | |
4 #------------------------------------------------------------------------------ | |
5 # University of Minnesota | |
6 # Copyright 2016, Regents of the University of Minnesota | |
7 #------------------------------------------------------------------------------ | |
8 # Author: | |
9 # | |
10 # James E Johnson | |
11 # | |
12 #------------------------------------------------------------------------------ | |
13 """ | |
14 import sys | |
15 import re | |
16 import optparse | |
17 import urllib | |
18 | |
19 | |
20 def __main__(): | |
21 # Parse Command Line | |
22 parser = optparse.OptionParser() | |
23 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download') | |
24 parser.add_option('-r', '--reviewed', dest='reviewed', help='file path for th downloaed uniprot xml') | |
25 parser.add_option('-o', '--output', dest='output', help='file path for th downloaed uniprot xml') | |
26 parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Print UniProt Info') | |
27 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') | |
28 (options, args) = parser.parse_args() | |
29 | |
30 taxids = options.taxon if options.taxon else ['9606'] | |
31 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids] | |
32 taxon_query = ' OR '.join(taxon_queries) | |
33 if options.output: | |
34 dest_path = options.output | |
35 else: | |
36 dest_path = "uniprot_%s.xml" % '_'.join(taxids) | |
37 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' | |
38 url = 'http://www.uniprot.org/uniprot/?query=%s%s&force=yes&format=xml' % (taxon_query, reviewed) | |
39 if options.debug: | |
40 print >> sys.stderr, url | |
41 try: | |
42 (fname, msg) = urllib.urlretrieve(url, dest_path) | |
43 headers = {j[0]: j[1].strip() for j in [i.split(':', 1) for i in str(msg).strip().splitlines()]} | |
44 if 'Content-Length' in headers and headers['Content-Length'] == 0: | |
45 print >> sys.stderr, url | |
46 print >> sys.stderr, msg | |
47 exit(1) | |
48 elif True: | |
49 pass | |
50 else: | |
51 with open(dest_path, 'r') as contents: | |
52 while True: | |
53 line = contents.readline() | |
54 if options.debug: | |
55 print >> sys.stderr, line | |
56 if line is None or not line.startswith('<?'): | |
57 break | |
58 # pattern match <root or <ns:root for any ns string | |
59 pattern = '^<(\w*:)?uniprot' | |
60 if re.match(pattern, line): | |
61 break | |
62 else: | |
63 print >> sys.stderr, "failed: Not a uniprot xml file" | |
64 exit(1) | |
65 | |
66 if options.verbose: | |
67 print >> sys.stdout, "NCBI Taxon ID:%s" % taxids | |
68 if 'X-UniProt-Release' in headers: | |
69 print >> sys.stdout, "UniProt-Release:%s" % headers['X-UniProt-Release'] | |
70 if 'X-Total-Results' in headers: | |
71 print >> sys.stdout, "Entries:%s" % headers['X-Total-Results'] | |
72 print >> sys.stdout, "%s" % url | |
73 except Exception, e: | |
74 print >> sys.stderr, "failed: %s" % e | |
75 | |
76 | |
77 if __name__ == "__main__": | |
78 __main__() |