0
|
1 #!/usr/bin/env python
|
|
2 import urllib
|
|
3 import sys, os
|
|
4
|
|
5 assert sys.version_info[:2] >= ( 2, 4 )
|
|
6
|
|
7 CHUNK = 2**20 # 1Mb
|
|
8 MAXSIZE = CHUNK * 100
|
|
9 if __name__ == '__main__':
|
|
10
|
|
11 if len(sys.argv) != 3:
|
|
12 print 'Usage ucsc.py input_params output_file'
|
|
13 sys.exit()
|
|
14
|
|
15 inp_file = sys.argv[1]
|
|
16 out_file = sys.argv[2]
|
|
17
|
|
18 DEFAULT_URL = "http://genome.ucsc.edu/hgTables?"
|
|
19
|
|
20 # this must stay a list to allow multiple selections for the same widget name (checkboxes)
|
|
21 params = []
|
|
22 for line in file(inp_file):
|
|
23 line = line.strip()
|
|
24 if line:
|
|
25 parts = line.split('=')
|
|
26 if len(parts) == 0:
|
|
27 key = ""
|
|
28 value = ""
|
|
29 elif len(parts) == 1:
|
|
30 key = parts[0]
|
|
31 value = ""
|
|
32 else:
|
|
33 key = parts[0]
|
|
34 value = parts[1]
|
|
35 if key == 'display':
|
|
36 print value
|
|
37 # get url from params, refered from proxy.py, initialized by the tool xml
|
|
38 elif key == 'proxy_url':
|
|
39 DEFAULT_URL = value
|
|
40 else:
|
|
41 params.append( (key, value) )
|
|
42
|
|
43 #print params
|
|
44
|
|
45 encoded_params = urllib.urlencode(params)
|
|
46 url = DEFAULT_URL + encoded_params
|
|
47
|
|
48 #print url
|
|
49
|
|
50 page = urllib.urlopen(url)
|
|
51
|
|
52 fp = open(out_file, 'wt')
|
|
53 size = 0
|
|
54 while 1:
|
|
55 data = page.read(CHUNK)
|
|
56 if not data:
|
|
57 break
|
|
58 if size > MAXSIZE:
|
|
59 fp.write('----- maximum datasize exceeded ---\n')
|
|
60 break
|
|
61 size += len(data)
|
|
62 fp.write(data)
|
|
63
|
|
64 fp.close()
|
|
65
|