0
|
1 import sys
|
|
2 import re
|
|
3 import requests
|
|
4
|
|
5 chromosome_re = re.compile('[0-9a-zA-Z_:]+\s+(chr[1-9]|chr1[0-9]|chr2[0-2]|chr[XY])\s+[0-9]+\s+[+-]\s+([ATGC]+|-)\s+([ATGC]+|-)', re.IGNORECASE)
|
|
6
|
|
7 def is_correct_input_line (line):
|
|
8 if chromosome_re.match(line) != None:
|
|
9 return True
|
|
10 else:
|
|
11 return False
|
|
12
|
|
13 def query (line):
|
|
14 url = query_url + '?mutation=' + '_'.join(line.split())
|
|
15 r = requests.get(url)
|
|
16 annot = r.json()
|
|
17 return annot
|
|
18
|
|
19 query_url = 'http://staging.cravat.us/rest/service/query'
|
|
20
|
|
21 first_headers = ['ID',
|
|
22 'Chromosome',
|
|
23 'Position',
|
|
24 'Strand',
|
|
25 'Reference base(s)',
|
|
26 'Alternate base(s)',
|
|
27 'Sample']
|
|
28
|
|
29 input_filename = sys.argv[1]
|
|
30 output_filename = sys.argv[2]
|
|
31
|
|
32 headers = []
|
|
33 header_not_loaded = True
|
|
34
|
|
35 f = open(input_filename)
|
|
36 wf = open(output_filename, 'w')
|
|
37 for line in f:
|
|
38 if is_correct_input_line(line) == False:
|
|
39 print 'Wrong format line:' + line[:-1]
|
|
40 continue
|
|
41
|
|
42 toks = line[:-1].split()
|
|
43 uid = toks[0]
|
|
44 if len(toks) >= 7:
|
|
45 sample_id = toks[6]
|
|
46 else:
|
|
47 sample_id = 'Unknown'
|
|
48 annot = query(' '.join(toks[1:]))
|
|
49 if header_not_loaded:
|
|
50 headers = annot.keys()
|
|
51 headers.sort()
|
|
52 wf.write('\t'.join(first_headers))
|
|
53 for header in headers:
|
|
54 if header not in first_headers:
|
|
55 wf.write('\t' + header)
|
|
56 wf.write('\n')
|
|
57 header_not_loaded = False
|
|
58 wf.write('\t'.join(toks[:6]) + '\t' + sample_id)
|
|
59 for header in headers:
|
|
60 if header not in first_headers:
|
|
61 wf.write('\t' + annot[header])
|
|
62 wf.write('\n')
|
|
63 f.close()
|
|
64 wf.close()
|