Mercurial > repos > insilicosolutions > cravat
diff cravat/cravat.py @ 0:9e29dd2972ab draft
Uploaded
author | insilicosolutions |
---|---|
date | Wed, 13 May 2015 15:24:18 -0400 |
parents | |
children | c13857bac2c4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat/cravat.py Wed May 13 15:24:18 2015 -0400 @@ -0,0 +1,64 @@ +import sys +import re +import requests + +chromosome_re = re.compile('[0-9a-zA-Z_:]+\s+(chr[1-9]|chr1[0-9]|chr2[0-2]|chr[XY])\s+[0-9]+\s+[+-]\s+([ATGC]+|-)\s+([ATGC]+|-)', re.IGNORECASE) + +def is_correct_input_line (line): + if chromosome_re.match(line) != None: + return True + else: + return False + +def query (line): + url = query_url + '?mutation=' + '_'.join(line.split()) + r = requests.get(url) + annot = r.json() + return annot + +query_url = 'http://staging.cravat.us/rest/service/query' + +first_headers = ['ID', + 'Chromosome', + 'Position', + 'Strand', + 'Reference base(s)', + 'Alternate base(s)', + 'Sample'] + +input_filename = sys.argv[1] +output_filename = sys.argv[2] + +headers = [] +header_not_loaded = True + +f = open(input_filename) +wf = open(output_filename, 'w') +for line in f: + if is_correct_input_line(line) == False: + print 'Wrong format line:' + line[:-1] + continue + + toks = line[:-1].split() + uid = toks[0] + if len(toks) >= 7: + sample_id = toks[6] + else: + sample_id = 'Unknown' + annot = query(' '.join(toks[1:])) + if header_not_loaded: + headers = annot.keys() + headers.sort() + wf.write('\t'.join(first_headers)) + for header in headers: + if header not in first_headers: + wf.write('\t' + header) + wf.write('\n') + header_not_loaded = False + wf.write('\t'.join(toks[:6]) + '\t' + sample_id) + for header in headers: + if header not in first_headers: + wf.write('\t' + annot[header]) + wf.write('\n') +f.close() +wf.close()