Mercurial > repos > insilicosolutions > cravat
view cravat/cravat.py @ 0:9e29dd2972ab draft
Uploaded
author | insilicosolutions |
---|---|
date | Wed, 13 May 2015 15:24:18 -0400 |
parents | |
children | c13857bac2c4 |
line wrap: on
line source
import sys import re import requests chromosome_re = re.compile('[0-9a-zA-Z_:]+\s+(chr[1-9]|chr1[0-9]|chr2[0-2]|chr[XY])\s+[0-9]+\s+[+-]\s+([ATGC]+|-)\s+([ATGC]+|-)', re.IGNORECASE) def is_correct_input_line (line): if chromosome_re.match(line) != None: return True else: return False def query (line): url = query_url + '?mutation=' + '_'.join(line.split()) r = requests.get(url) annot = r.json() return annot query_url = 'http://staging.cravat.us/rest/service/query' first_headers = ['ID', 'Chromosome', 'Position', 'Strand', 'Reference base(s)', 'Alternate base(s)', 'Sample'] input_filename = sys.argv[1] output_filename = sys.argv[2] headers = [] header_not_loaded = True f = open(input_filename) wf = open(output_filename, 'w') for line in f: if is_correct_input_line(line) == False: print 'Wrong format line:' + line[:-1] continue toks = line[:-1].split() uid = toks[0] if len(toks) >= 7: sample_id = toks[6] else: sample_id = 'Unknown' annot = query(' '.join(toks[1:])) if header_not_loaded: headers = annot.keys() headers.sort() wf.write('\t'.join(first_headers)) for header in headers: if header not in first_headers: wf.write('\t' + header) wf.write('\n') header_not_loaded = False wf.write('\t'.join(toks[:6]) + '\t' + sample_id) for header in headers: if header not in first_headers: wf.write('\t' + annot[header]) wf.write('\n') f.close() wf.close()