Mercurial > repos > insilicosolutions > cravat
view cravat/cravat.py @ 1:c13857bac2c4 draft default tip
Updated for new CRAVAT server.
author | insilicosolutions |
---|---|
date | Tue, 08 Mar 2016 16:07:46 -0500 |
parents | 9e29dd2972ab |
children |
line wrap: on
line source
import sys import re import requests chromosome_re = re.compile('[0-9a-zA-Z_:]+\s+(chr[1-9]|chr1[0-9]|chr2[0-2]|chr[XY])\s+[0-9]+\s+[+-]\s+([ATGC]+|-)\s+([ATGC]+|-)', re.IGNORECASE) def is_correct_input_line (line): if chromosome_re.match(line) != None: return True else: return False def query (line): url = query_url params = {'mutation': '_'.join(line.split())} r = requests.get(url, params=params) annot = r.json() return annot query_url = 'http://www.cravat.us/rest/service/query' first_headers = ['ID', 'Chromosome', 'Position', 'Strand', 'Reference base(s)', 'Alternate base(s)', 'Sample'] input_filename = sys.argv[1] output_filename = sys.argv[2] headers = [] header_not_loaded = True f = open(input_filename) wf = open(output_filename, 'w') for line in f: if is_correct_input_line(line) == False: print 'Wrong format line:' + line[:-1] continue toks = line[:-1].split() uid = toks[0] if len(toks) >= 7: sample_id = toks[6] else: sample_id = 'Unknown' annot = query(' '.join(toks[1:])) if header_not_loaded: headers = annot.keys() headers.sort() wf.write('\t'.join(first_headers)) for header in headers: if header not in first_headers: wf.write('\t' + header) wf.write('\n') header_not_loaded = False wf.write('\t'.join(toks[:6]) + '\t' + sample_id) for header in headers: if header not in first_headers: wf.write('\t' + annot[header]) wf.write('\n') f.close() wf.close()