view cravat/cravat.py @ 1:c13857bac2c4 draft default tip

Updated for new CRAVAT server.
author insilicosolutions
date Tue, 08 Mar 2016 16:07:46 -0500
parents 9e29dd2972ab
children
line wrap: on
line source

import sys
import re
import requests

chromosome_re = re.compile('[0-9a-zA-Z_:]+\s+(chr[1-9]|chr1[0-9]|chr2[0-2]|chr[XY])\s+[0-9]+\s+[+-]\s+([ATGC]+|-)\s+([ATGC]+|-)', re.IGNORECASE)

def is_correct_input_line (line):
    if chromosome_re.match(line) != None:
        return True
    else:
        return False

def query (line):
    url = query_url
    params = {'mutation': '_'.join(line.split())}
    r = requests.get(url, params=params)
    annot = r.json()
    return annot

query_url = 'http://www.cravat.us/rest/service/query'

first_headers = ['ID',
                 'Chromosome',
                 'Position', 
                 'Strand', 
                 'Reference base(s)', 
                 'Alternate base(s)',
                 'Sample']

input_filename = sys.argv[1]
output_filename = sys.argv[2]

headers = []
header_not_loaded = True

f = open(input_filename)
wf = open(output_filename, 'w')
for line in f:
    if is_correct_input_line(line) == False:
        print 'Wrong format line:' + line[:-1]
        continue

    toks = line[:-1].split()
    uid = toks[0]
    if len(toks) >= 7:
        sample_id = toks[6]
    else:
        sample_id = 'Unknown'
    annot = query(' '.join(toks[1:]))
    if header_not_loaded:
        headers = annot.keys()
        headers.sort()
        wf.write('\t'.join(first_headers))
        for header in headers:
            if header not in first_headers:
                wf.write('\t' + header)
        wf.write('\n')
        header_not_loaded = False
    wf.write('\t'.join(toks[:6]) + '\t' + sample_id)
    for header in headers:
        if header not in first_headers:
            wf.write('\t' + annot[header])
    wf.write('\n')
f.close()
wf.close()