annotate vep_rest/vep_rest.py @ 1:3645d1bcc7bb draft default tip

Uploaded
author saket-choudhary
date Sat, 18 Oct 2014 04:03:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
1 #!/usr/bin/env python
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
2 """
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
3 Script to interact with Ensemble Variant Effect Predictor(VEP)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
4 webservice
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
5
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
6
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
7 The MIT License (MIT)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
8
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
9 Copyright (c) 2014 Saket Choudhary<saketkc@gmail.com, skchoudh@usc.edu>
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
10
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
11 Permission is hereby granted, free of charge, to any person obtaining a copy
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
12 of this software and associated documentation files (the "Software"), to deal
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
13 in the Software without restriction, including without limitation the rights
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
14 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
15 copies of the Software, and to permit persons to whom the Software is
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
16 furnished to do so, subject to the following conditions:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
17
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
18 The above copyright notice and this permission notice shall be included in
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
19 all copies or substantial portions of the Software.
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
20
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
22 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
23 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
24 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
25 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
27 THE SOFTWARE.
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
28
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
29 """
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
30 import argparse
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
31 import requests
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
32 import sys
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
33 import time
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
34 import vcf
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
35
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
36 URL = 'http://grch37.rest.ensembl.org/vep/human/region/{}:{}-{}/{}?content-type=application/json&protein=1'
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
37
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
38 class VEPRestClient:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
39
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
40 def __init__(self, input_file, output_file):
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
41 self.pending_urls = []
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
42 vcf_reader = vcf.Reader(open(input_file, 'r'))
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
43 self.output_file = output_file
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
44 for record in vcf_reader:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
45 url = URL.format(record.CHROM, record.POS, record.POS, ("").join([str(x) for x in record.ALT]))
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
46 key = "{}:{}-{}-{}".format(record.CHROM, record.POS, record.POS, ("").join([str(x) for x in record.ALT]))
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
47 self.pending_urls.append((key, url))
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
48
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
49 def submit(self):
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
50 protein_variants = {}
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
51 for record in self.pending_urls:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
52 vcf_key = record[0]
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
53 url = record[1]
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
54 request = requests.get(url)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
55 time_delay = None
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
56 try:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
57 retry_delay = request.headers['Retry-After']
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
58 time_delay = retry_delay
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
59 except KeyError:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
60 pass
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
61 response = None
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
62 if time_delay:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
63 time.sleep(time_delay)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
64 request = requests.get(url)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
65 try:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
66 response = request.json()[0]
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
67 except Exception as e:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
68 #TODO Better error handling
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
69 print e
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
70 if not response:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
71 continue
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
72 variants = response['transcript_consequences']
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
73 consequence = ""
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
74 for variant in variants:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
75 consequence = ""
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
76 protein_id = None
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
77 protein_start = None
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
78 try:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
79 protein_id = variant['protein_id']
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
80 except KeyError:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
81 pass
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
82 try:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
83 protein_start = variant['protein_start']
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
84 except KeyError:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
85 pass
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
86 if protein_id:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
87 if protein_id.startswith('ENSP'):
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
88 if variant['protein_id'] not in protein_variants.keys():
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
89 protein_variants[protein_id] = []
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
90 consequence += protein_id
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
91 if protein_start:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
92 try:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
93 #TODO Better error handling
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
94 amino_acid_original, amino_acid_substituted = variant['amino_acids'].split("/")
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
95 substitution = amino_acid_original + str(protein_start) + amino_acid_substituted
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
96 if "X" not in substitution:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
97 protein_variants[variant['protein_id']].append(substitution)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
98 consequence += " ," + substitution
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
99 except:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
100 pass
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
101
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
102 output = ""
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
103 for key, value in protein_variants.iteritems():
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
104 if len(value)>0:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
105 output += "{} {}\n".format(key, (",").join(value))
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
106
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
107 with open(self.output_file, 'wb') as f:
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
108 f.write(output)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
109
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
110
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
111
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
112
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
113 if __name__ == "__main__":
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
114 parser = argparse.ArgumentParser()
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
115 parser.add_argument("--input_file", type=str, required=True, help="Input file location")
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
116 parser.add_argument("--output_file", type=str, required=True, help="Output file location")
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
117 args = parser.parse_args(sys.argv[1:])
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
118 vep = VEPRestClient(args.input_file, args.output_file)
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
119 vep.submit()
3645d1bcc7bb Uploaded
saket-choudhary
parents:
diff changeset
120