1
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Script to interact with Ensemble Variant Effect Predictor(VEP)
|
|
4 webservice
|
|
5
|
|
6
|
|
7 The MIT License (MIT)
|
|
8
|
|
9 Copyright (c) 2014 Saket Choudhary<saketkc@gmail.com, skchoudh@usc.edu>
|
|
10
|
|
11 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12 of this software and associated documentation files (the "Software"), to deal
|
|
13 in the Software without restriction, including without limitation the rights
|
|
14 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15 copies of the Software, and to permit persons to whom the Software is
|
|
16 furnished to do so, subject to the following conditions:
|
|
17
|
|
18 The above copyright notice and this permission notice shall be included in
|
|
19 all copies or substantial portions of the Software.
|
|
20
|
|
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
27 THE SOFTWARE.
|
|
28
|
|
29 """
|
|
30 import argparse
|
|
31 import requests
|
|
32 import sys
|
|
33 import time
|
|
34 import vcf
|
|
35
|
|
36 URL = 'http://grch37.rest.ensembl.org/vep/human/region/{}:{}-{}/{}?content-type=application/json&protein=1'
|
|
37
|
|
38 class VEPRestClient:
|
|
39
|
|
40 def __init__(self, input_file, output_file):
|
|
41 self.pending_urls = []
|
|
42 vcf_reader = vcf.Reader(open(input_file, 'r'))
|
|
43 self.output_file = output_file
|
|
44 for record in vcf_reader:
|
|
45 url = URL.format(record.CHROM, record.POS, record.POS, ("").join([str(x) for x in record.ALT]))
|
|
46 key = "{}:{}-{}-{}".format(record.CHROM, record.POS, record.POS, ("").join([str(x) for x in record.ALT]))
|
|
47 self.pending_urls.append((key, url))
|
|
48
|
|
49 def submit(self):
|
|
50 protein_variants = {}
|
|
51 for record in self.pending_urls:
|
|
52 vcf_key = record[0]
|
|
53 url = record[1]
|
|
54 request = requests.get(url)
|
|
55 time_delay = None
|
|
56 try:
|
|
57 retry_delay = request.headers['Retry-After']
|
|
58 time_delay = retry_delay
|
|
59 except KeyError:
|
|
60 pass
|
|
61 response = None
|
|
62 if time_delay:
|
|
63 time.sleep(time_delay)
|
|
64 request = requests.get(url)
|
|
65 try:
|
|
66 response = request.json()[0]
|
|
67 except Exception as e:
|
|
68 #TODO Better error handling
|
|
69 print e
|
|
70 if not response:
|
|
71 continue
|
|
72 variants = response['transcript_consequences']
|
|
73 consequence = ""
|
|
74 for variant in variants:
|
|
75 consequence = ""
|
|
76 protein_id = None
|
|
77 protein_start = None
|
|
78 try:
|
|
79 protein_id = variant['protein_id']
|
|
80 except KeyError:
|
|
81 pass
|
|
82 try:
|
|
83 protein_start = variant['protein_start']
|
|
84 except KeyError:
|
|
85 pass
|
|
86 if protein_id:
|
|
87 if protein_id.startswith('ENSP'):
|
|
88 if variant['protein_id'] not in protein_variants.keys():
|
|
89 protein_variants[protein_id] = []
|
|
90 consequence += protein_id
|
|
91 if protein_start:
|
|
92 try:
|
|
93 #TODO Better error handling
|
|
94 amino_acid_original, amino_acid_substituted = variant['amino_acids'].split("/")
|
|
95 substitution = amino_acid_original + str(protein_start) + amino_acid_substituted
|
|
96 if "X" not in substitution:
|
|
97 protein_variants[variant['protein_id']].append(substitution)
|
|
98 consequence += " ," + substitution
|
|
99 except:
|
|
100 pass
|
|
101
|
|
102 output = ""
|
|
103 for key, value in protein_variants.iteritems():
|
|
104 if len(value)>0:
|
|
105 output += "{} {}\n".format(key, (",").join(value))
|
|
106
|
|
107 with open(self.output_file, 'wb') as f:
|
|
108 f.write(output)
|
|
109
|
|
110
|
|
111
|
|
112
|
|
113 if __name__ == "__main__":
|
|
114 parser = argparse.ArgumentParser()
|
|
115 parser.add_argument("--input_file", type=str, required=True, help="Input file location")
|
|
116 parser.add_argument("--output_file", type=str, required=True, help="Output file location")
|
|
117 args = parser.parse_args(sys.argv[1:])
|
|
118 vep = VEPRestClient(args.input_file, args.output_file)
|
|
119 vep.submit()
|
|
120
|