0
|
1 #!/usr/bin/python
|
|
2
|
|
3 import sys
|
|
4 import optparse
|
|
5 import csv
|
|
6 import re
|
|
7
|
|
8 def main():
|
|
9 parser = optparse.OptionParser()
|
|
10 parser.add_option('-s', '--snpeff_file', dest = 'snpeff_file', action = 'store', type = 'string', default = None, help = "Path to the snpEff file")
|
|
11 parser.add_option('-c', '--candidate_list', dest = 'candidate_list', action = 'store', type = 'string', default = None, help = "Two column tabular list of candidate Gene ID, Type")
|
|
12 parser.add_option('-o', '--output', dest = 'output', action = 'store', type = 'string', default = None, help = "Output file name")
|
|
13 (options, args) = parser.parse_args()
|
|
14
|
|
15 snpeff_file = options.snpeff_file
|
|
16 candidate_list = options.candidate_list
|
|
17
|
|
18 candidates = parse_candidate_list(candidate_list = candidate_list)
|
|
19 mark_snpeff_file(snpeff_file = snpeff_file, output = options.output, candidates = candidates)
|
|
20
|
|
21 def skip_and_write_headers(writer = None, reader = None, i_file = None):
|
|
22 # count headers
|
|
23 comment = 0
|
|
24 while reader.next()[0].startswith('#'):
|
|
25 comment = comment + 1
|
|
26
|
|
27 # skip and write headers
|
|
28 i_file.seek(0)
|
|
29 for i in range(0, comment):
|
|
30 row = reader.next()
|
|
31 writer.writerow(row)
|
|
32
|
|
33 def parse_candidate_list(candidate_list = ""):
|
|
34 i_file = open(candidate_list, 'rU')
|
|
35 reader = csv.reader(i_file, delimiter = '\t',)
|
|
36
|
|
37 candidates = {}
|
|
38 for row in reader:
|
|
39 gene_id = row[0]
|
|
40 gene_type = row[1]
|
|
41 candidates[gene_id] = gene_type
|
|
42
|
|
43 i_file.close()
|
|
44
|
|
45 return candidates
|
|
46
|
|
47 def mark_snpeff_file(snpeff_file = "", output = "", candidates = None):
|
|
48 i_file = open(snpeff_file, 'rU')
|
|
49 reader = csv.reader(i_file, delimiter = '\t')
|
|
50
|
|
51 o_file = open(output, 'wb')
|
|
52 writer = csv.writer(o_file, delimiter = '\t')
|
|
53
|
|
54 skip_and_write_headers(writer = writer, reader = reader, i_file = i_file)
|
|
55
|
|
56 for row in reader:
|
|
57 gene_id = row[9]
|
|
58 if gene_id in candidates:
|
|
59 gene_type = candidates[gene_id]
|
|
60 row.append(gene_type)
|
|
61 else:
|
|
62 row.append('')
|
|
63
|
|
64 writer.writerow(row)
|
|
65
|
|
66 o_file.close()
|
|
67 i_file.close()
|
|
68
|
|
69 if __name__ == "__main__":
|
|
70 main()
|