annotate EMS_VariantDensityMapping.py @ 14:ddfef7773c2d draft default tip

Uploaded
author gregory-minevich
date Fri, 09 May 2014 17:46:56 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
1 #!/usr/bin/python
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
2
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
3 import re
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
4 import sys
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
5 import optparse
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
6 import csv
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
7 from rpy import *
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
8
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
9 def main():
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
10 parser = optparse.OptionParser()
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
11 parser.add_option('-s', '--snp_vcf', dest = 'snp_vcf', action = 'store', type = 'string', default = None, help = "VCF of SNPs")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
12 parser.add_option('-c', '--hist_color', dest = 'hist_color', action = 'store', type = 'string', default = "darkgray", help = "Color for 1Mb histograms")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
13 parser.add_option('-y', '--ylim', dest = 'ylim', action = 'store', type = 'int', default= 100, help = "Upper limit of Y axis")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
14 parser.add_option('-z', '--standardize', dest = 'standardize', default= 'false', help = "Standardize X-axis")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
15 parser.add_option('-e', '--ems', dest = 'ems', default= 'false', help = "Whether EMS variants should be filtered for")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
16 parser.add_option('-o', '--output', dest = 'plot_output', action = 'store', type = 'string', default = 'EMS_Variant_Density_Plot.pdf', help = "Output file name of plot")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
17 (options, args) = parser.parse_args()
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
18
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
19
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
20 i, ii, iii, iv, v, x = parse_snp_vcf(snp_vcf = options.snp_vcf, ems=options.ems)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
21
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
22 create_histograms(plot_output = options.plot_output, hist_color=options.hist_color, ylim=options.ylim, ems=options.ems, standardize=options.standardize, i = i, ii = ii, iii = iii, iv = iv, v = v, x = x)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
23
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
24 def create_histograms(plot_output = None, hist_color=None, ylim=None, ems=None, standardize=None , i = None, ii = None, iii = None, iv = None, v = None, x = None):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
25 breaks = { 'I' : 16 , 'II' : 16, 'III' : 14, 'IV' : 18, 'V' : 21, 'X' : 18 }
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
26
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
27 try:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
28 r.pdf(plot_output, 8, 8)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
29 if len(i) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
30 plot_data(position_list = i, chr = "I", breaks = breaks["I"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
31 if len(ii) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
32 plot_data(position_list = ii, chr = "II", breaks = breaks["II"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
33 if len(iii) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
34 plot_data(position_list = iii, chr = "III", breaks = breaks["III"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
35 if len(iv) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
36 plot_data(position_list = iv, chr = "IV", breaks = breaks["IV"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
37 if len(v) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
38 plot_data(position_list = v, chr = "V", breaks = breaks["V"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
39 if len(x) > 0:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
40 plot_data(position_list = x, chr = "X", breaks = breaks["X"], hist_color=hist_color, ylim=ylim, ems=ems, standardize=standardize)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
41 r.dev_off()
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
42 except Exception as inst:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
43 print inst
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
44 print "There was an error creating the plot pdf... Please try again"
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
45
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
46 def parse_snp_vcf(snp_vcf = None, ems=None):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
47 i_file = open(snp_vcf, 'rU')
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
48 reader = csv.reader(i_file, delimiter = '\t', quoting = csv.QUOTE_NONE)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
49 skip_headers(reader = reader, i_file = i_file)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
50
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
51 i_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
52 ii_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
53 iii_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
54 iv_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
55 v_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
56 x_position_list = []
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
57
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
58 for row in reader:
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
59 chromosome = row[0].upper()
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
60 chromosome = re.sub("CHROMOSOME_", "", chromosome, flags = re.IGNORECASE)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
61 chromosome = re.sub("chr", "", chromosome, flags = re.IGNORECASE)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
62
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
63
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
64 position = row[1]
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
65 ref_allele = row[3]
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
66 alt_allele = row[4]
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
67
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
68 if (ems=='true'):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
69 if (ref_allele =="G" or ref_allele =="C") and (alt_allele =="A" or alt_allele =="T"):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
70 if chromosome == "I":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
71 i_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
72 elif chromosome == "II":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
73 ii_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
74 elif chromosome == "III":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
75 iii_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
76 elif chromosome == "IV":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
77 iv_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
78 elif chromosome == "V":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
79 v_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
80 elif chromosome == "X":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
81 x_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
82 elif (ems=='false'):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
83 if chromosome == "I":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
84 i_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
85 elif chromosome == "II":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
86 ii_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
87 elif chromosome == "III":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
88 iii_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
89 elif chromosome == "IV":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
90 iv_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
91 elif chromosome == "V":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
92 v_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
93 elif chromosome == "X":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
94 x_position_list.append(position)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
95
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
96 return i_position_list, ii_position_list, iii_position_list, iv_position_list, v_position_list, x_position_list
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
97
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
98 def skip_headers(reader = None, i_file = None):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
99 # count headers
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
100 comment = 0
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
101 while reader.next()[0].startswith('#'):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
102 comment = comment + 1
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
103
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
104 # skip headers
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
105 i_file.seek(0)
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
106 for i in range(0, comment):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
107 reader.next()
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
108
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
109 def plot_data(position_list = None, chr = None, breaks = None, hist_color=None, ylim = None, ems=None, standardize=None):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
110 positions = ",".join(map(str, map(lambda x: float(x) / 1000000, position_list)))
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
111 positions = "c(" + positions + ")"
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
112
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
113 if (standardize=='true'):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
114 r("hist(" + positions + ", xlim=c(0,21), ylim=c(0, %d "%ylim +"),col='"+ hist_color + "', breaks = seq(0, as.integer( ' " + str(breaks) + " '), by=1), main = 'LG " + chr + "', ylab = 'Frequency Of SNPs', xlab = 'Location (Mb)')")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
115 r("hist(" + positions + ", xlim=c(0,21), add=TRUE, ylim=c(0, %d "%ylim +"), col=rgb(1, 0, 0, 1), breaks = seq(0, as.integer( ' " + str(breaks) + " '), by=.5), main = 'Chr " + chr + "', ylab = 'Number Of SNPs', xlab = 'Location (Mb)')")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
116 r("axis(1, at=seq(0, 21, by=1), labels=FALSE, tcl=-0.5)")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
117 r("axis(1, at=seq(0, 21, by=0.5), labels=FALSE, tcl=-0.25)")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
118 elif (standardize=='false'):
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
119 r("hist(" + positions + ", xlim=c(0,as.integer( ' " + str(breaks) + " ')), ylim=c(0, %d "%ylim +"),col='"+ hist_color + "', breaks = seq(0, as.integer( ' " + str(breaks) + " '), by=1), main = 'LG " + chr + "', ylab = 'Frequency Of SNPs', xlab = 'Location (Mb)')")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
120 r("hist(" + positions + ", xlim=c(0,as.integer( ' " + str(breaks) + " ')), add=TRUE, ylim=c(0, %d "%ylim +"), col=rgb(1, 0, 0, 1), breaks = seq(0, as.integer( ' " + str(breaks) + " '), by=.5), main = 'Chr " + chr + "', ylab = 'Number Of SNPs', xlab = 'Location (Mb)')")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
121 r("axis(1, at=seq(0, as.integer( ' " + str(breaks) + " '), by=1), labels=FALSE, tcl=-0.5)")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
122 r("axis(1, at=seq(0, as.integer( ' " + str(breaks) + " '), by=0.5), labels=FALSE, tcl=-0.25)")
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
123
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
124
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
125
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
126 if __name__ == "__main__":
ddfef7773c2d Uploaded
gregory-minevich
parents:
diff changeset
127 main()