comparison pattern_plots.r @ 0:c33d93683a09 draft

Uploaded
author davidvanzessen
date Thu, 13 Oct 2016 10:52:24 -0400
parents
children faae21ba5c63
comparison
equal deleted inserted replaced
-1:000000000000 0:c33d93683a09
1 library(ggplot2)
2 library(reshape2)
3 library(scales)
4
5 args <- commandArgs(trailingOnly = TRUE)
6
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
8
9 plot1.path = args[2]
10 plot1.png = paste(plot1.path, ".png", sep="")
11 plot1.txt = paste(plot1.path, ".txt", sep="")
12
13 plot2.path = args[3]
14 plot2.png = paste(plot2.path, ".png", sep="")
15 plot2.txt = paste(plot2.path, ".txt", sep="")
16
17 plot3.path = args[4]
18 plot3.png = paste(plot3.path, ".png", sep="")
19 plot3.txt = paste(plot3.path, ".txt", sep="")
20
21 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
22
23
24
25 classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM")
26 xyz = c("x", "y", "z")
27 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
28
29 names(dat) = new.names
30
31 dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
32 dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
33
34 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
35
36 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
37 names(data1) = gsub("\\..*", "", names(data1))
38
39 data1 = melt(t(data1))
40
41 names(data1) = c("Class", "Type", "value")
42
43 data1 = data1[order(data1$Type),]
44
45 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
46
47 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
48 p = p + theme(panel.background = element_rect(fill = "white", colour="black")) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
49 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
50 png(filename=plot1.png, width=480, height=300)
51 print(p)
52 dev.off()
53
54 data2 = dat[5:8,]
55
56 data2["sum",] = colSums(data2, na.rm=T)
57
58 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
59 names(data2) = gsub(".x", "", names(data2))
60
61 data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
62 data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
63
64 data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
65 data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
66
67
68 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
69 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
70 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
71 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
72
73 data2 = melt(t(data2[6:8,]))
74
75 names(data2) = c("Class", "Type", "value")
76
77 data2 = data2[order(data2$Type),]
78
79 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
80
81 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
82 p = p + theme(panel.background = element_rect(fill = "white", colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
83 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
84 png(filename=plot2.png, width=480, height=300)
85 print(p)
86 dev.off()
87
88 data3 = dat[c(5, 6, 8, 17:20),]
89 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
90 names(data3) = gsub(".x", "", names(data3))
91
92 data3[is.na(data3)] = 0
93 #data3[is.infinite(data3)] = 0
94
95 data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
96
97 data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
98
99 data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
100
101 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
102 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
103
104 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
105 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
106
107 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
108 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
109
110 data3 = melt(t(data3[8:10,]))
111 names(data3) = c("Class", "Type", "value")
112
113 data3 = data3[order(data3$Type),]
114
115 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
116
117 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
118 p = p + theme(panel.background = element_rect(fill = "white", colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
119 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
120 png(filename=plot3.png, width=480, height=300)
121 print(p)
122 dev.off()
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154