0
|
1 library(ggplot2)
|
|
2 library(reshape2)
|
|
3 library(scales)
|
|
4
|
|
5 args <- commandArgs(trailingOnly = TRUE)
|
|
6
|
|
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
|
|
8
|
|
9 plot1.path = args[2]
|
|
10 plot1.png = paste(plot1.path, ".png", sep="")
|
|
11 plot1.txt = paste(plot1.path, ".txt", sep="")
|
|
12
|
|
13 plot2.path = args[3]
|
|
14 plot2.png = paste(plot2.path, ".png", sep="")
|
|
15 plot2.txt = paste(plot2.path, ".txt", sep="")
|
|
16
|
|
17 plot3.path = args[4]
|
|
18 plot3.png = paste(plot3.path, ".png", sep="")
|
|
19 plot3.txt = paste(plot3.path, ".txt", sep="")
|
|
20
|
23
|
21 clean.output = args[5]
|
|
22
|
0
|
23 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
|
|
24
|
|
25
|
|
26
|
5
|
27 classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
|
0
|
28 xyz = c("x", "y", "z")
|
|
29 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
|
|
30
|
|
31 names(dat) = new.names
|
|
32
|
23
|
33 clean.dat = dat
|
|
34 clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
|
|
35
|
|
36 write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
|
|
37
|
0
|
38 dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
|
|
39 dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
|
|
40
|
|
41 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
|
|
42
|
|
43 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
|
|
44 names(data1) = gsub("\\..*", "", names(data1))
|
|
45
|
|
46 data1 = melt(t(data1))
|
|
47
|
|
48 names(data1) = c("Class", "Type", "value")
|
|
49
|
38
|
50 chk = is.na(data1$value)
|
|
51 if(any(chk)){
|
|
52 data1[chk, "value"] = 0
|
|
53 }
|
|
54
|
0
|
55 data1 = data1[order(data1$Type),]
|
|
56
|
|
57 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
58
|
39
|
59 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
|
5
|
60 p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
|
0
|
61 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
|
39
|
62 png(filename=plot1.png, width=510, height=300)
|
0
|
63 print(p)
|
|
64 dev.off()
|
|
65
|
23
|
66 data2 = dat[c(1, 5:8),]
|
0
|
67
|
|
68 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
|
|
69 names(data2) = gsub(".x", "", names(data2))
|
|
70
|
23
|
71 data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
|
0
|
72
|
23
|
73 data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
|
0
|
74
|
23
|
75 data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
|
|
76 data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
|
0
|
77
|
|
78 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
|
|
79 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
|
|
80 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
|
|
81 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
|
|
82
|
23
|
83 data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
|
0
|
84
|
|
85 names(data2) = c("Class", "Type", "value")
|
|
86
|
38
|
87 chk = is.na(data2$value)
|
|
88 if(any(chk)){
|
|
89 data2[chk, "value"] = 0
|
|
90 }
|
|
91
|
0
|
92 data2 = data2[order(data2$Type),]
|
|
93
|
|
94 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
95
|
39
|
96 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
|
5
|
97 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
|
0
|
98 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
|
|
99 png(filename=plot2.png, width=480, height=300)
|
|
100 print(p)
|
|
101 dev.off()
|
|
102
|
|
103 data3 = dat[c(5, 6, 8, 17:20),]
|
|
104 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
|
|
105 names(data3) = gsub(".x", "", names(data3))
|
|
106
|
23
|
107 data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
|
0
|
108
|
23
|
109 data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
|
0
|
110
|
23
|
111 data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
|
0
|
112
|
|
113 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
|
|
114 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
|
|
115
|
|
116 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
|
|
117 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
|
|
118
|
|
119 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
|
|
120 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
|
|
121
|
|
122 data3 = melt(t(data3[8:10,]))
|
|
123 names(data3) = c("Class", "Type", "value")
|
|
124
|
39
|
125 chk = is.na(data3$value)
|
38
|
126 if(any(chk)){
|
|
127 data3[chk, "value"] = 0
|
|
128 }
|
|
129
|
0
|
130 data3 = data3[order(data3$Type),]
|
|
131
|
|
132 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
133
|
39
|
134 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
|
5
|
135 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
|
0
|
136 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
|
|
137 png(filename=plot3.png, width=480, height=300)
|
|
138 print(p)
|
|
139 dev.off()
|
|
140
|
|
141
|
|
142
|
|
143
|
|
144
|
|
145
|
|
146
|
|
147
|
|
148
|
|
149
|
|
150
|
|
151
|
|
152
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164
|
|
165
|
|
166
|
|
167
|
|
168
|
|
169
|
|
170
|
|
171
|