comparison pattern_plots.r @ 81:b6f9a640e098 draft

Uploaded
author davidvanzessen
date Fri, 19 Feb 2021 15:10:54 +0000
parents
children
comparison
equal deleted inserted replaced
80:a4617f1d1d89 81:b6f9a640e098
1 library(ggplot2)
2 library(reshape2)
3 library(scales)
4
5 args <- commandArgs(trailingOnly = TRUE)
6
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
8
9 plot1.path = args[2]
10 plot1.png = paste(plot1.path, ".png", sep="")
11 plot1.txt = paste(plot1.path, ".txt", sep="")
12 plot1.pdf = paste(plot1.path, ".pdf", sep="")
13
14 plot2.path = args[3]
15 plot2.png = paste(plot2.path, ".png", sep="")
16 plot2.txt = paste(plot2.path, ".txt", sep="")
17 plot2.pdf = paste(plot2.path, ".pdf", sep="")
18
19 plot3.path = args[4]
20 plot3.png = paste(plot3.path, ".png", sep="")
21 plot3.txt = paste(plot3.path, ".txt", sep="")
22 plot3.pdf = paste(plot3.path, ".pdf", sep="")
23
24 clean.output = args[5]
25
26 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
27
28 classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
29 xyz = c("x", "y", "z")
30 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
31
32 names(dat) = new.names
33
34 clean.dat = dat
35 clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
36
37 write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
38
39 dat["RGYW.WRCY",] = colSums(dat[c(14,15),], na.rm=T)
40 dat["TW.WA",] = colSums(dat[c(16,17),], na.rm=T)
41
42 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
43
44 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
45 names(data1) = gsub("\\..*", "", names(data1))
46
47 data1 = melt(t(data1))
48
49 names(data1) = c("Class", "Type", "value")
50
51 chk = is.na(data1$value)
52 if(any(chk)){
53 data1[chk, "value"] = 0
54 }
55
56 data1 = data1[order(data1$Type),]
57
58 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
59
60 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
61 p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
62 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
63 png(filename=plot1.png, width=510, height=300)
64 print(p)
65 dev.off()
66
67 ggsave(plot1.pdf, p)
68
69 data2 = dat[c(1, 5:8),]
70
71 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
72 names(data2) = gsub(".x", "", names(data2))
73
74 data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
75
76 data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
77
78 data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
79 data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
80
81 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
82 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
83 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
84 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
85
86 data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
87
88 names(data2) = c("Class", "Type", "value")
89
90 chk = is.na(data2$value)
91 if(any(chk)){
92 data2[chk, "value"] = 0
93 }
94
95 data2 = data2[order(data2$Type),]
96
97 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
98
99 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
100 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
101 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
102 png(filename=plot2.png, width=480, height=300)
103 print(p)
104 dev.off()
105
106 ggsave(plot2.pdf, p)
107
108 data3 = dat[c(5, 6, 8, 18:21),]
109 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
110 names(data3) = gsub(".x", "", names(data3))
111
112 data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
113
114 data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
115
116 data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
117
118 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
119 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
120
121 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
122 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
123
124 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
125 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
126
127 data3 = melt(t(data3[8:10,]))
128 names(data3) = c("Class", "Type", "value")
129
130 chk = is.na(data3$value)
131 if(any(chk)){
132 data3[chk, "value"] = 0
133 }
134
135 data3 = data3[order(data3$Type),]
136
137 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
138
139 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
140 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
141 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
142 png(filename=plot3.png, width=480, height=300)
143 print(p)
144 dev.off()
145
146 ggsave(plot3.pdf, p)
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178