annotate pattern_plots.r @ 82:a103134ee6e0 draft

Uploaded
author davidvanzessen
date Thu, 25 Feb 2021 10:32:32 +0000
parents b6f9a640e098
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
81
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
1 library(ggplot2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
2 library(reshape2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
3 library(scales)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
4
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
5 args <- commandArgs(trailingOnly = TRUE)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
6
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
8
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
9 plot1.path = args[2]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
10 plot1.png = paste(plot1.path, ".png", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
11 plot1.txt = paste(plot1.path, ".txt", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
12 plot1.pdf = paste(plot1.path, ".pdf", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
13
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
14 plot2.path = args[3]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
15 plot2.png = paste(plot2.path, ".png", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
16 plot2.txt = paste(plot2.path, ".txt", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
17 plot2.pdf = paste(plot2.path, ".pdf", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
18
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
19 plot3.path = args[4]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
20 plot3.png = paste(plot3.path, ".png", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
21 plot3.txt = paste(plot3.path, ".txt", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
22 plot3.pdf = paste(plot3.path, ".pdf", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
23
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
24 clean.output = args[5]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
25
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
26 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
27
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
28 classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
29 xyz = c("x", "y", "z")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
30 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
31
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
32 names(dat) = new.names
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
33
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
34 clean.dat = dat
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
35 clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
36
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
37 write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
38
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
39 dat["RGYW.WRCY",] = colSums(dat[c(14,15),], na.rm=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
40 dat["TW.WA",] = colSums(dat[c(16,17),], na.rm=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
41
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
42 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
43
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
44 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
45 names(data1) = gsub("\\..*", "", names(data1))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
46
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
47 data1 = melt(t(data1))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
48
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
49 names(data1) = c("Class", "Type", "value")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
50
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
51 chk = is.na(data1$value)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
52 if(any(chk)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
53 data1[chk, "value"] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
54 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
55
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
56 data1 = data1[order(data1$Type),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
57
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
58 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
59
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
60 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
61 p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
62 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
63 png(filename=plot1.png, width=510, height=300)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
64 print(p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
65 dev.off()
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
66
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
67 ggsave(plot1.pdf, p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
68
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
69 data2 = dat[c(1, 5:8),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
70
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
71 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
72 names(data2) = gsub(".x", "", names(data2))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
73
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
74 data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
75
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
76 data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
77
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
78 data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
79 data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
80
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
81 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
82 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
83 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
84 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
85
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
86 data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
87
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
88 names(data2) = c("Class", "Type", "value")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
89
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
90 chk = is.na(data2$value)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
91 if(any(chk)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
92 data2[chk, "value"] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
93 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
94
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
95 data2 = data2[order(data2$Type),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
96
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
97 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
98
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
99 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
100 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
101 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
102 png(filename=plot2.png, width=480, height=300)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
103 print(p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
104 dev.off()
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
105
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
106 ggsave(plot2.pdf, p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
107
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
108 data3 = dat[c(5, 6, 8, 18:21),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
109 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
110 names(data3) = gsub(".x", "", names(data3))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
111
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
112 data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
113
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
114 data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
115
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
116 data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
117
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
118 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
119 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
120
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
121 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
122 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
123
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
124 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
125 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
126
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
127 data3 = melt(t(data3[8:10,]))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
128 names(data3) = c("Class", "Type", "value")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
129
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
130 chk = is.na(data3$value)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
131 if(any(chk)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
132 data3[chk, "value"] = 0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
133 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
134
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
135 data3 = data3[order(data3$Type),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
136
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
137 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
138
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
139 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
140 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
141 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
142 png(filename=plot3.png, width=480, height=300)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
143 print(p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
144 dev.off()
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
145
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
146 ggsave(plot3.pdf, p)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
147
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
148
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
149
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
150
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
151
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
152
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
153
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
154
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
155
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
156
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
157
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
158
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
159
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
160
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
161
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
162
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
163
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
164
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
165
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
166
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
167
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
168
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
169
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
170
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
171
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
172
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
173
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
174
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
175
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
176
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
177
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
178