Mercurial > repos > davidvanzessen > mutation_analysis
comparison pattern_plots.r @ 0:8a5a2abbb870 draft default tip
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Mon, 29 Aug 2016 05:36:10 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:8a5a2abbb870 | 
|---|---|
| 1 library(ggplot2) | |
| 2 library(reshape2) | |
| 3 library(scales) | |
| 4 | |
| 5 args <- commandArgs(trailingOnly = TRUE) | |
| 6 | |
| 7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" | |
| 8 | |
| 9 plot1.path = args[2] | |
| 10 plot1.png = paste(plot1.path, ".png", sep="") | |
| 11 plot1.txt = paste(plot1.path, ".txt", sep="") | |
| 12 | |
| 13 plot2.path = args[3] | |
| 14 plot2.png = paste(plot2.path, ".png", sep="") | |
| 15 plot2.txt = paste(plot2.path, ".txt", sep="") | |
| 16 | |
| 17 plot3.path = args[4] | |
| 18 plot3.png = paste(plot3.path, ".png", sep="") | |
| 19 plot3.txt = paste(plot3.path, ".txt", sep="") | |
| 20 | |
| 21 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) | |
| 22 | |
| 23 | |
| 24 | |
| 25 classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm") | |
| 26 xyz = c("x", "y", "z") | |
| 27 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) | |
| 28 | |
| 29 names(dat) = new.names | |
| 30 | |
| 31 dat["RGYW.WRCY",] = colSums(dat[c(13,14),]) | |
| 32 dat["TW.WA",] = colSums(dat[c(15,16),]) | |
| 33 | |
| 34 data1 = dat[c("RGYW.WRCY", "TW.WA"),] | |
| 35 | |
| 36 data1 = data1[,names(data1)[grepl(".z", names(data1))]] | |
| 37 names(data1) = gsub("\\..*", "", names(data1)) | |
| 38 | |
| 39 data1 = melt(t(data1)) | |
| 40 | |
| 41 names(data1) = c("Class", "Type", "value") | |
| 42 | |
| 43 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) | |
| 44 | |
| 45 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) | |
| 46 png(filename=plot1.png) | |
| 47 print(p) | |
| 48 dev.off() | |
| 49 | |
| 50 data2 = dat[5:8,] | |
| 51 | |
| 52 data2["sum",] = colSums(data2) | |
| 53 | |
| 54 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] | |
| 55 names(data2) = gsub(".x", "", names(data2)) | |
| 56 | |
| 57 data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1) | |
| 58 data2["A/T",is.nan(unlist(data2["A/T",]))] = 0 | |
| 59 | |
| 60 data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1) | |
| 61 data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1) | |
| 62 | |
| 63 | |
| 64 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 | |
| 65 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 | |
| 66 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 | |
| 67 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 | |
| 68 | |
| 69 data2 = melt(t(data2[6:8,])) | |
| 70 | |
| 71 names(data2) = c("Class", "Type", "value") | |
| 72 | |
| 73 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) | |
| 74 | |
| 75 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") | |
| 76 png(filename=plot2.png) | |
| 77 print(p) | |
| 78 dev.off() | |
| 79 | |
| 80 data3 = dat[c(5, 6, 8, 17:20),] | |
| 81 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] | |
| 82 names(data3) = gsub(".x", "", names(data3)) | |
| 83 | |
| 84 data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1) | |
| 85 | |
| 86 data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1) | |
| 87 | |
| 88 data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1) | |
| 89 | |
| 90 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 | |
| 91 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 | |
| 92 | |
| 93 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 | |
| 94 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 | |
| 95 | |
| 96 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 | |
| 97 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 | |
| 98 | |
| 99 data3 = melt(t(data3[8:10,])) | |
| 100 names(data3) = c("Class", "Type", "value") | |
| 101 | |
| 102 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) | |
| 103 | |
| 104 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) | |
| 105 png(filename=plot3.png) | |
| 106 print(p) | |
| 107 dev.off() | |
| 108 | |
| 109 | |
| 110 | |
| 111 | |
| 112 | |
| 113 | |
| 114 | |
| 115 | |
| 116 | |
| 117 | |
| 118 | |
| 119 | |
| 120 | |
| 121 | |
| 122 | |
| 123 | |
| 124 | |
| 125 | |
| 126 | |
| 127 | |
| 128 | |
| 129 | |
| 130 | |
| 131 | |
| 132 | |
| 133 | |
| 134 | |
| 135 | |
| 136 | |
| 137 | |
| 138 | |
| 139 | 
