diff shm_csr.r @ 23:81453585dfc3 draft

Uploaded
author davidvanzessen
date Thu, 01 Dec 2016 09:32:06 -0500
parents 372ccdcf0b2d
children 9955b23db68c
line wrap: on
line diff
--- a/shm_csr.r	Mon Nov 28 10:27:22 2016 -0500
+++ b/shm_csr.r	Thu Dec 01 09:32:06 2016 -0500
@@ -15,7 +15,7 @@
 if(length(dat$Sequence.ID) == 0){
   setwd(outputdir)
   result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
-  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
+  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)")
   write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
   transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
   row.names(transitionTable) = c("A", "C", "G", "T")
@@ -296,9 +296,9 @@
 		transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
 
 		transition2[is.na(transition2$value),]$value = 0
-
+		
 		if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad
-			print("Plotting stacked transition")
+			print("Plotting heatmap and transition")
 			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
 			p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
@@ -306,16 +306,13 @@
 			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
 			print(p)
 			dev.off()
-
-			print("Plotting heatmap transition")
-
 			png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
 			p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
 			print(p)
 			dev.off()
 		} else {
-			print("No data to plot")
+			#print("No data to plot")
 		}
 	}
 
@@ -338,22 +335,23 @@
 	func = funcs[[i]]
 	fname = fnames[[i]]
 	
+	print(paste("Creating table for", fname))
+	
 	rows = 9
 	if(fname == "sum"){
 		rows = 11
 	}
 	matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows)
 	for(i in 1:length(genes)){
-		print(paste("Creating table for", fname, genes[i]))
 		matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i])
 	}
 	matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all")
 
 	result = data.frame(matrx)
 	if(fname == "sum"){
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
 	} else {
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
 	}
 	write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F)
 }
@@ -368,7 +366,7 @@
 new.table[,1] = as.character(new.table[,1])
 new.table[2,1] = "Median of Number of Mutations (%)"
 
-#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
+#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
 
 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
 
@@ -467,7 +465,9 @@
 
 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
 
-frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
+frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
+
+frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match")
 
 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)