Mercurial > repos > davidvanzessen > shm_csr
diff shm_csr.r @ 23:81453585dfc3 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 01 Dec 2016 09:32:06 -0500 |
parents | 372ccdcf0b2d |
children | 9955b23db68c |
line wrap: on
line diff
--- a/shm_csr.r Mon Nov 28 10:27:22 2016 -0500 +++ b/shm_csr.r Thu Dec 01 09:32:06 2016 -0500 @@ -15,7 +15,7 @@ if(length(dat$Sequence.ID) == 0){ setwd(outputdir) result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) - row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") + row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)") write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) row.names(transitionTable) = c("A", "C", "G", "T") @@ -296,9 +296,9 @@ transition2 = merge(transition2, base.order, by.x="variable", by.y="base") transition2[is.na(transition2$value),]$value = 0 - + if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad - print("Plotting stacked transition") + print("Plotting heatmap and transition") png(filename=paste("transitions_stacked_", name, ".png", sep="")) p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) @@ -306,16 +306,13 @@ #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) print(p) dev.off() - - print("Plotting heatmap transition") - png(filename=paste("transitions_heatmap_", name, ".png", sep="")) p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) print(p) dev.off() } else { - print("No data to plot") + #print("No data to plot") } } @@ -338,22 +335,23 @@ func = funcs[[i]] fname = fnames[[i]] + print(paste("Creating table for", fname)) + rows = 9 if(fname == "sum"){ rows = 11 } matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) for(i in 1:length(genes)){ - print(paste("Creating table for", fname, genes[i])) matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) } matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") result = data.frame(matrx) if(fname == "sum"){ - row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") + row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") } else { - row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") + row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") } write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) } @@ -368,7 +366,7 @@ new.table[,1] = as.character(new.table[,1]) new.table[2,1] = "Median of Number of Mutations (%)" -#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] +#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) @@ -467,7 +465,9 @@ frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) -frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") +frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) + +frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match") frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)