Mercurial > repos > davidvanzessen > shm_csr
comparison shm_csr.r @ 23:81453585dfc3 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 01 Dec 2016 09:32:06 -0500 |
parents | 372ccdcf0b2d |
children | 9955b23db68c |
comparison
equal
deleted
inserted
replaced
22:0bea8c187a90 | 23:81453585dfc3 |
---|---|
13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) | 13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) |
14 | 14 |
15 if(length(dat$Sequence.ID) == 0){ | 15 if(length(dat$Sequence.ID) == 0){ |
16 setwd(outputdir) | 16 setwd(outputdir) |
17 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) | 17 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) |
18 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") | 18 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)") |
19 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 19 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
20 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) | 20 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) |
21 row.names(transitionTable) = c("A", "C", "G", "T") | 21 row.names(transitionTable) = c("A", "C", "G", "T") |
22 transitionTable["A","A"] = NA | 22 transitionTable["A","A"] = NA |
23 transitionTable["C","C"] = NA | 23 transitionTable["C","C"] = NA |
294 transition2 = merge(transition2, base.order, by.x="id", by.y="base") | 294 transition2 = merge(transition2, base.order, by.x="id", by.y="base") |
295 | 295 |
296 transition2 = merge(transition2, base.order, by.x="variable", by.y="base") | 296 transition2 = merge(transition2, base.order, by.x="variable", by.y="base") |
297 | 297 |
298 transition2[is.na(transition2$value),]$value = 0 | 298 transition2[is.na(transition2$value),]$value = 0 |
299 | 299 |
300 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad | 300 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad |
301 print("Plotting stacked transition") | 301 print("Plotting heatmap and transition") |
302 png(filename=paste("transitions_stacked_", name, ".png", sep="")) | 302 png(filename=paste("transitions_stacked_", name, ".png", sep="")) |
303 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar | 303 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar |
304 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) | 304 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) |
305 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) | 305 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) |
306 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) | 306 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) |
307 print(p) | 307 print(p) |
308 dev.off() | 308 dev.off() |
309 | |
310 print("Plotting heatmap transition") | |
311 | |
312 png(filename=paste("transitions_heatmap_", name, ".png", sep="")) | 309 png(filename=paste("transitions_heatmap_", name, ".png", sep="")) |
313 p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap | 310 p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap |
314 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) | 311 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) |
315 print(p) | 312 print(p) |
316 dev.off() | 313 dev.off() |
317 } else { | 314 } else { |
318 print("No data to plot") | 315 #print("No data to plot") |
319 } | 316 } |
320 } | 317 } |
321 | 318 |
322 #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) | 319 #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) |
323 write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 320 write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
336 | 333 |
337 for(i in 1:length(funcs)){ | 334 for(i in 1:length(funcs)){ |
338 func = funcs[[i]] | 335 func = funcs[[i]] |
339 fname = fnames[[i]] | 336 fname = fnames[[i]] |
340 | 337 |
338 print(paste("Creating table for", fname)) | |
339 | |
341 rows = 9 | 340 rows = 9 |
342 if(fname == "sum"){ | 341 if(fname == "sum"){ |
343 rows = 11 | 342 rows = 11 |
344 } | 343 } |
345 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) | 344 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) |
346 for(i in 1:length(genes)){ | 345 for(i in 1:length(genes)){ |
347 print(paste("Creating table for", fname, genes[i])) | |
348 matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) | 346 matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) |
349 } | 347 } |
350 matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") | 348 matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") |
351 | 349 |
352 result = data.frame(matrx) | 350 result = data.frame(matrx) |
353 if(fname == "sum"){ | 351 if(fname == "sum"){ |
354 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") | 352 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") |
355 } else { | 353 } else { |
356 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") | 354 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") |
357 } | 355 } |
358 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) | 356 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) |
359 } | 357 } |
360 | 358 |
361 print("Adding median number of mutations to sum table") | 359 print("Adding median number of mutations to sum table") |
366 new.table[2,] = median.table[1,] | 364 new.table[2,] = median.table[1,] |
367 new.table[3:12,] = sum.table[2:11,] | 365 new.table[3:12,] = sum.table[2:11,] |
368 new.table[,1] = as.character(new.table[,1]) | 366 new.table[,1] = as.character(new.table[,1]) |
369 new.table[2,1] = "Median of Number of Mutations (%)" | 367 new.table[2,1] = "Median of Number of Mutations (%)" |
370 | 368 |
371 #sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] | 369 #sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] |
372 | 370 |
373 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) | 371 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) |
374 | 372 |
375 print("Plotting IGA piechart") | 373 print("Plotting IGA piechart") |
376 | 374 |
465 | 463 |
466 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) | 464 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) |
467 | 465 |
468 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) | 466 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) |
469 | 467 |
470 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") | 468 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) |
469 | |
470 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match") | |
471 | 471 |
472 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) | 472 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) |
473 | 473 |
474 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) | 474 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) |
475 | 475 |