comparison shm_csr.r @ 23:81453585dfc3 draft

Uploaded
author davidvanzessen
date Thu, 01 Dec 2016 09:32:06 -0500
parents 372ccdcf0b2d
children 9955b23db68c
comparison
equal deleted inserted replaced
22:0bea8c187a90 23:81453585dfc3
13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) 13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
14 14
15 if(length(dat$Sequence.ID) == 0){ 15 if(length(dat$Sequence.ID) == 0){
16 setwd(outputdir) 16 setwd(outputdir)
17 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) 17 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
18 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") 18 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)")
19 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) 19 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
20 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) 20 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
21 row.names(transitionTable) = c("A", "C", "G", "T") 21 row.names(transitionTable) = c("A", "C", "G", "T")
22 transitionTable["A","A"] = NA 22 transitionTable["A","A"] = NA
23 transitionTable["C","C"] = NA 23 transitionTable["C","C"] = NA
294 transition2 = merge(transition2, base.order, by.x="id", by.y="base") 294 transition2 = merge(transition2, base.order, by.x="id", by.y="base")
295 295
296 transition2 = merge(transition2, base.order, by.x="variable", by.y="base") 296 transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
297 297
298 transition2[is.na(transition2$value),]$value = 0 298 transition2[is.na(transition2$value),]$value = 0
299 299
300 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad 300 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad
301 print("Plotting stacked transition") 301 print("Plotting heatmap and transition")
302 png(filename=paste("transitions_stacked_", name, ".png", sep="")) 302 png(filename=paste("transitions_stacked_", name, ".png", sep=""))
303 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar 303 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
304 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) 304 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
305 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) 305 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
306 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) 306 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
307 print(p) 307 print(p)
308 dev.off() 308 dev.off()
309
310 print("Plotting heatmap transition")
311
312 png(filename=paste("transitions_heatmap_", name, ".png", sep="")) 309 png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
313 p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap 310 p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
314 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) 311 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
315 print(p) 312 print(p)
316 dev.off() 313 dev.off()
317 } else { 314 } else {
318 print("No data to plot") 315 #print("No data to plot")
319 } 316 }
320 } 317 }
321 318
322 #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) 319 #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
323 write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) 320 write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
336 333
337 for(i in 1:length(funcs)){ 334 for(i in 1:length(funcs)){
338 func = funcs[[i]] 335 func = funcs[[i]]
339 fname = fnames[[i]] 336 fname = fnames[[i]]
340 337
338 print(paste("Creating table for", fname))
339
341 rows = 9 340 rows = 9
342 if(fname == "sum"){ 341 if(fname == "sum"){
343 rows = 11 342 rows = 11
344 } 343 }
345 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) 344 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows)
346 for(i in 1:length(genes)){ 345 for(i in 1:length(genes)){
347 print(paste("Creating table for", fname, genes[i]))
348 matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) 346 matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i])
349 } 347 }
350 matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") 348 matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all")
351 349
352 result = data.frame(matrx) 350 result = data.frame(matrx)
353 if(fname == "sum"){ 351 if(fname == "sum"){
354 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") 352 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
355 } else { 353 } else {
356 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") 354 row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
357 } 355 }
358 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) 356 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F)
359 } 357 }
360 358
361 print("Adding median number of mutations to sum table") 359 print("Adding median number of mutations to sum table")
366 new.table[2,] = median.table[1,] 364 new.table[2,] = median.table[1,]
367 new.table[3:12,] = sum.table[2:11,] 365 new.table[3:12,] = sum.table[2:11,]
368 new.table[,1] = as.character(new.table[,1]) 366 new.table[,1] = as.character(new.table[,1])
369 new.table[2,1] = "Median of Number of Mutations (%)" 367 new.table[2,1] = "Median of Number of Mutations (%)"
370 368
371 #sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] 369 #sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
372 370
373 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) 371 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
374 372
375 print("Plotting IGA piechart") 373 print("Plotting IGA piechart")
376 374
465 463
466 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) 464 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)
467 465
468 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) 466 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
469 467
470 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") 468 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
469
470 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match")
471 471
472 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) 472 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
473 473
474 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) 474 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)
475 475