comparison shm_csr.r @ 5:012a738edf5a draft

Uploaded
author davidvanzessen
date Tue, 01 Nov 2016 10:15:37 -0400
parents 477e95b098fd
children ad9be244b104
comparison
equal deleted inserted replaced
4:477e95b098fd 5:012a738edf5a
278 transition2 = merge(transition2, base.order, by.x="id", by.y="base") 278 transition2 = merge(transition2, base.order, by.x="id", by.y="base")
279 279
280 transition2 = merge(transition2, base.order, by.x="variable", by.y="base") 280 transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
281 281
282 transition2[is.na(transition2$value),]$value = 0 282 transition2[is.na(transition2$value),]$value = 0
283
284 print(transition2)
285 283
286 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad 284 if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad
287 print("Plotting stacked transition") 285 print("Plotting stacked transition")
288 png(filename=paste("transitions_stacked_", name, ".png", sep="")) 286 png(filename=paste("transitions_stacked_", name, ".png", sep=""))
289 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar 287 p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
290 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) 288 p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
291 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) 289 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
292 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) 290 #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
293 print(p) 291 print(p)
294 dev.off() 292 dev.off()
295 293
296 print("Plotting heatmap transition") 294 print("Plotting heatmap transition")
372 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 370 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
373 371
374 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) 372 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
375 pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4")) 373 pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4"))
376 pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) 374 pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
377 pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) 375 pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
378 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")")) 376 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
379 write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) 377 write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
380 378
381 png(filename="IGA.png") 379 png(filename="IGA.png")
382 print(pc) 380 print(pc)
393 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 391 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
394 392
395 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) 393 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
396 pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred")) 394 pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred"))
397 pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) 395 pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
398 pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) 396 pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
399 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")")) 397 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
400 write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) 398 write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
401 399
402 png(filename="IGG.png") 400 png(filename="IGG.png")
403 print(pc) 401 print(pc)
413 411
414 dat.clss = rbind(dat, dat.clss) 412 dat.clss = rbind(dat, dat.clss)
415 413
416 p = ggplot(dat.clss, aes(best_match, percentage_mutations)) 414 p = ggplot(dat.clss, aes(best_match, percentage_mutations))
417 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) 415 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
418 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) 416 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
419 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) 417 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4"))
420 p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) 418 p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4"))
421 419
422 png(filename="scatter.png") 420 png(filename="scatter.png")
423 print(p) 421 print(p)
440 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") 438 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
441 439
442 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) 440 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
443 441
444 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) 442 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
445 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) 443 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
446 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "black", "all" = "blue4")) 444 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "black", "all" = "blue4"))
447 445
448 png(filename="frequency_ranges.png") 446 png(filename="frequency_ranges.png")
449 print(p) 447 print(p)
450 dev.off() 448 dev.off()