comparison report_clonality/RScript.r @ 32:7c33029fd63d draft

Uploaded
author davidvanzessen
date Tue, 28 Mar 2017 05:40:04 -0400
parents b50965edac24
children f37e072affc0
comparison
equal deleted inserted replaced
31:3a76faa53c59 32:7c33029fd63d
300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) 301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
302 302
303 png("VPlot.png",width = 1280, height = 720) 303 png("VPlot.png",width = 1280, height = 720)
304 pV 304 pV
305 dev.off(); 305 dev.off()
306
307 ggsave("VPlot.pdf", pV, width=13, height=7)
306 308
307 if(useD){ 309 if(useD){
308 pD = ggplot(PRODFD) 310 pD = ggplot(PRODFD)
309 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 311 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
310 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) 312 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors)
311 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 313 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
312 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) 314 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
313 315
314 png("DPlot.png",width = 800, height = 600) 316 png("DPlot.png",width = 800, height = 600)
315 print(pD) 317 print(pD)
316 dev.off(); 318 dev.off()
319
320 ggsave("DPlot.pdf", pD, width=10, height=7)
317 } 321 }
318 322
319 pJ = ggplot(PRODFJ) 323 pJ = ggplot(PRODFJ)
320 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 324 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
321 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) 325 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors)
322 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 326 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
323 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) 327 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
324 328
325 png("JPlot.png",width = 800, height = 600) 329 png("JPlot.png",width = 800, height = 600)
326 pJ 330 pJ
327 dev.off(); 331 dev.off()
332
333 ggsave("JPlot.pdf", pJ)
328 334
329 # ---------------------- Now the frequency plots of the V, D and J families ---------------------- 335 # ---------------------- Now the frequency plots of the V, D and J families ----------------------
330 336
331 print("Report Clonality - V, D and J family plots") 337 print("Report Clonality - V, D and J family plots")
332 338
342 ylab("Percentage of sequences") + 348 ylab("Percentage of sequences") +
343 scale_fill_manual(values=sample.colors) + 349 scale_fill_manual(values=sample.colors) +
344 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 350 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
345 png("VFPlot.png") 351 png("VFPlot.png")
346 VPlot 352 VPlot
347 dev.off(); 353 dev.off()
354 ggsave("VFPlot.pdf", VPlot)
355
348 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) 356 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
349 357
350 if(useD){ 358 if(useD){
351 DGenes = PRODF[,c("Sample", "Top.D.Gene")] 359 DGenes = PRODF[,c("Sample", "Top.D.Gene")]
352 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) 360 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene)
360 ylab("Percentage of sequences") + 368 ylab("Percentage of sequences") +
361 scale_fill_manual(values=sample.colors) + 369 scale_fill_manual(values=sample.colors) +
362 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 370 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
363 png("DFPlot.png") 371 png("DFPlot.png")
364 print(DPlot) 372 print(DPlot)
365 dev.off(); 373 dev.off()
374
375 ggsave("DFPlot.pdf", DPlot)
366 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) 376 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
367 } 377 }
368 378
369 # ---------------------- Plotting the cdr3 length ---------------------- 379 # ---------------------- Plotting the cdr3 length ----------------------
370 380
382 scale_fill_manual(values=sample.colors) + 392 scale_fill_manual(values=sample.colors) +
383 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 393 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
384 png("CDR3LengthPlot.png",width = 1280, height = 720) 394 png("CDR3LengthPlot.png",width = 1280, height = 720)
385 CDR3LengthPlot 395 CDR3LengthPlot
386 dev.off() 396 dev.off()
397
398 ggsave("CDR3LengthPlot.pdf", CDR3LengthPlot, width=12, height=7)
399
387 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) 400 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T)
388 401
389 # ---------------------- Plot the heatmaps ---------------------- 402 # ---------------------- Plot the heatmaps ----------------------
390 403
391 #get the reverse order for the V and D genes 404 #get the reverse order for the V and D genes
411 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 424 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
412 425
413 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) 426 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
414 print(img) 427 print(img)
415 dev.off() 428 dev.off()
429
430 ggsave(paste("HeatmapVD_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=13, width=8)
431
416 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) 432 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA)
417 } 433 }
418 434
419 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) 435 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")])
420 436
461 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 477 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
462 478
463 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) 479 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
464 print(img) 480 print(img)
465 dev.off() 481 dev.off()
482
483 ggsave(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=11, width=4)
484
466 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) 485 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA)
467 } 486 }
468 487
469 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) 488 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")])
470 489
510 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 529 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
511 530
512 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) 531 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
513 print(img) 532 print(img)
514 dev.off() 533 dev.off()
534
535 ggsave(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, width=4, height=7)
536
515 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) 537 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA)
516 } 538 }
517 539
518 540
519 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) 541 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")])
901 923
902 png("DReadingFrame.png") 924 png("DReadingFrame.png")
903 D.REGION.reading.frame 925 D.REGION.reading.frame
904 dev.off() 926 dev.off()
905 927
906 928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame)
907 929
908 930
909 # ---------------------- AA composition in CDR3 ---------------------- 931 # ---------------------- AA composition in CDR3 ----------------------
910 932
911 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] 933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
935 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) 957 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
936 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) 958 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
937 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) 959 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2)
938 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) 960 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2)
939 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) 961 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors)
940 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 962 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
941 963
942 png("AAComposition.png",width = 1280, height = 720) 964 png("AAComposition.png",width = 1280, height = 720)
943 AAfreqplot 965 AAfreqplot
944 dev.off() 966 dev.off()
967
968 ggsave("AAComposition.pdf", AAfreqplot, width=12, height=7)
969
945 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) 970 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
946 971
947 # ---------------------- AA median CDR3 length ---------------------- 972 # ---------------------- AA median CDR3 length ----------------------
948 973
949 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) 974 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")])