Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 32:7c33029fd63d draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 28 Mar 2017 05:40:04 -0400 |
parents | b50965edac24 |
children | f37e072affc0 |
comparison
equal
deleted
inserted
replaced
31:3a76faa53c59 | 32:7c33029fd63d |
---|---|
300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
302 | 302 |
303 png("VPlot.png",width = 1280, height = 720) | 303 png("VPlot.png",width = 1280, height = 720) |
304 pV | 304 pV |
305 dev.off(); | 305 dev.off() |
306 | |
307 ggsave("VPlot.pdf", pV, width=13, height=7) | |
306 | 308 |
307 if(useD){ | 309 if(useD){ |
308 pD = ggplot(PRODFD) | 310 pD = ggplot(PRODFD) |
309 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 311 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
310 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) | 312 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) |
311 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 313 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
312 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 314 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
313 | 315 |
314 png("DPlot.png",width = 800, height = 600) | 316 png("DPlot.png",width = 800, height = 600) |
315 print(pD) | 317 print(pD) |
316 dev.off(); | 318 dev.off() |
319 | |
320 ggsave("DPlot.pdf", pD, width=10, height=7) | |
317 } | 321 } |
318 | 322 |
319 pJ = ggplot(PRODFJ) | 323 pJ = ggplot(PRODFJ) |
320 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 324 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
321 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) | 325 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) |
322 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 326 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
323 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 327 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
324 | 328 |
325 png("JPlot.png",width = 800, height = 600) | 329 png("JPlot.png",width = 800, height = 600) |
326 pJ | 330 pJ |
327 dev.off(); | 331 dev.off() |
332 | |
333 ggsave("JPlot.pdf", pJ) | |
328 | 334 |
329 # ---------------------- Now the frequency plots of the V, D and J families ---------------------- | 335 # ---------------------- Now the frequency plots of the V, D and J families ---------------------- |
330 | 336 |
331 print("Report Clonality - V, D and J family plots") | 337 print("Report Clonality - V, D and J family plots") |
332 | 338 |
342 ylab("Percentage of sequences") + | 348 ylab("Percentage of sequences") + |
343 scale_fill_manual(values=sample.colors) + | 349 scale_fill_manual(values=sample.colors) + |
344 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 350 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
345 png("VFPlot.png") | 351 png("VFPlot.png") |
346 VPlot | 352 VPlot |
347 dev.off(); | 353 dev.off() |
354 ggsave("VFPlot.pdf", VPlot) | |
355 | |
348 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 356 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
349 | 357 |
350 if(useD){ | 358 if(useD){ |
351 DGenes = PRODF[,c("Sample", "Top.D.Gene")] | 359 DGenes = PRODF[,c("Sample", "Top.D.Gene")] |
352 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) | 360 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) |
360 ylab("Percentage of sequences") + | 368 ylab("Percentage of sequences") + |
361 scale_fill_manual(values=sample.colors) + | 369 scale_fill_manual(values=sample.colors) + |
362 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 370 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
363 png("DFPlot.png") | 371 png("DFPlot.png") |
364 print(DPlot) | 372 print(DPlot) |
365 dev.off(); | 373 dev.off() |
374 | |
375 ggsave("DFPlot.pdf", DPlot) | |
366 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 376 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
367 } | 377 } |
368 | 378 |
369 # ---------------------- Plotting the cdr3 length ---------------------- | 379 # ---------------------- Plotting the cdr3 length ---------------------- |
370 | 380 |
382 scale_fill_manual(values=sample.colors) + | 392 scale_fill_manual(values=sample.colors) + |
383 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 393 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
384 png("CDR3LengthPlot.png",width = 1280, height = 720) | 394 png("CDR3LengthPlot.png",width = 1280, height = 720) |
385 CDR3LengthPlot | 395 CDR3LengthPlot |
386 dev.off() | 396 dev.off() |
397 | |
398 ggsave("CDR3LengthPlot.pdf", CDR3LengthPlot, width=12, height=7) | |
399 | |
387 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) | 400 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) |
388 | 401 |
389 # ---------------------- Plot the heatmaps ---------------------- | 402 # ---------------------- Plot the heatmaps ---------------------- |
390 | 403 |
391 #get the reverse order for the V and D genes | 404 #get the reverse order for the V and D genes |
411 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 424 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
412 | 425 |
413 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 426 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
414 print(img) | 427 print(img) |
415 dev.off() | 428 dev.off() |
429 | |
430 ggsave(paste("HeatmapVD_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=13, width=8) | |
431 | |
416 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 432 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
417 } | 433 } |
418 | 434 |
419 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 435 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) |
420 | 436 |
461 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 477 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
462 | 478 |
463 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 479 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
464 print(img) | 480 print(img) |
465 dev.off() | 481 dev.off() |
482 | |
483 ggsave(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=11, width=4) | |
484 | |
466 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 485 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
467 } | 486 } |
468 | 487 |
469 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 488 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) |
470 | 489 |
510 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 529 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
511 | 530 |
512 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 531 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
513 print(img) | 532 print(img) |
514 dev.off() | 533 dev.off() |
534 | |
535 ggsave(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, width=4, height=7) | |
536 | |
515 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 537 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
516 } | 538 } |
517 | 539 |
518 | 540 |
519 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 541 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
901 | 923 |
902 png("DReadingFrame.png") | 924 png("DReadingFrame.png") |
903 D.REGION.reading.frame | 925 D.REGION.reading.frame |
904 dev.off() | 926 dev.off() |
905 | 927 |
906 | 928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) |
907 | 929 |
908 | 930 |
909 # ---------------------- AA composition in CDR3 ---------------------- | 931 # ---------------------- AA composition in CDR3 ---------------------- |
910 | 932 |
911 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
935 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) | 957 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) |
936 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) | 958 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) |
937 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) | 959 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) |
938 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) | 960 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) |
939 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) | 961 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) |
940 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 962 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
941 | 963 |
942 png("AAComposition.png",width = 1280, height = 720) | 964 png("AAComposition.png",width = 1280, height = 720) |
943 AAfreqplot | 965 AAfreqplot |
944 dev.off() | 966 dev.off() |
967 | |
968 ggsave("AAComposition.pdf", AAfreqplot, width=12, height=7) | |
969 | |
945 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 970 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
946 | 971 |
947 # ---------------------- AA median CDR3 length ---------------------- | 972 # ---------------------- AA median CDR3 length ---------------------- |
948 | 973 |
949 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) | 974 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) |