Mercurial > repos > shians > shrnaseq
comparison hairpinTool.R @ 7:91e411fcdecc
Version 1.0.8
- Added differential representation counts table
author | shian_su <registertonysu@gmail.com> |
---|---|
date | Wed, 23 Apr 2014 14:05:26 +1000 |
parents | 3d04308a99f9 |
children | 548802b3492f |
comparison
equal
deleted
inserted
replaced
6:3d04308a99f9 | 7:91e411fcdecc |
---|---|
40 # Bar Plot of Counts Per Hairpin | 40 # Bar Plot of Counts Per Hairpin |
41 # MDS Plot | 41 # MDS Plot |
42 # Smear Plot | 42 # Smear Plot |
43 # Barcode Plots (If Genewise testing was selected) | 43 # Barcode Plots (If Genewise testing was selected) |
44 # Top Expression Table | 44 # Top Expression Table |
45 # Feature Counts Table | |
45 # HTML file linking to the ouputs | 46 # HTML file linking to the ouputs |
46 # | 47 # |
47 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 | 48 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 |
48 | 49 |
49 # Record starting time | 50 # Record starting time |
61 } | 62 } |
62 | 63 |
63 ################################################################################ | 64 ################################################################################ |
64 ### Function declarations | 65 ### Function declarations |
65 ################################################################################ | 66 ################################################################################ |
67 | |
68 # Function to load libaries without messages | |
69 silentLibrary <- function(...) { | |
70 list <- c(...) | |
71 for (package in list){ | |
72 suppressPackageStartupMessages(library(package, character.only=TRUE)) | |
73 } | |
74 } | |
66 | 75 |
67 # Function to sanitise contrast equations so there are no whitespaces | 76 # Function to sanitise contrast equations so there are no whitespaces |
68 # surrounding the arithmetic operators, leading or trailing whitespace | 77 # surrounding the arithmetic operators, leading or trailing whitespace |
69 sanitiseEquation <- function(equation) { | 78 sanitiseEquation <- function(equation) { |
70 equation <- gsub(" *[+] *", "+", equation) | 79 equation <- gsub(" *[+] *", "+", equation) |
94 } | 103 } |
95 | 104 |
96 # Function has string input and generates both a pdf and png output strings | 105 # Function has string input and generates both a pdf and png output strings |
97 imgOut <- function(filename) { | 106 imgOut <- function(filename) { |
98 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), | 107 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), |
99 envir = .GlobalEnv) | 108 envir=.GlobalEnv) |
100 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), | 109 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), |
101 envir = .GlobalEnv) | 110 envir=.GlobalEnv) |
102 } | 111 } |
103 | 112 |
104 # Create cat function default path set, default seperator empty and appending | 113 # Create cat function default path set, default seperator empty and appending |
105 # true by default (Ripped straight from the cat function with altered argument | 114 # true by default (Ripped straight from the cat function with altered argument |
106 # defaults) | 115 # defaults) |
107 cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, | 116 cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL, |
108 append = TRUE) { | 117 append=TRUE) { |
109 if (is.character(file)) | 118 if (is.character(file)) |
110 if (file == "") | 119 if (file == "") |
111 file <- stdout() | 120 file <- stdout() |
112 else if (substring(file, 1L, 1L) == "|") { | 121 else if (substring(file, 1L, 1L) == "|") { |
113 file <- pipe(substring(file, 2L), "w") | 122 file <- pipe(substring(file, 2L), "w") |
307 # link address | 316 # link address |
308 linkData <- data.frame(Label=character(), Link=character(), | 317 linkData <- data.frame(Label=character(), Link=character(), |
309 stringsAsFactors=FALSE) | 318 stringsAsFactors=FALSE) |
310 imageData <- data.frame(Label=character(), Link=character(), | 319 imageData <- data.frame(Label=character(), Link=character(), |
311 stringsAsFactors=FALSE) | 320 stringsAsFactors=FALSE) |
321 | |
322 # Initialise vectors for storage of up/down/neutral regulated counts | |
323 upCount <- numeric() | |
324 downCount <- numeric() | |
325 flatCount <- numeric() | |
326 | |
312 ################################################################################ | 327 ################################################################################ |
313 ### Data Processing | 328 ### Data Processing |
314 ################################################################################ | 329 ################################################################################ |
315 | 330 |
316 # Transform gene selection from string into index values for mroast | 331 # Transform gene selection from string into index values for mroast |
456 testData <- exactTest(data, pair=pairData) | 471 testData <- exactTest(data, pair=pairData) |
457 | 472 |
458 top <- topTags(testData, n=Inf) | 473 top <- topTags(testData, n=Inf) |
459 topIDs <- top$table[(top$table$FDR < fdrThresh) & | 474 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
460 (abs(top$table$logFC) > lfcThresh), 1] | 475 (abs(top$table$logFC) > lfcThresh), 1] |
476 | |
461 write.table(top, file=topOut, row.names=FALSE, sep="\t") | 477 write.table(top, file=topOut, row.names=FALSE, sep="\t") |
478 | |
462 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], | 479 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], |
463 ") (.tsv)") | 480 ") (.tsv)") |
464 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") | 481 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") |
465 linkData <- rbind(linkData, c(linkName, linkAddr)) | 482 linkData <- rbind(linkData, c(linkName, linkAddr)) |
466 | 483 |
484 upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) | |
485 downCount[1] <- sum(top$table$FDR < fdrThresh & | |
486 top$table$logFC < -lfcThresh) | |
487 flatCount[1] <- sum(top$table$FDR > fdrThresh | | |
488 abs(top$table$logFC) < lfcThresh) | |
489 | |
490 | |
491 | |
467 # Select hairpins with FDR < 0.05 to highlight on plot | 492 # Select hairpins with FDR < 0.05 to highlight on plot |
468 png(smearPng, width=600, height=600) | 493 png(smearPng, width=600, height=600) |
469 plotTitle <- gsub(".", " ", | 494 plotTitle <- gsub(".", " ", |
470 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | 495 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), |
471 fixed = TRUE) | 496 fixed=TRUE) |
472 plotSmear(testData, de.tags=topIDs, | 497 plotSmear(testData, de.tags=topIDs, |
473 pch=20, cex=1.0, main=plotTitle) | 498 pch=20, cex=1.0, main=plotTitle) |
474 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) | 499 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
475 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") | 500 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") |
476 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") | 501 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") |
477 imageData <- rbind(imageData, c(imgName, imgAddr)) | 502 imageData <- rbind(imageData, c(imgName, imgAddr)) |
478 invisible(dev.off()) | 503 invisible(dev.off()) |
479 | 504 |
480 pdf(smearPdf) | 505 pdf(smearPdf) |
481 plotTitle <- gsub(".", " ", | 506 plotTitle <- gsub(".", " ", |
482 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | 507 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), |
483 fixed = TRUE) | 508 fixed=TRUE) |
484 plotSmear(testData, de.tags=topIDs, | 509 plotSmear(testData, de.tags=topIDs, |
485 pch=20, cex=1.0, main=plotTitle) | 510 pch=20, cex=1.0, main=plotTitle) |
486 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) | 511 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
487 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") | 512 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") |
488 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") | 513 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") |
489 linkData <- rbind(linkData, c(imgName, imgAddr)) | 514 linkData <- rbind(linkData, c(imgName, imgAddr)) |
490 invisible(dev.off()) | 515 invisible(dev.off()) |
516 | |
491 } else if (workMode=="glm") { | 517 } else if (workMode=="glm") { |
492 # Generating design information | 518 # Generating design information |
493 factors <- factor(data$sample$group) | 519 factors <- factor(data$sample$group) |
494 design <- model.matrix(~0+factors) | 520 design <- model.matrix(~0+factors) |
495 | 521 |
496 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) | 522 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) |
497 | 523 |
498 # Split up contrasts seperated by comma into a vector | 524 # Split up contrasts seperated by comma into a vector |
499 contrastData <- unlist(strsplit(contrastData, split=",")) | 525 contrastData <- unlist(strsplit(contrastData, split=",")) |
526 | |
500 for (i in 1:length(contrastData)) { | 527 for (i in 1:length(contrastData)) { |
501 # Generate contrasts information | 528 # Generate contrasts information |
502 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) | 529 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) |
503 | 530 |
504 # Fit negative bionomial GLM | 531 # Fit negative bionomial GLM |
505 fit = glmFit(data, design) | 532 fit <- glmFit(data, design) |
506 # Carry out Likelihood ratio test | 533 # Carry out Likelihood ratio test |
507 testData = glmLRT(fit, contrast=contrasts) | 534 testData <- glmLRT(fit, contrast=contrasts) |
508 | 535 |
509 # Select hairpins with FDR < 0.05 to highlight on plot | 536 # Select hairpins with FDR < 0.05 to highlight on plot |
510 top <- topTags(testData, n=Inf) | 537 top <- topTags(testData, n=Inf) |
511 topIDs <- top$table[(top$table$FDR < fdrThresh) & | 538 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
512 (abs(top$table$logFC) > lfcThresh), 1] | 539 (abs(top$table$logFC) > lfcThresh), 1] |
513 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") | 540 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") |
514 | 541 |
515 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)") | 542 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)") |
516 linkAddr <- paste0("toptag(", contrastData[i], ").tsv") | 543 linkAddr <- paste0("toptag(", contrastData[i], ").tsv") |
517 linkData <- rbind(linkData, c(linkName, linkAddr)) | 544 linkData <- rbind(linkData, c(linkName, linkAddr)) |
545 | |
546 # Collect counts for differential representation | |
547 upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) | |
548 downCount[i] <- sum(top$table$FDR < fdrThresh & | |
549 top$table$logFC < -lfcThresh) | |
550 flatCount[i] <- sum(top$table$FDR > fdrThresh | | |
551 abs(top$table$logFC) < lfcThresh) | |
518 | 552 |
519 # Make a plot of logFC versus logCPM | 553 # Make a plot of logFC versus logCPM |
520 png(smearPng[i], height=600, width=600) | 554 png(smearPng[i], height=600, width=600) |
521 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], | 555 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], |
522 fixed=TRUE)) | 556 fixed=TRUE)) |
549 } | 583 } |
550 } | 584 } |
551 | 585 |
552 if (wantRoast) { | 586 if (wantRoast) { |
553 # Input preparaton for roast | 587 # Input preparaton for roast |
554 nrot = 9999 | 588 nrot <- 9999 |
555 set.seed(602214129) | 589 set.seed(602214129) |
556 roastData <- mroast(data, index=geneList, design=design, | 590 roastData <- mroast(data, index=geneList, design=design, |
557 contrast=contrasts, nrot=nrot) | 591 contrast=contrasts, nrot=nrot) |
558 roastData <- cbind(GeneID=rownames(roastData), roastData) | 592 roastData <- cbind(GeneID=rownames(roastData), roastData) |
559 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") | 593 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") |
600 dev.off() | 634 dev.off() |
601 } | 635 } |
602 } | 636 } |
603 } | 637 } |
604 | 638 |
639 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) | |
640 if (workMode == "glm") { | |
641 row.names(sigDiff) <- contrastData | |
642 } else if (workMode == "classic") { | |
643 row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1]) | |
644 } | |
645 | |
605 ID <- rownames(data$counts) | 646 ID <- rownames(data$counts) |
606 outputCounts <- cbind(ID, data$counts) | 647 outputCounts <- cbind(ID, data$counts) |
607 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t") | 648 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t") |
608 linkName <- "Counts table (.tsv)" | 649 linkName <- "Counts table (.tsv)" |
609 linkAddr <- "counts.tsv" | 650 linkAddr <- "counts.tsv" |
650 | 691 |
651 cata("The estimated common biological coefficient of variation (BCV) is: ", | 692 cata("The estimated common biological coefficient of variation (BCV) is: ", |
652 commonBCV, "<br />\n") | 693 commonBCV, "<br />\n") |
653 | 694 |
654 cata("<h4>Output:</h4>\n") | 695 cata("<h4>Output:</h4>\n") |
655 cata("All images displayed have PDF copy at the bottom of the page, these can ") | 696 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") |
656 cata("exported in a pdf viewer to high resolution image format. <br />\n") | |
657 for (i in 1:nrow(imageData)) { | 697 for (i in 1:nrow(imageData)) { |
658 if (grepl("barcode", imageData$Link[i])) { | 698 if (grepl("barcode", imageData$Link[i])) { |
659 if (packageVersion("limma")<"3.19.19") { | 699 if (packageVersion("limma")<"3.19.19") { |
660 HtmlImage(imageData$Link[i], imageData$Label[i], | 700 HtmlImage(imageData$Link[i], imageData$Label[i], |
661 height=length(selectedGenes)*150) | 701 height=length(selectedGenes)*150) |
667 HtmlImage(imageData$Link[i], imageData$Label[i]) | 707 HtmlImage(imageData$Link[i], imageData$Label[i]) |
668 } | 708 } |
669 } | 709 } |
670 cata("<br />\n") | 710 cata("<br />\n") |
671 | 711 |
712 cata("<h4>Differential Representation Counts:</h4>\n") | |
713 | |
714 cata("<table border=\"1\" cellpadding=\"4\">\n") | |
715 cata("<tr>\n") | |
716 TableItem() | |
717 for (i in colnames(sigDiff)) { | |
718 TableHeadItem(i) | |
719 } | |
720 cata("</tr>\n") | |
721 for (i in 1:nrow(sigDiff)) { | |
722 cata("<tr>\n") | |
723 TableHeadItem(unmake.names(row.names(sigDiff)[i])) | |
724 for (j in 1:ncol(sigDiff)) { | |
725 TableItem(as.character(sigDiff[i, j])) | |
726 } | |
727 cata("</tr>\n") | |
728 } | |
729 cata("</table>") | |
730 | |
672 cata("<h4>Plots:</h4>\n") | 731 cata("<h4>Plots:</h4>\n") |
673 for (i in 1:nrow(linkData)) { | 732 for (i in 1:nrow(linkData)) { |
674 if (!grepl(".tsv", linkData$Link[i])) { | 733 if (!grepl(".tsv", linkData$Link[i])) { |
675 HtmlLink(linkData$Link[i], linkData$Label[i]) | 734 HtmlLink(linkData$Link[i], linkData$Label[i]) |
676 } | 735 } |
681 if (grepl(".tsv", linkData$Link[i])) { | 740 if (grepl(".tsv", linkData$Link[i])) { |
682 HtmlLink(linkData$Link[i], linkData$Label[i]) | 741 HtmlLink(linkData$Link[i], linkData$Label[i]) |
683 } | 742 } |
684 } | 743 } |
685 | 744 |
686 cata("<p>alt-click any of the links to download the file, or click the name ") | 745 cata("<p>Alt-click links to download file.</p>\n") |
687 cata("of this task in the galaxy history panel and click on the floppy ") | 746 cata("<p>Click floppy disc icon associated history item to download ") |
688 cata("disk icon to download all files in a zip archive.</p>\n") | 747 cata("all files.</p>\n") |
689 cata("<p>.tsv files are tab seperated files that can be viewed using Excel ") | 748 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") |
690 cata("or other spreadsheet programs</p>\n") | |
691 | 749 |
692 cata("<h4>Additional Information:</h4>\n") | 750 cata("<h4>Additional Information:</h4>\n") |
693 | 751 |
694 if (inputType == "fastq") { | 752 if (inputType == "fastq") { |
695 ListItem("Data was gathered from fastq raw read file(s).") | 753 ListItem("Data was gathered from fastq raw read file(s).") |
696 } else if (inputType == "counts") { | 754 } else if (inputType == "counts") { |
697 ListItem("Data was gathered from a table of counts.") | 755 ListItem("Data was gathered from a table of counts.") |
698 } | 756 } |
699 | 757 |
700 if (cpmReq!=0 && sampleReq!=0) { | 758 if (cpmReq!=0 && sampleReq!=0) { |
701 tempStr <- paste("Hairpins that do not have more than", cpmReq, | 759 tempStr <- paste("Hairpins with less than", cpmReq, |
702 "CPM in at least", sampleReq, "samples are considered", | 760 "CPM in at least", sampleReq, "samples are insignificant", |
703 "insignificant and filtered out.") | 761 "and filtered out.") |
704 ListItem(tempStr) | 762 ListItem(tempStr) |
705 filterProp <- round(filteredCount/preFilterCount*100, digits=2) | 763 filterProp <- round(filteredCount/preFilterCount*100, digits=2) |
706 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, | 764 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, |
707 "%) hairpins were filtered out for low count-per-million.") | 765 "%) hairpins were filtered out for low count-per-million.") |
708 ListItem(tempStr) | 766 ListItem(tempStr) |
711 if (workMode == "classic") { | 769 if (workMode == "classic") { |
712 ListItem("An exact test was performed on each hairpin.") | 770 ListItem("An exact test was performed on each hairpin.") |
713 } else if (workMode == "glm") { | 771 } else if (workMode == "glm") { |
714 ListItem("A generalised linear model was fitted to each hairpin.") | 772 ListItem("A generalised linear model was fitted to each hairpin.") |
715 } | 773 } |
716 | |
717 | |
718 | 774 |
719 cit <- character() | 775 cit <- character() |
720 link <-character() | 776 link <-character() |
721 link[1] <- paste0("<a href=\"", | 777 link[1] <- paste0("<a href=\"", |
722 "http://www.bioconductor.org/packages/release/bioc/", | 778 "http://www.bioconductor.org/packages/release/bioc/", |