comparison hairpinTool.R @ 7:91e411fcdecc

Version 1.0.8 - Added differential representation counts table
author shian_su <registertonysu@gmail.com>
date Wed, 23 Apr 2014 14:05:26 +1000
parents 3d04308a99f9
children 548802b3492f
comparison
equal deleted inserted replaced
6:3d04308a99f9 7:91e411fcdecc
40 # Bar Plot of Counts Per Hairpin 40 # Bar Plot of Counts Per Hairpin
41 # MDS Plot 41 # MDS Plot
42 # Smear Plot 42 # Smear Plot
43 # Barcode Plots (If Genewise testing was selected) 43 # Barcode Plots (If Genewise testing was selected)
44 # Top Expression Table 44 # Top Expression Table
45 # Feature Counts Table
45 # HTML file linking to the ouputs 46 # HTML file linking to the ouputs
46 # 47 #
47 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 48 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
48 49
49 # Record starting time 50 # Record starting time
61 } 62 }
62 63
63 ################################################################################ 64 ################################################################################
64 ### Function declarations 65 ### Function declarations
65 ################################################################################ 66 ################################################################################
67
68 # Function to load libaries without messages
69 silentLibrary <- function(...) {
70 list <- c(...)
71 for (package in list){
72 suppressPackageStartupMessages(library(package, character.only=TRUE))
73 }
74 }
66 75
67 # Function to sanitise contrast equations so there are no whitespaces 76 # Function to sanitise contrast equations so there are no whitespaces
68 # surrounding the arithmetic operators, leading or trailing whitespace 77 # surrounding the arithmetic operators, leading or trailing whitespace
69 sanitiseEquation <- function(equation) { 78 sanitiseEquation <- function(equation) {
70 equation <- gsub(" *[+] *", "+", equation) 79 equation <- gsub(" *[+] *", "+", equation)
94 } 103 }
95 104
96 # Function has string input and generates both a pdf and png output strings 105 # Function has string input and generates both a pdf and png output strings
97 imgOut <- function(filename) { 106 imgOut <- function(filename) {
98 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), 107 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")),
99 envir = .GlobalEnv) 108 envir=.GlobalEnv)
100 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), 109 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")),
101 envir = .GlobalEnv) 110 envir=.GlobalEnv)
102 } 111 }
103 112
104 # Create cat function default path set, default seperator empty and appending 113 # Create cat function default path set, default seperator empty and appending
105 # true by default (Ripped straight from the cat function with altered argument 114 # true by default (Ripped straight from the cat function with altered argument
106 # defaults) 115 # defaults)
107 cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, 116 cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL,
108 append = TRUE) { 117 append=TRUE) {
109 if (is.character(file)) 118 if (is.character(file))
110 if (file == "") 119 if (file == "")
111 file <- stdout() 120 file <- stdout()
112 else if (substring(file, 1L, 1L) == "|") { 121 else if (substring(file, 1L, 1L) == "|") {
113 file <- pipe(substring(file, 2L), "w") 122 file <- pipe(substring(file, 2L), "w")
307 # link address 316 # link address
308 linkData <- data.frame(Label=character(), Link=character(), 317 linkData <- data.frame(Label=character(), Link=character(),
309 stringsAsFactors=FALSE) 318 stringsAsFactors=FALSE)
310 imageData <- data.frame(Label=character(), Link=character(), 319 imageData <- data.frame(Label=character(), Link=character(),
311 stringsAsFactors=FALSE) 320 stringsAsFactors=FALSE)
321
322 # Initialise vectors for storage of up/down/neutral regulated counts
323 upCount <- numeric()
324 downCount <- numeric()
325 flatCount <- numeric()
326
312 ################################################################################ 327 ################################################################################
313 ### Data Processing 328 ### Data Processing
314 ################################################################################ 329 ################################################################################
315 330
316 # Transform gene selection from string into index values for mroast 331 # Transform gene selection from string into index values for mroast
456 testData <- exactTest(data, pair=pairData) 471 testData <- exactTest(data, pair=pairData)
457 472
458 top <- topTags(testData, n=Inf) 473 top <- topTags(testData, n=Inf)
459 topIDs <- top$table[(top$table$FDR < fdrThresh) & 474 topIDs <- top$table[(top$table$FDR < fdrThresh) &
460 (abs(top$table$logFC) > lfcThresh), 1] 475 (abs(top$table$logFC) > lfcThresh), 1]
476
461 write.table(top, file=topOut, row.names=FALSE, sep="\t") 477 write.table(top, file=topOut, row.names=FALSE, sep="\t")
478
462 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], 479 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1],
463 ") (.tsv)") 480 ") (.tsv)")
464 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") 481 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv")
465 linkData <- rbind(linkData, c(linkName, linkAddr)) 482 linkData <- rbind(linkData, c(linkName, linkAddr))
466 483
484 upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh)
485 downCount[1] <- sum(top$table$FDR < fdrThresh &
486 top$table$logFC < -lfcThresh)
487 flatCount[1] <- sum(top$table$FDR > fdrThresh |
488 abs(top$table$logFC) < lfcThresh)
489
490
491
467 # Select hairpins with FDR < 0.05 to highlight on plot 492 # Select hairpins with FDR < 0.05 to highlight on plot
468 png(smearPng, width=600, height=600) 493 png(smearPng, width=600, height=600)
469 plotTitle <- gsub(".", " ", 494 plotTitle <- gsub(".", " ",
470 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), 495 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
471 fixed = TRUE) 496 fixed=TRUE)
472 plotSmear(testData, de.tags=topIDs, 497 plotSmear(testData, de.tags=topIDs,
473 pch=20, cex=1.0, main=plotTitle) 498 pch=20, cex=1.0, main=plotTitle)
474 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) 499 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
475 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") 500 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")")
476 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") 501 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png")
477 imageData <- rbind(imageData, c(imgName, imgAddr)) 502 imageData <- rbind(imageData, c(imgName, imgAddr))
478 invisible(dev.off()) 503 invisible(dev.off())
479 504
480 pdf(smearPdf) 505 pdf(smearPdf)
481 plotTitle <- gsub(".", " ", 506 plotTitle <- gsub(".", " ",
482 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), 507 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
483 fixed = TRUE) 508 fixed=TRUE)
484 plotSmear(testData, de.tags=topIDs, 509 plotSmear(testData, de.tags=topIDs,
485 pch=20, cex=1.0, main=plotTitle) 510 pch=20, cex=1.0, main=plotTitle)
486 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) 511 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
487 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") 512 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)")
488 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") 513 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf")
489 linkData <- rbind(linkData, c(imgName, imgAddr)) 514 linkData <- rbind(linkData, c(imgName, imgAddr))
490 invisible(dev.off()) 515 invisible(dev.off())
516
491 } else if (workMode=="glm") { 517 } else if (workMode=="glm") {
492 # Generating design information 518 # Generating design information
493 factors <- factor(data$sample$group) 519 factors <- factor(data$sample$group)
494 design <- model.matrix(~0+factors) 520 design <- model.matrix(~0+factors)
495 521
496 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) 522 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE)
497 523
498 # Split up contrasts seperated by comma into a vector 524 # Split up contrasts seperated by comma into a vector
499 contrastData <- unlist(strsplit(contrastData, split=",")) 525 contrastData <- unlist(strsplit(contrastData, split=","))
526
500 for (i in 1:length(contrastData)) { 527 for (i in 1:length(contrastData)) {
501 # Generate contrasts information 528 # Generate contrasts information
502 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) 529 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design)
503 530
504 # Fit negative bionomial GLM 531 # Fit negative bionomial GLM
505 fit = glmFit(data, design) 532 fit <- glmFit(data, design)
506 # Carry out Likelihood ratio test 533 # Carry out Likelihood ratio test
507 testData = glmLRT(fit, contrast=contrasts) 534 testData <- glmLRT(fit, contrast=contrasts)
508 535
509 # Select hairpins with FDR < 0.05 to highlight on plot 536 # Select hairpins with FDR < 0.05 to highlight on plot
510 top <- topTags(testData, n=Inf) 537 top <- topTags(testData, n=Inf)
511 topIDs <- top$table[(top$table$FDR < fdrThresh) & 538 topIDs <- top$table[(top$table$FDR < fdrThresh) &
512 (abs(top$table$logFC) > lfcThresh), 1] 539 (abs(top$table$logFC) > lfcThresh), 1]
513 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") 540 write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
514 541
515 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)") 542 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)")
516 linkAddr <- paste0("toptag(", contrastData[i], ").tsv") 543 linkAddr <- paste0("toptag(", contrastData[i], ").tsv")
517 linkData <- rbind(linkData, c(linkName, linkAddr)) 544 linkData <- rbind(linkData, c(linkName, linkAddr))
545
546 # Collect counts for differential representation
547 upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh)
548 downCount[i] <- sum(top$table$FDR < fdrThresh &
549 top$table$logFC < -lfcThresh)
550 flatCount[i] <- sum(top$table$FDR > fdrThresh |
551 abs(top$table$logFC) < lfcThresh)
518 552
519 # Make a plot of logFC versus logCPM 553 # Make a plot of logFC versus logCPM
520 png(smearPng[i], height=600, width=600) 554 png(smearPng[i], height=600, width=600)
521 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], 555 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i],
522 fixed=TRUE)) 556 fixed=TRUE))
549 } 583 }
550 } 584 }
551 585
552 if (wantRoast) { 586 if (wantRoast) {
553 # Input preparaton for roast 587 # Input preparaton for roast
554 nrot = 9999 588 nrot <- 9999
555 set.seed(602214129) 589 set.seed(602214129)
556 roastData <- mroast(data, index=geneList, design=design, 590 roastData <- mroast(data, index=geneList, design=design,
557 contrast=contrasts, nrot=nrot) 591 contrast=contrasts, nrot=nrot)
558 roastData <- cbind(GeneID=rownames(roastData), roastData) 592 roastData <- cbind(GeneID=rownames(roastData), roastData)
559 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") 593 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t")
600 dev.off() 634 dev.off()
601 } 635 }
602 } 636 }
603 } 637 }
604 638
639 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
640 if (workMode == "glm") {
641 row.names(sigDiff) <- contrastData
642 } else if (workMode == "classic") {
643 row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1])
644 }
645
605 ID <- rownames(data$counts) 646 ID <- rownames(data$counts)
606 outputCounts <- cbind(ID, data$counts) 647 outputCounts <- cbind(ID, data$counts)
607 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t") 648 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t")
608 linkName <- "Counts table (.tsv)" 649 linkName <- "Counts table (.tsv)"
609 linkAddr <- "counts.tsv" 650 linkAddr <- "counts.tsv"
650 691
651 cata("The estimated common biological coefficient of variation (BCV) is: ", 692 cata("The estimated common biological coefficient of variation (BCV) is: ",
652 commonBCV, "<br />\n") 693 commonBCV, "<br />\n")
653 694
654 cata("<h4>Output:</h4>\n") 695 cata("<h4>Output:</h4>\n")
655 cata("All images displayed have PDF copy at the bottom of the page, these can ") 696 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n")
656 cata("exported in a pdf viewer to high resolution image format. <br />\n")
657 for (i in 1:nrow(imageData)) { 697 for (i in 1:nrow(imageData)) {
658 if (grepl("barcode", imageData$Link[i])) { 698 if (grepl("barcode", imageData$Link[i])) {
659 if (packageVersion("limma")<"3.19.19") { 699 if (packageVersion("limma")<"3.19.19") {
660 HtmlImage(imageData$Link[i], imageData$Label[i], 700 HtmlImage(imageData$Link[i], imageData$Label[i],
661 height=length(selectedGenes)*150) 701 height=length(selectedGenes)*150)
667 HtmlImage(imageData$Link[i], imageData$Label[i]) 707 HtmlImage(imageData$Link[i], imageData$Label[i])
668 } 708 }
669 } 709 }
670 cata("<br />\n") 710 cata("<br />\n")
671 711
712 cata("<h4>Differential Representation Counts:</h4>\n")
713
714 cata("<table border=\"1\" cellpadding=\"4\">\n")
715 cata("<tr>\n")
716 TableItem()
717 for (i in colnames(sigDiff)) {
718 TableHeadItem(i)
719 }
720 cata("</tr>\n")
721 for (i in 1:nrow(sigDiff)) {
722 cata("<tr>\n")
723 TableHeadItem(unmake.names(row.names(sigDiff)[i]))
724 for (j in 1:ncol(sigDiff)) {
725 TableItem(as.character(sigDiff[i, j]))
726 }
727 cata("</tr>\n")
728 }
729 cata("</table>")
730
672 cata("<h4>Plots:</h4>\n") 731 cata("<h4>Plots:</h4>\n")
673 for (i in 1:nrow(linkData)) { 732 for (i in 1:nrow(linkData)) {
674 if (!grepl(".tsv", linkData$Link[i])) { 733 if (!grepl(".tsv", linkData$Link[i])) {
675 HtmlLink(linkData$Link[i], linkData$Label[i]) 734 HtmlLink(linkData$Link[i], linkData$Label[i])
676 } 735 }
681 if (grepl(".tsv", linkData$Link[i])) { 740 if (grepl(".tsv", linkData$Link[i])) {
682 HtmlLink(linkData$Link[i], linkData$Label[i]) 741 HtmlLink(linkData$Link[i], linkData$Label[i])
683 } 742 }
684 } 743 }
685 744
686 cata("<p>alt-click any of the links to download the file, or click the name ") 745 cata("<p>Alt-click links to download file.</p>\n")
687 cata("of this task in the galaxy history panel and click on the floppy ") 746 cata("<p>Click floppy disc icon associated history item to download ")
688 cata("disk icon to download all files in a zip archive.</p>\n") 747 cata("all files.</p>\n")
689 cata("<p>.tsv files are tab seperated files that can be viewed using Excel ") 748 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
690 cata("or other spreadsheet programs</p>\n")
691 749
692 cata("<h4>Additional Information:</h4>\n") 750 cata("<h4>Additional Information:</h4>\n")
693 751
694 if (inputType == "fastq") { 752 if (inputType == "fastq") {
695 ListItem("Data was gathered from fastq raw read file(s).") 753 ListItem("Data was gathered from fastq raw read file(s).")
696 } else if (inputType == "counts") { 754 } else if (inputType == "counts") {
697 ListItem("Data was gathered from a table of counts.") 755 ListItem("Data was gathered from a table of counts.")
698 } 756 }
699 757
700 if (cpmReq!=0 && sampleReq!=0) { 758 if (cpmReq!=0 && sampleReq!=0) {
701 tempStr <- paste("Hairpins that do not have more than", cpmReq, 759 tempStr <- paste("Hairpins with less than", cpmReq,
702 "CPM in at least", sampleReq, "samples are considered", 760 "CPM in at least", sampleReq, "samples are insignificant",
703 "insignificant and filtered out.") 761 "and filtered out.")
704 ListItem(tempStr) 762 ListItem(tempStr)
705 filterProp <- round(filteredCount/preFilterCount*100, digits=2) 763 filterProp <- round(filteredCount/preFilterCount*100, digits=2)
706 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, 764 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
707 "%) hairpins were filtered out for low count-per-million.") 765 "%) hairpins were filtered out for low count-per-million.")
708 ListItem(tempStr) 766 ListItem(tempStr)
711 if (workMode == "classic") { 769 if (workMode == "classic") {
712 ListItem("An exact test was performed on each hairpin.") 770 ListItem("An exact test was performed on each hairpin.")
713 } else if (workMode == "glm") { 771 } else if (workMode == "glm") {
714 ListItem("A generalised linear model was fitted to each hairpin.") 772 ListItem("A generalised linear model was fitted to each hairpin.")
715 } 773 }
716
717
718 774
719 cit <- character() 775 cit <- character()
720 link <-character() 776 link <-character()
721 link[1] <- paste0("<a href=\"", 777 link[1] <- paste0("<a href=\"",
722 "http://www.bioconductor.org/packages/release/bioc/", 778 "http://www.bioconductor.org/packages/release/bioc/",