changeset 1:76d01fe0ec36 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 58c05c0ce9334f8b9c800283cfd1f40573546edd
author iuc
date Wed, 05 Jul 2017 04:39:42 -0400
parents bdebdea5f6a7
children a330ddf43861
files limma_voom.R limma_voom.xml test-data/factorinfo.txt test-data/limma-voom_Mut-WT.tsv test-data/limma-voom_Mut-WTmultifact.tsv test-data/limma-voom_WT-Mut.tsv test-data/limma-voom_normcounts.tsv
diffstat 7 files changed, 312 insertions(+), 179 deletions(-) [+]
line wrap: on
line diff
--- a/limma_voom.R	Mon Jun 12 07:41:02 2017 -0400
+++ b/limma_voom.R	Wed Jul 05 04:39:42 2017 -0400
@@ -3,7 +3,7 @@
 # expression analysis
 #
 # ARGS: 1.countPath       -Path to RData input containing counts
-#       2.annoPath        -Path to RData input containing gene annotations
+#       2.annoPath        -Path to input containing gene annotations
 #       3.htmlPath        -Path to html file linking to other outputs
 #       4.outPath         -Path to folder to write all output to
 #       5.rdaOpt          -String specifying if RData should be saved
@@ -15,15 +15,18 @@
 #       11.pAdjOpt        -String specifying the p-value adjustment method
 #       12.pValReq        -Float specifying the p-value requirement
 #       13.lfcReq         -Float specifying the log-fold-change requirement
-#       14.factorData     -String containing factor names and values
+#       14.normCounts     -String specifying if normalised counts should be output
+#       15.factPath       -Path to factor information file
+#       16.factorData     -Strings containing factor names and values if manually input 
 #
 # OUT:  Voom Plot
 #       BCV Plot
 #       MA Plot
-#       Top Expression Table
+#       Expression Table
 #       HTML file linking to the ouputs
 #
 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
+# Modified by: Maria Doyle - Jun 2017
 
 # Record starting time
 timeStart <- as.character(Sys.time())
@@ -148,13 +151,31 @@
 pAdjOpt <- as.character(argv[11])
 pValReq <- as.numeric(argv[12])
 lfcReq <- as.numeric(argv[13])
-factorData <- list()
-for (i in 14:length(argv)) {
-  newFact <- unlist(strsplit(as.character(argv[i]), split="::"))
-  factorData <- rbind(factorData, newFact)
-} # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,...
+normCounts <- as.character(argv[14])
+factPath <- as.character(argv[15])
+# Process factors
+if (as.character(argv[16])=="None") {
+    factorData <- read.table(factPath, header=TRUE, sep="\t")
+    factors <- factorData[,-1]
+}  else { 
+    factorData <- list()
+    for (i in 16:length(argv)) {
+        newFact <- unlist(strsplit(as.character(argv[i]), split="::"))
+        factorData <- rbind(factorData, newFact)
+    } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
 
-# Process arguments
+    # Set the row names to be the name of the factor and delete first row
+    row.names(factorData) <- factorData[, 1]
+    factorData <- factorData[, -1]
+    factorData <- sapply(factorData, sanitiseGroups)
+    factorData <- sapply(factorData, strsplit, split=",")
+    factorData <- sapply(factorData, make.names)
+    # Transform factor data into data frame of R factor objects
+    factors <- data.frame(factorData)
+
+}
+
+# Process other arguments
 if (weightOpt=="yes") {
   wantWeight <- TRUE
 } else {
@@ -173,15 +194,12 @@
   haveAnno <- TRUE
 }
 
-# Set the row names to be the name of the factor and delete first row
-row.names(factorData) <- factorData[, 1]
-factorData <- factorData[, -1]
-factorData <- sapply(factorData, sanitiseGroups)
-factorData <- sapply(factorData, strsplit, split=",")
-factorData <- sapply(factorData, make.names)
+if (normCounts=="yes") {
+  wantNorm <- TRUE
+} else {
+  wantNorm <- FALSE
+}
 
-# Transform factor data into data frame of R factor objects
-factors <- data.frame(factorData)
 
 #Create output directory
 dir.create(outPath, showWarnings=FALSE)
@@ -205,6 +223,7 @@
   maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
   topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv"))
 }                         # Save output paths for each contrast as vectors
+normOut <- makeOut("limma-voom_normcounts.tsv")
 rdaOut <- makeOut("RData.rda")
 sessionOut <- makeOut("session_info.txt")
 
@@ -221,12 +240,12 @@
 flatCount <- numeric()
                         
 # Read in counts and geneanno data
-counts <- read.table(countPath, header=TRUE, sep="\t")
+counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
 row.names(counts) <- counts[, 1]
 counts <- counts[ , -1]
 countsRows <- nrow(counts)
 if (haveAnno) {
-  geneanno <- read.table(annoPath, header=TRUE, sep="\t")
+  geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
 }
 
 ################################################################################
@@ -247,7 +266,7 @@
 preFilterCount <- nrow(data$counts)
 sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq
 data$counts <- data$counts[sel, ]
-data$genes <- data$genes[sel, ]
+data$genes <- data$genes[sel, ,drop = FALSE]
 postFilterCount <- nrow(data$counts)
 filteredCount <- preFilterCount-postFilterCount
 
@@ -255,6 +274,7 @@
 samplenames <- colnames(data$counts)
 sampleanno <- data.frame("sampleID"=samplenames, factors)
 
+
 # Generating the DGEList object "data"
 data$samples <- sampleanno
 data$samples$lib.size <- colSums(data$counts)
@@ -263,14 +283,17 @@
 data <- new("DGEList", data)
 
 factorList <- sapply(names(factors), pasteListName)
-formula <- "~0"
+
+formula <- "~0" 
 for (i in 1:length(factorList)) {
-  formula <- paste(formula, factorList[i], sep="+")
+    formula <- paste(formula,factorList[i], sep="+")
 }
+
 formula <- formula(formula)
 design <- model.matrix(formula)
+
 for (i in 1:length(factorList)) {
-  colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
+    colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
 }
 
 # Calculating normalising factor, estimating dispersion
@@ -330,6 +353,13 @@
   
 }
 
+ # Save normalised counts (log2cpm)
+if (wantNorm) {   
+    norm_counts <- data.frame(vData$genes, vData$E)
+    write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t")
+    linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv"))
+}
+
 # Fit linear model and estimate dispersion with eBayes
 voomFit <- contrasts.fit(voomFit, contrasts)
 voomFit <- eBayes(voomFit)
@@ -368,12 +398,11 @@
   top <- topTable(voomFit, coef=i, number=Inf, sort.by="P")
   write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
   
-  linkName <- paste0("limma-voom_", contrastData[i], 
-                     ".tsv")
+  linkName <- paste0("limma-voom_", contrastData[i], ".tsv")
   linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv")
   linkData <- rbind(linkData, c(linkName, linkAddr))
   
-  # Plot MA (log ratios vs mean average) using limma package on weighted data
+  # Plot MA (log ratios vs mean average) using limma package on weighted 
   pdf(maOutPdf[i])
   limma::plotMA(voomFit, status=status, coef=i,
                 main=paste("MA Plot:", unmake.names(contrastData[i])), 
@@ -534,12 +563,14 @@
 
 cata("<h4>Summary of experimental data:</h4>\n")
 
-cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP*</p>\n")
+cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
 
 cata("<table border=\"1\" cellpadding=\"3\">\n")
 cata("<tr>\n")
-TableItem()
-for (i in names(factors)) {
+TableHeadItem("SampleID")
+TableHeadItem(names(factors)[1]," (Primary Factor)")
+
+for (i in names(factors)[2:length(names(factors))]) {
   TableHeadItem(i)
 }
 cata("</tr>\n")
@@ -547,7 +578,7 @@
 for (i in 1:nrow(factors)) {
   cata("<tr>\n")
   TableHeadItem(row.names(factors)[i])
-  for (j in ncol(factors)) {
+  for (j in 1:ncol(factors)) {
     TableItem(as.character(unmake.names(factors[i, j])))
   }
   cata("</tr>\n")
--- a/limma_voom.xml	Mon Jun 12 07:41:02 2017 -0400
+++ b/limma_voom.xml	Wed Jul 05 04:39:42 2017 -0400
@@ -1,8 +1,8 @@
-<tool id="limma_voom" name="limma-voom" version="1.1.1">
+<tool id="limma_voom" name="limma-voom" version="1.2.0">
     <description>
         Differential expression with optional sample weights
     </description>
-  
+
     <requirements>
         <requirement type="package" version="3.16.5">bioconductor-edger</requirement>
         <requirement type="package" version="3.30.13">bioconductor-limma</requirement>
@@ -10,66 +10,71 @@
         <requirement type="package" version="0.4.1">r-scales</requirement>
     </requirements>
 
-    <version_command>
-    <![CDATA[
+    <version_command><![CDATA[
         echo $(R --version | grep version | grep -v GNU)", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
-    ]]>
-    </version_command>
-  
-    <command detect_errors="exit_code">
-    <![CDATA[
-        Rscript '$__tool_directory__/limma_voom.R'
-            '$counts'
-            
-            #if $anno.annoOpt=='yes':
-              '$geneanno'
-            #else:
-              None
-            #end if
-            
-            '$outReport'
-            '$outReport.files_path'
-            $rdaOption
-            $normalisationOption
-            $weightOption
-            '$contrast'
+    ]]></version_command>
+
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '$__tool_directory__/limma_voom.R'
+'$counts'
+
+#if $anno.annoOpt=='yes':
+  '$geneanno'
+#else:
+  None
+#end if
+
+'$outReport'
+'$outReport.files_path'
+$rdaOption
+$normalisationOption
+$weightOption
+'$contrast'
+
+#if $filterCPM.filterLowCPM=='yes':
+  '$filterCPM.cpmReq'
+  '$filterCPM.sampleReq'
+#else:
+  0
+  0
+#end if
 
-            #if $filterCPM.filterLowCPM=='yes':
-              '$filterCPM.cpmReq'
-              '$filterCPM.sampleReq'
-            #else:
-              0
-              0
-            #end if
-            
-            #if $testOpt.wantOpt=='yes':
-              '$testOpt.pAdjust'
-              '$testOpt.pVal'
-              '$testOpt.lfc'
-            #else:
-              "BH"
-              0.05
-              0
-            #end if
-            
-            '$factName::$factLevel'
+#if $testOpt.wantOpt=='yes':
+  '$testOpt.pAdjust'
+  '$testOpt.pVal'
+  '$testOpt.lfc'
+#else:
+  "BH"
+  0.05
+  0
+#end if
+
+$normCounts
 
-            &&
-            mkdir ./output_dir
+#if $fact.ffile=='yes':
+    '$finfo'
+    'None'
+#else:
+    'None'
+    '$fact.pfactName::$fact.pfactLevel'
+    #for $sfact in $fact.sfactors:
+        '$sfact.sfactName::$sfact.sfactLevel'
+    #end for
+#end if
 
-            &&
-            mv '$outReport.files_path'/*.tsv output_dir/
-            
-    ]]>             
-    </command>
-   
+&&
+mkdir ./output_dir
+
+&&
+mv '$outReport.files_path'/*.tsv output_dir/
+    ]]></command>
+
     <inputs>
         <param name="counts" type="data" format="tabular" label="Counts Data"/>
-        
+
         <conditional name="anno">
-            <param name="annoOpt" type="select"
-                    label="Use Gene Annotations?" 
-                    help="If an annotation file is provided, annotations will be added to the table of differential expression results to provide descriptions for each gene.">
+            <param name="annoOpt" type="select" label="Use Gene Annotations?"
+                help="If an annotation file is provided, annotations will be added to the table of differential expression results to provide descriptions for each gene.">
                 <option value="no">No</option>
                 <option value="yes">Yes</option>
             </param>
@@ -79,22 +84,45 @@
             <when value="no" />
         </conditional>
 
-      <!--*Code commented until solution for multiple factors is found*
-      <repeat name="factors" title="Factors" min="1" max="5" default="1">
-        <param name="factName" type="text" label="Factor Name (No spaces)"
-               help="Eg. Genotype"/>
-          <param name="factLevel" type="text" size="100"
-                 label="Factor Levels (No spaces)"
-                 help="Eg. WT,WT,Mut,Mut,WT"/>
-      </repeat>
-      -->
-      
-        <param name="factName" type="text" label="Factor Name" help="Eg. Genotype."/>
-        <param name="factLevel" type="text" label="Factor Values"
-                help="Eg. WT,WT,WT,Mut,Mut,Mut
-                NOTE: Please ensure that the same levels are typed identically with cases matching."/>     
-        <param name="contrast" type="text" label="Contrasts of interest" help="Eg. Mut-WT,KD-Control"/>
-      
+        <conditional name="fact">
+            <param name="ffile" type="select" label="Input Factor Information from file?"
+                help="You can choose to input the factor information from a file or manually enter below.">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="yes">
+                <param name="finfo" type="data" format="tabular" label="Factor Information"/>
+            </when>
+            <when value="no" >
+                <param name="pfactName" type="text" label="Primary Factor Name"
+                    help="Eg. Genotype NOTE: Please only use letters, numbers or underscores.">
+                    <validator type="empty_field" />
+                    <validator type="regex" message="Please only use letters, numbers or underscores">^[\w]+$</validator>
+                </param>
+                <param name="pfactLevel" type="text" label="Primary Factor Levels"
+                    help="Eg. WT,WT,WT,Mut,Mut,Mut NOTE: Please only use letters, numbers or underscores and ensure that the same levels are typed identically with cases matching.">
+                    <validator type="empty_field" />
+                    <validator type="regex" message="Please only use letters, numbers or underscores, and separate levels by commas">^[\w,]+$</validator>
+                </param>
+                <repeat name="sfactors" title="Secondary Factor" >
+                    <param name="sfactName" type="text" label="Secondary Factor Name" help="Eg. Batch">
+                        <validator type="empty_field" />
+                        <validator type="regex" message="Please only use letters, numbers or underscores">^[\w]+$</validator>
+                    </param>
+                    <param name="sfactLevel" type="text" label="Secondary Factor Levels"
+                        help="Eg. b1,b2,b3,b1,b2,b3 NOTE: Please only use letters, numbers or underscores and ensure that the same levels are typed identically with cases matching.">
+                        <validator type="empty_field" />
+                        <validator type="regex" message="Please only use letters, numbers or underscores">^[\w,]+$</validator>
+                    </param>
+                </repeat>
+            </when>
+        </conditional>
+
+        <param name="contrast" type="text" label="Contrasts of interest" help="Eg. Mut-WT,KD-Control">
+            <validator type="empty_field" />
+            <validator type="regex" message="Please only use letters, numbers or underscores">^[\w,-]+$</validator>
+        </param>
+
         <conditional name="filterCPM">
             <param name="filterLowCPM" type="select" label="Filter Low CPM?"
                 help="Treat genes with very low expression as unexpressed and filter out to speed up computation.">
@@ -103,17 +131,16 @@
             </param>
             <when value="yes">
                 <param name="cpmReq" type="float" value="0.5" min="0" label="Minimum CPM"/>
-                       
                 <param name="sampleReq" type="integer" value="1" min="0" label="Minimum Samples"
                     help="Filter out all the genes that do not meet the minimum CPM in at least this many samples."/>
-            </when>          
-            <when value="no"/>         
+            </when>
+            <when value="no"/>
         </conditional>
 
         <param name="weightOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Apply sample weights?"
-            help="Apply weights if outliers are present."> 
+            help="Apply weights if outliers are present.">
         </param>
-      
+
         <param name="normalisationOption" type="select" label="Normalisation Method">
             <option value="TMM">TMM</option>
             <option value="RLE">RLE</option>
@@ -121,39 +148,44 @@
             <option value="none">None (Don't normalise)</option>
         </param>
 
-        <param name="rdaOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" 
+        <param name="normCounts" type="boolean" truevalue="yes" falsevalue="no" checked="false"
+            label="Output normalised counts table?"
+            help="Output a file containing the normalised counts, these are in log2 counts per million (logCPM).">
+        </param>
+
+        <param name="rdaOption" type="boolean" truevalue="yes" falsevalue="no" checked="false"
             label="Output RData?"
-            help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report.">      
+            help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report.">
         </param>
-                    
+
         <conditional name="testOpt">
             <param name="wantOpt" type="select" label="Use Advanced Testing Options?"
                 help="Enable choices for p-value adjustment method, p-value threshold and log2-fold-change threshold.">
                 <option value="no" selected="True">No</option>
                 <option value="yes">Yes</option>
-            </param>         
+            </param>
             <when value="yes">
                 <param name="pAdjust" type="select" label="P-Value Adjustment Method.">
                     <option value="BH">Benjamini and Hochberg (1995)</option>
                     <option value="BY">Benjamini and Yekutieli (2001)</option>
                     <option value="holm">Holm (1979)</option>
                     <option value="none">None</option>
-                </param>             
+                </param>
                 <param name="pVal" type="float" value="0.05" min="0" max="1"
                     label="Adjusted Threshold"
                     help="Genes below this threshold are considered significant and highlighted in the MA plot. If either BH(1995) or BY(2001) were selected then this value is a false-discovery-rate control. If Holm(1979) was selected then this is an adjusted p-value for family-wise error rate."/>
                 <param name="lfc" type="float" value="0" min="0"
                     label="Minimum log2-fold-change Required"
                     help="Genes above this threshold and below the p-value threshold are considered significant and highlighted in the MA plot."/>
-            </when> 
-            <when value="no"/>       
+            </when>
+            <when value="no"/>
         </conditional>
 
     </inputs>
-  
+
     <outputs>
-        <data format="html" name="outReport" label="${tool.name} on ${on_string}: Report" />
-        <collection name="voom_results" type="list" label="${tool.name} on ${on_string}: DE genes">
+        <data name="outReport" format="html" label="${tool.name} on ${on_string}: Report" />
+        <collection name="outTables" type="list" label="${tool.name} on ${on_string}: Tables">
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output_dir" visible="false" />
         </collection>
     </outputs>
@@ -161,38 +193,38 @@
     <tests>
         <test>
             <param name="counts" value="matrix.txt" />
-            <param name="factName" value="Genotype" />
-            <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+            <param name="pfactName" value="Genotype" />
+            <param name="pfactLevel" value="WT,WT,WT,Mut,Mut,Mut" />
             <param name="contrast" value="Mut-WT,WT-Mut" />
             <param name="normalisationOption" value="TMM" />
-            <output_collection name="voom_results" count="2">
+            <output_collection name="outTables" count="2">
                 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
                 <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" />
-            </output_collection>    
+            </output_collection>
             <output name="outReport" >
                 <assert_contents>
                     <has_text text="Limma-voom Analysis Output" />
                     <not_has_text text="RData" />
                 </assert_contents>
-            </output>          
+            </output>
         </test>
         <test>
             <param name="annoOpt" value="yes" />
             <param name="geneanno" value="anno.txt" />
             <param name="counts" value="matrix.txt" />
-            <param name="factName" value="Genotype" />
-            <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+            <param name="pfactName" value="Genotype" />
+            <param name="pfactLevel" value="WT,WT,WT,Mut,Mut,Mut" />
             <param name="contrast" value="Mut-WT" />
             <param name="normalisationOption" value="TMM" />
-            <output_collection name="voom_results" >
+            <output_collection name="outTables" >
                 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTanno.tsv" />
-            </output_collection>  
+            </output_collection>
         </test>
         <test>
             <param name="rdaOption" value="yes" />
-            <param name="counts" value="matrix.txt" />            
-            <param name="factName" value="Genotype" />
-            <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+            <param name="counts" value="matrix.txt" />
+            <param name="pfactName" value="Genotype" />
+            <param name="pfactLevel" value="WT,WT,WT,Mut,Mut,Mut" />
             <param name="contrast" value="Mut-WT" />
             <param name="normalisationOption" value="TMM" />
             <output name="outReport" >
@@ -201,16 +233,51 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="counts" value="matrix.txt" />
+            <param name="pfactName" value="Genotype"/>
+            <param name="pfactLevel" value="WT,WT,WT,Mut,Mut,Mut"/>
+            <repeat name="sfactors">
+                <param name="sfactName" value="Batch"/>
+                <param name="sfactLevel" value="b1,b2,b3,b1,b2,b3"/>
+            </repeat>
+            <param name="contrast" value="Mut-WT" />
+            <param name="normalisationOption" value="TMM" />
+            <output_collection name="outTables" >
+                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTmultifact.tsv" />
+            </output_collection>
+        </test>
+        <test>
+            <param name="ffile" value="yes" />
+            <param name="finfo" value="factorinfo.txt" />
+            <param name="counts" value="matrix.txt" />
+            <param name="contrast" value="Mut-WT" />
+            <param name="normalisationOption" value="TMM" />
+            <output_collection name="outTables" >
+                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTmultifact.tsv" />
+            </output_collection>
+        </test>
+        <test>
+            <param name="normCounts" value="yes" />
+            <param name="counts" value="matrix.txt" />
+            <param name="pfactName" value="Genotype" />
+            <param name="pfactLevel" value="WT,WT,WT,Mut,Mut,Mut" />
+            <param name="contrast" value="Mut-WT" />
+            <param name="normalisationOption" value="TMM" />
+            <output_collection name="outTables" count="2">
+                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
+                <element name="limma-voom_normcounts" ftype="tabular" file="limma-voom_normcounts.tsv" />
+            </output_collection>
+        </test>
     </tests>
-  
-    <help>
-<![CDATA[
+
+    <help><![CDATA[
 .. class:: infomark
 
 **What it does**
 
 Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool
-produces plots and tables useful in the analysis of differential gene 
+produces plots and tables useful in the analysis of differential gene
 expression.
 
 -----
@@ -228,18 +295,18 @@
     ========== ======= ======= ======= ======== ======== ========
     **GeneID** **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3**
     ---------- ------- ------- ------- -------- -------- --------
-    11287      1699    1528    1601    1463     1441     1495                
-    11298      1905    1744    1834    1345     1291     1346                
-    11302      6       8       7       5        6        5                   
-    11303      2099    1974    2100    1574     1519     1654                
-    11304      356     312     337     361      397      346                 
-    11305      2528    2438    2493    1762     1942     2027                
+    11287      1699    1528    1601    1463     1441     1495
+    11298      1905    1744    1834    1345     1291     1346
+    11302      6       8       7       5        6        5
+    11303      2099    1974    2100    1574     1519     1654
+    11304      356     312     337     361      397      346
+    11305      2528    2438    2493    1762     1942     2027
     ========== ======= ======= ======= ======== ======== ========
 
 **Gene Annotations:**
 Optional input for gene annotations, this can contain more
 information about the genes than just an ID number. The annotations will
-be avaiable in the differential expression results table.
+be available in the differential expression results table and the optional normalised counts table.
 
 Example:
 
@@ -254,24 +321,36 @@
     1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
     ==========  ==========  ===================================================
 
-**Factor Name:**
-The name of the factor being investigated. This tool currently assumes
-that only one factor is of interest.
+**Factor Information:**
+Enter Factor Names and Levels in the tool form or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the Primary Factor levels (e.g. Genotype) with optional additional columns for any Secondary Factors (e.g. Batch).
+
+Example:
 
-**Factor Levels:**
-The levels of the factor of interest, this must be entered in the same
-order as the samples to which the levels correspond as listed in the
-columns of the counts matrix. 
+    ========== ============ =========
+    **Sample** **Genotype** **Batch**
+    ---------- ------------ ---------
+    WT1        WT           b1
+    WT2        WT           b2
+    WT3        WT           b3
+    Mut1       Mut          b1
+    Mut2       Mut          b2
+    Mut3       Mut          b3
+    ========== ============ =========
 
-The values should be seperated by commas, and spaces must not be used.
+**Primary Factor Name:** The name of the primary factor being investigated e.g. Genotype. One primary factor must be entered and spaces must not be used.
+
+**Primary Factor Levels:** The levels of the primary factor of interest, these must be entered in the same order as the samples to which the levels correspond, as listed in the columns of the counts matrix. Spaces must not be used and if entered in the tool form the values should be separated by commas.
+
+**Secondary Factor Name:** Optionally, one or more secondary factors can be included. These are variables that might influence your experiment e.g. Batch, Gender. Spaces must not be used.
+
+**Secondary Factor Levels:** The levels of the secondary factor of interest, these must be entered in the same order as the samples to which the levels correspond, as listed in the columns of the counts matrix. Spaces must not be used and if entered in the tool form the values should be separated by commas.
+
 
 **Contrasts of Interest:**
-The contrasts you wish to make between levels. 
-
-A common contrast would be a simple difference between two levels: "Mut-WT" 
+The contrasts you wish to make between levels.
+A common contrast would be a simple difference between two levels: "Mut-WT"
 represents the difference between the mutant and wild type genotypes.
-
-The values should be seperated by commas and spaces must not be used.
+Multiple contrasts should be separated by commas and spaces must not be used.
 
 **Filter Low CPM:**
 Option to ignore the genes that do not show significant levels of
@@ -291,7 +370,6 @@
 then this will cause that gene to fail the criteria. When in doubt simply do not
 filter.
 
-
 **Normalisation Method:**
 Option for using different methods to rescale the raw library
 size. For more information, see calcNormFactor section in the edgeR_ user's
@@ -309,21 +387,25 @@
 there are options to change this to custom values.
 
     * **P-Value Adjustment Method:**
-      Change the multiple testing control method, the options are BH(1995) and 
+      Change the multiple testing control method, the options are BH(1995) and
       BY(2001) which are both false discovery rate controls. There is also
       Holm(1979) which is a method for family-wise error rate control.
-    
+
     * **Adjusted Threshold:**
       Set the threshold for the resulting value of the multiple testing control
       method. Only observations whose statistic falls below this value is
       considered significant, thus highlighted in the MA plot.
-      
+
     * **Minimum log2-fold-change Required:**
       In addition to meeting the requirement for the adjusted statistic for
       multiple testing, the observation must have an absolute log2-fold-change
-      greater than this threshold to be considered significant, thus highlighted 
+      greater than this threshold to be considered significant, thus highlighted
       in the MA plot.
 
+**Outputs**
+
+This tool outputs a table of differentially expressed genes for each contrast of interest and a HTML report with plots and additional information. Optionally you can choose to output the normalised counts table and the RData file.
+
 -----
 
 **Citations:**
@@ -332,24 +414,24 @@
 
 limma
 
-Please cite the paper below for the limma software itself.  Please also try
+Please cite the paper below for the limma software itself. Please also try
 to cite the appropriate methodology articles that describe the statistical
 methods implemented in limma, depending on which limma functions you are
-using.  The methodology articles are listed in Section 2.1 of the limma 
+using.  The methodology articles are listed in Section 2.1 of the limma
 User's Guide.
 
-    * Smyth GK (2005). Limma: linear models for microarray data. In: 
-      'Bioinformatics and Computational Biology Solutions using R and 
-      Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, 
+    * Smyth GK (2005). Limma: linear models for microarray data. In:
+      'Bioinformatics and Computational Biology Solutions using R and
+      Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry,
       W. Huber (eds), Springer, New York, pages 397-420.
-        
+
     * Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:
       precision weights unlock linear model analysis tools for
       RNA-seq read counts. Genome Biology 15, R29.
 
     * Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME, Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? Modelling sample and observational level variability improves power in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.
 
-    * Ritchie, M. E., Diyagama, D., Neilson, J., van Laar, R., Dobrovic, 
+    * Ritchie, M. E., Diyagama, D., Neilson, J., van Laar, R., Dobrovic,
       A., Holloway, A., and Smyth, G. K. (2006). Empirical array quality weights
       for microarray data. BMC Bioinformatics 7, Article 261.
 
@@ -358,30 +440,29 @@
 edgeR
 
 Please cite the first paper for the software itself and the other papers for
-the various original statistical methods implemented in edgeR.  See 
+the various original statistical methods implemented in edgeR.  See
 Section 1.2 in the User's Guide for more detail.
 
-    * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor 
-      package for differential expression analysis of digital gene expression 
+    * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
+      package for differential expression analysis of digital gene expression
       data. Bioinformatics 26, 139-140
-      
-    * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing 
+
+    * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing
       differences in tag abundance. Bioinformatics 23, 2881-2887
-      
-    * Robinson MD and Smyth GK (2008). Small-sample estimation of negative 
+
+    * Robinson MD and Smyth GK (2008). Small-sample estimation of negative
       binomial dispersion, with applications to SAGE data.
       Biostatistics, 9, 321-332
-      
-    * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis 
-      of multifactor RNA-Seq experiments with respect to biological variation. 
+
+    * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis
+      of multifactor RNA-Seq experiments with respect to biological variation.
       Nucleic Acids Research 40, 4288-4297
-      
+
 Please report problems or suggestions to: su.s@wehi.edu.au
 
 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
-]]>
-    </help>
+    ]]></help>
     <citations>
         <citation type="doi">10.1093/nar/gkv412</citation>
     </citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/factorinfo.txt	Wed Jul 05 04:39:42 2017 -0400
@@ -0,0 +1,7 @@
+Sample	Genotype	Batch
+WT1	WT	b1
+WT2	WT	b2
+WT3	WT	b3
+Mut1	Mut	b1
+Mut2	Mut	b2
+Mut3	Mut	b3
--- a/test-data/limma-voom_Mut-WT.tsv	Mon Jun 12 07:41:02 2017 -0400
+++ b/test-data/limma-voom_Mut-WT.tsv	Wed Jul 05 04:39:42 2017 -0400
@@ -1,4 +1,4 @@
-"ID"	"logFC"	"AveExpr"	"t"	"P.Value"	"adj.P.Val"	"B"
+"GeneID"	"logFC"	"AveExpr"	"t"	"P.Value"	"adj.P.Val"	"B"
 "11304"	0.457332061341026	15.5254133001226	6.50459574633681	9.98720685006039e-07	5.99232411003624e-06	14.0741948485896
 "11287"	0.190749727701785	17.6546448244617	5.09535410066402	3.26518807654125e-05	9.79556422962375e-05	5.46773893802392
 "11298"	-0.138014418336201	17.6747285193431	-3.33168485842331	0.00278753263633162	0.00557506527266324	-1.84301342041449
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_Mut-WTmultifact.tsv	Wed Jul 05 04:39:42 2017 -0400
@@ -0,0 +1,7 @@
+"GeneID"	"logFC"	"AveExpr"	"t"	"P.Value"	"adj.P.Val"	"B"
+"11287"	0.190521683937291	17.6546448244617	14.4538942795625	5.93483727834149e-09	3.56090236700489e-08	96.5864491178815
+"11298"	-0.139877413200757	17.6747285193431	-7.71901694642401	5.4113978311412e-06	1.62341934934236e-05	22.3495333961486
+"11304"	0.459011254726061	15.5254133001226	5.64083198409048	0.000108849060923731	0.000217698121847462	8.76657226635859
+"11303"	-0.0641599901594212	17.886791401216	-2.99008181539252	0.0112725655419959	0.0169088483129939	-2.70089035288952
+"11305"	-0.0651479753016204	18.1585474109909	-2.28935282063873	0.0409794711446019	0.0491753653735222	-4.27133526604488
+"11302"	-0.035881713464434	9.78883119065989	-0.439436789626921	0.668154169055758	0.668154169055758	-5.75838315483926
--- a/test-data/limma-voom_WT-Mut.tsv	Mon Jun 12 07:41:02 2017 -0400
+++ b/test-data/limma-voom_WT-Mut.tsv	Wed Jul 05 04:39:42 2017 -0400
@@ -1,4 +1,4 @@
-"ID"	"logFC"	"AveExpr"	"t"	"P.Value"	"adj.P.Val"	"B"
+"GeneID"	"logFC"	"AveExpr"	"t"	"P.Value"	"adj.P.Val"	"B"
 "11304"	-0.457332061341026	15.5254133001226	-6.50459574633681	9.98720685006039e-07	5.99232411003624e-06	14.0741948485896
 "11287"	-0.190749727701785	17.6546448244617	-5.09535410066402	3.26518807654125e-05	9.79556422962375e-05	5.46773893802392
 "11298"	0.138014418336201	17.6747285193431	3.33168485842331	0.00278753263633162	0.00557506527266324	-1.84301342041449
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_normcounts.tsv	Wed Jul 05 04:39:42 2017 -0400
@@ -0,0 +1,7 @@
+"GeneID"	"WT1"	"WT2"	"WT3"	"Mut1"	"Mut2"	"Mut3"
+"11287"	17.6076545522668	17.5079905058358	17.5639197719462	17.7719335903598	17.7105244453357	17.7658460810258
+"11298"	17.7727137985373	17.6986875511432	17.7598828784201	17.6506532355915	17.5520012519588	17.6144324004081
+"11302"	9.57719962386619	10.0175525101992	9.82560228478005	9.71615817858834	9.91760904198188	9.67886550454371
+"11303"	17.9125899785601	17.8773613214092	17.955228759658	17.8774046020864	17.7865502833631	17.911613462219
+"11304"	15.354518172169	15.2178020484983	15.3174553811097	15.7545783969022	15.8519803740125	15.6561454280436
+"11305"	18.1808259688524	18.1818679259847	18.2026681768366	18.0401341021246	18.1408682071359	18.204920085011