changeset 5:bde663b872d9 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 275a72ec0424e4e5d658d1bc8227077ea46f0fdc
author yhoogstrate
date Mon, 14 Dec 2015 11:01:38 -0500
parents 5d38abf7e4b6
children a6e388381821
files README.rst edgeR_Differential_Gene_Expression.xml test-data/Differential_Gene_Expression/C1 test-data/Differential_Gene_Expression/C2 test-data/Differential_Gene_Expression/C3 test-data/Differential_Gene_Expression/C4 test-data/Differential_Gene_Expression/E1 test-data/Differential_Gene_Expression/E2 test-data/Differential_Gene_Expression/E3 test-data/Differential_Gene_Expression/E4 test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt
diffstat 13 files changed, 487 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Wed Dec 09 10:43:03 2015 -0500
+++ b/README.rst	Mon Dec 14 11:01:38 2015 -0500
@@ -1,6 +1,13 @@
 EdgeR wrapper for Galaxy
 ========================
 
+This is a wrapper for the RNA-Seq differentially gene expression analysis tool EdgeR.
+This wrapper contains 2 flavours of tests, a classical 2 group analysis and a more
+sophistiacted multi-factor analysis.
+
+Input data can be generated using so called count tools. The wrapper has been written
+to be compatible with at least featureCounts (by yhoogstrate) and HTSeq-count (by iuc).
+
 http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
 
 Implementation of EdgeR supporting quite advanced experimental
@@ -42,17 +49,4 @@
 
 **This wrapper**:
 
-    Copyright (C) 2013-2015  Youri Hoogstrate
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+GPL (>=2)
--- a/edgeR_Differential_Gene_Expression.xml	Wed Dec 09 10:43:03 2015 -0500
+++ b/edgeR_Differential_Gene_Expression.xml	Mon Dec 14 11:01:38 2015 -0500
@@ -36,18 +36,58 @@
     
     <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
     
-    <command>
+    <command><![CDATA[
+        #if $analysis_type.analysis_select == "multi_factor"
+            #set $expression_matrix = $analysis_type.expression_matrix
+            #set $design_matrix = $analysis_type.design_matrix
+            #set $contrast = $analysis_type.contrast
+        #else
+            ## Design and Expression matrices do not exist - create them
+            #set $expression_matrix = "expression_matrix.txt"
+            #set $design_matrix = "design_matrix.txt"
+            #set $contrast = str($analysis_type.factorLevel_condition)+"-"+str($analysis_type.factorLevel_control)
+
+            ## -- Create expression matrix
+            cut -f 1 "$analysis_type.countsFile_control[1]" > gene_ids.column.txt &&
+            #for $file in $analysis_type.countsFile_control:
+                cut -f 2 "${file}" > "${file}.expression_column.txt"    &&
+            #end for
+            #for $file in $analysis_type.countsFile_condition:
+                cut -f 2 "${file}" > "${file}.expression_column.txt"    &&
+            #end for
+            
+            paste
+                gene_ids.column.txt
+            #for $file in $analysis_type.countsFile_control:
+                "${file}.expression_column.txt"
+            #end for
+            #for $file in $analysis_type.countsFile_condition:
+                "${file}.expression_column.txt"
+            #end for
+                > "${expression_matrix}"                                &&
+            
+            ## -- Create design matrix matrix
+            echo "sample-name	Condition" >> ${design_matrix}          &&
+            #for $file in $analysis_type.countsFile_control:
+                echo "${file.name}	${analysis_type.factorLevel_control}" >> ${design_matrix}        &&
+            #end for
+            #for $file in $analysis_type.countsFile_condition:
+                echo "${file.name}	${analysis_type.factorLevel_condition}" >> ${design_matrix}      &&
+            #end for
+        #end if
+        
         R --vanilla --slave -f $R_script '--args
             $expression_matrix
             $design_matrix
             $contrast
             
+            $analysis_report_genes
             $fdr
             
             $output_count_edgeR 
             $output_cpm
             
-            /dev/null                                                    <!-- Calculation of FPKM/RPKM should come here -->
+            /dev/null                                                   ### Calculation of FPKM/RPKM should come here
             
             #if $output_raw_counts:
                 $output_raw_counts
@@ -117,6 +157,7 @@
             
             $output_format_images
             '
+    ]]>
     </command>
     
     <configfiles>
@@ -134,28 +175,29 @@
 design_matrix_file                  <- args[2]
 contrast                            <- args[3]
 
-fdr                                 <- args[4]
+truncate_table_by_fdr               <- args[4]
+fdr                                 <- as.double(args[5])
 
-output_count_edgeR                  <- args[5]
-output_cpm                          <- args[6]
+output_count_edgeR                  <- args[6]
+output_cpm                          <- args[7]
 
-output_xpkm                         <- args[7]        ##FPKM file - to be implemented
+output_xpkm                         <- args[8]        ##FPKM file - to be implemented
 
-output_raw_counts                   <- args[8]
+output_raw_counts                   <- args[9]
 
-output_MDSplot_logFC                <- args[9]
-output_MDSplot_logFC_coordinates    <- args[10]
+output_MDSplot_logFC                <- args[10]
+output_MDSplot_logFC_coordinates    <- args[11]
 
-output_MDSplot_bcv                  <- args[11]
-output_MDSplot_bcv_coordinates      <- args[12]
+output_MDSplot_bcv                  <- args[12]
+output_MDSplot_bcv_coordinates      <- args[13]
 
-output_BCVplot                      <- args[13]
-output_MAplot                       <- args[14]
-output_PValue_distribution_plot     <- args[15]
-output_hierarchical_clustering_plot <- args[16]
-output_heatmap_plot                 <- args[17]
-output_RData_obj                    <- args[18]
-output_format_images                <- args[19]
+output_BCVplot                      <- args[14]
+output_MAplot                       <- args[15]
+output_PValue_distribution_plot     <- args[16]
+output_hierarchical_clustering_plot <- args[17]
+output_heatmap_plot                 <- args[18]
+output_RData_obj                    <- args[19]
+output_format_images                <- args[20]
 
 
 ## Obtain read-counts
@@ -166,15 +208,22 @@
 
 for(i in 1:ncol(design_matrix)) {
   old <- design_matrix[,i]
-  design_matrix[,i] <- make.names(design_matrix[,i])
-  if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
-    print("Renaming of factors:")
-    print(old)
-    print("To:")
-    print(design_matrix[,i])
+  
+  if(any(grepl("^[0-9]+$", old, perl=TRUE) == FALSE)){
+    # Convert invalid names
+    design_matrix[,i] <- make.names(design_matrix[,i])
+    
+    # Print if names have been converted
+    if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
+      print("Renamed of factors:")
+      print(old)
+      print("To:")
+      print(design_matrix[,i])
+    }
+  } else {
+    # Only numerical factors: these are blocking / pairing factors
+    design_matrix[,i] <- as.numeric(design_matrix[,i])
   }
-  ## The following line seems to malfunction the script:
-  ##design_matrix[,i] <- as.factor(design_matrix[,i])
 }
 
 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
@@ -348,7 +397,13 @@
 
   lrt <- glmLRT(fit, contrast=cont[,1])
   write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
-  write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+  
+  if(truncate_table_by_fdr =="all") {
+    write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+  }
+  else {
+    write.table(file=output_count_edgeR,subset(topTags(lrt,n=nrow(read_counts))\$table, FDR < fdr),sep="\t",row.names=TRUE,col.names=NA)
+  }
   write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
 
   ## todo EXPORT FPKM
@@ -458,12 +513,44 @@
     </configfiles>
     
     <inputs>
-        <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
-        <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
+        <conditional name="analysis_type">
+            <param name="analysis_select" type="select" label="Analysis type">
+                <option value="2_factor" selected="true">2-Group test</option>
+                <option value="multi_factor">Multigroup test and/or complex designs with e.g. blocking</option>
+            </param>
+            <when value="2_factor">
+                <param name="factorLevel_control" type="text" value="Control"
+                       label="Specify a factor level" help="Only letters, numbers and underscores will be retained in this field">
+                    <sanitizer>
+                        <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                    </sanitizer>
+                </param>
+                <param name="countsFile_control" type="data" format="tabular,csv" multiple="true" label="Counts file(s)"/>
+                
+                <param name="factorLevel_condition" type="text" value="Condition"
+                       label="Specify a factor level" help="Only letters, numbers and underscores will be retained in this field">
+                    <sanitizer>
+                        <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                    </sanitizer>
+                </param>
+                <param name="countsFile_condition" type="data" format="tabular,csv" multiple="true" label="Counts file(s)"/>
+            </when>
+            <when value="multi_factor">
+                <param name="expression_matrix" type="data" format="tabular,csv" label="Expression (read count) matrix" />
+                <param name="design_matrix" type="data" format="tabular,csv" label="Design matrix"
+                       help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
+                
+                <param name="contrast" type="text" label="Contrast (biological question)"
+                       help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
+            </when>
+        </conditional>
         
-        <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
+        <param name="analysis_report_genes" type="select" label="Report differentially expressed genes">
+            <option value="all" selected="true">All genes</option>
+            <option value="significant">Only significant (defined by FDR cutoff)</option>
+        </param>
         
-        <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
+        <param name="fdr" type="float" min="0" max="1" value="0.01" label="False Discovery Rate (FDR) cutoff" help="Used to highlight significant genes in figures" />
         
         <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
             <option value="make_output_raw_counts">Raw counts table</option>
@@ -482,19 +569,23 @@
         <param name="output_format_images" type="select" label="Output format of images" display="radio">
             <option value="png">Portable network graphics (.png)</option>
             <option value="pdf">Portable document format (.pdf)</option>
-            <option value="svg">Scalable vector graphics (.svg)</option>
+            <option value="svg" selected="true">Scalable vector graphics (.svg)</option>
         </param>
     </inputs>
     
     <outputs>
-        <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" />
-        <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
+        <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${on_string}: differentially expressed genes" >
+            <actions>
+                <action name="column_names" type="metadata" default="original_gene_position,genes,logFC,logCPM,LR,PValue,FDR" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_cpm" label="edgeR DGE on ${on_string}: CPM" />
         
-        <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts">
+        <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${on_string}: raw counts">
             <filter>outputs and ("make_output_raw_counts" in outputs)</filter>
         </data>
         
-        <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (logFC method)">
+        <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${on_string}: MDS-plot (logFC method)">
             <filter>outputs and ("make_output_MDSplot_logFC" in outputs)</filter>
             
             <change_format>
@@ -504,11 +595,11 @@
             </change_format>
         </data>
         
-        <data format="tabular" name="output_MDSplot_logFC_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (logFC method)">
+        <data format="tabular" name="output_MDSplot_logFC_coordinates" label="edgeR DGE on ${on_string}: MDS-plot coordinates table (logFC method)">
             <filter>outputs and ("make_output_MDSplot_logFC_coordinates" in outputs)</filter>
         </data>
         
-        <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)">
+        <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${on_string}: MDS-plot (bcv method)">
             <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter>
             
             <change_format>
@@ -518,11 +609,11 @@
             </change_format>
         </data>
         
-        <data format="tabular" name="output_MDSplot_bcv_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (BCV method)">
+        <data format="tabular" name="output_MDSplot_bcv_coordinates" label="edgeR DGE on ${on_string}: MDS-plot coordinates table (BCV method)">
             <filter>outputs and ("make_output_MDSplot_bcv_coordinates" in outputs)</filter>
         </data>
         
-        <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
+        <data format="png" name="output_BCVplot" label="edgeR DGE on ${on_string}: BCV-plot">
             <filter>outputs and ("make_output_BCVplot" in outputs)</filter>
             
             <change_format>
@@ -532,7 +623,7 @@
             </change_format>
         </data>
         
-        <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
+        <data format="png" name="output_MAplot" label="edgeR DGE on ${on_string}: MA-plot">
             <filter>outputs and ("make_output_MAplot" in outputs)</filter>
             
             <change_format>
@@ -542,7 +633,7 @@
             </change_format>
         </data>
         
-        <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution">
+        <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${on_string}: P-Value distribution">
             <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter>
             
             <change_format>
@@ -552,7 +643,7 @@
             </change_format>
         </data>
         
-        <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering">
+        <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${on_string}: Hierarchical custering">
             <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter>
             
             <change_format>
@@ -562,7 +653,7 @@
             </change_format>
         </data>
         
-        <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap">
+        <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${on_string}: Heatmap">
             <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter>
             
             <change_format>
@@ -572,28 +663,83 @@
             </change_format>
         </data>
         
-        <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object">
+        <data format="RData" name="output_RData_obj" label="edgeR DGE on ${on_string}: R data object">
             <filter>outputs and ("make_output_RData_obj" in outputs)</filter>
         </data>
         
-        <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" >
+        <data format="txt" name="output_R" label="edgeR DGE on ${on_string}: R output (debug)" >
             <filter>outputs and ("make_output_R_stdout" in outputs)</filter>
         </data>
     </outputs>
     
     <tests>
         <test>
+            <param name="analysis_select" value="multi_factor" />
+            
+            <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
+            <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
+            
+            <param name="contrast" value="E-C"/>
+        
+            <param name="analysis_report_genes" value="all"/>
+            <param name="fdr" value="0.01" />
+            
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
+        </test>
+        <test>
+            <param name="analysis_select" value="multi_factor" />
+            
             <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
             <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
             
             <param name="contrast" value="E-C"/>
         
+            <param name="analysis_report_genes" value="significant"/>
             <param name="fdr" value="0.05" />
             
-            <param name="output_format_images" value="png" />
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt" />
+        </test>
+        <test>
+            <param name="analysis_select" value="2_factor" />
+            
+            <param name="factorLevel_control" value="C" />
+            <param name="countsFile_control" value="Differential_Gene_Expression/C1,Differential_Gene_Expression/C2,Differential_Gene_Expression/C3,Differential_Gene_Expression/C4" ftype="tabular" />
+            
+            <param name="factorLevel_condition" value="E" />
+            <param name="countsFile_condition" value="Differential_Gene_Expression/E1,Differential_Gene_Expression/E2,Differential_Gene_Expression/E3,Differential_Gene_Expression/E4" ftype="tabular" />
+        
+            <param name="analysis_report_genes" value="all"/>
+            <param name="fdr" value="0.01" />
             
             <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
         </test>
+        <test>
+            <param name="analysis_select" value="2_factor" />
+            
+            <param name="factorLevel_control" value="C" />
+            <param name="countsFile_control" value="Differential_Gene_Expression/C1,Differential_Gene_Expression/C2,Differential_Gene_Expression/C3,Differential_Gene_Expression/C4" ftype="tabular" />
+            
+            <param name="factorLevel_condition" value="E" />
+            <param name="countsFile_condition" value="Differential_Gene_Expression/E1,Differential_Gene_Expression/E2,Differential_Gene_Expression/E3,Differential_Gene_Expression/E4" ftype="tabular" />
+        
+            <param name="analysis_report_genes" value="significant"/>
+            <param name="fdr" value="0.05" />
+            
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt" />
+        </test>
+        <test>
+            <param name="analysis_select" value="multi_factor" />
+            
+            <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
+            <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt" />
+            
+            <param name="contrast" value="E-C"/>
+        
+            <param name="analysis_report_genes" value="all"/>
+            <param name="fdr" value="0.01" />
+            
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt" />
+        </test>
     </tests>
     
     <help>
@@ -661,24 +807,6 @@
 - African-European
 - 0.5*(Control+Placebo) / Treated
 
-Installation
-------------
-
-This tool requires no specific configuration. The following dependencies will installed automatically:
-
-- R
-- limma
-- edgeR
-
-License
--------
-- R
-    - GPL 2 &amp; GPL 3
-- limma
-    - GPL (&gt;=2)
-- edgeR
-    - GPL (&gt;=2)
-
 @CONTACT@
     </help>
     
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C1	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	C1
+COMMD10	966
+USP26	1
+DDX17	8544
+DDX11	329
+PTPN20B	0
+SLC35D3	1
+GLOD4	1614
+GIMAP7	0
+TXLNB	15
+MYO18A	1775
+ATG4B	936
+IFI44L	347
+KHSRP	2557
+KCNAB3	20
+RET	331
+IQCG	125
+C20orf118	9
+GPIHBP1	0
+RASSF3	658
+FUT8	4834
+LYSMD3	1333
+LMOD3	12
+HIPK1	24218
+HSPA8	44244
+TAS2R39	0
+NR2C2AP	606
+INADL	4315
+TMEM31	5
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C2	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	C2
+COMMD10	1067
+USP26	0
+DDX17	13335
+DDX11	970
+PTPN20B	0
+SLC35D3	0
+GLOD4	2596
+GIMAP7	1
+TXLNB	29
+MYO18A	4666
+ATG4B	2602
+IFI44L	678
+KHSRP	5001
+KCNAB3	42
+RET	695
+IQCG	193
+C20orf118	20
+GPIHBP1	0
+RASSF3	1060
+FUT8	6459
+LYSMD3	1679
+LMOD3	31
+HIPK1	35223
+HSPA8	58864
+TAS2R39	0
+NR2C2AP	1162
+INADL	6418
+TMEM31	10
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C3	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	C3
+COMMD10	438
+USP26	1
+DDX17	4579
+DDX11	221
+PTPN20B	0
+SLC35D3	0
+GLOD4	965
+GIMAP7	0
+TXLNB	9
+MYO18A	1193
+ATG4B	638
+IFI44L	307
+KHSRP	1593
+KCNAB3	10
+RET	361
+IQCG	84
+C20orf118	3
+GPIHBP1	0
+RASSF3	405
+FUT8	2599
+LYSMD3	666
+LMOD3	7
+HIPK1	14147
+HSPA8	26628
+TAS2R39	0
+NR2C2AP	403
+INADL	2421
+TMEM31	3
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C4	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	C4
+COMMD10	1231
+USP26	0
+DDX17	16358
+DDX11	867
+PTPN20B	0
+SLC35D3	2
+GLOD4	2912
+GIMAP7	0
+TXLNB	25
+MYO18A	4741
+ATG4B	2394
+IFI44L	784
+KHSRP	5513
+KCNAB3	34
+RET	669
+IQCG	229
+C20orf118	14
+GPIHBP1	0
+RASSF3	1277
+FUT8	7977
+LYSMD3	2029
+LMOD3	48
+HIPK1	47991
+HSPA8	76924
+TAS2R39	0
+NR2C2AP	1223
+INADL	8507
+TMEM31	14
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E1	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	E1
+COMMD10	964
+USP26	0
+DDX17	6995
+DDX11	916
+PTPN20B	0
+SLC35D3	1
+GLOD4	1807
+GIMAP7	1
+TXLNB	14
+MYO18A	1669
+ATG4B	1605
+IFI44L	268
+KHSRP	3162
+KCNAB3	28
+RET	2077
+IQCG	118
+C20orf118	6
+GPIHBP1	0
+RASSF3	507
+FUT8	4291
+LYSMD3	868
+LMOD3	19
+HIPK1	19201
+HSPA8	72195
+TAS2R39	0
+NR2C2AP	1293
+INADL	3443
+TMEM31	6
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E2	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	E2
+COMMD10	812
+USP26	0
+DDX17	8079
+DDX11	632
+PTPN20B	0
+SLC35D3	0
+GLOD4	1448
+GIMAP7	0
+TXLNB	15
+MYO18A	1457
+ATG4B	953
+IFI44L	302
+KHSRP	2624
+KCNAB3	34
+RET	1431
+IQCG	116
+C20orf118	13
+GPIHBP1	0
+RASSF3	575
+FUT8	4187
+LYSMD3	1141
+LMOD3	26
+HIPK1	28435
+HSPA8	61132
+TAS2R39	0
+NR2C2AP	761
+INADL	4415
+TMEM31	5
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E3	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	E3
+COMMD10	528
+USP26	0
+DDX17	5994
+DDX11	706
+PTPN20B	0
+SLC35D3	2
+GLOD4	1039
+GIMAP7	0
+TXLNB	6
+MYO18A	1497
+ATG4B	1185
+IFI44L	191
+KHSRP	2434
+KCNAB3	22
+RET	1490
+IQCG	79
+C20orf118	10
+GPIHBP1	0
+RASSF3	401
+FUT8	2974
+LYSMD3	749
+LMOD3	9
+HIPK1	20715
+HSPA8	42728
+TAS2R39	0
+NR2C2AP	726
+INADL	3094
+TMEM31	6
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E4	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid	E4
+COMMD10	860
+USP26	0
+DDX17	6596
+DDX11	518
+PTPN20B	0
+SLC35D3	1
+GLOD4	1564
+GIMAP7	0
+TXLNB	17
+MYO18A	1121
+ATG4B	911
+IFI44L	269
+KHSRP	2509
+KCNAB3	10
+RET	1327
+IQCG	107
+C20orf118	9
+GPIHBP1	0
+RASSF3	568
+FUT8	4154
+LYSMD3	1076
+LMOD3	20
+HIPK1	22614
+HSPA8	67106
+TAS2R39	0
+NR2C2AP	902
+INADL	3441
+TMEM31	3
+GC	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,9 @@
+sample-name	Condition	Batch
+C1	C	1
+C2	C	1
+C3	C	2
+C4	C	2
+E1	E	1
+E2	E	1
+E3	E	2
+E4	E	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+""	"genes"	"logFC"	"logCPM"	"LR"	"PValue"	"FDR"
+"15"	"RET"	1.95351498277649	13.2940435307943	70.7884298827703	3.9766237329402e-17	1.15322088255266e-15
+"24"	"HSPA8"	0.607097086193171	18.9380827005326	18.3388522248215	1.84897696413517e-05	0.000268101659799599
+"12"	"IFI44L"	-0.663271215486821	11.7020333673755	10.9083408215917	0.00095732231526494	0.00925411571422775
+"10"	"MYO18A"	-0.607030110998538	14.1586814058554	8.85972925087567	0.0029153081194653	0.0208760330354099
+"4"	"DDX11"	0.724160415576466	12.4597575302041	8.47565074268945	0.00359931604058791	0.0208760330354099
+"26"	"NR2C2AP"	0.538450796715875	12.9331552590697	6.96854404518889	0.00829549660040106	0.0400949002352718
+"19"	"RASSF3"	-0.323616221214522	12.4964626371138	3.7019217136962	0.05434983717189	0.225163611140687
+"3"	"DDX17"	-0.20201533346913	16.1804174471336	2.4240349611365	0.119486805704499	0.43313967067881
+"2"	"USP26"	-2.19888949506255	4.27316802151059	2.23930233943077	0.1345416719394	0.433523165138066
+"27"	"INADL"	-0.171504044976009	15.2186072712825	1.49672630947915	0.221175760548511	0.641409705590681
+"21"	"LYSMD3"	-0.194806526906794	13.32535006408	1.33073961519612	0.248673682102714	0.655594252816247
+"16"	"IQCG"	-0.208260226810614	10.1622224199572	0.967867000668835	0.3252127194599	0.785930738694759
+"6"	"SLC35D3"	0.789817686242913	4.62274850327991	0.638997348362095	0.424074105189856	0.922823270583707
+"14"	"KCNAB3"	0.224776816841962	7.85434240586327	0.477050454902194	0.489761385678098	0.922823270583707
+"20"	"FUT8"	-0.100266057923686	15.3006920075591	0.448528599960923	0.503034528215111	0.922823270583707
+"1"	"COMMD10"	0.129868119349192	12.8850203875481	0.43582621570193	0.509143873425494	0.922823270583707
+"9"	"TXLNB"	-0.183590716507963	7.2825391341052	0.269612199658876	0.603592030691037	0.990834288521076
+"28"	"TMEM31"	-0.231195333650393	6.16897757110336	0.252957033365721	0.615000592875151	0.990834288521076
+"17"	"C20orf118"	0.163495741844413	6.73904801103973	0.171566008065263	0.678723780532467	1
+"7"	"GLOD4"	-0.0679141042839315	13.8710260882794	0.154827011727691	0.693964522467115	1
+"22"	"LMOD3"	0.0791372844747401	7.60390982671528	0.0540610465986004	0.816141676483626	1
+"8"	"GIMAP7"	0.195152071961945	4.26623062002702	0.0194047958154413	0.889212222514442	1
+"23"	"HIPK1"	-0.00677640818725696	17.7957744498389	0.00305975315578166	0.955887483884316	1
+"11"	"ATG4B"	0.00473988666702576	13.5252482941211	0.000418258796557325	0.983683299966982	1
+"13"	"KHSRP"	-0.00184247000571017	14.7206397592923	0.000136964665301775	0.990662418229392	1
+"5"	"PTPN20B"	0	4.09631395702755	0	1	1
+"18"	"GPIHBP1"	0	4.09631395702755	0	1	1
+"25"	"TAS2R39"	0	4.09631395702755	0	1	1
+"29"	"GC"	0	4.09631395702755	0	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt	Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,7 @@
+""	"genes"	"logFC"	"logCPM"	"LR"	"PValue"	"FDR"
+"15"	"RET"	1.94897640107286	13.2940435307943	77.6545995415986	1.22730171935022e-18	3.55917498611563e-17
+"24"	"HSPA8"	0.607138087178614	18.9380827005326	16.8408380186893	4.06490891119454e-05	0.000589411792123208
+"12"	"IFI44L"	-0.665544707287881	11.7020333673755	13.7144720195324	0.000212808308075529	0.00205714697806344
+"10"	"MYO18A"	-0.608389235629078	14.1586814058554	10.0030349277278	0.00156282461006963	0.0113304784230048
+"4"	"DDX11"	0.719283453206409	12.4597575302041	9.1203698809081	0.00252778847312638	0.014661173144133
+"26"	"NR2C2AP"	0.538719097450497	12.9331552590697	7.88314604309164	0.00498976028708414	0.0241171747209067