changeset 9:e27b4f7811c2 draft

Updated DESeq version 1.12
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 08:09:28 -0400
parents 2b3bb3348076
children 2fe512c7bfdf
files deseq-hts_1.0/README deseq-hts_1.0/bin/deseq_config.sh deseq-hts_1.0/bin/deseq_config.sh.sample deseq-hts_1.0/bin/start_interpreter.sh deseq-hts_1.0/galaxy/deseq.xml deseq-hts_1.0/setup_deseq-hts.sh deseq-hts_1.0/src/deseq-hts.sh deseq-hts_1.0/src/deseq_config.m deseq-hts_1.0/src/difftest_deseq.R
diffstat 9 files changed, 99 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/deseq-hts_1.0/README	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/README	Tue Oct 08 08:09:28 2013 -0400
@@ -1,48 +1,52 @@
----------------------------------------------------
-DESeq-hts: A Galaxy wrapper for DESeq version 1.6.1
----------------------------------------------------
+----------------------------------------------------
+DESeq: A Galaxy wrapper for DESeq version 1.12.1
+----------------------------------------------------
 
 Description:
+------------
     DESeq can be used as a web service embedded in a Galaxy instance. 
     We call it as DESeq-hts. 
 
-Requirements: 
+Requirements:
+-------------
     MATLAB/OCTAVE and Python :- Preprocessing of sequencing reads and GFF files
-    R, Bio-conductor package :- Required for DESEQ 
+    R, Bio-conductor package :- Required for DESeq 
     SCIPY, NUMPY :- for python  
-    SAMTOOLS :- Read processing 
+    SAMTOOLS :- Sequencing read processing 
 
 Contents:
-    [src]
+---------
+    ./src
     All relevant scripts for DESeq-hts are located in the subdirectory
-    src. src/deseq.sh is the main script to start DESeq-hts. The 
-    preprocessing of BAM and GFF file start before the R DESEQ script. 
+    src. src/deseq-hts.sh is the main script to start DESeq-hts. The 
+    preprocessing of BAM and GFF file start before the R DESeq script. 
 	Please follow the shell script to understand the details. 
     
-    [galaxy]
+    ./galaxy
     Galaxy tool configuration file can be found galaxy folder. Please 
     make necessary editing for .xml file and remaining .sh files and 
     perform few tests. 
 
-    [setup_deseq-hts.sh]
+    ./setup_deseq-hts.sh
     Setup script for DESeq-hts.
 
-    [mex]
+    ./mex
     matlab executable files.
 
-    [bin]
+    ./bin
     Contains deseq_config.sh file which is used for the configuration of 
     DESeq-hts. According to your platform, the default file will be changed.
 
-    [test_data]
-    This subsirectory contains all data for running a functional test in
+    ./test_data
+    This sub-directory contains all data for running a functional test in
     Galaxy framework. You may need to move these test files into the test-data
     directory.
 
-    [tools]
+    ./tools
     A python based GFF parsing program. Also contains small utils programs.
 
 Getting started:
+----------------
     Check for all requirements first, then
 
     a) Run ./setup_deseq-hts.sh and setup paths and configuration options for DESeq-hts.
@@ -57,6 +61,7 @@
     c) Edit the Galaxy tool configuration file to adjust the path if necessary. 
 
 Licenses:
+---------
     If **DESeq** is used to obtain results for scientific publications it should be cited as [1].
 
     This wrapper program (DESeq-hts) is free software; you can redistribute it and/or modify it 
@@ -65,11 +70,21 @@
 
     Written (W) 2009-2012 Jonas Behr, Regina Bohnert, Andre Kahles, Gunnar Raetsch, Vipin T. Sreedharan
     Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany and 
-                  2012 cBio Memorial Sloan Kettering Cancer Center, New York City, USA.
+                  2013 cBio Memorial Sloan Kettering Cancer Center, New York City, USA.
 
 References:
+-----------
     [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`. 
 
+Contributions:
+--------------
+    15 Aug. 2013 
+    Philippe Moncuquet
+    Bioinformatics Analyst, Bioinformatics Core, CSIRO Mathematics, Informatics and Statistics
+    
+    Extended the DESeq result.
+
 Contact:
-    vipin@cbio.mskcc.org 
+--------
+    support [at] oqtans.org 
 
--- a/deseq-hts_1.0/bin/deseq_config.sh	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/bin/deseq_config.sh	Tue Oct 08 08:09:28 2013 -0400
@@ -1,6 +1,5 @@
 #!/bin/bash
-# Copyright (C) 2010-2012 Max Planck Society
-export DESEQ_VERSION=1.6.0
+export DESEQ_VERSION=1.12.1
 export DESEQ_PATH=
 export DESEQ_SRC_PATH=$DESEQ_PATH/src
 export DESEQ_BIN_PATH=$DESEQ_PATH/bin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/deseq_config.sh.sample	Tue Oct 08 08:09:28 2013 -0400
@@ -0,0 +1,17 @@
+#!/bin/bash
+export DESEQ_VERSION=1.10.1
+export DESEQ_PATH=/home/galaxy/software/deseq_hts/
+export DESEQ_SRC_PATH=$DESEQ_PATH/src
+export DESEQ_BIN_PATH=$DESEQ_PATH/bin
+export INTERPRETER=octave
+export MATLAB_BIN_PATH=
+export MATLAB_MEX_PATH=
+export MATLAB_INCLUDE_DIR=
+export OCTAVE_BIN_PATH=/home/galaxy/software/octave/source/octave-3.6.3/octave
+export OCTAVE_MKOCT=/home/galaxy/software/bin/mkoctfile
+export SAMTOOLS_DIR=/home/galaxy/software/samtools-0.1.17/
+export PYTHON_PATH=/usr/bin/python
+export SCIPY_PATH=/home/galaxy/software/lib/python2.6/site-packages/
+export R_PATH=/home/galaxy/software/bin/R
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/oqtansTools/oqtans_dep/octave-3.6.2_64/lib/octave/3.6.2/
+export ENVIRONMENT=galaxy
--- a/deseq-hts_1.0/bin/start_interpreter.sh	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/bin/start_interpreter.sh	Tue Oct 08 08:09:28 2013 -0400
@@ -5,11 +5,11 @@
 
 . `dirname $0`/deseq_config.sh
 
-export MATLAB_RETURN_FILE=`tempfile`
+export MATLAB_RETURN_FILE=`mktemp`
 
 if [ "$INTERPRETER" == 'octave' ];
 then
-	echo exit | ${OCTAVE_BIN_PATH} --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ;
+	echo exit | ${OCTAVE_BIN_PATH} --no-window-system --silent --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ;
 fi
 
 if [ "$INTERPRETER" == 'matlab' ];
--- a/deseq-hts_1.0/galaxy/deseq.xml	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/galaxy/deseq.xml	Tue Oct 08 08:09:28 2013 -0400
@@ -1,7 +1,10 @@
-<tool id="deseq-hts" name="DESeq" version="1.6.1">
-  <description>Determines differentially expressed transcripts from read alignments</description>
-  <command> 
-deseq-hts/src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat 
+<tool id="deseq-hts" name="DESeq" version="1.12.1">
+  <description> Determines differentially expressed transcripts from read alignments</description>
+  <requirements>
+	<requirement type="package" version="0.1">oqtans</requirement>
+  </requirements>
+  <command interpreter="bash"> 
+./../src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat 
 #for $i in $replicate_groups
 #for $j in $i.replicates
 $j.bam_alignment:#slurp
@@ -19,8 +22,8 @@
   </inputs>
 
   <outputs>
-    <data format="txt" name="deseq_out" label="DESeq result"/>
-    <data format="txt" name="Log_File" label="DESeq log file"/>
+    <data format="txt" name="deseq_out" label="${tool.name} on ${on_string}: Differential Expression"/>
+    <data format="txt" name="Log_File" label="${tool.name} on ${on_string}: log"/>
   </outputs>
 
   <tests>
@@ -41,19 +44,15 @@
 
 **What it does** 
 
-`DESeq` is a tool for differential expression testing of RNA-Seq data.
-
+DESeq_ is a tool for differential expression testing of RNA-Seq data.
 
-**Inputs**
-
-`DESeq` requires three input files to run:
+.. _DESeq: http://bioconductor.org/packages/release/bioc/html/DESeq.html
 
-1. Annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified.
-2. The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments in a compressed format. They can be generated using the `SAM-to-BAM` tool in the NGS: SAM Tools section. (The script will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is  therefor not recommended.)
+`DESeq` requires:
 
-**Output**
+Genome annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified.
 
-`DESeq` generates a text file containing the gene name and the p-value.
+The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments, The program will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefore not recommended!
 
 ------
 
@@ -100,7 +99,7 @@
 
 ------
 
-DESeq-hts Wrapper Version 0.3 (Feb 2012)
+DESeq-hts Wrapper Version 0.5 (Aug 2013)
 
 </help>
 </tool>
--- a/deseq-hts_1.0/setup_deseq-hts.sh	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/setup_deseq-hts.sh	Tue Oct 08 08:09:28 2013 -0400
@@ -1,7 +1,8 @@
 #!/bin/bash
 set -e 
 
-. ./bin/deseq_config.sh
+DIR=`dirname $0`
+. ${DIR}/./bin/deseq_config.sh
 
 echo ==========================================
 echo  DESeq-hts setup script \(DESeq version $DESEQ_VERSION\) 
@@ -15,7 +16,6 @@
 fi
 echo '=>' Setting DESeq-hts base directory to \"$DESEQ_PATH\"
 echo
-
 echo SAMTools directory \(currently set to \"$SAMTOOLS_DIR\", system version used if left empty\)
 read SAMTOOLS_DIR
 if [ "$SAMTOOLS_DIR" == "" ];
--- a/deseq-hts_1.0/src/deseq-hts.sh	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/src/deseq-hts.sh	Tue Oct 08 08:09:28 2013 -0400
@@ -5,7 +5,7 @@
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
-# Copyright (C) 2009-2012 Max Planck Society
+# Copyright (C) 2009-2013 Max Planck Society & Memorial Sloan-Kettering Cancer Center 
 #
 
 set -e 
@@ -16,7 +16,7 @@
 . ${DIR}/../bin/deseq_config.sh
 
 echo
-echo ${PROG}: FML http://galaxy.fml.mpg.de Galaxy wrapper for the DESeq version $DESEQ_VERSION.
+echo ${PROG}: Oqtans http://galaxy.cbio.mskcc.org Galaxy wrapper for the DESeq version $DESEQ_VERSION.
 echo
 echo DESeq performs differential expression testing from RNA-Seq measurements.
 echo 
@@ -35,14 +35,12 @@
 echo %%%%%%%%%%%%%%%%%%%%%%%
 echo
 echo load the genome annotation in GFF3 format and create an annotation object
-echo
 export PYTHONPATH=$PYTHONPATH:${SCIPY_PATH}
 ${PYTHON_PATH} ${DIR}/../tools/ParseGFF.py ${ANNO_INPUT} ${GENES_FN}
-${DIR}/../bin/genes_cell2struct ${GENES_FN} 2>&1
+${DIR}/../bin/genes_cell2struct ${GENES_FN} 
 echo 
 echo genome annotation stored in $GENES_FN
 
-echo
 echo %%%%%%%%%%%%%%%%%%%%
 echo % 2. Read counting %
 echo %%%%%%%%%%%%%%%%%%%%
@@ -68,22 +66,18 @@
 tmpfile=`mktemp --tmpdir=/tmp`
 
 echo "${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@"
-${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile "$@" 2>&1
+${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile "$@" 
 
-echo
 echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
 echo % 3. Differential testing %
 echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
 echo
-
 echo testing genes for differential expression using given alignments
 
 echo "cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $#"
-cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# 2> /dev/null
+cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# 
 
 rm $tmpfile ${tmpfile}_COUNTS.tab ${tmpfile}_CONDITIONS.tab
-echo
 echo %%%%%%%%
 echo % Done %
 echo %%%%%%%%
-echo
--- a/deseq-hts_1.0/src/deseq_config.m	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/src/deseq_config.m	Tue Oct 08 08:09:28 2013 -0400
@@ -30,6 +30,7 @@
 
 % switch off a few expected warnings
 addpath(sprintf('%s/tools', DESEQ_PATH));
+engine='';
 lserve=license;
 if ~isequal(lserve, 'GNU General Public License'),
     engine='matlab';
--- a/deseq-hts_1.0/src/difftest_deseq.R	Wed Jun 27 15:38:39 2012 -0400
+++ b/deseq-hts_1.0/src/difftest_deseq.R	Tue Oct 08 08:09:28 2013 -0400
@@ -1,4 +1,5 @@
-library( DESeq )
+### load DESeq package
+suppressMessages(require("DESeq"))
 
 ### get arguments 1: INFILE, 2: OUTFILE 3:SIZE
 args <- commandArgs()
@@ -31,26 +32,39 @@
 {
     cds <- estimateDispersions( cds )
 } else {
-    writeLines("\nYou did not enter any replicates! - The results may be less valuable without replicates!\n")
+    writeLines("\n***You did not enter any replicates! - The results may be less valuable without replicates!***\n")
     cds <- estimateDispersions( cds, method='blind', sharingMode='fit-only')
 }
 experiments <- levels(conds)
 
-res<-c()
+res_1<-c()
+res_2<-c()
+res_3<-c()
+res_4<-c()
+res_5<-c()
+res_6<-c()
+res_7<-c()
+res_8<-c()
 table_col_names<-c()
+
 for (i in 1:(length(experiments)-1))
 {
    for( j in (i+1):(length(experiments)))
    {
        print(c(i,j))
        tempres <- nbinomTest(cds,experiments[i],experiments[j])
-       res = cbind(res,tempres[,7])
-       #res = cbind(res,tempres[,8])
-       table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep='')) 
+       res_1 = cbind(res_1,tempres[,1])
+       res_2 = cbind(res_2,tempres[,2])
+       res_3 = cbind(res_3,tempres[,3])
+       res_4 = cbind(res_4,tempres[,4])
+       res_5 = cbind(res_5,tempres[,5])
+       res_6 = cbind(res_6,tempres[,6])
+       res_7 = cbind(res_7,tempres[,7])
+       res_8 = cbind(res_8,tempres[,8])
+       table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep='', 'test')) 
    }
 }
 
-DiffTable<-res
-rownames(DiffTable)<-rownames(countsTable)
-colnames(DiffTable)<-table_col_names
+DiffTable<-cbind(res_1,res_2,res_3,res_4,res_5,res_6,res_7,res_8)
+colnames(DiffTable)<-c('feature ID', 'base  mean', 'base mean A', 'base mean B', 'fold change', 'log2 fold change','p value', 'adjusted p value')
 write.table(DiffTable, file = OUTFILE, quote = FALSE, sep ="\t", eol ="\n", na = "1.000", dec = ".", row.names = TRUE,col.names =TRUE)