changeset 2:3c5f10f7dd40 draft

Updated to tool version 2.0.1 (use data table to locate freqfiles).
author pjbriggs
date Fri, 27 Nov 2015 11:06:28 -0500
parents 571cb77ab9e7
children f19e18ab01b1
files README.rst test-data/weeder2.loc tool-data/weeder2.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test weeder2_wrapper.sh weeder2_wrapper.xml
diffstat 7 files changed, 105 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Tue Dec 09 11:27:19 2014 -0500
+++ b/README.rst	Fri Nov 27 11:06:28 2015 -0500
@@ -30,7 +30,12 @@
 
     <tool file="weeder2/weeder2_wrapper.xml" />
 
-You will also need to install ``weeder2`` from:
+You also need to make a copy of the ``weeder2.loc`` file (a sample version is
+provided here) which lists the species for which frequency files are available.
+This file should be placed in the ``tool-data`` directory of your Galaxy
+installation.
+
+Additionally you will need to install ``weeder2`` from:
 
 - http://159.149.160.51/modtools/downloads/weeder2.html
 
@@ -41,20 +46,41 @@
 
 Also the directory holding the Weeder2 executables should be on your ``PATH``.
 
+Functional tests
+================
+
 If you want to run the functional tests, copy the sample test files under
 sample test files under Galaxy's ``test-data/`` directory. Then:
 
-    ./run_tests.sh -id trimmomatic
+    ./run_tests.sh -id weeder2
 
 You will need to have set the environment variables above.
 
+Reference Data
+==============
+
+Weeder2 requires reference data in the form of frequency files for each
+species of interest. A set of reference files is provided as part of the
+Weeder2 installation.
+
+Additional frequency files can be generated for novel species using the
+``w2frequency_maker`` utility available via:
+
+- http://159.149.160.51/weederaddons/weeder2freq.html
+
+This page also explains what input data should be used.
+
+The location of the additional frequency files can then be specified by
+adding them to the ``weeder2.loc`` file (see above).
+
 History
 =======
 
 ========== ======================================================================
 Version    Changes
 ---------- ----------------------------------------------------------------------
-2.0.0.0    - Initial version
+2.0.1      - Explicitly specify frequency files in ``weeder2.loc``.
+2.0.0      - Initial version
 ========== ======================================================================
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/weeder2.loc	Fri Nov 27 11:06:28 2015 -0500
@@ -0,0 +1,5 @@
+HS	Homo sapiens (HS)	.
+MM	Mus musculus (MM)	.
+DM	Drosophila melanogaster (DM)	.
+SC	Saccharomyces cerevisiae (SC)	.
+AT	Arabidopsis thaliana (AT)	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/weeder2.loc.sample	Fri Nov 27 11:06:28 2015 -0500
@@ -0,0 +1,35 @@
+# This is a sample file distributed with Galaxy that is used by the
+# weeder2 tool to find the frequency files.
+#
+# The weeder.loc file has this format (white space characters are TAB
+# characters):
+#
+#<species_code>	<Description>	<PathToFreqFilesDir>
+#
+# <PathToFreqFilesDir> should be a period ('.') to indicate the
+# frequency files that come as standard with weeder2; otherwise
+# it should be the full path to the directory holding a custom
+# set of .freq files.
+#
+# For example:
+#
+#AF	Aspergillus fumigatus (AF)	/home/galaxy/weeder/FreqFiles
+#
+# where '/home/galaxy/weeder/FreqFiles' would contain:
+#
+#-rw-rw-r-- 1 pjb pjb 20109718 Oct 23 13:16 AF_ds.10.freq
+#-rw-rw-r-- 1 pjb pjb    47080 Oct 23 13:15 AF_ds.6.freq
+#-rw-rw-r-- 1 pjb pjb  1006374 Oct 23 13:15 AF_ds.8.freq
+#-rw-rw-r-- 1 pjb pjb 19930169 Oct 23 13:16 AF_ss.10.freq
+#-rw-rw-r-- 1 pjb pjb    45262 Oct 23 13:16 AF_ss.6.freq
+#-rw-rw-r-- 1 pjb pjb   984583 Oct 23 13:16 AF_ss.8.freq
+#
+# Entries are already provided for the standard frequency files that
+# are distributed with Weeder2.
+# This file should be placed in galaxy's tool-data directory when the
+# weeder2 tool is installed.
+HS	Homo sapiens (HS)	.
+MM	Mus musculus (MM)	.
+DM	Drosophila melanogaster (DM)	.
+SC	Saccharomyces cerevisiae (SC)	.
+AT	Arabidopsis thaliana (AT)	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 27 11:06:28 2015 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="weeder2" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/weeder2.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Nov 27 11:06:28 2015 -0500
@@ -0,0 +1,6 @@
+<tables>
+    <table name="weeder2" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/weeder2.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- a/weeder2_wrapper.sh	Tue Dec 09 11:27:19 2014 -0500
+++ b/weeder2_wrapper.sh	Fri Nov 27 11:06:28 2015 -0500
@@ -2,31 +2,40 @@
 #
 # Wrapper script to run weeder2 as a Galaxy tool
 #
-# Usage: weeder_wrapper.sh FASTA_IN SPECIES_CODE MOTIFS_OUT MATRIX_OUT [ ARGS... ]
+# Usage: weeder_wrapper.sh FASTA_IN SPECIES_CODE FREQFILES_DIR MOTIFS_OUT MATRIX_OUT [ ARGS... ]
 #
 # ARGS: one or more arguments to supply directly to weeder2
 #
 # Process command line
 FASTA_IN=$1
 SPECIES_CODE=$2
-MOTIFS_OUT=$3
-MATRIX_OUT=$4
+FREQFILES_DIR=$3
+MOTIFS_OUT=$4
+MATRIX_OUT=$5
 #
 # Other arguments
 ARGS=""
-while [ ! -z "$5" ] ; do
-    ARGS="$ARGS $5"
+while [ ! -z "$6" ] ; do
+    ARGS="$ARGS $6"
     shift
 done
 #
 # Link to input file
 ln -s $FASTA_IN
 #
+# Locate the FreqFiles directory
+if [ $FREQFILES_DIR == "." ] ; then
+    # Use the files in the Weeder2 distribution
+    freqfiles_dir=$WEEDER_FREQFILES_DIR
+else
+    # Alternative location
+    freqfiles_dir=$FREQFILES_DIR
+fi
+#
 # Link to the FreqFiles directory as weeder2 executable
 # expects it to be the same directory
-freqfiles_dir=$WEEDER_FREQFILES_DIR
 if [ -d $freqfiles_dir ] ; then
-    echo "Linking to FreqFiles directory"
+    echo "Linking to FreqFiles directory: $freqfiles_dir"
     ln -s $freqfiles_dir FreqFiles
 else
     echo "ERROR FreqFiles directory not found" >&2
--- a/weeder2_wrapper.xml	Tue Dec 09 11:27:19 2014 -0500
+++ b/weeder2_wrapper.xml	Fri Nov 27 11:06:28 2015 -0500
@@ -1,7 +1,10 @@
-<tool id="motiffinding_weeder2" name="Weeder2" version="2.0.0">
+<tool id="motiffinding_weeder2" name="Weeder2" version="2.0.1">
   <description>Motif discovery in sequences from coregulated genes of a single species</description>
+  <requirements>
+    <requirement type="package" version="2.0">weeder</requirement>
+  </requirements>
   <command interpreter="bash">weeder2_wrapper.sh
-  $sequence_file $species_code
+  $sequence_file $species_code ${species_code.fields.path}
   $output_motifs_file $output_matrix_file
   $strands
   #if $chipseq.use_chipseq
@@ -13,21 +16,12 @@
      -sim $advanced_options.sim_threshold
      -em $advanced_options.em_cycles
   #end if
-</command>
-  <requirements>
-    <requirement type="package" version="2.0">weeder</requirement>
-  </requirements>
+  </command>
   <inputs>
     <param name="sequence_file" type="data" format="fasta" label="Input sequence" />
     <param name="species_code" type="select" label="Species to use for background comparison">
-      <!-- Hard code options for now
-	   See weeder's "organisms.txt" for full list
-      -->
-      <option value="HS">Homo sapiens (HS)</option>
-      <option value="MM">Mus musculus (MM)</option>
-      <option value="DM">Drosophila melanogaster (DM)</option>
-      <option value="SC">Saccharomyces cerevisiae (SC)</option>
-      <option value="AT">Arabidopsis thaliana (AT)</option>
+      <options from_data_table="weeder2">
+      </options>
     </param>
     <param name="strands" label="Use both strands of sequence" type="boolean"
 	   truevalue="" falsevalue="-ss" checked="True"