Mercurial > repos > pjbriggs > weeder2
changeset 2:3c5f10f7dd40 draft
Updated to tool version 2.0.1 (use data table to locate freqfiles).
author | pjbriggs |
---|---|
date | Fri, 27 Nov 2015 11:06:28 -0500 |
parents | 571cb77ab9e7 |
children | f19e18ab01b1 |
files | README.rst test-data/weeder2.loc tool-data/weeder2.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test weeder2_wrapper.sh weeder2_wrapper.xml |
diffstat | 7 files changed, 105 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Tue Dec 09 11:27:19 2014 -0500 +++ b/README.rst Fri Nov 27 11:06:28 2015 -0500 @@ -30,7 +30,12 @@ <tool file="weeder2/weeder2_wrapper.xml" /> -You will also need to install ``weeder2`` from: +You also need to make a copy of the ``weeder2.loc`` file (a sample version is +provided here) which lists the species for which frequency files are available. +This file should be placed in the ``tool-data`` directory of your Galaxy +installation. + +Additionally you will need to install ``weeder2`` from: - http://159.149.160.51/modtools/downloads/weeder2.html @@ -41,20 +46,41 @@ Also the directory holding the Weeder2 executables should be on your ``PATH``. +Functional tests +================ + If you want to run the functional tests, copy the sample test files under sample test files under Galaxy's ``test-data/`` directory. Then: - ./run_tests.sh -id trimmomatic + ./run_tests.sh -id weeder2 You will need to have set the environment variables above. +Reference Data +============== + +Weeder2 requires reference data in the form of frequency files for each +species of interest. A set of reference files is provided as part of the +Weeder2 installation. + +Additional frequency files can be generated for novel species using the +``w2frequency_maker`` utility available via: + +- http://159.149.160.51/weederaddons/weeder2freq.html + +This page also explains what input data should be used. + +The location of the additional frequency files can then be specified by +adding them to the ``weeder2.loc`` file (see above). + History ======= ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- -2.0.0.0 - Initial version +2.0.1 - Explicitly specify frequency files in ``weeder2.loc``. +2.0.0 - Initial version ========== ======================================================================
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/weeder2.loc Fri Nov 27 11:06:28 2015 -0500 @@ -0,0 +1,5 @@ +HS Homo sapiens (HS) . +MM Mus musculus (MM) . +DM Drosophila melanogaster (DM) . +SC Saccharomyces cerevisiae (SC) . +AT Arabidopsis thaliana (AT) .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/weeder2.loc.sample Fri Nov 27 11:06:28 2015 -0500 @@ -0,0 +1,35 @@ +# This is a sample file distributed with Galaxy that is used by the +# weeder2 tool to find the frequency files. +# +# The weeder.loc file has this format (white space characters are TAB +# characters): +# +#<species_code> <Description> <PathToFreqFilesDir> +# +# <PathToFreqFilesDir> should be a period ('.') to indicate the +# frequency files that come as standard with weeder2; otherwise +# it should be the full path to the directory holding a custom +# set of .freq files. +# +# For example: +# +#AF Aspergillus fumigatus (AF) /home/galaxy/weeder/FreqFiles +# +# where '/home/galaxy/weeder/FreqFiles' would contain: +# +#-rw-rw-r-- 1 pjb pjb 20109718 Oct 23 13:16 AF_ds.10.freq +#-rw-rw-r-- 1 pjb pjb 47080 Oct 23 13:15 AF_ds.6.freq +#-rw-rw-r-- 1 pjb pjb 1006374 Oct 23 13:15 AF_ds.8.freq +#-rw-rw-r-- 1 pjb pjb 19930169 Oct 23 13:16 AF_ss.10.freq +#-rw-rw-r-- 1 pjb pjb 45262 Oct 23 13:16 AF_ss.6.freq +#-rw-rw-r-- 1 pjb pjb 984583 Oct 23 13:16 AF_ss.8.freq +# +# Entries are already provided for the standard frequency files that +# are distributed with Weeder2. +# This file should be placed in galaxy's tool-data directory when the +# weeder2 tool is installed. +HS Homo sapiens (HS) . +MM Mus musculus (MM) . +DM Drosophila melanogaster (DM) . +SC Saccharomyces cerevisiae (SC) . +AT Arabidopsis thaliana (AT) .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Nov 27 11:06:28 2015 -0500 @@ -0,0 +1,6 @@ +<tables> + <table name="weeder2" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/weeder2.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Nov 27 11:06:28 2015 -0500 @@ -0,0 +1,6 @@ +<tables> + <table name="weeder2" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/weeder2.loc" /> + </table> +</tables> \ No newline at end of file
--- a/weeder2_wrapper.sh Tue Dec 09 11:27:19 2014 -0500 +++ b/weeder2_wrapper.sh Fri Nov 27 11:06:28 2015 -0500 @@ -2,31 +2,40 @@ # # Wrapper script to run weeder2 as a Galaxy tool # -# Usage: weeder_wrapper.sh FASTA_IN SPECIES_CODE MOTIFS_OUT MATRIX_OUT [ ARGS... ] +# Usage: weeder_wrapper.sh FASTA_IN SPECIES_CODE FREQFILES_DIR MOTIFS_OUT MATRIX_OUT [ ARGS... ] # # ARGS: one or more arguments to supply directly to weeder2 # # Process command line FASTA_IN=$1 SPECIES_CODE=$2 -MOTIFS_OUT=$3 -MATRIX_OUT=$4 +FREQFILES_DIR=$3 +MOTIFS_OUT=$4 +MATRIX_OUT=$5 # # Other arguments ARGS="" -while [ ! -z "$5" ] ; do - ARGS="$ARGS $5" +while [ ! -z "$6" ] ; do + ARGS="$ARGS $6" shift done # # Link to input file ln -s $FASTA_IN # +# Locate the FreqFiles directory +if [ $FREQFILES_DIR == "." ] ; then + # Use the files in the Weeder2 distribution + freqfiles_dir=$WEEDER_FREQFILES_DIR +else + # Alternative location + freqfiles_dir=$FREQFILES_DIR +fi +# # Link to the FreqFiles directory as weeder2 executable # expects it to be the same directory -freqfiles_dir=$WEEDER_FREQFILES_DIR if [ -d $freqfiles_dir ] ; then - echo "Linking to FreqFiles directory" + echo "Linking to FreqFiles directory: $freqfiles_dir" ln -s $freqfiles_dir FreqFiles else echo "ERROR FreqFiles directory not found" >&2
--- a/weeder2_wrapper.xml Tue Dec 09 11:27:19 2014 -0500 +++ b/weeder2_wrapper.xml Fri Nov 27 11:06:28 2015 -0500 @@ -1,7 +1,10 @@ -<tool id="motiffinding_weeder2" name="Weeder2" version="2.0.0"> +<tool id="motiffinding_weeder2" name="Weeder2" version="2.0.1"> <description>Motif discovery in sequences from coregulated genes of a single species</description> + <requirements> + <requirement type="package" version="2.0">weeder</requirement> + </requirements> <command interpreter="bash">weeder2_wrapper.sh - $sequence_file $species_code + $sequence_file $species_code ${species_code.fields.path} $output_motifs_file $output_matrix_file $strands #if $chipseq.use_chipseq @@ -13,21 +16,12 @@ -sim $advanced_options.sim_threshold -em $advanced_options.em_cycles #end if -</command> - <requirements> - <requirement type="package" version="2.0">weeder</requirement> - </requirements> + </command> <inputs> <param name="sequence_file" type="data" format="fasta" label="Input sequence" /> <param name="species_code" type="select" label="Species to use for background comparison"> - <!-- Hard code options for now - See weeder's "organisms.txt" for full list - --> - <option value="HS">Homo sapiens (HS)</option> - <option value="MM">Mus musculus (MM)</option> - <option value="DM">Drosophila melanogaster (DM)</option> - <option value="SC">Saccharomyces cerevisiae (SC)</option> - <option value="AT">Arabidopsis thaliana (AT)</option> + <options from_data_table="weeder2"> + </options> </param> <param name="strands" label="Use both strands of sequence" type="boolean" truevalue="" falsevalue="-ss" checked="True"