Mercurial > repos > pjbriggs > trimmomatic
changeset 3:f8a9a5eaca8a draft
Updated to version 0.32.3: add support for FASTQ pairs (dataset collections)
author | pjbriggs |
---|---|
date | Wed, 23 Sep 2015 08:59:23 -0400 |
parents | a60283899c6d |
children | 14d05f2d511d |
files | README.rst install_tool_deps.sh package_trimmomatic.sh run_planemo_tests.sh trimmomatic.xml |
diffstat | 5 files changed, 204 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Wed Apr 22 09:36:27 2015 -0400 +++ b/README.rst Wed Sep 23 08:59:23 2015 -0400 @@ -58,6 +58,9 @@ ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- +0.32.3 - Add support for FASTQ R1/R2 pairs using dataset collections (input + can be dataset collection, in which case tool also outputs dataset + collections) and improve order and naming of output files. 0.32.2 - Use ``GALAXY_SLOTS`` to set the appropriate number of threads to use at runtime (default is 6). 0.32.1 - Remove ``trimmomatic_adapters.loc.sample`` and hard-code adapter files
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_tool_deps.sh Wed Sep 23 08:59:23 2015 -0400 @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Install dependencies for Trimmomatic for testing from the command line +# +# Installation directory +TOP_DIR=$1 +if [ -z "$TOP_DIR" ] ; then + echo Usage: $(basename $0) DIR + exit +fi +if [ -z "$(echo $TOP_DIR | grep ^/)" ] ; then + TOP_DIR=$(pwd)/$TOP_DIR +fi +if [ ! -d "$TOP_DIR" ] ; then + mkdir -p $TOP_DIR +fi +cd $TOP_DIR +# Trimmomatic 0.32 +INSTALL_DIR=$TOP_DIR/trimmomatic/0.32 +mkdir -p $INSTALL_DIR +wd=$(mktemp -d) +pushd $wd +wget -q http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.32.zip +unzip -qq Trimmomatic-0.32.zip +mv Trimmomatic-0.32/trimmomatic-0.32.jar $INSTALL_DIR/ +mv Trimmomatic-0.32/adapters/ $INSTALL_DIR/ +popd +rm -rf $wd/* +rmdir $wd +# Make setup file +cat > trimmomatic/0.32/env.sh <<EOF +#!/bin/sh +# Source this to setup trimmomatic/0.32 +echo Setting up Trimmomatic 0.32 +export TRIMMOMATIC_DIR=$INSTALL_DIR +export TRIMMOMATIC_ADAPTERS_DIR=$INSTALL_DIR/adapters +# +EOF +## +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/package_trimmomatic.sh Wed Sep 23 08:59:23 2015 -0400 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Package Trimmomatic tool files into tgz file for upload to +# Galaxy toolshed +# +TGZ=trimmomatic.tgz +if [ -f $TGZ ] ; then + echo $TGZ: already exists, please remove >&2 + exit 1 +fi +tar cvzf $TGZ \ + README.rst \ + trimmomatic.xml \ + trimmomatic.sh \ + tool_dependencies.xml \ + test-data +if [ -f $TGZ ] ; then + echo Created $TGZ +else + echo Failed to created $TGZ >&2 + exit 1 +fi +## +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_planemo_tests.sh Wed Sep 23 08:59:23 2015 -0400 @@ -0,0 +1,39 @@ +#!/bin/bash +# +# Install dependencies and set up environment for +# trimmomatic tool, then run tests using planemo +# +# Note that any arguments supplied to the script are +# passed directly to the "planemo test..." invocation +# +# e.g. --install_galaxy (to get planemo to create a +# Galaxy instance to run tests) +# +# --galaxy_root DIR (to run tests using existing +# Galaxy instance) +# +# List of dependencies +TOOL_DEPENDENCIES="trimmomatic/0.32" +# Where to find them +TOOL_DEPENDENCIES_DIR=$(pwd)/test.tool_dependencies.trimmomatic +if [ ! -d $TOOL_DEPENDENCIES_DIR ] ; then + echo WARNING $TOOL_DEPENDENCIES_DIR not found >&2 + echo Creating tool dependencies dir + mkdir -p $TOOL_DEPENDENCIES_DIR + echo Installing tool dependencies + $(dirname $0)/install_tool_deps.sh $TOOL_DEPENDENCIES_DIR +fi +# Load dependencies +for dep in $TOOL_DEPENDENCIES ; do + env_file=$TOOL_DEPENDENCIES_DIR/$dep/env.sh + if [ -e $env_file ] ; then + . $env_file + else + echo ERROR no env.sh file found for $dep >&2 + exit 1 + fi +done +# Run the planemo tests +planemo test $@ $(dirname $0)/trimmomatic.xml +## +#
--- a/trimmomatic.xml Wed Apr 22 09:36:27 2015 -0400 +++ b/trimmomatic.xml Wed Sep 23 08:59:23 2015 -0400 @@ -1,12 +1,31 @@ -<tool id="trimmomatic" name="Trimmomatic" version="0.32.2"> +<tool id="trimmomatic" name="Trimmomatic" version="0.32.3"> <description>flexible read trimming tool for Illumina NGS data</description> - <command interpreter="bash">trimmomatic.sh + <requirements> + <requirement type="package" version="0.32">trimmomatic</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="bash"><![CDATA[ + trimmomatic.sh -mx8G -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar #if $paired_end.is_paired_end - PE -threads \${GALAXY_SLOTS:-6} -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired + PE -threads \${GALAXY_SLOTS:-6} -phred33 + #set $paired_input_type = $paired_end.paired_input_type_conditional.paired_input_type + #if $paired_input_type == "pair_of_files" + "${paired_end.paired_input_type_conditional.fastq_r1_in}" + "${paired_end.paired_input_type_conditional.fastq_r2_in}" + "${fastq_out_r1_paired}" "${fastq_out_r1_unpaired}" + "${fastq_out_r2_paired}" "${fastq_out_r2_unpaired}" + #else + "${paired_end.paired_input_type_conditional.fastq_pair.forward}" + "${paired_end.paired_input_type_conditional.fastq_pair.reverse}" + "${fastq_out_paired.forward}" "${fastq_out_unpaired.forward}" + "${fastq_out_paired.reverse}" "${fastq_out_unpaired.reverse}" + #end if #else - SE -threads \${GALAXY_SLOTS:-6} -phred33 $fastq_in $fastq_out + SE -threads \${GALAXY_SLOTS:-6} -phred33 "$fastq_in" "$fastq_out" #end if ## ILLUMINACLIP option #if $illuminaclip.do_illuminaclip @@ -35,10 +54,7 @@ HEADCROP:$op.operation.headcrop #end if #end for - </command> - <requirements> - <requirement type="package" version="0.32">trimmomatic</requirement> - </requirements> + ]]></command> <inputs> <conditional name="paired_end"> <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" /> @@ -46,10 +62,23 @@ <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" /> </when> <when value="yes"> - <param name="fastq_r1_in" type="data" format="fastqsanger" - label="Input FASTQ file (R1/first of pair)" /> - <param name="fastq_r2_in" type="data" format="fastqsanger" - label="Input FASTQ file (R2/second of pair)" /> + <conditional name="paired_input_type_conditional"> + <param name="paired_input_type" type="select" label="Input Type"> + <option value="pair_of_files" selected="true">Pair of datasets</option> + <option value="collection">Dataset collection pair</option> + </param> + <when value="pair_of_files"> + <param name="fastq_r1_in" type="data" format="fastqsanger" + label="Input FASTQ file (R1/first of pair)" /> + <param name="fastq_r2_in" type="data" format="fastqsanger" + label="Input FASTQ file (R2/second of pair)" /> + </when> + <when value="collection"> + <param name="fastq_pair" format="fastqsanger" type="data_collection" + collection_type="paired" + label="Select FASTQ dataset collection with R1/R2 pair" /> + </when> + </conditional> </when> </conditional> <conditional name="illuminaclip"> @@ -101,21 +130,37 @@ </repeat> </inputs> <outputs> - <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)"> + <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r1_in.name} (R1 paired)"> <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter> </data> - <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)"> + <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r2_in.name} (R2 paired)"> <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter> </data> - <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)"> + <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r1_in.name} (R1 unpaired)"> <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter> </data> - <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)"> + <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r2_in.name} (R2 unpaired)"> <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter> </data> - <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}"> + <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${paired_end.fastq_in.name}"> <filter>not paired_end['is_paired_end']</filter> </data> + <collection name="fastq_out_paired" type="paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.name}: paired"> + <data name="forward" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.forward.name} (R1 paired)" /> + <data name="reverse" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.reverse.name} (R2 paired)" /> + <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "collection"</filter> + </collection> + <collection name="fastq_out_unpaired" type="paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.name}: unpaired"> + <data name="forward" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.forward.name} (R1 unpaired)" /> + <data name="reverse" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.reverse.name} (R2 unpaired)" /> + <filter>paired_end['is_paired_end']</filter> + <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "collection"</filter> + </collection> </outputs> <tests> <test> @@ -159,8 +204,28 @@ --> <output name="fastq_out" file="trimmomatic_se_out2.fastq" /> </test> + <test> + <!-- Paired-end with dataset collection --> + <param name="is_paired_end" value="yes" /> + <param name="paired_input_type" value="collection" /> + <param name="fastq_pair"> + <collection type="paired"> + <element name="forward" value="Illumina_SG_R1.fastq" ftype="fastqsanger" /> + <element name="reverse" value="Illumina_SG_R2.fastq" ftype="fastqsanger"/> + </collection> + </param> + <param name="operations_0|operation|name" value="SLIDINGWINDOW" /> + <output_collection name="fastq_out_paired" type="paired"> + <element name="forward" file="trimmomatic_pe_r1_paired_out1.fastq" /> + <element name="reverse" file="trimmomatic_pe_r2_paired_out1.fastq" /> + </output_collection> + <output_collection name="fastq_out_unpaired" type="paired"> + <element name="forward" file="trimmomatic_pe_r1_unpaired_out1.fastq" /> + <element name="reverse" file="trimmomatic_pe_r2_unpaired_out1.fastq" /> + </output_collection> + </test> </tests> - <help> + <help><![CDATA[ .. class:: infomark **What it does** @@ -191,6 +256,14 @@ .. class:: infomark +**Inputs** + +For single-end data this Trimmomatic tool accepts a single FASTQ file; for +paired-end data it will accept either two FASTQ files (R1 and R2), or a +dataset collection containing the R1/R2 FASTQ pair. + +.. class:: infomark + **Outputs** For paired-end data a particular strength of Trimmomatic is that it retains the @@ -201,6 +274,12 @@ * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where one of the pair failed the filtering steps. +.. class:: warningmark + +If the input consists of a dataset collection with the R1/R2 FASTQ pair then +the outputs will also inclue two dataset collections: one for the 'paired' +outputs and one for the 'unpaired' (as described above) + Retaining the same order and number of reads in the filtered output fastq files is essential for many downstream analysis tools. @@ -228,7 +307,7 @@ Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you use it. - </help> + ]]></help> <citations> <!-- See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set