Mercurial > repos > pjbriggs > trimmomatic
diff trimmomatic.xml @ 0:3358c3d30143 draft
Uploaded initial version.
author | pjbriggs |
---|---|
date | Mon, 01 Dec 2014 10:40:07 -0500 |
parents | |
children | 2bd7cdbb6228 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trimmomatic.xml Mon Dec 01 10:40:07 2014 -0500 @@ -0,0 +1,233 @@ +<tool id="trimmomatic" name="Trimmomatic" version="0.32.1"> + <description>flexible read trimming tool for Illumina NGS data</description> + <command interpreter="bash">trimmomatic.sh + -mx8G + -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar + #if $paired_end.is_paired_end + PE -threads 6 -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired + #else + SE -threads 6 -phred33 $fastq_in $fastq_out + #end if + ## ILLUMINACLIP option + #if $illuminaclip.do_illuminaclip + ILLUMINACLIP:\$TRIMMOMATIC_ADAPTERS_DIR/$illuminaclip.adapter_fasta:$illuminaclip.seed_mismatches:$illuminaclip.palindrome_clip_threshold:$illuminaclip.simple_clip_threshold + #end if + ## Other operations + #for $op in $operations + ## SLIDINGWINDOW + #if str( $op.operation.name ) == "SLIDINGWINDOW" + SLIDINGWINDOW:$op.operation.window_size:$op.operation.required_quality + #end if + ## MINLEN:36 + #if str( $op.operation.name ) == "MINLEN" + MINLEN:$op.operation.minlen + #end if + #if str( $op.operation.name ) == "LEADING" + LEADING:$op.operation.leading + #end if + #if str( $op.operation.name ) == "TRAILING" + TRAILING:$op.operation.trailing + #end if + #if str( $op.operation.name ) == "CROP" + CROP:$op.operation.crop + #end if + #if str( $op.operation.name ) == "HEADCROP" + HEADCROP:$op.operation.headcrop + #end if + #end for + </command> + <requirements> + <requirement type="package" version="0.32">trimmomatic</requirement> + </requirements> + <inputs> + <conditional name="paired_end"> + <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" /> + <when value="no"> + <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" /> + </when> + <when value="yes"> + <param name="fastq_r1_in" type="data" format="fastqsanger" + label="Input FASTQ file (R1/first of pair)" /> + <param name="fastq_r2_in" type="data" format="fastqsanger" + label="Input FASTQ file (R2/second of pair)" /> + </when> + </conditional> + <conditional name="illuminaclip"> + <param name="do_illuminaclip" type="boolean" label="Perform initial ILLUMINACLIP step?" help="Cut adapter and other illumina-specific sequences from the read" truevalue="yes" falsevalue="no" checked="off" /> + <when value="yes"> + <param name="adapter_fasta" type="select" label="Adapter sequences to use"> + <option value="TruSeq2-SE.fa">TruSeq2 (single-ended, for Illumina GAII)</option> + <option value="TruSeq3-SE.fa">TruSeq3 (single-ended, for MiSeq and HiSeq)</option> + <option value="TruSeq2-PE.fa">TruSeq2 (paired-ended, for Illumina GAII)</option> + <option value="TruSeq3-PE.fa">TruSeq3 (paired-ended, for MiSeq and HiSeq)</option> + <option value="TruSeq3-PE-2.fa">TruSeq3 (additional seqs) (paired-ended, for MiSeq and HiSeq)</option> + <option value="NexteraPE-PE.fa">Nextera (paired-ended)</option> + </param> + <param name="seed_mismatches" type="integer" label="Maximum mismatch count which will still allow a full match to be performed" value="2" /> + <param name="palindrome_clip_threshold" type="integer" label="How accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment" value="30" /> + <param name="simple_clip_threshold" type="integer" label="How accurate the match between any adapter etc. sequence must be against a read" value="10" /> + </when> + </conditional> + <repeat name="operations" title="Trimmomatic Operation" min="1"> + <conditional name="operation"> + <param name="name" type="select" label="Select Trimmomatic operation to perform"> + <option selected="true" value="SLIDINGWINDOW">Sliding window trimming (SLIDINGWINDOW)</option> + <option value="MINLEN">Drop reads below a specified length (MINLEN)</option> + <option value="LEADING">Cut bases off the start of a read, if below a threshold quality (LEADING)</option> + <option value="TRAILING">Cut bases off the end of a read, if below a threshold quality (TRAILING)</option> + <option value="CROP">Cut the read to a specified length (CROP)</option> + <option value="HEADCROP">Cut the specified number of bases from the start of the read (HEADCROP)</option> + </param> + <when value="SLIDINGWINDOW"> + <param name="window_size" type="integer" label="Number of bases to average across" value="4" /> + <param name="required_quality" type="integer" label="Average quality required" value="20" /> + </when> + <when value="MINLEN"> + <param name="minlen" type="integer" label="Minimum length of reads to be kept" value="20" /> + </when> + <when value="LEADING"> + <param name="leading" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the start of the read with quality below the threshold will be removed" /> + </when> + <when value="TRAILING"> + <param name="trailing" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the end of the read with quality below the threshold will be removed" /> + </when> + <when value="CROP"> + <param name="crop" type="integer" label="Number of bases to keep from the start of the read" value="" /> + </when> + <when value="HEADCROP"> + <param name="headcrop" type="integer" label="Number of bases to remove from the start of the read" value="" /> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)"> + <filter>paired_end['is_paired_end']</filter> + </data> + <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)"> + <filter>paired_end['is_paired_end']</filter> + </data> + <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)"> + <filter>paired_end['is_paired_end']</filter> + </data> + <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)"> + <filter>paired_end['is_paired_end']</filter> + </data> + <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}"> + <filter>not paired_end['is_paired_end']</filter> + </data> + </outputs> + <tests> + <test> + <!-- Single-end example --> + <param name="is_paired_end" value="no" /> + <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" /> + <param name="operations_0|operation|name" value="SLIDINGWINDOW" /> + <!-- + **NB** outputs have to be specified in order that they appear in the + tool (which is the order they will be written to the history) - the + test framework seems to use the order and ignores the "name" attribute + --> + <output name="fastq_out" file="trimmomatic_se_out1.fastq" /> + </test> + <test> + <!-- Paired-end example --> + <param name="is_paired_end" value="yes" /> + <param name="fastq_r1_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" /> + <param name="fastq_r2_in" value="Illumina_SG_R2.fastq" ftype="fastqsanger" /> + <param name="operations_0|operation|name" value="SLIDINGWINDOW" /> + <!-- + **NB** outputs have to be specified in order that they appear in the + tool (which is the order they will be written to the history) - the + test framework seems to use the order and ignores the "name" attribute + --> + <output name="fastq_out_r1_paired" file="trimmomatic_pe_r1_paired_out1.fastq" /> + <output name="fastq_out_r1_unpaired" file="trimmomatic_pe_r1_unpaired_out1.fastq" /> + <output name="fastq_out_r2_paired" file="trimmomatic_pe_r2_paired_out1.fastq" /> + <output name="fastq_out_r2_unpaired" file="trimmomatic_pe_r2_unpaired_out1.fastq" /> + </test> + <test> + <!-- Single-end example (cropping) --> + <param name="is_paired_end" value="no" /> + <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" /> + <param name="operations_0|operation|name" value="CROP" /> + <param name="operations_0|operation|crop" value="10" /> + <!-- + **NB** outputs have to be specified in order that they appear in the + tool (which is the order they will be written to the history) - the + test framework seems to use the order and ignores the "name" attribute + --> + <output name="fastq_out" file="trimmomatic_se_out2.fastq" /> + </test> + </tests> + <help> +.. class:: infomark + +**What it does** + +Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and +single ended data. + +This tool allows the following trimming steps to be performed: + + * **ILLUMINACLIP:** Cut adapter and other illumina-specific sequences from the read + * **SLIDINGWINDOW:** Perform a sliding window trimming, cutting once the average + quality within the window falls below a threshold + * **MINLEN:** Drop the read if it is below a specified length + * **LEADING:** Cut bases off the start of a read, if below a threshold quality + * **TRAILING:** Cut bases off the end of a read, if below a threshold quality + * **CROP:** Cut the read to a specified length + * **HEADCROP:** Cut the specified number of bases from the start of the read + +If ILLUMINACLIP is requested then it is always performed first; subsequent options +can be mixed and matched and will be performed in the order that they have been +specified. + +.. class:: warningmark + +Note that trimming operation order is important. + +------------- + +.. class:: infomark + +**Outputs** + +For paired-end data a particular strength of Trimmomatic is that it retains the +pairing of reads (from R1 and R2) in the filtered output files: + + * Two FASTQ files (R1-paired and R2-paired) contain one read from each pair where + both have survived filtering. + * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where + one of the pair failed the filtering steps. + +Retaining the same order and number of reads in the filtered output fastq files is +essential for many downstream analysis tools. + +For single-end data the output is a single FASTQ file containing just the filtered +reads. + +------------- + +.. class:: infomark + +**Credits** + +This Galaxy tool has been developed within the Bioinformatics Core Facility at the +University of Manchester. It runs the Trimmomatic program which has been developed +within Bjorn Usadel's group at RWTH Aachen university. + +Trimmomatic website (including documentation): + + * http://www.usadellab.org/cms/index.php?page=trimmomatic + +The reference for Trimmomatic is: + + * Lohse M, Bolger AM, Nagel A, Fernie AR, Lunn JE, Stitt M, Usadel B. RobiNA: a + user-friendly, integrated software solution for RNA-Seq-based transcriptomics. + Nucleic Acids Res. 2012 Jul;40(Web Server issue):W622-7) + +Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you +use it. + </help> +</tool>