diff tools/fastq/fastq_trimmer.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastq/fastq_trimmer.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,120 @@
+<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
+  <description>by column</description>
+  <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
+  <inputs>
+    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
+    <conditional name="offset_type">
+      <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
+        <option value="offsets_absolute" selected="true">Absolute Values</option>
+        <option value="offsets_percent">Percentage of Read Length</option>
+      </param>
+      <when value="offsets_absolute">
+        <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
+          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
+          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
+        </param>
+        <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
+          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
+          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
+        </param>
+      </when>
+      <when value="offsets_percent">
+        <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
+          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
+        </param>
+        <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
+          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
+        </param>
+      </when>
+    </conditional>
+  <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
+  </inputs>
+  <outputs>
+    <data name="output_file" format="input" />
+  </outputs>
+  <tests>
+    <test>
+      <!-- Do nothing trim -->
+      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+      <param name="base_offset_type" value="offsets_absolute"/>
+      <param name="left_column_offset" value="0"/>
+      <param name="right_column_offset" value="0"/>
+      <param name="keep_zero_length" value="keep_zero_length" />
+      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
+    </test>
+    <!-- Trim to empty File -->
+    <test>
+      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+      <param name="base_offset_type" value="offsets_absolute"/>
+      <param name="left_column_offset" value="30"/>
+      <param name="right_column_offset" value="64"/>
+      <param name="keep_zero_length" value="exclude_zero_length" />
+      <output name="output_file" file="empty_file.dat" />
+    </test>
+    <test>
+      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+      <param name="base_offset_type" value="offsets_percent"/>
+      <param name="left_column_offset" value="50"/>
+      <param name="right_column_offset" value="50"/>
+      <param name="keep_zero_length" value="exclude_zero_length" />
+      <output name="output_file" file="empty_file.dat" />
+    </test>
+    <!-- Trim to 4 inner-most bases -->
+    <test>
+      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+      <param name="base_offset_type" value="offsets_absolute"/>
+      <param name="left_column_offset" value="45"/>
+      <param name="right_column_offset" value="45"/>
+      <param name="keep_zero_length" value="exclude_zero_length" />
+      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
+    </test>
+    <test>
+      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+      <param name="base_offset_type" value="offsets_percent"/>
+      <param name="left_column_offset" value="47.87"/>
+      <param name="right_column_offset" value="47.87"/>
+      <param name="keep_zero_length" value="exclude_zero_length" />
+      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
+    </test>
+  </tests>
+  <help>
+This tool allows you to trim the ends of reads.
+
+You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. 
+
+For example, if you have a read of length 36::
+  
+  @Some FASTQ Sanger Read
+  CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
+  +
+  =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
+  
+And you set absolute offsets of 2 and 9::
+  
+  @Some FASTQ Sanger Read
+  ATATGTNCTCACTGATAAGTGGATA
+  +
+  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
+  
+Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
+  
+  @Some FASTQ Sanger Read
+  ATATGTNCTCACTGATAAGTGGATATN
+  +
+  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
+  
+-----
+
+.. class:: warningmark
+
+Trimming a color space read will cause any adapter base to be lost.
+
+------
+
+**Citation**
+
+If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
+
+
+  </help>
+</tool>