0
|
1 <tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
|
|
2 <description>by column</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
|
|
5 </requirements>
|
|
6 <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
|
|
7 <inputs>
|
|
8 <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
|
|
9 <conditional name="offset_type">
|
|
10 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)">
|
|
11 <option value="offsets_absolute" selected="true">Absolute Values</option>
|
|
12 <option value="offsets_percent">Percentage of Read Length</option>
|
|
13 </param>
|
|
14 <when value="offsets_absolute">
|
|
15 <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
|
|
16 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
|
|
17 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
|
|
18 </param>
|
|
19 <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
|
|
20 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
|
|
21 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
|
|
22 </param>
|
|
23 </when>
|
|
24 <when value="offsets_percent">
|
|
25 <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
|
|
26 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
|
|
27 </param>
|
|
28 <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
|
|
29 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
|
|
30 </param>
|
|
31 </when>
|
|
32 </conditional>
|
|
33 <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
|
|
34 </inputs>
|
|
35 <outputs>
|
|
36 <data name="output_file" format="input" />
|
|
37 </outputs>
|
|
38 <tests>
|
|
39 <test>
|
|
40 <!-- Do nothing trim -->
|
|
41 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
42 <param name="base_offset_type" value="offsets_absolute"/>
|
|
43 <param name="left_column_offset" value="0"/>
|
|
44 <param name="right_column_offset" value="0"/>
|
|
45 <param name="keep_zero_length" value="keep_zero_length" />
|
|
46 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
|
|
47 </test>
|
|
48 <!-- Trim to empty File -->
|
|
49 <test>
|
|
50 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
51 <param name="base_offset_type" value="offsets_absolute"/>
|
|
52 <param name="left_column_offset" value="30"/>
|
|
53 <param name="right_column_offset" value="64"/>
|
|
54 <param name="keep_zero_length" value="exclude_zero_length" />
|
|
55 <output name="output_file" file="empty_file.dat" />
|
|
56 </test>
|
|
57 <test>
|
|
58 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
59 <param name="base_offset_type" value="offsets_percent"/>
|
|
60 <param name="left_column_offset" value="50"/>
|
|
61 <param name="right_column_offset" value="50"/>
|
|
62 <param name="keep_zero_length" value="exclude_zero_length" />
|
|
63 <output name="output_file" file="empty_file.dat" />
|
|
64 </test>
|
|
65 <!-- Trim to 4 inner-most bases -->
|
|
66 <test>
|
|
67 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
68 <param name="base_offset_type" value="offsets_absolute"/>
|
|
69 <param name="left_column_offset" value="45"/>
|
|
70 <param name="right_column_offset" value="45"/>
|
|
71 <param name="keep_zero_length" value="exclude_zero_length" />
|
|
72 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
|
|
73 </test>
|
|
74 <test>
|
|
75 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
|
|
76 <param name="base_offset_type" value="offsets_percent"/>
|
|
77 <param name="left_column_offset" value="47.87"/>
|
|
78 <param name="right_column_offset" value="47.87"/>
|
|
79 <param name="keep_zero_length" value="exclude_zero_length" />
|
|
80 <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
|
|
81 </test>
|
|
82 </tests>
|
|
83 <help>
|
|
84 This tool allows you to trim the ends of reads.
|
|
85
|
|
86 You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer.
|
|
87
|
|
88 For example, if you have a read of length 36::
|
|
89
|
|
90 @Some FASTQ Sanger Read
|
|
91 CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
|
|
92 +
|
|
93 =@@.@;B-%?8>CBA@>7@7BBCA4-48%<;;%<B@
|
|
94
|
|
95 And you set absolute offsets of 2 and 9::
|
|
96
|
|
97 @Some FASTQ Sanger Read
|
|
98 ATATGTNCTCACTGATAAGTGGATA
|
|
99 +
|
|
100 @.@;B-%?8>CBA@>7@7BBCA4-4
|
|
101
|
|
102 Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
|
|
103
|
|
104 @Some FASTQ Sanger Read
|
|
105 ATATGTNCTCACTGATAAGTGGATATN
|
|
106 +
|
|
107 @.@;B-%?8>CBA@>7@7BBCA4-48%
|
|
108
|
|
109 -----
|
|
110
|
|
111 .. class:: warningmark
|
|
112
|
|
113 Trimming a color space read will cause any adapter base to be lost.
|
|
114
|
|
115 ------
|
|
116
|
|
117 **Citation**
|
|
118
|
|
119 If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
|
|
120
|
|
121
|
|
122 </help>
|
|
123 </tool>
|