comparison tools/filters/trimmer.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="trimmer" name="Trim" version="0.0.1">
2 <description>leading or trailing characters</description>
3 <command interpreter="python">
4 trimmer.py -a -f $input1 -c $col -s $start -e $end -i $ignore $fastq > $out_file1
5 </command>
6 <inputs>
7 <param format="tabular,txt" name="input1" type="data" label="this dataset"/>
8 <param name="col" type="integer" value="0" label="Trim this column only" help="0 = process entire line" />
9 <param name="start" type="integer" size="10" value="1" label="Trim from the beginning to this position" help="1 = do not trim the beginning"/>
10 <param name="end" type="integer" size="10" value="0" label="Remove everything from this position to the end" help="0 = do not trim the end"/>
11 <param name="fastq" type="select" label="Is input dataset in fastq format?" help="If set to YES, the tool will not trim evenly numbered lines (0, 2, 4, etc...)">
12 <option selected="true" value="">No</option>
13 <option value="-q">Yes</option>
14 </param>
15 <param name="ignore" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are not trimmed">
16 <option value="62">&gt;</option>
17 <option value="64">@</option>
18 <option value="43">+</option>
19 <option value="60">&lt;</option>
20 <option value="42">*</option>
21 <option value="45">-</option>
22 <option value="61">=</option>
23 <option value="124">|</option>
24 <option value="63">?</option>
25 <option value="36">$</option>
26 <option value="46">.</option>
27 <option value="58">:</option>
28 <option value="38">&amp;</option>
29 <option value="37">%</option>
30 <option value="94">^</option>
31 <option value="35">&#35;</option>
32 </param>
33 </inputs>
34 <outputs>
35 <data name="out_file1" format="input" metadata_source="input1"/>
36 </outputs>
37 <tests>
38 <test>
39 <param name="input1" value="trimmer_tab_delimited.dat"/>
40 <param name="col" value="0"/>
41 <param name="start" value="1"/>
42 <param name="end" value="13"/>
43 <param name="ignore" value="62"/>
44 <param name="fastq" value="No"/>
45 <output name="out_file1" file="trimmer_a_f_c0_s1_e13_i62.dat"/>
46 </test>
47 <test>
48 <param name="input1" value="trimmer_tab_delimited.dat"/>
49 <param name="col" value="2"/>
50 <param name="start" value="1"/>
51 <param name="end" value="2"/>
52 <param name="ignore" value="62"/>
53 <param name="fastq" value="No"/>
54 <output name="out_file1" file="trimmer_a_f_c2_s1_e2_i62.dat"/>
55 </test>
56
57 </tests>
58
59 <help>
60
61
62 **What it does**
63
64 Trims specified number of characters from a dataset or its field (if dataset is tab-delimited).
65
66 -----
67
68 **Example 1**
69
70 Trimming this dataset::
71
72 1234567890
73 abcdefghijk
74
75 by setting **Trim from the beginning to this position** to *2* and **Remove everything from this position to the end** to *6* will produce::
76
77 23456
78 bcdef
79
80 -----
81
82 **Example 2**
83
84 Trimming column 2 of this dataset::
85
86 abcde 12345 fghij 67890
87 fghij 67890 abcde 12345
88
89 by setting **Trim content of this column only** to *2*, **Trim from the beginning to this position** to *2*, and **Remove everything from this position to the end** to *4* will produce::
90
91 abcde 234 fghij 67890
92 fghij 789 abcde 12345
93
94 -----
95
96 **Trimming FASTQ datasets**
97
98 This tool can be used to trim sequences and quality strings in fastq datasets. This is done by selected *Yes* from the **Is input dataset in fastq format?** dropdown. If set to *Yes*, the tool will skip all even numbered lines (see warning below). For example, trimming last 5 bases of this dataset::
99
100 @081017-and-081020:1:1:1715:1759
101 GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
102 +
103 II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&amp;&amp;B
104
105 cab done by setting **Remove everything from this position to the end** to 31::
106
107 @081017-and-081020:1:1:1715:1759
108 GGACTCAGATAGTAATCCACGCTCCTTTAAA
109 +
110 II#IIIIIII$5+.(9IIIIIII$%*$G$A3
111
112 **Note** that headers are skipped.
113
114 .. class:: warningmark
115
116 **WARNING:** This tool will only work on properly formatted fastq datasets where (1) each read and quality string occupy one line and (2) '@' (read header) and "+" (quality header) lines are evenly numbered like in the above example.
117
118
119 </help>
120 </tool>