0
|
1 <tool id="fastq_mcf" name="FastqMcf" version="1.0">
|
|
2 <description>sequence quality filtering and clipping</description>
|
|
3 <requirements>
|
|
4 <requirement type="binary">fastq-mcf</requirement>
|
|
5 </requirements>
|
|
6 <version_string>fastq-mcf -V</version_string>
|
|
7 <command>fastq-mcf
|
|
8 #if $trimming.choice == 'disable':
|
|
9 -0
|
|
10 #elif $trimming.choice == 'user_set':
|
|
11 #if len($trimming.scale.__str__) > 0
|
|
12 -s $trimming.scale
|
|
13 #end if
|
|
14 #if len($trimming.minpct.__str__) > 0
|
|
15 -t $trimming.minpct
|
|
16 #end if
|
|
17 #if len($trimming.nmin.__str__) > 0
|
|
18 -m $trimming.nmin
|
|
19 #end if
|
|
20 #if len($trimming.pctdiff.__str__) > 0
|
|
21 -p $trimming.pctdiff
|
|
22 #end if
|
|
23 #if len($trimming.nmax.__str__) > 0
|
|
24 -L $trimming.nmax
|
|
25 #end if
|
|
26 #if len($trimming.nkeep.__str__) > 0
|
|
27 -l $trimming.nkeep
|
|
28 #end if
|
|
29 #if len($trimming.skewpct.__str__) > 0
|
|
30 -k $trimming.skewpct
|
|
31 #end if
|
|
32 #if len($trimming.qthr.__str__) > 0
|
|
33 -q $trimming.qthr
|
|
34 #end if
|
|
35 #if len($trimming.qwin.__str__) > 0
|
|
36 -w $trimming.qwin
|
|
37 #end if
|
|
38 #if len($trimming.pctns.__str__) > 0
|
|
39 -x $trimming.pctns
|
|
40 #end if
|
|
41 #if len($trimming.sampcnt.__str__) > 0
|
|
42 -s $trimming.sampcnt
|
|
43 #end if
|
|
44 $trimming.ilv3
|
|
45 $trimming.rmns
|
|
46 #end if
|
|
47 #if $noclip == True :
|
|
48 $noclip
|
|
49 #else :
|
|
50 -o $reads_out
|
|
51 #if $mates.__str__ != 'None' :
|
|
52 -o $mates_out
|
|
53 #end if
|
|
54 #end if
|
|
55 $adpaters
|
|
56 $reads
|
|
57 #if $mates.__str__ != 'None' :
|
|
58 $mates
|
|
59 #end if
|
|
60 > $log
|
|
61 </command>
|
|
62 <inputs>
|
|
63 <param name="adpaters" type="data" format="fasta" label="A fasta formatted adapter list" />
|
|
64 <param name="reads" type="data" format="fastqsanger,fastqillumina" label="Reads: single or Left-hand of Paired End Reads" />
|
|
65 <param name="mates" type="data" format="fastqsanger,fastqillumina" optional="true" label="Right-hand mates for Paired End Reads" />
|
|
66 <!--
|
|
67 -s N.N Log scale for clip pct to threshold (2.5)
|
|
68 -t N % occurance threshold before clipping (0.25)
|
|
69 -m N Minimum clip length, overrides scaled auto (1)
|
|
70 -p N Maximum adapter difference percentage (20)
|
|
71 -l N Minimum remaining sequence length (15)
|
|
72 -L N Maximum sequence length (none)
|
|
73 -k N sKew percentage causing trimming (2)
|
|
74 -q N quality threshold causing trimming (10)
|
|
75 -f force output, even if not much will be done
|
|
76 -0 Set all trimming parameters to zero
|
|
77 -U|u Force disable/enable illumina PF filtering
|
|
78 -P N phred-scale (64)
|
|
79 -x N 'N' (Bad read) percentage causing trimming (10)
|
|
80 -R Don't remove N's from the fronts/ends of reads
|
|
81 -n Don't clip, just output what would be done
|
|
82 -C N Number of reads to use for subsampling (200000)
|
|
83 -d Output lots of random debugging stuff
|
|
84 -->
|
|
85
|
|
86
|
|
87 <conditional name="trimming">
|
|
88 <param name="choice" type="select" label="Trimming Options">
|
|
89 <option value="defaults">Use Defaults</option>
|
|
90 <option value="user_set">Set Values</option>
|
|
91 <option value="disable">Set all trimming parameters to zero</option>
|
|
92 </param>
|
|
93 <when value="defaults"/>
|
|
94 <when value="disable"/>
|
|
95 <when value="user_set">
|
|
96 <param name="sampcnt" type="integer" optional="true" label="-C Number of reads to use for subsampling (100000)">
|
|
97 </param>
|
|
98 <param name="scale" type="float" optional="true" label="-s N.N Log scale for clip pct to threshold (2.5)">
|
|
99 </param>
|
|
100 <param name="minpct" type="float" optional="true" label="-t % occurance threshold before clipping (0.25)">
|
|
101 </param>
|
|
102 <param name="nmin" type="integer" optional="true" label="-m Minimum clip length, overrides scaled auto (1)">
|
|
103 </param>
|
|
104 <param name="pctdiff" type="integer" optional="true" label="-p Maximum adapter difference percentage (20)">
|
|
105 </param>
|
|
106
|
|
107 <param name="nmax" type="integer" optional="true" label="-L Maximum sequence length (none)">
|
|
108 </param>
|
|
109 <param name="nkeep" type="integer" optional="true" label="-l Minimum remaining sequence length (15)">
|
|
110 </param>
|
|
111 <param name="skewpct" type="float" optional="true" label="-k sKew percentage causing trimming (2)">
|
|
112 </param>
|
|
113 <param name="qthr" type="integer" optional="true" label="-q quality threshold causing trimming (7)"
|
|
114 help="remove end of-read with quality < threshold">
|
|
115 </param>
|
|
116 <param name="qwin" type="integer" optional="true" label="-w mean quality threshold causing trimming (1)"
|
|
117 help="remove end of read with mean quality < threshold">
|
|
118 </param>
|
|
119 <param name="pctns" type="float" optional="true" label="-x 'N' (Bad read) percentage causing trimming (10)">
|
|
120 </param>
|
|
121 <param name="rmns" type="boolean" truevalue="-R" falsevalue="" checked="false" label="-R Don't remove N's from the fronts/ends of reads"/>
|
|
122 <param name="ilv3" type="select" label="illumina PF filtering">
|
|
123 <option value=" ">Default</option>
|
|
124 <option value="-U">Disable illumina PF filtering</option>
|
|
125 <option value="-u">Enable illumina PF filtering</option>
|
|
126 </param>
|
|
127 </when>
|
|
128 </conditional>
|
|
129
|
|
130
|
|
131 <param name="phred" type="integer" optional="true" label="-P phred-scale (64)" help="Default is to determine automatically">
|
|
132 </param>
|
|
133
|
|
134 <param name="noclip" type="boolean" truevalue="-n" falsevalue="" checked="false" label="-n Don't clip, just output what would be done"/>
|
|
135
|
|
136 </inputs>
|
|
137 <outputs>
|
|
138 <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/>
|
|
139 <data name="reads_out" format_source="reads" label="${tool.name} on ${on_string}: reads">
|
|
140 <filter>noclip == False</filter>
|
|
141 </data>
|
|
142 <data name="mates_out" format_source="mates" label="${tool.name} on ${on_string}: mates">
|
|
143 <filter>(noclip == False and mates != None)</filter>
|
|
144 </data>
|
|
145 </outputs>
|
|
146 <tests>
|
|
147 </tests>
|
|
148 <help>
|
|
149 **What it does**
|
|
150
|
|
151 fastq-mcf_ attempts to:
|
|
152
|
|
153 Detect and remove sequencing adapters and primers
|
|
154 Detect limited skewing at the ends of reads and clip
|
|
155 Detect poor quality at the ends of reads and clip
|
|
156 Detect N's, and remove from ends
|
|
157 Remove reads with CASAVA 'Y' flag (purity filtering)
|
|
158 Discard sequences that are too short after all of the above
|
|
159 Keep multiple mate-reads in sync while doing all of the above
|
|
160
|
|
161 .. _fastq-mcf: http://code.google.com/p/ea-utils/wiki/FastqMcf
|
|
162 -----
|
|
163
|
|
164 **Input**
|
|
165
|
|
166 Fasta file of adapter sequences, for example::
|
|
167
|
|
168 > Genomic_DNA_oligonucleotide_sequences_Adapters_F
|
|
169 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
|
|
170 > Genomic_DNA_oligonucleotide_sequences_Adapters_R
|
|
171 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
|
172 > Genomic_DNA_Sequencing_Primer
|
|
173 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
|
174
|
|
175
|
|
176
|
|
177 Reads or Left-hand mates, for example::
|
|
178
|
|
179 @1539:931/1
|
|
180 ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG
|
|
181 +1539:931/1
|
|
182 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
183
|
|
184 Right-hand mates, for example::
|
|
185
|
|
186 @1539:931/2
|
|
187 CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT
|
|
188 +1539:931/2
|
|
189 WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
190
|
|
191 -----
|
|
192
|
|
193 **Output**
|
|
194
|
|
195 A log file
|
|
196
|
|
197 A trimmed fastq of the reads
|
|
198
|
|
199 A trimmed fastq of the mates
|
|
200
|
|
201
|
|
202
|
|
203 </help>
|
|
204 </tool>
|