comparison bbmerge.xml @ 0:fc029a9b4d07 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit aca07f4e7d683d1b7d06abb63e05d4ff1b28771f
author iuc
date Mon, 06 Feb 2023 18:06:47 +0000
parents
children 82ced0e47b9d
comparison
equal deleted inserted replaced
-1:000000000000 0:fc029a9b4d07
1 <tool id="bbtools_bbmerge" name="BBTools: BBMerge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Merge overlapping mates of a read pair</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #import os
10 #import re
11
12 #if str($input_type_cond.input_type) in ['single', 'pair']:
13 #set read1 = $input_type_cond.read1
14 ## bbmerge uses the file extension to determine the input format.
15 #set ext = '.fastq'
16 #if $read1.ext.endswith('.gz'):
17 #set ext = $ext + '.gz'
18 #end if
19 #set read1_file = 'forward' + $ext
20 ln -s '${read1}' '${read1_file}' &&
21 #if str($input_type_cond.input_type) == 'pair':
22 #set read2 = $input_type_cond.read2
23 #set read2_file = 'reverse' + $ext
24 ln -s '${read2}' '${read2_file}' &&
25 #end if
26 #else:
27 #set read1 = $input_type_cond.reads_collection['forward']
28 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
29 ## bbmap uses the file extension to determine the input format.
30 #set ext = $read1_identifier + '.fastq'
31 #if $read1.ext.endswith('.gz'):
32 #set ext = $ext + '.gz'
33 #end if
34 #set read1_file = $read1_identifier + $ext
35 ln -s '${read1}' '${read1_file}' &&
36 #set read2 = $input_type_cond.reads_collection['reverse']
37 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
38 #set read2_file = $read2_identifier + $ext
39 ln -s '${read2}' '${read2_file}' &&
40 #end if
41
42 bbmerge.sh
43 #### Input parameters
44 #if str($input_type_cond.input_type) == 'single':
45 in='${read1_file}'
46 interleaved=t
47 #else:
48 in1='${read1_file}' in2='${read2_file}'
49 interleaved=f
50 #end if
51
52 #### Output options
53 out=merged.fastq
54 outu=unmerged.fastq
55 ihist=ihist.tabular
56 touppercase=t
57
58 #### Quality and trimming parameters
59 qtrim='$qt_options.qtrim'
60 trimq='$qt_options.trimq'
61 minlength='$qt_options.minlength_after_trim'
62 usequality='$qt_options.usequality'
63
64 #### Merging parameters
65 usejni=f ## Do overlapping in C code, which is faster.
66 ecco='$merge_options.ecco'
67 trimnonoverlapping='$merge_options.trimnonoverlapping'
68 mininsert='$merge_options.mininsert'
69 minoverlap='$merge_options.minoverlap'
70 minq='$merge_options.minq'
71 maxq='$merge_options.maxq'
72 entropy='$merge_options.entropy'
73 efilter='$merge_options.efilter'
74 pfilter='$merge_options.pfilter'
75 kfilter='$merge_options.kfilter'
76 usequality='$merge_options.usequality'
77
78 #if $merge_options.adapters.selector == "with_adaptors":
79 adapter1='$merge_options.adapter1'
80 adapter2='$merge_options.adapter2'
81 #end if
82
83 #if $merge_options.merge_mode.selector == 'Ratio mode':
84 maxratio='$merge_options.merge_mode.maxratio'
85 ratiomargin='$merge_options.merge_mode.ratiomargin'
86 ratiooffset='$merge_options.merge_mode.ratiooffset'
87 maxmismatches='$merge_options.merge_mode.maxmismatches'
88 ratiominoverlapreduction=0
89 minsecondratio='$merge_options.merge_mode.minsecondratio'
90 #else:
91 margin='$merge_options.merge_mode.margin'
92 mismatches='$merge_options.merge_mode.mismatches'
93 requireratiomatch='$merge_options.merge_mode.requireratiomatch'
94 #end if
95
96 $merge_options.strictness=t
97 ]]></command>
98 <inputs>
99 <expand macro="input_type_cond"/>
100
101 <section name="qt_options" title="Quality and trimming options">
102 <param name="qtrim" type="select" label="Select option for quality trimming ends before mapping">
103 <option value="f" selected="true">No trimming</option>
104 <option value="l">Trim left</option>
105 <option value="r">Trim right</option>
106 <option value="lr">Trim both</option>
107 </param>
108 <param argument="trimq" type="integer" value="6" label="Trim regions with average quality below this value"/>
109 <param argument="minlength_after_trim" type="integer" value="60" label="Don't trim reads to be shorter than this value"/>
110 <param argument="usequality" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Use quality scores when determining which read kmers to use as seeds?"/>
111 </section>
112
113 <section name="merge_options" title="Merging parameters">
114 <param name="strictness" type="select" label="Select option for quality trimming ends before mapping">
115 <option value="xstrict">max strict</option>
116 <option value="ustrict">ultra strict</option>
117 <option value="vstrict">very strict</option>
118 <option value="strict">strict</option>
119 <option value="default" selected="true">default</option>
120 <option value="loose">loose</option>
121 <option value="vloose">very loose</option>
122 <option value="uloose">ultra loose</option>
123 <option value="xloose">max loose</option>
124 <option value="fast">fastest possible, less accurate</option>
125 </param>
126 <param argument="ecco" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Error-correct the overlapping part, but don't merge." help="If selected, the tool with find the overlaps as if merging reads, use this overlap information to correct sequencing errors on both strands. However, the strands from the mates will not be merged but provided as two separate reads after error correction."/>
127 <param argument="trimnonoverlapping" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Trim all non-overlapping portions, leaving only consensus sequence. By default, only sequence to the right of the overlap (adapter sequence) is trimmed."/>
128 <param argument="mininsert" type="integer" value="35" label="Minimum insert size to merge reads"/>
129 <param argument="minoverlap" type="integer" value="12" label="Minimum number of overlapping bases to allow merging"/>
130
131 <param argument="minq" type="integer" value="9" label="Ignore bases with quality below this"/>
132 <param argument="maxq" type="integer" value="41" label="Cap output quality scores at this"/>
133
134 <param argument="entropy" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Increase the minimum overlap requirement for low-complexity reads"/>
135 <param argument="efilter" type="integer" value="6" label="Ban overlaps with over this many times the expected number of errors." help="Lower is more strict, -1 disables."/>
136 <param argument="pfilter" type="float" value="0.00004" label="Probability filter to disallow improbable overlaps." help="Higher is stricter. 0 will disable the filter; 1 will allow only perfect overlaps."/>
137 <param argument="kfilter" type="integer" value="41" label="Ban overlaps that create kmers with count below this value" help="Requires good coverage, 0 disables."/>
138 <param argument="usequality" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Take quality factors into account" help="If disabled, quality values are completely ignored, both for overlap detection and filtering. May be useful for data with inaccurate quality values."/>
139
140 <conditional name="adapters">
141 <param name="selector" type="select" label="Provide adapter sequences to improve accuracy?">
142 <option value="wout_adapters" selected="true">No</option>
143 <option value="with_adapters">Yes, use these adapter sequences</option>
144 </param>
145 <when value="wout_adapters"/>
146 <when value="with_adapters">
147 <param argument="adapter1" type="text" value="" label="Left adapter sequence"/>
148 <param argument="adapter2" type="text" value="" label="Right adapter sequence"/>
149 </when>
150 </conditional>
151
152 <conditional name="merge_mode">
153 <param name="selector" type="select" label="Evaluate overlaps via..." help="In the ratio mode, overlaps are decided based on the ratio of matching to mismatching bases. Flat mode scores overlaps based on the total number of mismatching bases only.">
154 <option value="Ratio mode" selected="true">Ratio mode</option>
155 <option value="Flat mode">Flat mode</option>
156 </param>
157 <when value="Ratio mode">
158 <param argument="maxratio" type="float" value="0.09" label="Max error rate; higher increases merge rate."/>
159 <param argument="ratiomargin" type="float" value="5.5" label="Lower increases merge rate; min is 1."/>
160 <param argument="ratiooffset" type="float" value="0.55" label="Lower increases merge rate; min is 0."/>
161 <param argument="maxmismatches" type="integer" value="20" label="Maximum mismatches allowed in overlapping region."/>
162 <param argument="minsecondratio" type="float" value="0.1" label="Cutoff for second-best overlap ratio."/>
163 </when>
164 <when value="Flat mode">
165 <param argument="margin" type="integer" value="2" label="The best overlap must have at least 'margin' fewer mismatches than the second best."/>
166 <param argument="mismatches" type="integer" value="3" label="Do not allow more than this many mismatches."/>
167 <param argument="requireratiomatch" type="boolean" value="false" label="Require the answer from flat mode and ratio mode to agree, reducing false positives if both are enabled."/>
168 </when>
169 </conditional>
170 </section>
171 </inputs>
172 <outputs>
173 <data format="fastq" name="output_merged_reads" from_work_dir="merged.fastq" label="${tool.name} on ${on_string} (merged reads)"/>
174 <data format="fastq" name="output_unmerged_reads" from_work_dir="unmerged.fastq" label="${tool.name} on ${on_string} (unmerged reads)"/>
175 <data format="tabular" name="output_insertlen_hist" from_work_dir="ihist.tabular" label="${tool.name} on ${on_string} (insert size histogram)"/>
176 </outputs>
177 <tests>
178 <!-- Single interleaved file -->
179 <test expect_num_outputs="3">
180 <param name="input_type" value="single"/>
181 <param name="read1" value="bbmerge/input_interleaved.fastq"/>
182 <output name="output_unmerged_reads" ftype="fastq" value="bbmerge/unmerged.fastq"/>
183 <output name="output_merged_reads" ftype="fastq" value="bbmerge/merged.fastq"/>
184 <output name="output_insertlen_hist" ftype="tabular" value="bbmerge/insert_length_hist.tabular"/>
185 </test>
186 <!-- Paired mates in 2 separate files -->
187 <test expect_num_outputs="3">
188 <param name="input_type" value="pair"/>
189 <param name="read1" value="bbmerge/input_R1.fastq"/>
190 <param name="read2" value="bbmerge/input_R2.fastq"/>
191 <output name="output_unmerged_reads" ftype="fastq" value="bbmerge/unmerged.fastq"/>
192 <output name="output_merged_reads" ftype="fastq" value="bbmerge/merged.fastq"/>
193 <output name="output_insertlen_hist" ftype="tabular" value="bbmerge/insert_length_hist.tabular"/>
194 </test>
195 <!-- Paired mates provided via a paired collection -->
196 <test expect_num_outputs="3">
197 <param name="input_type" value="paired"/>
198 <param name="reads_collection">
199 <collection type="paired">
200 <element name="forward" value="bbmerge/input_R1.fastq"/>
201 <element name="reverse" value="bbmerge/input_R2.fastq"/>
202 </collection>
203 </param>
204 <output name="output_unmerged_reads" ftype="fastq" value="bbmerge/unmerged.fastq"/>
205 <output name="output_merged_reads" ftype="fastq" value="bbmerge/merged.fastq"/>
206 <output name="output_insertlen_hist" ftype="tabular" value="bbmerge/insert_length_hist.tabular"/>
207 </test>
208 </tests>
209 <help>
210 **What it does**
211
212 BBMerge merges two overlapping paired reads into a single read. A 2x100nt read pair, for instance can be merged into a single read of length 150nt if the last 50nt of the first read mate and the last 50nt of the second read map overlap. The accuracy of the base calling can also improve as a result of such a reconciliation between the read pairs. BBMerge is also capable of error-correcting the overlapping portion of reads without merging them, as well as merging nonoverlapping reads, if enough coverage is available.
213
214 -----
215
216 **A Martian PE sequencing result is expected to be processed as follows:**
217
218 input_R1.fastq::
219
220 @read_header_1/1
221 AAAAATTTTTAAAAACCCCCGGGGG
222 +
223 FFFFFFFFFFFFFFFEFFFFFF,FF
224 @read_header_2/1
225 AAAATTTTAAAACCCCCGGGGG
226 +
227 FFFFFFFFFFFFFFFEFFFFFF
228
229
230 input_R2.fastq::
231
232 @read_header_1/2
233 TTAATTAATTCCCCCGGGGG
234 +
235 FFFFFFFFFFFFFFFFFFFF
236 @read_header_2/2
237 TTTAAATTTAAACCCCCGGGGG
238 +
239 FFFFFFFFFFFFFFFFFFFFEF
240
241
242 output.fastq::
243
244 @read_header_1
245 AAAAATTTTTAAAAACCCCCGGGGGAATTAATTAA
246 +
247 FFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFF
248 @read_header_2
249 AAAATTTTAAAACCCCCGGGGGTTTAAATTTAAA
250 +
251 FFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFF
252 </help>
253 <expand macro="citations"/>
254 </tool>
255