0
|
1 <tool id="bamFilter" name="bamFilter" version="0.0.1">
|
|
2 <description>Filter BAM files on a variety of attributes</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement>
|
|
5 </requirements>
|
|
6 <command>
|
|
7
|
|
8 cat $script_file > $out_file2;
|
|
9
|
|
10 #for $bam_count, $input_bam in enumerate( $input_bams ):
|
|
11 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &&
|
|
12 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
|
|
13 #end for
|
|
14
|
|
15 bamtools
|
|
16 filter
|
|
17 -script $script_file
|
|
18
|
|
19 #for $bam_count, $input_bam in enumerate( $input_bams ):
|
|
20 -in "localbam_${bam_count}.bam"
|
|
21 #end for
|
|
22 -out $out_file1
|
|
23
|
|
24 </command>
|
|
25
|
|
26 <inputs>
|
|
27 <repeat name="input_bams" title="BAM dataset(s) to filter" min="1">
|
|
28 <param name="input_bam" type="data" format="bam" label="BAM dataset" />
|
|
29 </repeat>
|
|
30 <repeat name="conditions" title="Condition" min="1">
|
|
31 <repeat name="filters" title="Filter" min="1">
|
|
32 <conditional name="bam_property">
|
|
33 <param name="bam_property_selector" type="select" label="Select BAM property to filter on">
|
|
34 <option value="alignmentFlag"/>
|
|
35 <option value="cigar"/>
|
|
36 <option value="insertSize"/>
|
|
37 <option value="isDuplicate"/>
|
|
38 <option value="isFailedQC"/>
|
|
39 <option value="isFirstMate"/>
|
|
40 <option value="isMapped"/>
|
|
41 <option value="isMateMapped"/>
|
|
42 <option value="isMateReverseStrand"/>
|
|
43 <option value="isPaired"/>
|
|
44 <option value="isPrimaryAlignment"/>
|
|
45 <option value="isProperPair"/>
|
|
46 <option value="isReverseStrand"/>
|
|
47 <option value="isSecondMate"/>
|
|
48 <option selected="True" value="mapQuality"/>
|
|
49 <option value="matePosition"/>
|
|
50 <option value="mateReference"/>
|
|
51 <option value="name"/>
|
|
52 <option value="position"/>
|
|
53 <option value="queryBases"/>
|
|
54 <option value="reference"/>
|
|
55 <option value="tag"/>
|
|
56 </param>
|
|
57 <!-- would be fanstastic to have AND and OR constructs in when statements -->
|
|
58 <when value="alignmentFlag">
|
|
59 <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/>
|
|
60 </when>
|
|
61 <when value="cigar">
|
|
62 <param name="bam_property_value" type="text" size="10" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/>
|
|
63 </when>
|
|
64 <when value="insertSize">
|
|
65 <param name="bam_property_value" type="text" size="10" value=">=250" label="Filter on inster size" help="You can use >, <, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use ">=250"">
|
|
66 <sanitizer invalid_char="">
|
|
67 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
68 </sanitizer>
|
|
69 </param>
|
|
70 </when>
|
|
71 <when value="isDuplicate">
|
|
72 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" />
|
|
73 </when>
|
|
74 <when value="isFailedQC">
|
|
75 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/>
|
|
76 </when>
|
|
77 <when value="isFirstMate">
|
|
78 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/>
|
|
79 </when>
|
|
80 <when value="isMapped">
|
|
81 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/>
|
|
82 </when>
|
|
83 <when value="isMateMapped">
|
|
84 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/>
|
|
85 </when>
|
|
86 <when value="isMateReverseStrand">
|
|
87 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/>
|
|
88 </when>
|
|
89 <when value="isPaired">
|
|
90 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/>
|
|
91 </when>
|
|
92 <when value="isPrimaryAlignment">
|
|
93 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/>
|
|
94 </when>
|
|
95 <when value="isProperPair">
|
|
96 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/>
|
|
97 </when>
|
|
98 <when value="isReverseStrand">
|
|
99 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/>
|
|
100 </when>
|
|
101 <when value="isSecondMate">
|
|
102 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/>
|
|
103 </when>
|
|
104 <when value="mapQuality">
|
|
105 <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use ">=30"">
|
|
106 <sanitizer invalid_char="">
|
|
107 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
108 </sanitizer>
|
|
109 </param>
|
|
110 </when>
|
|
111 <when value="matePosition">
|
|
112 <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use ">1000000"">
|
|
113 <sanitizer invalid_char="">
|
|
114 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
115 </sanitizer>
|
|
116 </param>
|
|
117 </when>
|
|
118 <when value="mateReference">
|
|
119 <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use "chr22"">
|
|
120 <sanitizer invalid_char="">
|
|
121 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
122 </sanitizer>
|
|
123 </param>
|
|
124 </when>
|
|
125 <when value="name">
|
|
126 <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression.">
|
|
127 <sanitizer invalid_char="">
|
|
128 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
129 </sanitizer>
|
|
130 </param>
|
|
131 </when>
|
|
132 <when value="position">
|
|
133 <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use ">5000"">
|
|
134 <sanitizer invalid_char="">
|
|
135 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
136 </sanitizer>
|
|
137 </param>
|
|
138 </when>
|
|
139 <when value="queryBases">
|
|
140 <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression">
|
|
141 <sanitizer invalid_char="">
|
|
142 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
143 </sanitizer>
|
|
144 </param>
|
|
145 </when>
|
|
146 <when value="reference">
|
|
147 <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression">
|
|
148 <sanitizer invalid_char="">
|
|
149 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid>
|
|
150 </sanitizer>
|
|
151 </param>
|
|
152 </when>
|
|
153 <when value="tag">
|
|
154 <param name="bam_property_value" type="text" value="NM:>1" label="Filter on a particular tag" help="You can use >, <, =, and ! (not).
|
|
155 Tag name and its value must be separated by ":". E.g., to obtain reads with at least one mismatch use "NM:>1"">
|
|
156 <sanitizer invalid_char="">
|
|
157 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value=":!="/></valid>
|
|
158 </sanitizer>
|
|
159 </param>
|
|
160 </when>
|
|
161 </conditional>
|
|
162 </repeat>
|
|
163 </repeat>
|
|
164 <conditional name="rule_configuration">
|
|
165 <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." />
|
|
166 <when value="true">
|
|
167 <param name="rules" type="text" size="20" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." >
|
|
168 <sanitizer invalid_char="">
|
|
169 <valid initial="string.printable"/>
|
|
170 </sanitizer>
|
|
171 </param>
|
|
172 </when>
|
|
173 </conditional>
|
|
174 </inputs>
|
|
175
|
|
176 <configfiles>
|
|
177 <configfile name="script_file">
|
|
178 ##Sets up a json configfile for bamtools filter
|
|
179 ##If there is more than one condition prints brackets and "filters:"
|
|
180 #if len( $conditions ) > 1
|
|
181 {
|
|
182 "filters":
|
|
183 [
|
|
184 #end if
|
|
185 #for $i, $c in enumerate( $conditions, start=1 )
|
|
186 { "id": "$i",
|
|
187 #for $j, $s in enumerate( $c.filters, start=1 )
|
|
188 ##The if below takes care of the comma at the end of last condition within group
|
|
189 #if $j != len( $c.filters)
|
|
190 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}",
|
|
191 #else
|
|
192 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}"
|
|
193 #end if
|
|
194 #end for
|
|
195 ##The if below takes care of the comma at the end of last condition within group
|
|
196 #if $i != len( $conditions )
|
|
197 },
|
|
198 #else
|
|
199 }
|
|
200 #end if
|
|
201 #end for
|
|
202 #if len( $conditions ) > 1
|
|
203 #if str( $rule_configuration.rules_selector ) == "True":
|
|
204 ],
|
|
205 "rule" : "${rule_configuration.rules}"
|
|
206 #else
|
|
207 ]
|
|
208 #end if
|
|
209 }
|
|
210 #end if
|
|
211 </configfile>
|
|
212 </configfiles>
|
|
213
|
|
214 <outputs>
|
|
215 <data format="txt" name="out_file2" />
|
|
216 <data format="bam" name="out_file1" />
|
|
217 </outputs>
|
|
218 <tests>
|
|
219 <test>
|
|
220 <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/>
|
|
221 <param name="bam_property_selector" value="mapQuality"/>
|
|
222 <param name="bam_property_value" value=">20"/>
|
|
223 <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/>
|
|
224 </test>
|
|
225 </tests>
|
|
226 <help>
|
|
227 **What is does**
|
|
228
|
|
229 BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
|
|
230
|
|
231 -----
|
|
232
|
|
233 **How it works**
|
|
234
|
|
235 The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters.
|
|
236
|
|
237 *Input BAM(s)*
|
|
238
|
|
239 The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter**
|
|
240
|
|
241 *Conditions and Filters*
|
|
242
|
|
243 Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below.
|
|
244
|
|
245 -----
|
|
246
|
|
247 **Example 1. Using a single filter**
|
|
248
|
|
249 When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters).
|
|
250 For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below:
|
|
251
|
|
252 .. image:: images/simple-filter.png
|
|
253
|
|
254 -----
|
|
255
|
|
256 **Example 2. Using multiple filters**
|
|
257
|
|
258 Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only.
|
|
259 To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button):
|
|
260
|
|
261 .. image:: images/multiple-filters.png
|
|
262
|
|
263 In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering.
|
|
264 In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example.
|
|
265
|
|
266 -----
|
|
267
|
|
268 **Example 3. Complex filtering with multiple conditions**
|
|
269
|
|
270 Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*)
|
|
271 at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*.
|
|
272 The following screenshot expalins how this can be done:
|
|
273
|
|
274 .. image:: images/complex-filters.png
|
|
275
|
|
276 -----
|
|
277
|
|
278 **Example 4. Even more complex filtering with Rules**
|
|
279
|
|
280 In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering.
|
|
281 For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such
|
|
282 filtering will look like this::
|
|
283
|
|
284 !(1) & (2 | 3)
|
|
285
|
|
286 Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy:
|
|
287
|
|
288 .. image:: images/rule.png
|
|
289
|
|
290 There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule.
|
|
291 Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means::
|
|
292
|
|
293 NOT condition 1 AND (condition 2 OR condition 3)
|
|
294
|
|
295 -----
|
|
296
|
|
297 **JSON script file**
|
|
298
|
|
299 This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools.
|
|
300 For instance, the example 4 looks like this in the JSON form::
|
|
301
|
|
302 {
|
|
303 "filters":
|
|
304 [
|
|
305 { "id": "1",
|
|
306 "tag":"NM:=0",
|
|
307 "isReverseStrand":"false"
|
|
308 },
|
|
309 { "id": "2",
|
|
310 "tag":"NM:>0",
|
|
311 "isReverseStrand":"true"
|
|
312 }
|
|
313 ]
|
|
314 }
|
|
315
|
|
316
|
|
317
|
|
318 -----
|
|
319
|
|
320 **More information**
|
|
321
|
|
322 .. class:: infomark
|
|
323
|
|
324 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
|
|
325
|
|
326
|
|
327 </help>
|
|
328 </tool>
|