comparison bamtools-filter.xml @ 0:62e370d81cd5 draft default tip

Uploaded
author anton
date Tue, 26 Aug 2014 13:14:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:62e370d81cd5
1 <tool id="bamFilter" name="bamFilter" version="0.0.1">
2 <description>Filter BAM files on a variety of attributes</description>
3 <requirements>
4 <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement>
5 </requirements>
6 <command>
7
8 cat $script_file > $out_file2;
9
10 #for $bam_count, $input_bam in enumerate( $input_bams ):
11 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
12 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
13 #end for
14
15 bamtools
16 filter
17 -script $script_file
18
19 #for $bam_count, $input_bam in enumerate( $input_bams ):
20 -in "localbam_${bam_count}.bam"
21 #end for
22 -out $out_file1
23
24 </command>
25
26 <inputs>
27 <repeat name="input_bams" title="BAM dataset(s) to filter" min="1">
28 <param name="input_bam" type="data" format="bam" label="BAM dataset" />
29 </repeat>
30 <repeat name="conditions" title="Condition" min="1">
31 <repeat name="filters" title="Filter" min="1">
32 <conditional name="bam_property">
33 <param name="bam_property_selector" type="select" label="Select BAM property to filter on">
34 <option value="alignmentFlag"/>
35 <option value="cigar"/>
36 <option value="insertSize"/>
37 <option value="isDuplicate"/>
38 <option value="isFailedQC"/>
39 <option value="isFirstMate"/>
40 <option value="isMapped"/>
41 <option value="isMateMapped"/>
42 <option value="isMateReverseStrand"/>
43 <option value="isPaired"/>
44 <option value="isPrimaryAlignment"/>
45 <option value="isProperPair"/>
46 <option value="isReverseStrand"/>
47 <option value="isSecondMate"/>
48 <option selected="True" value="mapQuality"/>
49 <option value="matePosition"/>
50 <option value="mateReference"/>
51 <option value="name"/>
52 <option value="position"/>
53 <option value="queryBases"/>
54 <option value="reference"/>
55 <option value="tag"/>
56 </param>
57 <!-- would be fanstastic to have AND and OR constructs in when statements -->
58 <when value="alignmentFlag">
59 <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/>
60 </when>
61 <when value="cigar">
62 <param name="bam_property_value" type="text" size="10" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/>
63 </when>
64 <when value="insertSize">
65 <param name="bam_property_value" type="text" size="10" value=">=250" label="Filter on inster size" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use &quot;>=250&quot;">
66 <sanitizer invalid_char="">
67 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
68 </sanitizer>
69 </param>
70 </when>
71 <when value="isDuplicate">
72 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" />
73 </when>
74 <when value="isFailedQC">
75 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/>
76 </when>
77 <when value="isFirstMate">
78 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/>
79 </when>
80 <when value="isMapped">
81 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/>
82 </when>
83 <when value="isMateMapped">
84 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/>
85 </when>
86 <when value="isMateReverseStrand">
87 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/>
88 </when>
89 <when value="isPaired">
90 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/>
91 </when>
92 <when value="isPrimaryAlignment">
93 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/>
94 </when>
95 <when value="isProperPair">
96 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/>
97 </when>
98 <when value="isReverseStrand">
99 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/>
100 </when>
101 <when value="isSecondMate">
102 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/>
103 </when>
104 <when value="mapQuality">
105 <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use &quot;>=30&quot;">
106 <sanitizer invalid_char="">
107 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
108 </sanitizer>
109 </param>
110 </when>
111 <when value="matePosition">
112 <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use &quot;&gt;1000000&quot;">
113 <sanitizer invalid_char="">
114 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
115 </sanitizer>
116 </param>
117 </when>
118 <when value="mateReference">
119 <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use &quot;chr22&quot;">
120 <sanitizer invalid_char="">
121 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
122 </sanitizer>
123 </param>
124 </when>
125 <when value="name">
126 <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression.">
127 <sanitizer invalid_char="">
128 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
129 </sanitizer>
130 </param>
131 </when>
132 <when value="position">
133 <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use &quot;&gt;5000&quot;">
134 <sanitizer invalid_char="">
135 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
136 </sanitizer>
137 </param>
138 </when>
139 <when value="queryBases">
140 <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression">
141 <sanitizer invalid_char="">
142 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
143 </sanitizer>
144 </param>
145 </when>
146 <when value="reference">
147 <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression">
148 <sanitizer invalid_char="">
149 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
150 </sanitizer>
151 </param>
152 </when>
153 <when value="tag">
154 <param name="bam_property_value" type="text" value="NM:&gt;1" label="Filter on a particular tag" help="You can use &gt;, &lt;, =, and ! (not).
155 Tag name and its value must be separated by &quot;:&quot;. E.g., to obtain reads with at least one mismatch use &quot;NM:&gt;1&quot;">
156 <sanitizer invalid_char="">
157 <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value=":!="/></valid>
158 </sanitizer>
159 </param>
160 </when>
161 </conditional>
162 </repeat>
163 </repeat>
164 <conditional name="rule_configuration">
165 <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." />
166 <when value="true">
167 <param name="rules" type="text" size="20" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." >
168 <sanitizer invalid_char="">
169 <valid initial="string.printable"/>
170 </sanitizer>
171 </param>
172 </when>
173 </conditional>
174 </inputs>
175
176 <configfiles>
177 <configfile name="script_file">
178 ##Sets up a json configfile for bamtools filter
179 ##If there is more than one condition prints brackets and "filters:"
180 #if len( $conditions ) > 1
181 {
182 "filters":
183 [
184 #end if
185 #for $i, $c in enumerate( $conditions, start=1 )
186 { "id": "$i",
187 #for $j, $s in enumerate( $c.filters, start=1 )
188 ##The if below takes care of the comma at the end of last condition within group
189 #if $j != len( $c.filters)
190 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}",
191 #else
192 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}"
193 #end if
194 #end for
195 ##The if below takes care of the comma at the end of last condition within group
196 #if $i != len( $conditions )
197 },
198 #else
199 }
200 #end if
201 #end for
202 #if len( $conditions ) > 1
203 #if str( $rule_configuration.rules_selector ) == "True":
204 ],
205 "rule" : "${rule_configuration.rules}"
206 #else
207 ]
208 #end if
209 }
210 #end if
211 </configfile>
212 </configfiles>
213
214 <outputs>
215 <data format="txt" name="out_file2" />
216 <data format="bam" name="out_file1" />
217 </outputs>
218 <tests>
219 <test>
220 <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/>
221 <param name="bam_property_selector" value="mapQuality"/>
222 <param name="bam_property_value" value=">20"/>
223 <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/>
224 </test>
225 </tests>
226 <help>
227 **What is does**
228
229 BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
230
231 -----
232
233 **How it works**
234
235 The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters.
236
237 *Input BAM(s)*
238
239 The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter**
240
241 *Conditions and Filters*
242
243 Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below.
244
245 -----
246
247 **Example 1. Using a single filter**
248
249 When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters).
250 For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below:
251
252 .. image:: images/simple-filter.png
253
254 -----
255
256 **Example 2. Using multiple filters**
257
258 Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only.
259 To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button):
260
261 .. image:: images/multiple-filters.png
262
263 In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering.
264 In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example.
265
266 -----
267
268 **Example 3. Complex filtering with multiple conditions**
269
270 Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*)
271 at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*.
272 The following screenshot expalins how this can be done:
273
274 .. image:: images/complex-filters.png
275
276 -----
277
278 **Example 4. Even more complex filtering with Rules**
279
280 In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering.
281 For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such
282 filtering will look like this::
283
284 !(1) &amp; (2 | 3)
285
286 Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy:
287
288 .. image:: images/rule.png
289
290 There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule.
291 Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means::
292
293 NOT condition 1 AND (condition 2 OR condition 3)
294
295 -----
296
297 **JSON script file**
298
299 This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools.
300 For instance, the example 4 looks like this in the JSON form::
301
302 {
303 "filters":
304 [
305 { "id": "1",
306 "tag":"NM:=0",
307 "isReverseStrand":"false"
308 },
309 { "id": "2",
310 "tag":"NM:>0",
311 "isReverseStrand":"true"
312 }
313 ]
314 }
315
316
317
318 -----
319
320 **More information**
321
322 .. class:: infomark
323
324 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
325
326
327 </help>
328 </tool>