3
|
1 <tool id="sambamba_filter" name="Filter BAM or SAM" version="1.0">
|
|
2 <requirements>
|
|
3 <requirement type="package" version="0.3.3">sambamba</requirement>
|
|
4 </requirements>
|
|
5 <description>
|
|
6 on flags, fields, and tags using Sambamba
|
|
7 </description>
|
|
8 <version_command>sambamba 2>&1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command>
|
|
9 <command>
|
|
10 #if $query != None:
|
|
11 #set $query = $query.replace('__sq__', '\'')
|
|
12 #set $query = $query.replace('__ob__', '[')
|
|
13 #set $query = $query.replace('__cb__', ']')
|
|
14 #set $query = $query.replace('__dq__', '"')
|
|
15 #set $query = $query.replace('__oc__', '{')
|
|
16 #set $query = $query.replace('__cc__', '}')
|
|
17 #set $query = $query.replace('__gt__', chr(62))
|
|
18 #set $query = $query.replace('__lt__', chr(60))
|
|
19 #end if
|
|
20 #set $input1 = 'input.bam'
|
|
21 ln -s $input $input1 &&
|
|
22 ln -s $input.metadata.bam_index input.bai &&
|
|
23 sambamba view
|
|
24 #if $query != "":
|
|
25 --filter="$query"
|
|
26 #end if
|
|
27 -f bam -o $outfile $input1 $region
|
|
28 </command>
|
|
29 <inputs>
|
|
30 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
|
|
31 <param name="query" type="text" size="80">
|
|
32 <label>Filter expression</label>
|
|
33 <help>See below for query syntax.</help>
|
|
34 </param>
|
|
35
|
|
36 <param name="region" type="text" size="40" label="Region in format chr:beg-end">
|
|
37 <help>
|
|
38 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000' (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000' (region between 1,000,000 and 2,000,000bp including the end points). The coordinates are 1-based.
|
|
39 </help>
|
|
40 </param>
|
|
41 </inputs>
|
|
42 <outputs>
|
|
43 <data name="outfile" format="bam"></data>
|
|
44 </outputs>
|
|
45 <stdio>
|
|
46 <exit_code range="1:" level="fatal" description="Error occured" />
|
|
47 </stdio>
|
|
48 <tests>
|
|
49 <test>
|
|
50 <param name="input" value="ex1_header.sam" ftype="sam" />
|
|
51 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
|
|
52 <param name="format" value="bam" />
|
|
53 <param name="region" value="" />
|
|
54 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
|
|
55 </test>
|
|
56 <test>
|
|
57 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
|
|
58 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
|
|
59 <param name="format" value="sam" />
|
|
60 <param name="region" value="AL096846:1000-5000" />
|
|
61 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" />
|
|
62 </test>
|
|
63 </tests>
|
|
64 <help>
|
|
65 Sambamba Filter Overview
|
|
66 ========================
|
|
67
|
|
68 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.
|
|
69
|
|
70
|
|
71 Filter Syntax
|
|
72 =============
|
|
73
|
|
74 Complete documentation of filter syntax is available at https://github.com/lomereiter/sambamba/wiki/%5Bsambamba-view%5D-Filter-expression-syntax.
|
|
75
|
|
76 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.
|
|
77
|
|
78 *Basic condition* is a one for a single record field, tag, or flag.
|
|
79
|
|
80 You can use ``==,`` ``!=,`` ``>``, ``<``, ``>=``, ``<=`` comparison operators for both integers and strings.
|
|
81
|
|
82 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.
|
|
83
|
|
84 Examples of filter expressions
|
|
85 ------------------------------
|
|
86
|
|
87 ::
|
|
88
|
|
89 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
|
|
90 read_name == 'abc\'def'
|
|
91
|
|
92 Basic conditions for flags
|
|
93 --------------------------
|
|
94
|
|
95 The following flag names are recognized:
|
|
96 * paired
|
|
97 * proper_pair
|
|
98 * unmapped
|
|
99 * mate_is_unmapped
|
|
100 * reverse_strand
|
|
101 * mate_is_reverse_strand
|
|
102 * first_of_pair
|
|
103 * second_of_pair
|
|
104 * secondary_alignment
|
|
105 * failed_quality_control
|
|
106 * duplicate
|
|
107
|
|
108 Example
|
|
109 ~~~~~~~
|
|
110
|
|
111 ::
|
|
112
|
|
113 not (unmapped or mate_is_unmapped) and first_of_pair
|
|
114
|
|
115 Basic conditions for fields
|
|
116 ---------------------------
|
|
117
|
|
118 Conditions for integer and string fields are supported.
|
|
119
|
|
120 List of integer fields:
|
|
121 * ref_id
|
|
122 * position
|
|
123 * mapping_quality
|
|
124 * sequence_length
|
|
125 * mate_ref_id
|
|
126 * mate_position
|
|
127 * template_length
|
|
128
|
|
129
|
|
130 List of string fields:
|
|
131 * read_name
|
|
132 * sequence
|
|
133 * cigar
|
|
134
|
|
135
|
|
136 Example
|
|
137 ~~~~~~~
|
|
138
|
|
139 ::
|
|
140
|
|
141 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80
|
|
142
|
|
143 Basic conditions for tags
|
|
144 -------------------------
|
|
145
|
|
146 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.
|
|
147
|
|
148 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.
|
|
149
|
|
150 Example
|
|
151 ~~~~~~~
|
|
152
|
|
153 ::
|
|
154
|
|
155 [RG] != null and [AM] == 37
|
|
156
|
|
157 -----
|
|
158
|
|
159 .. _sambamba: http://github.com/lomereiter/sambamba
|
|
160 .. _filter-syntax: https://github.com/lomereiter/sambamba/wiki/%5Bsambamba-view%5D-Filter-expression-syntax
|
|
161
|
|
162 </help>
|
|
163 </tool>
|