annotate sambamba_filter.xml @ 3:a03e95059256

Uploaded
author lparsons
date Wed, 24 Jul 2013 13:33:06 -0400
parents
children 123168c85390
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
a03e95059256 Uploaded
lparsons
parents:
diff changeset
1 <tool id="sambamba_filter" name="Filter BAM or SAM" version="1.0">
a03e95059256 Uploaded
lparsons
parents:
diff changeset
2 <requirements>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
3 <requirement type="package" version="0.3.3">sambamba</requirement>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
4 </requirements>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
5 <description>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
6 on flags, fields, and tags using Sambamba
a03e95059256 Uploaded
lparsons
parents:
diff changeset
7 </description>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
8 <version_command>sambamba 2>&amp;1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
9 <command>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
10 #if $query != None:
a03e95059256 Uploaded
lparsons
parents:
diff changeset
11 #set $query = $query.replace('__sq__', '\'')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
12 #set $query = $query.replace('__ob__', '[')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
13 #set $query = $query.replace('__cb__', ']')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
14 #set $query = $query.replace('__dq__', '"')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
15 #set $query = $query.replace('__oc__', '{')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
16 #set $query = $query.replace('__cc__', '}')
a03e95059256 Uploaded
lparsons
parents:
diff changeset
17 #set $query = $query.replace('__gt__', chr(62))
a03e95059256 Uploaded
lparsons
parents:
diff changeset
18 #set $query = $query.replace('__lt__', chr(60))
a03e95059256 Uploaded
lparsons
parents:
diff changeset
19 #end if
a03e95059256 Uploaded
lparsons
parents:
diff changeset
20 #set $input1 = 'input.bam'
a03e95059256 Uploaded
lparsons
parents:
diff changeset
21 ln -s $input $input1 &amp;&amp;
a03e95059256 Uploaded
lparsons
parents:
diff changeset
22 ln -s $input.metadata.bam_index input.bai &amp;&amp;
a03e95059256 Uploaded
lparsons
parents:
diff changeset
23 sambamba view
a03e95059256 Uploaded
lparsons
parents:
diff changeset
24 #if $query != "":
a03e95059256 Uploaded
lparsons
parents:
diff changeset
25 --filter="$query"
a03e95059256 Uploaded
lparsons
parents:
diff changeset
26 #end if
a03e95059256 Uploaded
lparsons
parents:
diff changeset
27 -f bam -o $outfile $input1 $region
a03e95059256 Uploaded
lparsons
parents:
diff changeset
28 </command>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
29 <inputs>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
30 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
31 <param name="query" type="text" size="80">
a03e95059256 Uploaded
lparsons
parents:
diff changeset
32 <label>Filter expression</label>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
33 <help>See below for query syntax.</help>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
34 </param>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
35
a03e95059256 Uploaded
lparsons
parents:
diff changeset
36 <param name="region" type="text" size="40" label="Region in format chr:beg-end">
a03e95059256 Uploaded
lparsons
parents:
diff changeset
37 <help>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
38 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000' (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000' (region between 1,000,000 and 2,000,000bp including the end points). The coordinates are 1-based.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
39 </help>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
40 </param>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
41 </inputs>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
42 <outputs>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
43 <data name="outfile" format="bam"></data>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
44 </outputs>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
45 <stdio>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
46 <exit_code range="1:" level="fatal" description="Error occured" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
47 </stdio>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
48 <tests>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
49 <test>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
50 <param name="input" value="ex1_header.sam" ftype="sam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
51 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
52 <param name="format" value="bam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
53 <param name="region" value="" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
54 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
55 </test>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
56 <test>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
57 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
58 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
59 <param name="format" value="sam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
60 <param name="region" value="AL096846:1000-5000" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
61 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" />
a03e95059256 Uploaded
lparsons
parents:
diff changeset
62 </test>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
63 </tests>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
64 <help>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
65 Sambamba Filter Overview
a03e95059256 Uploaded
lparsons
parents:
diff changeset
66 ========================
a03e95059256 Uploaded
lparsons
parents:
diff changeset
67
a03e95059256 Uploaded
lparsons
parents:
diff changeset
68 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
69
a03e95059256 Uploaded
lparsons
parents:
diff changeset
70
a03e95059256 Uploaded
lparsons
parents:
diff changeset
71 Filter Syntax
a03e95059256 Uploaded
lparsons
parents:
diff changeset
72 =============
a03e95059256 Uploaded
lparsons
parents:
diff changeset
73
a03e95059256 Uploaded
lparsons
parents:
diff changeset
74 Complete documentation of filter syntax is available at https://github.com/lomereiter/sambamba/wiki/%5Bsambamba-view%5D-Filter-expression-syntax.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
75
a03e95059256 Uploaded
lparsons
parents:
diff changeset
76 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
77
a03e95059256 Uploaded
lparsons
parents:
diff changeset
78 *Basic condition* is a one for a single record field, tag, or flag.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
79
a03e95059256 Uploaded
lparsons
parents:
diff changeset
80 You can use ``==,`` ``!=,`` ``&gt;``, ``&lt;``, ``&gt;=``, ``&lt;=`` comparison operators for both integers and strings.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
81
a03e95059256 Uploaded
lparsons
parents:
diff changeset
82 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
83
a03e95059256 Uploaded
lparsons
parents:
diff changeset
84 Examples of filter expressions
a03e95059256 Uploaded
lparsons
parents:
diff changeset
85 ------------------------------
a03e95059256 Uploaded
lparsons
parents:
diff changeset
86
a03e95059256 Uploaded
lparsons
parents:
diff changeset
87 ::
a03e95059256 Uploaded
lparsons
parents:
diff changeset
88
a03e95059256 Uploaded
lparsons
parents:
diff changeset
89 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
a03e95059256 Uploaded
lparsons
parents:
diff changeset
90 read_name == 'abc\'def'
a03e95059256 Uploaded
lparsons
parents:
diff changeset
91
a03e95059256 Uploaded
lparsons
parents:
diff changeset
92 Basic conditions for flags
a03e95059256 Uploaded
lparsons
parents:
diff changeset
93 --------------------------
a03e95059256 Uploaded
lparsons
parents:
diff changeset
94
a03e95059256 Uploaded
lparsons
parents:
diff changeset
95 The following flag names are recognized:
a03e95059256 Uploaded
lparsons
parents:
diff changeset
96 * paired
a03e95059256 Uploaded
lparsons
parents:
diff changeset
97 * proper_pair
a03e95059256 Uploaded
lparsons
parents:
diff changeset
98 * unmapped
a03e95059256 Uploaded
lparsons
parents:
diff changeset
99 * mate_is_unmapped
a03e95059256 Uploaded
lparsons
parents:
diff changeset
100 * reverse_strand
a03e95059256 Uploaded
lparsons
parents:
diff changeset
101 * mate_is_reverse_strand
a03e95059256 Uploaded
lparsons
parents:
diff changeset
102 * first_of_pair
a03e95059256 Uploaded
lparsons
parents:
diff changeset
103 * second_of_pair
a03e95059256 Uploaded
lparsons
parents:
diff changeset
104 * secondary_alignment
a03e95059256 Uploaded
lparsons
parents:
diff changeset
105 * failed_quality_control
a03e95059256 Uploaded
lparsons
parents:
diff changeset
106 * duplicate
a03e95059256 Uploaded
lparsons
parents:
diff changeset
107
a03e95059256 Uploaded
lparsons
parents:
diff changeset
108 Example
a03e95059256 Uploaded
lparsons
parents:
diff changeset
109 ~~~~~~~
a03e95059256 Uploaded
lparsons
parents:
diff changeset
110
a03e95059256 Uploaded
lparsons
parents:
diff changeset
111 ::
a03e95059256 Uploaded
lparsons
parents:
diff changeset
112
a03e95059256 Uploaded
lparsons
parents:
diff changeset
113 not (unmapped or mate_is_unmapped) and first_of_pair
a03e95059256 Uploaded
lparsons
parents:
diff changeset
114
a03e95059256 Uploaded
lparsons
parents:
diff changeset
115 Basic conditions for fields
a03e95059256 Uploaded
lparsons
parents:
diff changeset
116 ---------------------------
a03e95059256 Uploaded
lparsons
parents:
diff changeset
117
a03e95059256 Uploaded
lparsons
parents:
diff changeset
118 Conditions for integer and string fields are supported.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
119
a03e95059256 Uploaded
lparsons
parents:
diff changeset
120 List of integer fields:
a03e95059256 Uploaded
lparsons
parents:
diff changeset
121 * ref_id
a03e95059256 Uploaded
lparsons
parents:
diff changeset
122 * position
a03e95059256 Uploaded
lparsons
parents:
diff changeset
123 * mapping_quality
a03e95059256 Uploaded
lparsons
parents:
diff changeset
124 * sequence_length
a03e95059256 Uploaded
lparsons
parents:
diff changeset
125 * mate_ref_id
a03e95059256 Uploaded
lparsons
parents:
diff changeset
126 * mate_position
a03e95059256 Uploaded
lparsons
parents:
diff changeset
127 * template_length
a03e95059256 Uploaded
lparsons
parents:
diff changeset
128
a03e95059256 Uploaded
lparsons
parents:
diff changeset
129
a03e95059256 Uploaded
lparsons
parents:
diff changeset
130 List of string fields:
a03e95059256 Uploaded
lparsons
parents:
diff changeset
131 * read_name
a03e95059256 Uploaded
lparsons
parents:
diff changeset
132 * sequence
a03e95059256 Uploaded
lparsons
parents:
diff changeset
133 * cigar
a03e95059256 Uploaded
lparsons
parents:
diff changeset
134
a03e95059256 Uploaded
lparsons
parents:
diff changeset
135
a03e95059256 Uploaded
lparsons
parents:
diff changeset
136 Example
a03e95059256 Uploaded
lparsons
parents:
diff changeset
137 ~~~~~~~
a03e95059256 Uploaded
lparsons
parents:
diff changeset
138
a03e95059256 Uploaded
lparsons
parents:
diff changeset
139 ::
a03e95059256 Uploaded
lparsons
parents:
diff changeset
140
a03e95059256 Uploaded
lparsons
parents:
diff changeset
141 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80
a03e95059256 Uploaded
lparsons
parents:
diff changeset
142
a03e95059256 Uploaded
lparsons
parents:
diff changeset
143 Basic conditions for tags
a03e95059256 Uploaded
lparsons
parents:
diff changeset
144 -------------------------
a03e95059256 Uploaded
lparsons
parents:
diff changeset
145
a03e95059256 Uploaded
lparsons
parents:
diff changeset
146 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
147
a03e95059256 Uploaded
lparsons
parents:
diff changeset
148 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.
a03e95059256 Uploaded
lparsons
parents:
diff changeset
149
a03e95059256 Uploaded
lparsons
parents:
diff changeset
150 Example
a03e95059256 Uploaded
lparsons
parents:
diff changeset
151 ~~~~~~~
a03e95059256 Uploaded
lparsons
parents:
diff changeset
152
a03e95059256 Uploaded
lparsons
parents:
diff changeset
153 ::
a03e95059256 Uploaded
lparsons
parents:
diff changeset
154
a03e95059256 Uploaded
lparsons
parents:
diff changeset
155 [RG] != null and [AM] == 37
a03e95059256 Uploaded
lparsons
parents:
diff changeset
156
a03e95059256 Uploaded
lparsons
parents:
diff changeset
157 -----
a03e95059256 Uploaded
lparsons
parents:
diff changeset
158
a03e95059256 Uploaded
lparsons
parents:
diff changeset
159 .. _sambamba: http://github.com/lomereiter/sambamba
a03e95059256 Uploaded
lparsons
parents:
diff changeset
160 .. _filter-syntax: https://github.com/lomereiter/sambamba/wiki/%5Bsambamba-view%5D-Filter-expression-syntax
a03e95059256 Uploaded
lparsons
parents:
diff changeset
161
a03e95059256 Uploaded
lparsons
parents:
diff changeset
162 </help>
a03e95059256 Uploaded
lparsons
parents:
diff changeset
163 </tool>