Mercurial > repos > artbio > sambamba
comparison sambamba_filter.xml @ 0:e3cbb848d8f7 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 1ff1d6786536e134d019c6d6d12ee9885f44b601"
author | artbio |
---|---|
date | Thu, 21 May 2020 09:51:19 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e3cbb848d8f7 |
---|---|
1 <tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.4"> | |
2 <description> | |
3 on flags, fields, and tags using Sambamba | |
4 </description> | |
5 <requirements> | |
6 <requirement type="package" version="0.7.1">sambamba</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" level="fatal" description="Error occured" /> | |
10 </stdio> | |
11 <!-- <version_command>sambamba 2>&1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> --> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 ln -s $input input.bam && | |
14 ln -s $input.metadata.bam_index input.bai && | |
15 #if $sambamba_options.selector == 'filter' | |
16 sambamba view -h -t \${GALAXY_SLOTS:-4} | |
17 #if $sambamba_options.query != '': | |
18 --filter='$sambamba_options.query' | |
19 -f '$format' -o $outfile input.bam $sambamba_options.region | |
20 #end if | |
21 #else | |
22 sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format' | |
23 --subsampling-seed='$sambamba_options.seed' | |
24 -s '$sambamba_options.fraction' -o '$outfile' input.bam | |
25 #end if | |
26 ]]></command> | |
27 <inputs> | |
28 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/> | |
29 <param name="format" type="select" label="format of the tool output"> | |
30 <option value="bam">BAM</option> | |
31 <option value="sam">SAM</option> | |
32 </param> | |
33 <conditional name="sambamba_options"> | |
34 <param name="selector" type="select" label="Filter or Down-sample alignments"> | |
35 <option value="sample">Down-sample bam or sam alignments</option> | |
36 <option value="filter" selected="true">Filter bam or sam alignements</option> | |
37 </param> | |
38 <when value="filter"> | |
39 <param name="query" type="text" size="80"> | |
40 <sanitizer invalid_char="X"> | |
41 <valid initial="string.ascii_letters,string.digits, string.punctuation"> | |
42 <add value=" " /> | |
43 </valid> | |
44 </sanitizer> | |
45 <label>Filter expression</label> | |
46 <help>See below for query syntax.</help> | |
47 </param> | |
48 <param name="region" type="text" size="40" label="Region in format chr:beg-end"> | |
49 <help> | |
50 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000' | |
51 (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000' | |
52 (region between 1,000,000 and 2,000,000bp including the end points). | |
53 The coordinates are 1-based. | |
54 </help> | |
55 </param> | |
56 </when> | |
57 <when value="sample"> | |
58 <param name="seed" type="integer" value="123" size="10"> | |
59 <label>Seed value for randomisation</label> | |
60 <help> | |
61 Be careful at selecting different seed values if you | |
62 re-subsample a subsample output of this tool | |
63 </help> | |
64 </param> | |
65 <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling"> | |
66 <help> | |
67 Use a real number between 0 and 1 to indicate the relative size of | |
68 the fraction you wish to retrieve | |
69 </help> | |
70 </param> | |
71 </when> | |
72 </conditional> | |
73 </inputs> | |
74 <outputs> | |
75 <data name="outfile" format="bam"> | |
76 <change_format> | |
77 <when input="format" value="sam" format="sam" /> | |
78 </change_format> | |
79 </data> | |
80 </outputs> | |
81 <tests> | |
82 <test> | |
83 <param name="input" value="ex1_header.sam" ftype="sam" /> | |
84 <param name="selector" value="filter" /> | |
85 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" /> | |
86 <param name="format" value="bam" /> | |
87 <param name="region" value="" /> | |
88 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" /> | |
89 </test> | |
90 <test> | |
91 <param name="input" value="c1215_fixmate.bam" ftype="bam" /> | |
92 <param name="selector" value="filter" /> | |
93 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" /> | |
94 <param name="format" value="sam" /> | |
95 <param name="region" value="AL096846:1000-5000" /> | |
96 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/> | |
97 </test> | |
98 <test> | |
99 <param name="input" value="ex1_header.sam" ftype="sam" /> | |
100 <param name="selector" value="sample" /> | |
101 <param name="seed" value="123" /> | |
102 <param name="fraction" value="0.1" /> | |
103 <param name="format" value="bam" /> | |
104 <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" /> | |
105 </test> | |
106 <test> | |
107 <param name="input" value="c1215_fixmate.bam" ftype="bam" /> | |
108 <param name="selector" value="sample" /> | |
109 <param name="seed" value="123" /> | |
110 <param name="fraction" value="0.1" /> | |
111 <param name="format" value="sam" /> | |
112 <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/> | |
113 </test> | |
114 </tests> | |
115 <help> | |
116 Sambamba Filter Overview | |
117 ======================== | |
118 | |
119 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file. | |
120 | |
121 | |
122 Filter Syntax | |
123 ============= | |
124 | |
125 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed. | |
126 | |
127 *Basic condition* is a one for a single record field, tag, or flag. | |
128 | |
129 You can use ``==,`` ``!=,`` ``>``, ``<``, ``>=``, ``<=`` comparison operators for both integers and strings. | |
130 | |
131 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``. | |
132 | |
133 Examples of filter expressions | |
134 ------------------------------ | |
135 | |
136 :: | |
137 | |
138 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7) | |
139 read_name == 'abc\'def' | |
140 | |
141 Basic conditions for flags | |
142 -------------------------- | |
143 | |
144 The following flag names are recognized: | |
145 * paired | |
146 * proper_pair | |
147 * unmapped | |
148 * mate_is_unmapped | |
149 * reverse_strand | |
150 * mate_is_reverse_strand | |
151 * first_of_pair | |
152 * second_of_pair | |
153 * secondary_alignment | |
154 * failed_quality_control | |
155 * duplicate | |
156 | |
157 Example | |
158 ~~~~~~~ | |
159 | |
160 :: | |
161 | |
162 not (unmapped or mate_is_unmapped) and first_of_pair | |
163 | |
164 Basic conditions for fields | |
165 --------------------------- | |
166 | |
167 Conditions for integer and string fields are supported. | |
168 | |
169 List of integer fields: | |
170 * ref_id | |
171 * position | |
172 * mapping_quality | |
173 * sequence_length | |
174 * mate_ref_id | |
175 * mate_position | |
176 * template_length | |
177 | |
178 | |
179 List of string fields: | |
180 * read_name | |
181 * sequence | |
182 * cigar | |
183 | |
184 | |
185 Example | |
186 ~~~~~~~ | |
187 | |
188 :: | |
189 | |
190 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80 | |
191 | |
192 Basic conditions for tags | |
193 ------------------------- | |
194 | |
195 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type. | |
196 | |
197 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value. | |
198 | |
199 Example | |
200 ~~~~~~~ | |
201 | |
202 :: | |
203 | |
204 [RG] != null and [AM] == 37 | |
205 | |
206 Down-sampling | |
207 ============= | |
208 | |
209 The tool is using the following sambamba command line for sampling: | |
210 | |
211 :: | |
212 | |
213 sambamba view -h -t <number of Galaxy threads configured in job_conf.xml> -f <bam or sam> | |
214 --subsampling-seed=<an integer> | |
215 -s <a real number between 0 and 1> -o <bam or sam output> input_file | |
216 | |
217 Warnings | |
218 -------- | |
219 | |
220 The tool does not down-sample at a user given **number of lines**, because sambamba does not | |
221 expose this functionality. For tool performances, we decided not to add it in this wrapper. | |
222 | |
223 If you down-sample a dataset that has been *already down-sampled* with this tool, it is | |
224 important that you choose **another seed** for randomisation. Otherwise, the new subsampling | |
225 was reported not to conform the indicated fraction. | |
226 | |
227 | |
228 .. _sambamba: http://github.com/lomereiter/sambamba | |
229 | |
230 </help> | |
231 <citations> | |
232 <citation type="doi">10.1093/bioinformatics/btv098</citation> | |
233 </citations> | |
234 </tool> |