Mercurial > repos > artbio > sambamba
comparison sambamba.xml @ 1:6195f15d4541 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 516e8d55d6d45e6f2266805b78eb25a711621321"
author | artbio |
---|---|
date | Mon, 25 May 2020 17:10:17 -0400 |
parents | |
children | 7ad3484aa5db |
comparison
equal
deleted
inserted
replaced
0:e3cbb848d8f7 | 1:6195f15d4541 |
---|---|
1 <tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.6"> | |
2 <description> | |
3 on flags, fields, and tags using Sambamba | |
4 </description> | |
5 <requirements> | |
6 <requirement type="package" version="0.7.1">sambamba</requirement> | |
7 </requirements> | |
8 <stdio> | |
9 <exit_code range="1:" level="fatal" description="Error occured" /> | |
10 </stdio> | |
11 <!-- <version_command>sambamba 2>&1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> --> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 ln -s $input input.bam && | |
14 ln -s $input.metadata.bam_index input.bai && | |
15 #if $sambamba_options.selector == 'filter' | |
16 sambamba view -h -t \${GALAXY_SLOTS:-4} | |
17 #if $sambamba_options.query != '': | |
18 --filter='$sambamba_options.query' | |
19 #end if | |
20 -f '$format' -o $outfile input.bam $sambamba_options.region | |
21 #else | |
22 sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format' | |
23 --subsampling-seed='$sambamba_options.seed' | |
24 -s '$sambamba_options.fraction' -o '$outfile' input.bam | |
25 #end if | |
26 ]]></command> | |
27 <inputs> | |
28 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/> | |
29 <param name="format" type="select" label="format of the tool output"> | |
30 <option value="bam">BAM</option> | |
31 <option value="sam">SAM</option> | |
32 </param> | |
33 <conditional name="sambamba_options"> | |
34 <param name="selector" type="select" label="Filter or Down-sample alignments"> | |
35 <option value="sample">Down-sample bam or sam alignments</option> | |
36 <option value="filter" selected="true">Filter bam or sam alignements</option> | |
37 </param> | |
38 <when value="filter"> | |
39 <param name="query" type="text" size="80"> | |
40 <sanitizer invalid_char="X"> | |
41 <valid initial="string.ascii_letters,string.digits, string.punctuation"> | |
42 <add value=" " /> | |
43 </valid> | |
44 </sanitizer> | |
45 <label>Filter expression</label> | |
46 <help>See below for query syntax.</help> | |
47 </param> | |
48 <param name="region" type="text" size="40" label="Region in format chr:beg-end"> | |
49 <help> | |
50 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000' | |
51 (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000' | |
52 (region between 1,000,000 and 2,000,000bp including the end points). | |
53 The coordinates are 1-based. | |
54 </help> | |
55 </param> | |
56 </when> | |
57 <when value="sample"> | |
58 <param name="seed" type="integer" value="123" size="10"> | |
59 <label>Seed value for randomisation</label> | |
60 <help> | |
61 Be careful at selecting different seed values if you | |
62 re-subsample a subsample output of this tool | |
63 </help> | |
64 </param> | |
65 <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling"> | |
66 <help> | |
67 Use a real number between 0 and 1 to indicate the relative size of | |
68 the fraction you wish to retrieve | |
69 </help> | |
70 </param> | |
71 </when> | |
72 </conditional> | |
73 </inputs> | |
74 <outputs> | |
75 <data name="outfile" format="bam"> | |
76 <change_format> | |
77 <when input="format" value="sam" format="sam" /> | |
78 </change_format> | |
79 </data> | |
80 </outputs> | |
81 <tests> | |
82 <test> | |
83 <param name="input" value="ex1_header.sam" ftype="sam" /> | |
84 <param name="selector" value="filter" /> | |
85 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" /> | |
86 <param name="format" value="bam" /> | |
87 <param name="region" value="" /> | |
88 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" /> | |
89 </test> | |
90 <test> | |
91 <param name="input" value="c1215_fixmate.bam" ftype="bam" /> | |
92 <param name="selector" value="filter" /> | |
93 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" /> | |
94 <param name="format" value="sam" /> | |
95 <param name="region" value="AL096846:1000-5000" /> | |
96 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/> | |
97 </test> | |
98 <test> | |
99 <param name="input" value="c1215_fixmate.bam" ftype="bam" /> | |
100 <param name="selector" value="filter" /> | |
101 <param name="query" value='' /> | |
102 <param name="format" value="sam" /> | |
103 <param name="region" value="AL096846:1000-5000" /> | |
104 <output name="outfile" file="c1215_fixmate_region-filtered.sam" ftype="sam" lines_diff="2"/> | |
105 </test> | |
106 <test> | |
107 <param name="input" value="ex1_header.sam" ftype="sam" /> | |
108 <param name="selector" value="sample" /> | |
109 <param name="seed" value="123" /> | |
110 <param name="fraction" value="0.1" /> | |
111 <param name="format" value="bam" /> | |
112 <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" /> | |
113 </test> | |
114 <test> | |
115 <param name="input" value="c1215_fixmate.bam" ftype="bam" /> | |
116 <param name="selector" value="sample" /> | |
117 <param name="seed" value="123" /> | |
118 <param name="fraction" value="0.1" /> | |
119 <param name="format" value="sam" /> | |
120 <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/> | |
121 </test> | |
122 </tests> | |
123 <help> | |
124 Sambamba Filter Overview | |
125 ======================== | |
126 | |
127 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file. | |
128 | |
129 | |
130 Filter Syntax | |
131 ============= | |
132 | |
133 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed. | |
134 | |
135 *Basic condition* is a one for a single record field, tag, or flag. | |
136 | |
137 You can use ``==,`` ``!=,`` ``>``, ``<``, ``>=``, ``<=`` comparison operators for both integers and strings. | |
138 | |
139 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``. | |
140 | |
141 Examples of filter expressions | |
142 ------------------------------ | |
143 | |
144 :: | |
145 | |
146 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7) | |
147 read_name == 'abc\'def' | |
148 | |
149 Basic conditions for flags | |
150 -------------------------- | |
151 | |
152 The following flag names are recognized: | |
153 * paired | |
154 * proper_pair | |
155 * unmapped | |
156 * mate_is_unmapped | |
157 * reverse_strand | |
158 * mate_is_reverse_strand | |
159 * first_of_pair | |
160 * second_of_pair | |
161 * secondary_alignment | |
162 * failed_quality_control | |
163 * duplicate | |
164 | |
165 Example | |
166 ~~~~~~~ | |
167 | |
168 :: | |
169 | |
170 not (unmapped or mate_is_unmapped) and first_of_pair | |
171 | |
172 Basic conditions for fields | |
173 --------------------------- | |
174 | |
175 Conditions for integer and string fields are supported. | |
176 | |
177 List of integer fields: | |
178 * ref_id | |
179 * position | |
180 * mapping_quality | |
181 * sequence_length | |
182 * mate_ref_id | |
183 * mate_position | |
184 * template_length | |
185 | |
186 | |
187 List of string fields: | |
188 * read_name | |
189 * sequence | |
190 * cigar | |
191 | |
192 | |
193 Example | |
194 ~~~~~~~ | |
195 | |
196 :: | |
197 | |
198 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80 | |
199 | |
200 Basic conditions for tags | |
201 ------------------------- | |
202 | |
203 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type. | |
204 | |
205 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value. | |
206 | |
207 Example | |
208 ~~~~~~~ | |
209 | |
210 :: | |
211 | |
212 [RG] != null and [AM] == 37 | |
213 | |
214 Down-sampling | |
215 ============= | |
216 | |
217 The tool is using the following sambamba command line for sampling: | |
218 | |
219 :: | |
220 | |
221 sambamba view -h -t <number of Galaxy threads configured in job_conf.xml> -f <bam or sam> | |
222 --subsampling-seed=<an integer> | |
223 -s <a real number between 0 and 1> -o <bam or sam output> input_file | |
224 | |
225 Warnings | |
226 -------- | |
227 | |
228 The tool does not down-sample at a user given **number of lines**, because sambamba does not | |
229 expose this functionality. For tool performances, we decided not to add it in this wrapper. | |
230 | |
231 If you down-sample a dataset that has been *already down-sampled* with this tool, it is | |
232 important that you choose **another seed** for randomisation. Otherwise, the new subsampling | |
233 was reported not to conform the indicated fraction. | |
234 | |
235 | |
236 .. _sambamba: http://github.com/lomereiter/sambamba | |
237 | |
238 </help> | |
239 <citations> | |
240 <citation type="doi">10.1093/bioinformatics/btv098</citation> | |
241 </citations> | |
242 </tool> |