comparison diff.xml @ 0:c28c4c132459 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bamutil commit 9f971aa7d32d756aa4763ab29d1770178a5ece6b"
author iuc
date Fri, 02 Apr 2021 20:20:54 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c28c4c132459
1 <tool id="bamutil_diff" name="BamUtil diff" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>two coordinate sorted SAM/BAM files</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="edam"/>
8 <command detect_errors="exit_code"><![CDATA[
9 bam diff
10 --in1 '$in1'
11 --in2 '$in2'
12 #if $fields.choice == 'all':
13 --all
14 #elif $fields.choice == 'select':
15 $fields.flag
16 $fields.mapQual
17 $fields.mate
18 $fields.isize
19 $fields.seq
20 $fields.baseQual
21 $fields.noCigar
22 $fields.noPos
23 #if $fields.samtags.tagchoice == 'everyTag':
24 --everyTag
25 #elif $fields.samtags.tagchoice == 'specify':
26 --tags '$fields.samtags.tags'
27 #end if
28 #end if
29 --posDiff $posDiff
30 --recPoolSize -1
31 $onlyDiffs
32 --params
33 --noPhoneHome
34 --out $output_as
35 ]]></command>
36 <inputs>
37 <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/>
38 <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/>
39 <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/>
40 <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/>
41 <conditional name="fields">
42 <param name="choice" type="select" label="BAM fields to diff">
43 <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option>
44 <option value="all">Diff all the SAM/BAM fields</option>
45 <option value="select">Select SAM/BAM fields to diff</option>
46 </param>
47 <when value="default"/>
48 <when value="all"/>
49 <when value="select">
50 <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/>
51 <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/>
52 <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/>
53 <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/>
54 <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/>
55 <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/>
56 <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/>
57 <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/>
58 <conditional name="samtags">
59 <param name="tagchoice" type="select" label="Tags to diff">
60 <option value="none">Do not diff tags</option>
61 <option value="everyTag">Diff every tag</option>
62 <option value="specify">Specify tags to diff</option>
63 </param>
64 <when value="none"/>
65 <when value="everyTag"/>
66 <when value="specify">
67 <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type...">
68 <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator>
69 </param>
70 </when>
71 </conditional>
72 </when>
73 </conditional>
74 <param name="output_as" type="select" label="Output format">
75 <option value="diff.txt">ASCII text diff file</option>
76 <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option>
77 <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option>
78 </param>
79 </inputs>
80 <outputs>
81 <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam">
82 <filter>output_as == 'diff.bam'</filter>
83 </data>
84 <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
85 <filter>output_as == 'diff.bam'</filter>
86 </data>
87 <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
88 <filter>output_as == 'diff.bam'</filter>
89 </data>
90 <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam">
91 <filter>output_as == 'diff.sam'</filter>
92 </data>
93 <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
94 <filter>output_as == 'diff.sam'</filter>
95 </data>
96 <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
97 <filter>output_as == 'diff.sam'</filter>
98 </data>
99 <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt">
100 <filter>output_as == 'diff.txt'</filter>
101 </data>
102 </outputs>
103 <tests>
104 <!-- Test-1 -->
105 <test expect_num_outputs="1">
106 <param name="in1" ftype="sam" value="in1.sam"/>
107 <param name="in2" ftype="sam" value="in2.sam"/>
108 <param name="posDiff" value="100000"/>
109 <param name="onlyDiffs" value="true"/>
110 <conditional name="fields">
111 <param name="choice" value="default"/>
112 </conditional>
113 <param name="output_as" value="diff.txt"/>
114 <output name="diff_txt">
115 <assert_contents>
116 <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
117 <has_text_matching expression="&lt;\t1a3\t74M74N1M" />
118 <has_text_matching expression="&gt;\ta3\t74M66N1M" />
119 </assert_contents>
120 </output>
121 </test>
122
123 <!-- Test-2 -->
124 <test expect_num_outputs="3">
125 <param name="in1" ftype="sam" value="in1.sam"/>
126 <param name="in2" ftype="sam" value="in2.sam"/>
127 <param name="posDiff" value="100000"/>
128 <param name="onlyDiffs" value="true"/>
129 <conditional name="fields">
130 <param name="choice" value="select"/>
131 <param name="flag" value="true"/>
132 <param name="seq" value="true"/>
133 <conditional name="samtags">
134 <param name="tagchoice" value="specify"/>
135 <param name="tags" value="AS:i,MD:Z"/>
136 </conditional>
137 </conditional>
138 <param name="output_as" value="diff.sam"/>
139 <output name="diff_sam">
140 <assert_contents>
141 <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" />
142 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
143 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
144 </assert_contents>
145 </output>
146 <output name="diff_only1_sam">
147 <assert_contents>
148 <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
149 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
150 <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" />
151 <has_text text="AS:i:0" />
152 <has_text text="MD:Z:75" />
153 </assert_contents>
154 </output>
155 <output name="diff_only2_sam">
156 <assert_contents>
157 <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
158 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
159 <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" />
160 <has_text text="AS:i:0" />
161 <has_text text="MD:Z:75" />
162 </assert_contents>
163 </output>
164 </test>
165 <!-- Test-3 -->
166 <test expect_num_outputs="1">
167 <param name="in1" ftype="sam" value="in1.sam"/>
168 <param name="in2" ftype="sam" value="in3.sam"/>
169 <param name="posDiff" value="100000"/>
170 <param name="onlyDiffs" value="true"/>
171 <conditional name="fields">
172 <param name="choice" value="default"/>
173 </conditional>
174 <param name="output_as" value="diff.txt"/>
175 <output name="diff_txt">
176 <assert_contents>
177 <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
178 </assert_contents>
179 </output>
180 </test>
181 </tests>
182 <help><![CDATA[
183 **bamUtil diff**
184
185 The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools.
186 The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different.
187 diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work.
188 By default, just the chromosome/position and cigar are compared for each record.
189 Note: The headers are not compared.
190
191 Options are available to compare::
192
193 - all fields
194 - flags
195 - mapping quality
196 - mate chromosome/position
197 - insert size
198 - sequence
199 - base quality
200 - specified tags
201 - all tags
202 - turn off position comparison
203 - turn off cigar comparison
204
205 **Inputs**
206 Two BAM or SAM alignment files
207
208 **Outputs**
209 Choice of 2 Output Formats:
210
211 ::
212
213 **Diff Format**
214 There are 2 types of differences.
215 ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff
216 ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different
217 Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines.
218 The first line of the difference output is just the read name.
219 The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'.
220 The 2nd line is the flag followed by the diff'd fields from one of the records.
221 The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record.
222 The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified::
223
224 - '<' or '>'
225 - flag
226 - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified
227 - cigar - if --noCigar is not specified
228 - mapping quality - if --mapq or --all is specified
229 - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified
230 - insert size - if --isize or --all is specified
231 - sequence - if --seq or --all is specified
232 - base quality - if --baseQual or --all is specified
233 - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified
234
235
236 **BAM Format**
237 In SAM/BAM format there will be 3 output files::
238
239 1. the specified name with record diffs
240 2. specified name with _only_<in1>.sam/bam with records only in the in1 file
241 3. specified name with _only_<in2>.sam/bam with records only in the in2 file
242
243 Records that are identical in the two files are not written in any of these output files.
244 When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags::
245
246 - ZF - Flag
247 - ZP - Chromosome:1-based Position
248 - ZC - Cigar
249 - ZM - Mapping Quality
250 - ZN - Chromosome:1-based Mate Position
251 - ZI - Insert Size
252 - ZS - Sequence
253 - ZQ - Base Quality
254 - ZT - Tags
255
256 If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags.
257
258
259
260
261 https://genome.sph.umich.edu/wiki/BamUtil:_diff
262
263 ]]></help>
264 <expand macro="citations" />
265 </tool>