comparison bamutil_diff.xml @ 0:2cafa8420c04 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/bamutil/ commit c1945909ca200610f128577b68a82d9228905f3d-dirty"
author jjohnson
date Fri, 26 Mar 2021 13:16:53 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2cafa8420c04
1 <tool id="bamutil_diff" name="BamUtil diff" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
2 <description>two coordinate sorted SAM/BAM files</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 bam diff
9 --in1 '$in1'
10 --in2 '$in2'
11 #if $fields.choice == 'all':
12 --all
13 #elif $fields.choice == 'select':
14 $fields.flag
15 $fields.mapQual
16 $fields.mate
17 $fields.isize
18 $fields.seq
19 $fields.baseQual
20 $fields.noCigar
21 $fields.noPos
22 #if $fields.samtags.tagchoice == 'everyTag':
23 --everyTag
24 #elif $fields.samtags.tagchoice == 'specify':
25 --tags '$fields.samtags.tags'
26 #end if
27 #end if
28 --posDiff $posDiff
29 --recPoolSize -1
30 $onlyDiffs
31 --params
32 --noPhoneHome
33 --out $output_as
34 ]]></command>
35 <inputs>
36 <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/>
37 <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/>
38 <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/>
39 <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/>
40 <conditional name="fields">
41 <param name="choice" type="select" label="BAM fields to diff">
42 <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option>
43 <option value="all">Diff all the SAM/BAM fields</option>
44 <option value="select">Select SAM/BAM fields to diff</option>
45 </param>
46 <when value="default"/>
47 <when value="all"/>
48 <when value="select">
49 <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/>
50 <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/>
51 <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/>
52 <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/>
53 <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/>
54 <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/>
55 <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/>
56 <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/>
57 <conditional name="samtags">
58 <param name="tagchoice" type="select" label="Tags to diff">
59 <option value="none">Do not diff tags</option>
60 <option value="everyTag">Diff every tag</option>
61 <option value="specify">Specify tags to diff</option>
62 </param>
63 <when value="none"/>
64 <when value="everyTag"/>
65 <when value="specify">
66 <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type...">
67 <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator>
68 </param>
69 </when>
70 </conditional>
71 </when>
72 </conditional>
73 <param name="output_as" type="select" label="Output format">
74 <option value="diff.txt">ASCII text diff file</option>
75 <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option>
76 <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option>
77 </param>
78 </inputs>
79 <outputs>
80 <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam">
81 <filter>output_as == 'diff.bam'</filter>
82 </data>
83 <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
84 <filter>output_as == 'diff.bam'</filter>
85 </data>
86 <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
87 <filter>output_as == 'diff.bam'</filter>
88 </data>
89 <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam">
90 <filter>output_as == 'diff.sam'</filter>
91 </data>
92 <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
93 <filter>output_as == 'diff.sam'</filter>
94 </data>
95 <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
96 <filter>output_as == 'diff.sam'</filter>
97 </data>
98 <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt">
99 <filter>output_as == 'diff.txt'</filter>
100 </data>
101 </outputs>
102 <tests>
103 <!-- Test-1 -->
104 <test>
105 <param name="in1" ftype="sam" value="in1.sam"/>
106 <param name="in2" ftype="sam" value="in2.sam"/>
107 <param name="posDiff" value="100000"/>
108 <param name="onlyDiffs" value="true"/>
109 <conditional name="fields">
110 <param name="choice" value="default"/>
111 </conditional>
112 <param name="output_as" value="diff.txt"/>
113 <output name="diff_txt" file="diff.txt"/>
114 <output name="diff_txt">
115 <assert_contents>
116 <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
117 <has_text_matching expression="&lt;\t1a3\t74M74N1M" />
118 <has_text_matching expression="&gt;\ta3\t74M66N1M" />
119 </assert_contents>
120 </output>
121 </test>
122
123 <!-- Test-2 -->
124 <test>
125 <param name="in1" ftype="sam" value="in1.sam"/>
126 <param name="in2" ftype="sam" value="in2.sam"/>
127 <param name="posDiff" value="100000"/>
128 <param name="onlyDiffs" value="true"/>
129 <conditional name="fields">
130 <param name="choice" value="select"/>
131 <param name="flag" value="true"/>
132 <param name="seq" value="true"/>
133 <conditional name="samtags">
134 <param name="tagchoice" value="specify"/>
135 <param name="tags" value="AS:i,MD:Z"/>
136 </conditional>
137 </conditional>
138 <param name="output_as" value="diff.sam"/>
139 <output name="diff_sam">
140 <assert_contents>
141 <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" />
142 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
143 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
144 </assert_contents>
145 </output>
146 <output name="diff_only1_sam">
147 <assert_contents>
148 <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
149 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
150 <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" />
151 <has_text text="AS:i:0" />
152 <has_text text="MD:Z:75" />
153 </assert_contents>
154 </output>
155 <output name="diff_only2_sam">
156 <assert_contents>
157 <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
158 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
159 <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" />
160 <has_text text="AS:i:0" />
161 <has_text text="MD:Z:75" />
162 </assert_contents>
163 </output>
164 </test>
165 <!-- Test-3 -->
166 <test>
167 <param name="in1" ftype="sam" value="in1.sam"/>
168 <param name="in2" ftype="sam" value="in3.sam"/>
169 <param name="posDiff" value="100000"/>
170 <param name="onlyDiffs" value="true"/>
171 <conditional name="fields">
172 <param name="choice" value="default"/>
173 </conditional>
174 <param name="output_as" value="diff.txt"/>
175 <output name="diff_txt">
176 <assert_contents>
177 <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
178 </assert_contents>
179 </output>
180 </test>
181 </tests>
182 <help><![CDATA[
183 **bamUtil diff**
184
185 The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools.
186 The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different.
187 diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work.
188 By default, just the chromosome/position and cigar are compared for each record.
189 Note: The headers are not compared.
190
191 Options are available to compare::
192
193 - all fields
194 - flags
195 - mapping quality
196 - mate chromosome/position
197 - insert size
198 - sequence
199 - base quality
200 - specified tags
201 - all tags
202 - turn off position comparison
203 - turn off cigar comparison
204
205 **Inputs**
206 Two BAM or SAM alignment files
207
208 **Outputs**
209 Choice of 2 Output Formats:
210
211 ::
212
213 **Diff Format**
214 There are 2 types of differences.
215 ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff
216 ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different
217 Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines.
218 The first line of the difference output is just the read name.
219 The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'.
220 The 2nd line is the flag followed by the diff'd fields from one of the records.
221 The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record.
222 The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified::
223
224 - '<' or '>'
225 - flag
226 - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified
227 - cigar - if --noCigar is not specified
228 - mapping quality - if --mapq or --all is specified
229 - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified
230 - insert size - if --isize or --all is specified
231 - sequence - if --seq or --all is specified
232 - base quality - if --baseQual or --all is specified
233 - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified
234
235
236 **BAM Format**
237 In SAM/BAM format there will be 3 output files::
238
239 1. the specified name with record diffs
240 2. specified name with _only_<in1>.sam/bam with records only in the in1 file
241 3. specified name with _only_<in2>.sam/bam with records only in the in2 file
242
243 Records that are identical in the two files are not written in any of these output files.
244 When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags::
245
246 - ZF - Flag
247 - ZP - Chromosome:1-based Position
248 - ZC - Cigar
249 - ZM - Mapping Quality
250 - ZN - Chromosome:1-based Mate Position
251 - ZI - Insert Size
252 - ZS - Sequence
253 - ZQ - Base Quality
254 - ZT - Tags
255
256 If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags.
257
258
259
260
261 https://genome.sph.umich.edu/wiki/BamUtil:_diff
262
263 ]]></help>
264 <expand macro="citations" />
265 </tool>