Mercurial > repos > jjohnson > bamutil_diff
comparison bamutil_diff.xml @ 0:2cafa8420c04 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/bamutil/ commit c1945909ca200610f128577b68a82d9228905f3d-dirty"
author | jjohnson |
---|---|
date | Fri, 26 Mar 2021 13:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2cafa8420c04 |
---|---|
1 <tool id="bamutil_diff" name="BamUtil diff" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> | |
2 <description>two coordinate sorted SAM/BAM files</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 bam diff | |
9 --in1 '$in1' | |
10 --in2 '$in2' | |
11 #if $fields.choice == 'all': | |
12 --all | |
13 #elif $fields.choice == 'select': | |
14 $fields.flag | |
15 $fields.mapQual | |
16 $fields.mate | |
17 $fields.isize | |
18 $fields.seq | |
19 $fields.baseQual | |
20 $fields.noCigar | |
21 $fields.noPos | |
22 #if $fields.samtags.tagchoice == 'everyTag': | |
23 --everyTag | |
24 #elif $fields.samtags.tagchoice == 'specify': | |
25 --tags '$fields.samtags.tags' | |
26 #end if | |
27 #end if | |
28 --posDiff $posDiff | |
29 --recPoolSize -1 | |
30 $onlyDiffs | |
31 --params | |
32 --noPhoneHome | |
33 --out $output_as | |
34 ]]></command> | |
35 <inputs> | |
36 <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/> | |
37 <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/> | |
38 <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/> | |
39 <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/> | |
40 <conditional name="fields"> | |
41 <param name="choice" type="select" label="BAM fields to diff"> | |
42 <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option> | |
43 <option value="all">Diff all the SAM/BAM fields</option> | |
44 <option value="select">Select SAM/BAM fields to diff</option> | |
45 </param> | |
46 <when value="default"/> | |
47 <when value="all"/> | |
48 <when value="select"> | |
49 <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/> | |
50 <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/> | |
51 <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/> | |
52 <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/> | |
53 <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/> | |
54 <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/> | |
55 <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/> | |
56 <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/> | |
57 <conditional name="samtags"> | |
58 <param name="tagchoice" type="select" label="Tags to diff"> | |
59 <option value="none">Do not diff tags</option> | |
60 <option value="everyTag">Diff every tag</option> | |
61 <option value="specify">Specify tags to diff</option> | |
62 </param> | |
63 <when value="none"/> | |
64 <when value="everyTag"/> | |
65 <when value="specify"> | |
66 <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type..."> | |
67 <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator> | |
68 </param> | |
69 </when> | |
70 </conditional> | |
71 </when> | |
72 </conditional> | |
73 <param name="output_as" type="select" label="Output format"> | |
74 <option value="diff.txt">ASCII text diff file</option> | |
75 <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option> | |
76 <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option> | |
77 </param> | |
78 </inputs> | |
79 <outputs> | |
80 <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam"> | |
81 <filter>output_as == 'diff.bam'</filter> | |
82 </data> | |
83 <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}"> | |
84 <filter>output_as == 'diff.bam'</filter> | |
85 </data> | |
86 <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}"> | |
87 <filter>output_as == 'diff.bam'</filter> | |
88 </data> | |
89 <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam"> | |
90 <filter>output_as == 'diff.sam'</filter> | |
91 </data> | |
92 <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}"> | |
93 <filter>output_as == 'diff.sam'</filter> | |
94 </data> | |
95 <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}"> | |
96 <filter>output_as == 'diff.sam'</filter> | |
97 </data> | |
98 <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt"> | |
99 <filter>output_as == 'diff.txt'</filter> | |
100 </data> | |
101 </outputs> | |
102 <tests> | |
103 <!-- Test-1 --> | |
104 <test> | |
105 <param name="in1" ftype="sam" value="in1.sam"/> | |
106 <param name="in2" ftype="sam" value="in2.sam"/> | |
107 <param name="posDiff" value="100000"/> | |
108 <param name="onlyDiffs" value="true"/> | |
109 <conditional name="fields"> | |
110 <param name="choice" value="default"/> | |
111 </conditional> | |
112 <param name="output_as" value="diff.txt"/> | |
113 <output name="diff_txt" file="diff.txt"/> | |
114 <output name="diff_txt"> | |
115 <assert_contents> | |
116 <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" /> | |
117 <has_text_matching expression="<\t1a3\t74M74N1M" /> | |
118 <has_text_matching expression=">\ta3\t74M66N1M" /> | |
119 </assert_contents> | |
120 </output> | |
121 </test> | |
122 | |
123 <!-- Test-2 --> | |
124 <test> | |
125 <param name="in1" ftype="sam" value="in1.sam"/> | |
126 <param name="in2" ftype="sam" value="in2.sam"/> | |
127 <param name="posDiff" value="100000"/> | |
128 <param name="onlyDiffs" value="true"/> | |
129 <conditional name="fields"> | |
130 <param name="choice" value="select"/> | |
131 <param name="flag" value="true"/> | |
132 <param name="seq" value="true"/> | |
133 <conditional name="samtags"> | |
134 <param name="tagchoice" value="specify"/> | |
135 <param name="tags" value="AS:i,MD:Z"/> | |
136 </conditional> | |
137 </conditional> | |
138 <param name="output_as" value="diff.sam"/> | |
139 <output name="diff_sam"> | |
140 <assert_contents> | |
141 <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" /> | |
142 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
143 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
144 </assert_contents> | |
145 </output> | |
146 <output name="diff_only1_sam"> | |
147 <assert_contents> | |
148 <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
149 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
150 <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" /> | |
151 <has_text text="AS:i:0" /> | |
152 <has_text text="MD:Z:75" /> | |
153 </assert_contents> | |
154 </output> | |
155 <output name="diff_only2_sam"> | |
156 <assert_contents> | |
157 <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
158 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
159 <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" /> | |
160 <has_text text="AS:i:0" /> | |
161 <has_text text="MD:Z:75" /> | |
162 </assert_contents> | |
163 </output> | |
164 </test> | |
165 <!-- Test-3 --> | |
166 <test> | |
167 <param name="in1" ftype="sam" value="in1.sam"/> | |
168 <param name="in2" ftype="sam" value="in3.sam"/> | |
169 <param name="posDiff" value="100000"/> | |
170 <param name="onlyDiffs" value="true"/> | |
171 <conditional name="fields"> | |
172 <param name="choice" value="default"/> | |
173 </conditional> | |
174 <param name="output_as" value="diff.txt"/> | |
175 <output name="diff_txt"> | |
176 <assert_contents> | |
177 <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" /> | |
178 </assert_contents> | |
179 </output> | |
180 </test> | |
181 </tests> | |
182 <help><![CDATA[ | |
183 **bamUtil diff** | |
184 | |
185 The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools. | |
186 The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different. | |
187 diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work. | |
188 By default, just the chromosome/position and cigar are compared for each record. | |
189 Note: The headers are not compared. | |
190 | |
191 Options are available to compare:: | |
192 | |
193 - all fields | |
194 - flags | |
195 - mapping quality | |
196 - mate chromosome/position | |
197 - insert size | |
198 - sequence | |
199 - base quality | |
200 - specified tags | |
201 - all tags | |
202 - turn off position comparison | |
203 - turn off cigar comparison | |
204 | |
205 **Inputs** | |
206 Two BAM or SAM alignment files | |
207 | |
208 **Outputs** | |
209 Choice of 2 Output Formats: | |
210 | |
211 :: | |
212 | |
213 **Diff Format** | |
214 There are 2 types of differences. | |
215 ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff | |
216 ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different | |
217 Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines. | |
218 The first line of the difference output is just the read name. | |
219 The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'. | |
220 The 2nd line is the flag followed by the diff'd fields from one of the records. | |
221 The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record. | |
222 The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified:: | |
223 | |
224 - '<' or '>' | |
225 - flag | |
226 - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified | |
227 - cigar - if --noCigar is not specified | |
228 - mapping quality - if --mapq or --all is specified | |
229 - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified | |
230 - insert size - if --isize or --all is specified | |
231 - sequence - if --seq or --all is specified | |
232 - base quality - if --baseQual or --all is specified | |
233 - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified | |
234 | |
235 | |
236 **BAM Format** | |
237 In SAM/BAM format there will be 3 output files:: | |
238 | |
239 1. the specified name with record diffs | |
240 2. specified name with _only_<in1>.sam/bam with records only in the in1 file | |
241 3. specified name with _only_<in2>.sam/bam with records only in the in2 file | |
242 | |
243 Records that are identical in the two files are not written in any of these output files. | |
244 When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags:: | |
245 | |
246 - ZF - Flag | |
247 - ZP - Chromosome:1-based Position | |
248 - ZC - Cigar | |
249 - ZM - Mapping Quality | |
250 - ZN - Chromosome:1-based Mate Position | |
251 - ZI - Insert Size | |
252 - ZS - Sequence | |
253 - ZQ - Base Quality | |
254 - ZT - Tags | |
255 | |
256 If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags. | |
257 | |
258 | |
259 | |
260 | |
261 https://genome.sph.umich.edu/wiki/BamUtil:_diff | |
262 | |
263 ]]></help> | |
264 <expand macro="citations" /> | |
265 </tool> |