diff bamutil_diff.xml @ 0:2cafa8420c04 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/bamutil/ commit c1945909ca200610f128577b68a82d9228905f3d-dirty"
author jjohnson
date Fri, 26 Mar 2021 13:16:53 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamutil_diff.xml	Fri Mar 26 13:16:53 2021 +0000
@@ -0,0 +1,265 @@
+<tool id="bamutil_diff" name="BamUtil diff" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>two coordinate sorted SAM/BAM files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+    bam diff 
+    --in1 '$in1'
+    --in2 '$in2'
+    #if $fields.choice == 'all':
+        --all
+    #elif $fields.choice == 'select':
+        $fields.flag
+        $fields.mapQual
+        $fields.mate
+        $fields.isize
+        $fields.seq
+        $fields.baseQual
+        $fields.noCigar
+        $fields.noPos
+        #if $fields.samtags.tagchoice == 'everyTag':
+            --everyTag
+        #elif $fields.samtags.tagchoice == 'specify':
+            --tags '$fields.samtags.tags'
+        #end if
+    #end if
+    --posDiff $posDiff
+    --recPoolSize -1
+    $onlyDiffs
+    --params
+    --noPhoneHome
+    --out $output_as
+    ]]></command>
+    <inputs>
+        <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/>
+        <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/>
+        <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/>
+        <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/>
+        <conditional name="fields">
+            <param name="choice" type="select" label="BAM fields to diff">
+                <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option>
+                <option value="all">Diff all the SAM/BAM fields</option>
+                <option value="select">Select SAM/BAM fields to diff</option>
+            </param>
+            <when value="default"/>
+            <when value="all"/>
+            <when value="select">
+                <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/>
+                <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/>
+                <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/>
+                <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/>
+                <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/>
+                <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/>
+                <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/>
+                <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/>
+                <conditional name="samtags">
+                    <param name="tagchoice" type="select" label="Tags to diff">
+                        <option value="none">Do not diff tags</option>
+                        <option value="everyTag">Diff every tag</option>
+                        <option value="specify">Specify tags to diff</option>
+                    </param>
+                    <when value="none"/>
+                    <when value="everyTag"/>
+                    <when value="specify">
+                        <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type...">
+                            <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator>
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param name="output_as" type="select" label="Output format">
+            <option value="diff.txt">ASCII text diff file</option>
+            <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option>
+            <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam">
+            <filter>output_as == 'diff.bam'</filter>
+        </data>
+        <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
+            <filter>output_as == 'diff.bam'</filter>
+        </data>
+        <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
+            <filter>output_as == 'diff.bam'</filter>
+        </data>
+        <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam">
+            <filter>output_as == 'diff.sam'</filter>
+        </data>
+        <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
+            <filter>output_as == 'diff.sam'</filter>
+        </data>
+        <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
+            <filter>output_as == 'diff.sam'</filter>
+        </data>
+        <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt">
+            <filter>output_as == 'diff.txt'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test-1 --> 
+        <test>
+            <param name="in1" ftype="sam" value="in1.sam"/>
+            <param name="in2" ftype="sam" value="in2.sam"/>
+            <param name="posDiff" value="100000"/>
+            <param name="onlyDiffs" value="true"/>
+            <conditional name="fields">
+                <param name="choice" value="default"/>
+            </conditional>
+            <param name="output_as" value="diff.txt"/>
+            <output name="diff_txt" file="diff.txt"/>
+            <output name="diff_txt">
+                <assert_contents>
+                    <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
+                    <has_text_matching expression="&lt;\t1a3\t74M74N1M" />
+                    <has_text_matching expression="&gt;\ta3\t74M66N1M" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test-2 --> 
+        <test>
+            <param name="in1" ftype="sam" value="in1.sam"/>
+            <param name="in2" ftype="sam" value="in2.sam"/>
+            <param name="posDiff" value="100000"/>
+            <param name="onlyDiffs" value="true"/>
+            <conditional name="fields">
+                <param name="choice" value="select"/>
+                <param name="flag" value="true"/>
+                <param name="seq" value="true"/>
+                <conditional name="samtags">
+                    <param name="tagchoice" value="specify"/>
+                    <param name="tags" value="AS:i,MD:Z"/>
+                </conditional>
+            </conditional>
+            <param name="output_as" value="diff.sam"/>
+            <output name="diff_sam">
+                <assert_contents>
+                    <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" />
+                    <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
+                    <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
+                </assert_contents>
+            </output>
+            <output name="diff_only1_sam">
+                <assert_contents>
+                    <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
+                    <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
+                    <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" />
+                    <has_text text="AS:i:0" />
+                    <has_text text="MD:Z:75" />
+                </assert_contents>
+            </output>
+            <output name="diff_only2_sam">
+                <assert_contents>
+                    <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
+                    <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
+                    <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" />
+                    <has_text text="AS:i:0" />
+                    <has_text text="MD:Z:75" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test-3 --> 
+        <test>
+            <param name="in1" ftype="sam" value="in1.sam"/>
+            <param name="in2" ftype="sam" value="in3.sam"/>
+            <param name="posDiff" value="100000"/>
+            <param name="onlyDiffs" value="true"/>
+            <conditional name="fields">
+                <param name="choice" value="default"/>
+            </conditional>
+            <param name="output_as" value="diff.txt"/>
+            <output name="diff_txt">
+                <assert_contents>
+                    <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**bamUtil diff**
+
+The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools.
+The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different.
+diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work.
+By default, just the chromosome/position and cigar are compared for each record.
+Note: The headers are not compared.
+
+Options are available to compare::
+
+ - all fields
+ - flags
+ - mapping quality
+ - mate chromosome/position
+ - insert size
+ - sequence
+ - base quality
+ - specified tags
+ - all tags
+ - turn off position comparison
+ - turn off cigar comparison
+
+**Inputs**
+  Two BAM or SAM alignment files
+
+**Outputs**
+  Choice of 2 Output Formats:
+
+  ::
+
+    **Diff Format**
+    There are 2 types of differences.
+    ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff
+    ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different
+    Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines.
+    The first line of the difference output is just the read name.
+    The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'.
+    The 2nd line is the flag followed by the diff'd fields from one of the records.
+    The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record.
+    The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified::
+
+      - '<' or '>'
+      - flag
+      - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified
+      - cigar - if --noCigar is not specified
+      - mapping quality - if --mapq or --all is specified
+      - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified
+      - insert size - if --isize or --all is specified
+      - sequence - if --seq or --all is specified
+      - base quality - if --baseQual or --all is specified
+      - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified
+
+
+    **BAM Format**
+    In SAM/BAM format there will be 3 output files::
+
+      1. the specified name with record diffs
+      2. specified name with _only_<in1>.sam/bam with records only in the in1 file
+      3. specified name with _only_<in2>.sam/bam with records only in the in2 file
+
+    Records that are identical in the two files are not written in any of these output files.
+    When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags::
+
+      - ZF - Flag
+      - ZP - Chromosome:1-based Position
+      - ZC - Cigar
+      - ZM - Mapping Quality
+      - ZN - Chromosome:1-based Mate Position
+      - ZI - Insert Size
+      - ZS - Sequence
+      - ZQ - Base Quality
+      - ZT - Tags
+
+    If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags.
+
+
+
+
+https://genome.sph.umich.edu/wiki/BamUtil:_diff
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>