Mercurial > repos > jjohnson > fgbio_sort_bam

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fgbio_sort_bam.xml	Sun Feb 21 23:41:16 2021 +0000
@@ -0,0 +1,34 @@
+<tool id="fgbio_sort_bam" name="fgbio SortBam" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>Sorts a SAM or BAM file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>fgbio --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        fgbio SortBam"
+        --input '$input'
+        --sort-order=$sort_order
+        --output '$output'
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="bam,sam" label="SAM/BAM to sort"/>
+        <expand macro="sam_sort_order" />
+    </inputs>
+    <outputs>
+        <data name="output" format="unsorted.bam" />
+    </outputs>
+    <help><![CDATA[
+**fgbio SortBam**
+
+Sorts a SAM or BAM file. Several sort orders are available:
+
+    - Coordinate: sorts reads by their reference sequence and left-most aligned coordinate
+    - Queryname: sort the reads by their query (i.e. read) name
+    - Random: sorts the reads into a random order. The output is deterministic for any given input. and several
+    - RandomQuery: sorts the reads into a random order but keeps reads with the same queryname together. The ordering is deterministic for any given input.
+
+http://fulcrumgenomics.github.io/fgbio/tools/latest/SortBam.html
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Feb 21 23:41:16 2021 +0000
@@ -0,0 +1,56 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token>
+    <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token>
+    <xml name="read_structures_validator">
+            <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator>
+    </xml>
+    <xml name="sam_tag_validator">
+            <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator>
+    </xml>
+    <xml name="sam_sort_order">
+        <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="Coordinate">Coordinate</option>
+            <option value="Queryname">Queryname</option>
+            <option value="Random">Random</option>
+            <option value="RandomQuery">RandomQuery</option>
+        </param>
+    </xml>
+
+    <token name="@READ_STRUCTURES_HELP@"><![CDATA[
+**Read Structures**
+
+Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
+
+    T identifies a template read
+    B identifies a sample barcode read
+    M identifies a unique molecular index read
+    S identifies a set of bases that should be skipped or ignored
+
+The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B
+
+Alternative if you know your reads are of fixed length you could specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B
+
+
+]]></token>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>