Mercurial > repos > jjohnson > fgbio_sort_bam
changeset 0:286e35cf3f1c draft
"planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
author | jjohnson |
---|---|
date | Sun, 21 Feb 2021 23:41:16 +0000 |
parents | |
children | cb58d1961fd3 |
files | fgbio_sort_bam.xml macros.xml |
diffstat | 2 files changed, 90 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fgbio_sort_bam.xml Sun Feb 21 23:41:16 2021 +0000 @@ -0,0 +1,34 @@ +<tool id="fgbio_sort_bam" name="fgbio SortBam" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> + <description>Sorts a SAM or BAM file</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>fgbio --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + fgbio SortBam" + --input '$input' + --sort-order=$sort_order + --output '$output' + ]]></command> + <inputs> + <param name="input" type="data" format="bam,sam" label="SAM/BAM to sort"/> + <expand macro="sam_sort_order" /> + </inputs> + <outputs> + <data name="output" format="unsorted.bam" /> + </outputs> + <help><![CDATA[ +**fgbio SortBam** + +Sorts a SAM or BAM file. Several sort orders are available: + + - Coordinate: sorts reads by their reference sequence and left-most aligned coordinate + - Queryname: sort the reads by their query (i.e. read) name + - Random: sorts the reads into a random order. The output is deterministic for any given input. and several + - RandomQuery: sorts the reads into a random order but keeps reads with the same queryname together. The ordering is deterministic for any given input. + +http://fulcrumgenomics.github.io/fgbio/tools/latest/SortBam.html + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Feb 21 23:41:16 2021 +0000 @@ -0,0 +1,56 @@ +<macros> + <token name="@TOOL_VERSION@">1.3.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement> + <yield/> + </requirements> + </xml> + <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token> + <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token> + <xml name="read_structures_validator"> + <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator> + </xml> + <xml name="sam_tag_validator"> + <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator> + </xml> + <xml name="sam_sort_order"> + <param argument="--sort-order" type="select" optional="true" label="Sort BAM by"> + <option value="Coordinate">Coordinate</option> + <option value="Queryname">Queryname</option> + <option value="Random">Random</option> + <option value="RandomQuery">RandomQuery</option> + </param> + </xml> + + <token name="@READ_STRUCTURES_HELP@"><![CDATA[ +**Read Structures** + +Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized: + + T identifies a template read + B identifies a sample barcode read + M identifies a unique molecular index read + S identifies a set of bases that should be skipped or ignored + +The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B + +Alternative if you know your reads are of fixed length you could specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B + + +]]></token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros>