Mercurial > repos > iuc > umi_tools_group

<tool id="umi_tools_group" name="UMI-tools group" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Extract UMI from fastq files</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="1.21">samtools</requirement>
    </expand>
    <command detect_errors="exit_code"><![CDATA[
        @LINK_SAM_BAM_INPUT@

        umi_tools group
            #if $group_output:
                --group-out '$group_out'
            #end if
            --output-bam
            @GROUPDEDUP_OPTIONS@
            @BARCODE_OPTIONS@
            @UMI_GROUPING_OPTIONS@
            @SAMBAM_OPTIONS@
            @FULLSC_OPTIONS@
            -I '$input_file' -S grouped.bam
            @ADVANCED_OPTIONS@
            @LOG@
            ## TODO using samtools sort is a workaround, for the following error that appears when Galaxy
            ## compares the generated file with the one in test-data
            ## `Converting history BAM to SAM failed: 'samtools returned with error 1: stdout=None, stderr=[main_samview] fail to read the header from "/tmp/tmpd8o61jykdedup_out6.bam".\n'. Will compare BAM files`
            ## may be dropped in the future
            --no-sort-output
            && samtools sort --no-PG grouped.bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$output' -O BAM
    ]]></command>
    <inputs>
        <param name="input" type="data" format="sam,bam" label="Reads to group in SAM or BAM format" />
        <param name="group_output" argument="--group-out" type="boolean" truevalue="--group-out" falsevalue="" label="Output a flatfile describing the read groups" />
        <expand macro="groupdedup_options_macro"/>
        <expand macro="barcode_options_macro"/>
        <expand macro="umi_grouping_options_macro"/>
        <expand macro="sambam_options_macro"/>
        <expand macro="fullsc_options_macro"/>
        <expand macro="advanced_options_macro"/>
        <expand macro="log_input_macro"/>
    </inputs>
    <outputs>
        <data format="bam" name="output" />
        <data format="tabular" name="group_out">
            <filter>group_output</filter>
        </data>
        <expand macro="log_output_macro"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="group_in2.sam" ftype="sam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="read_id" />
            </conditional>
            <section name="sambam">
                <param name="paired" value="true" />
            </section>
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <output name="output" file="group_out2.bam" ftype="bam" />
        </test>
        <test expect_num_outputs="2">
            <param name="input" value="group_in3.bam" ftype="bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="read_id" />
            </conditional>
            <param name="group_output" value="true" />
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <output name="group_out" file="group_out3.tab" />
            <output name="output" file="group_out3.bam" ftype="bam" />
        </test>
        <test expect_num_outputs="2">
            <param name="input" value="group_in4.bam" ftype="bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="tag" />
                <param name="umi_tag" value="BX" />
            </conditional>
            <param name="group_output" value="true" />
            <section name="umi">
                <param name="method" value="unique" />
            </section>
            <output name="group_out" file="group_out4.tab" />
            <output name="output" file="group_out4.bam" ftype="bam" />
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="group_in5.bam" ftype="bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="read_id" />
            </conditional>
            <section name="umi">
                <param name="method" value="cluster" />
            </section>
            <output name="output" file="group_out5.bam" ftype="bam"/>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="group_in6.bam" ftype="bam" />
            <section name="advanced">
                <param name="random_seed" value="0" />
            </section>
            <conditional name="bc">
                <param name="extract_umi_method" value="read_id" />
            </conditional>
            <section name="umi">
                <param name="method" value="directional" />
            </section>
            <output name="output" file="group_out6.bam" ftype="bam"/>
        </test>
    </tests>
    <help><![CDATA[
umi_tools group - Group reads based on their UMI
================================================

Purpose
-------

The purpose of this command is to identify groups of reads based on
their genomic coordinate and UMI.

The group command can be used to create two types of outfile: a tagged
BAM or a flatfile describing the read groups

To generate the tagged-BAM file, use the option --output-bam and
provide a filename with the -S option. Alternatively, if you do not
provide a filename, the bam file will be outputted to the stdout. If
you have provided the --log/-L option to send the logging output
elsewhere, you can pipe the output from the group command directly to
e.g samtools sort like so:

``umi_tools group -I inf.bam --group-out=grouped.tsv --output-bam --log=group.log --paired | samtools sort - -o grouped_sorted.bam``

The tagged-BAM file will have two tagged per read:

 - UG = Unique_id.
    0-indexed unique id number for each group of reads with the same genomic position and UMI or UMIs inferred to be from the same true UMI + errors

 - BX = Final UMI.
     The inferred true UMI for the group

To generate the flatfile describing the read groups, include the
--group-out=<filename> option. The columns of the read groups file are
below. The first five columns relate to the read. The final 3 columns
relate to the group.

  - read_id
      read identifier

  - contig
      alignment contig

  - position
      Alignment position. Note that this position is not the start position of the read in the BAM file but the start of the read taking into account the read strand and cigar

  - umi
      The read UMI

  - umi_count
      The number of times this UMI is observed for reads at the same position

  - final_umi
      The inferred true UMI for the group

  - final_umi_count
      The total number of reads within the group

  - unique_id
      The unique id for the group

@BARCODE_HELP@

@UMI_GROUPING_HELP@
    ]]></help>
    <expand macro="citations" />
</tool>
author	iuc
date	Sat, 28 Sep 2024 16:41:21 +0000
parents	cf25b50eff0a
children	428735be9764