view pbgcpp.xml @ 1:a6c40438946b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pbgcpp commit b9fe99d8335825c6c23f569034354323b794127a
author iuc
date Wed, 08 Mar 2023 12:00:29 +0000
parents a6d93d0d5328
children
line wrap: on
line source

<tool id="pbgcpp" name="pbgcpp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Compute genomic consensus and call variants using PacBio reads mapped to a reference.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ## set up files
        #if $reference_source.reference_source_selector == 'history':
            #set ref_fn = 'reference.fa'
            ln -f -s '$reference_source.ref_file' '$ref_fn' &&
        #else:
            #set ref_fn = $reference_source.ref_file.fields.path
        #end if

        ln -s '$input' 'input.bam' &&
        ln -s '$input.metadata.bam_index' 'input.bam.bai' &&

        ## set up the outputs
        #set output_line = ','.join('output.' + str(x) for x in $output_selector)

        ## run variantCaller
        gcpp 
        --num-threads \${GALAXY_SLOTS:-4}
        --reference '$ref_fn'
        --output $output_line
        'input.bam'
    ]]></command>
    <inputs>
        <!-- from tools-iuc minimap2 wrapper -->
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?">
                <option value="cached">Use a built-in genome index</option>
                <option value="history">Use a genome from history and build index</option>
            </param>
            <when value="cached">
                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                        <validator type="no_options" message="No reference genomes are available" />
                    </options>
                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
            </when>
        </conditional>
        <param type="data" name="input" format="bam" label="bam" help="The input BAM alignment file" />
        <!-- Output Options -->
        <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output formats">
            <!-- Use the format's extension as the value, so we can use it directly in the output_line constructor. -->
            <option value="fa" selected="true">Computed consensus (fasta)</option>
            <option value="vcf">Variants (vcf)</option>
            <option value="gff">Variants (gff)</option>
        </param>
    </inputs>
    <outputs>
        <data name="fa" format="fasta" from_work_dir="output.fa" label="${tool.name} on ${on_string} (consensus)">
            <filter>output_selector and 'fa' in output_selector</filter>
        </data>
        <data name="gff" format="gff" from_work_dir="output.gff" label="${tool.name} on ${on_string} (gff)">
            <filter>output_selector and 'gff' in output_selector</filter>
        </data>
        <data name="vcf" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string} (vcf)">
            <filter>output_selector and 'vcf' in output_selector</filter>
        </data>
    </outputs>
    <tests>
        <!-- test1: basic test (output from pbmm2 1.10.0) -->
        <test expect_num_outputs="1">
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" value="bnd-ref.fasta"/>
            <param name="input" value="pbmm2_3.bam"/>
            <param name="output_selector" value="fa"/>
            <output name="fa" ftype="fasta" file="pbgcpp_test1_out.fa"/>
        </test>
        <!-- test2: output selector -->
        <test  expect_num_outputs="3">
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" value="bnd-ref.fasta"/>
            <param name="input" value="pbmm2.bam"/>
            <param name="output_selector" value="fa,gff,vcf"/>
            <output name="fa" ftype="fasta" file="pbgcpp_test2_out.fa"/>
            <output name="gff" ftype="gff">
                <assert_contents>
                    <has_text text="gff-version 3" />
                </assert_contents>
            </output>
            <output name="vcf" ftype="vcf">
                <assert_contents>
                    <has_text text="fileformat=VCFv4.2" />
                </assert_contents>
            </output>
        </test>
        <!-- test3: cached genome -->
        <test>
            <param name="reference_source_selector" value="cached" />
            <param name="ref_file" value="bnd-ref"/>
            <param name="input" value="pbmm2_3.bam"/>
            <param name="output_selector" value="fa"/>
            <output name="fa" ftype="fasta" file="pbgcpp_test3_out.fa"/>
        </test>
    </tests>
        <help><![CDATA[
**What it does**

Compute genomic consensus and call variants relative to the reference.

This tool requires a PacBio BAM file.

You can create one by mapping PacBio reads to the reference genome with
the `pbmm2 <root?tool_id=pbmm2>`__ tool. When doing this, you have to
input CLR reads to pbmm2 in [unaligned] BAM format, not fastq or fasta. This is
because the pbgcpp algorithm uses additional information stored in the
unaligned BAM format that PacBio uses.

**NOTE**: The pbgcpp tool used to be called GenomicConsensus.  It works for PacBio Sequel data and RS data with the P6-C4 chemistry.

--------------

pbgcpp is Pacific Biosciences’ tool to generate accurate reference
contigs. It takes an alignment in the form of a BAM file and polishes
the references with the provided subreads from the alignment. It uses
the Arrow algorithm in multi-molecule consensus setting and can reach up
to QV60 at coverage 100. pbgcpp is the successor of the venerable
GenomicConsensus suite which has reached EOL.

See the `Pacific Biosciences GitHub
page <https://github.com/PacificBiosciences/pbbioconda>`__ for more
information.

**Input**: Aligned subreads in PacBio BAM format (.bam). Compatible with PacBio Sequel data and RS data with the P6-C4 chemistry.

**Output**: Polished contigs in .fasta format.

**Why am I getting “Missing valid chemistry from input file, is this a
proper PBBAM input file?”**

pbgcpp expects metadata in the bamfile that most aligners (like
minimap2) don’t include by default. Align the PacBio reads file using
pbmm2.

        ]]></help>
    <expand macro="creator"/>
</tool>