view vsnp_add_zero_coverage.xml @ 7:6dc6dd4666e3 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
author iuc
date Tue, 16 Nov 2021 20:10:48 +0000
parents 9ddeef840a07
children 40b97055bb99
line wrap: on
line source

<tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <expand macro="biopython_requirement"/>
        <expand macro="openpyxl_requirement"/>
        <expand macro="pandas_requirement"/>
        <requirement type="package" version="0.16.0.1">pysam</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
#import re

## The identifer for both of the following files is likely the same
## string, so we append a file extension to allow for both links.
#set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
ln -s '${bam_input}' '${bam_identifier}' &&
#set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
ln -s '${vcf_input}' '${vcf_identifier}' &&

python '$__tool_directory__/vsnp_add_zero_coverage.py'
--bam_input '$bam_identifier'
--vcf_input '$vcf_identifier'
#if str($reference_cond.reference_source) == 'cached'
    --reference '$reference_cond.reference.fields.path'
#else:
    --reference '$reference_cond.reference'
#end if
--output_metrics '$output_metrics'
--output_vcf '$output_vcf'
]]></command>
    <inputs>
        <param name="bam_input" type="data" format="bam" label="BAM file"/>
        <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
        <conditional name="reference_cond">
            <expand macro="param_reference_source"/>
            <when value="cached">
                <param name="reference" type="select" label="Using reference genome">
                    <options from_data_table="fasta_indexes">
                        <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_vcf" format="vcf"  label="${tool.name} on ${on_string} (filtered VCF)"/>
        <data name="output_metrics" format="tabular"  label="${tool.name} on ${on_string} (metrics)"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
            <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
            <param name="reference_source" value="history"/>
            <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
            <output name="output_vcf" ftype="vcf">
                <assert_contents>
                    <has_size value="259726"/>
                </assert_contents>
            </output>
            <output name="output_metrics" ftype="tabular">
                <assert_contents>
                    <has_size value="109"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
            <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
            <param name="reference_source" value="cached"/>
            <output name="output_vcf" ftype="vcf">
                <assert_contents>
                    <has_size value="259726"/>
                </assert_contents>
            </output>
            <output name="output_metrics" ftype="tabular">
                <assert_contents>
                    <has_size value="109"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
**What it does**

Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
combination whose positions with no coverage are represented as "N".  These outputs are restricted to SNPs and those regions
along the reference with no coverage.

A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
coverage percentage.

**Required Options**

 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
    </help>
    <expand macro="citations"/>
</tool>