view StatSTR/statSTR.xml @ 0:ecf8c4f9f8ba draft

"planemo upload for repository https://github.com/Kulivox/TRToolsGalaxyWrapper commit 08480e3145858e41b14bf1061f87243e8cc2d04e"
author michal_klinka
date Sat, 23 Apr 2022 16:34:03 +0000
parents
children 8e8cf43f6836
line wrap: on
line source

<?xml version="1.0" ?>
<tool id="statSTR" name="StatSTR" version="@tool_version@">

    <description>StatSTR takes in a TR genotyping VCF file and outputs per-locus statistics.</description>

    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <version_command>statSTR --version</version_command>

    <command detect_errors="exit_code"><![CDATA[statSTR

## vcf definition
#if $input_output.vcf:
   --vcf $input_output.vcf
#end if
## end vcf definition
## out definition
#if $input_output.out:
   --out $input_output.out
#end if
## end out definition
## vcftype definition
#if $input_output.vcftype:
   --vcftype $input_output.vcftype
#end if
## end vcftype definition
## precision definition
#if $input_output.precision:
   --precision $input_output.precision
#end if
## end precision definition
## samples definition
#if $filtering_group.samples:
   --samples $filtering_group.samples
#end if
## end samples definition
## sample_prefixes definition
#if $filtering_group.sample_prefixes:
   --sample-prefixes $filtering_group.sample_prefixes
#end if
## end sample_prefixes definition
## region definition
#if $filtering_group.region:
   --region $filtering_group.region
#end if
## end region definition
## thresh definition
#if $stats_group.thresh:
   --thresh
#end if
## end thresh definition
## afreq definition
#if $stats_group.afreq:
   --afreq
#end if
## end afreq definition
## acount definition
#if $stats_group.acount:
   --acount
#end if
## end acount definition
## hwep definition
#if $stats_group.hwep:
   --hwep
#end if
## end hwep definition
## het definition
#if $stats_group.het:
   --het
#end if
## end het definition
## entropy definition
#if $stats_group.entropy:
   --entropy
#end if
## end entropy definition
## mean definition
#if $stats_group.mean:
   --mean
#end if
## end mean definition
## mode definition
#if $stats_group.mode:
   --mode
#end if
## end mode definition
## var definition
#if $stats_group.var:
   --var
#end if
## end var definition
## numcalled definition
#if $stats_group.numcalled:
   --numcalled
#end if
## end numcalled definition
## use_length definition
#if $stats_group.use_length:
   --use-length
#end if
## end use_length definition
## plot_afreq definition
#if $plotting_group.plot_afreq:
   --plot-afreq
#end if
## end plot_afreq definition
## version definition
#if $version9ac8.version:
   --version
#end if
## end version definition
1>$stdout 2>$stderr
]]></command>


    <inputs>
        <section name="input_output" title="Input/output" expanded="false">
            <param argument="--vcf" type="data" format="vcf" optional="false" label="vcf" help="Input STR VCF file"/>
            <param argument="--out" type="text" optional="false" label="out"
                   help="Output file prefix. Use stdout to print file to standard output. In addition, if not stdout then timing diagnostics are print to stdout."/>
            <param argument="--vcftype" type="select" optional="true" label="vcftype"
                   help="Genotyper that was used to create this VCF input">
                <expand macro="vcfTypes"/>
            </param>
            <param argument="--precision" type="integer" optional="true" label="precision"
                   help="How much precision to use when printing decimals"/>
        </section>
        <section name="filtering_group" title="Filtering group" expanded="false">
            <param argument="--samples" type="text" optional="true" label="samples"
                   help="File containing list of samples to include. Or a comma-separated list of files to compute stats separate for each group of samples"/>
            <param argument="--sample-prefixes" type="text" optional="true" label="sample_prefixes"
                   help="Prefixes to name output for each samples group. By default uses 1,2,3 etc."/>
            <param argument="--region" type="text" optional="true" label="region"
                   help="Restrict to the region chrom:start-end. Requires file to bgzipped and tabix indexed."/>
        </section>
        <section name="stats_group" title="Stats group" expanded="false">
            <param argument="--thresh" type="boolean" optional="true" label="thresh"
                   help="Output threshold field (max allele size, used for GangSTR strinfo)."/>
            <param argument="--afreq" type="boolean" optional="true" label="afreq" help="Output allele frequencies"/>
            <param argument="--acount" type="boolean" optional="true" label="acount" help="Output allele counts"/>
            <param argument="--hwep" type="boolean" optional="true" label="hwep" help="Output HWE p-values per loci."/>
            <param argument="--het" type="boolean" optional="true" label="het"
                   help="Output the heterozygosity of each locus."/>
            <param argument="--entropy" type="boolean" optional="true" label="entropy"
                   help="Output the entropy of each locus."/>
            <param argument="--mean" type="boolean" optional="true" label="mean"
                   help="Output mean of the allele frequencies."/>
            <param argument="--mode" type="boolean" optional="true" label="mode"
                   help="Output mode of the allele frequencies."/>
            <param argument="--var" type="boolean" optional="true" label="var"
                   help="Output variance of the allele frequencies."/>
            <param argument="--numcalled" type="boolean" optional="true" label="numcalled"
                   help="Output number of samples called."/>
            <param argument="--use-length" type="boolean" optional="true" label="use_length"
                   help="Calculate per-locus stats (het, HWE) collapsing alleles by length. This is implicitly true for genotypers which only emit length based genotypes."/>
        </section>
        <section name="plotting_group" title="Plotting group" expanded="false">
            <param argument="--plot-afreq" type="boolean" optional="true" label="plot_afreq"
                   help="Output allele frequency plot. Will only do for a maximum of 10 TRs."/>
        </section>
        <section name="version9ac8" title="Version" expanded="false">
            <param argument="--version" type="boolean" optional="true" label="version"
                   help="show program's version number and exit"/>
        </section>
    </inputs>


    <outputs>
        <data name="stdout" label="Outputs and console output" format="txt">
            <discover_datasets format="tabular" pattern="(?P&lt;designation&gt;.+)\.tab" visible="true"/>
        </data>
        <data name="stderr" label="stderr output" format="txt"/>
    </outputs>


    <tests>
        <test>
            <section name="main_params">
                <param name="vcf" value="NA12878_chr21_hipstr.sorted.vcf.gz"/>
                <param name="out" value="test_result"/>
                <param name="vcftype" value="hipstr"/>
            </section>
            <section name="opt_basic">
                <param name="precision" value="5"/>
            </section>
            <section name="statistic_opt">
                <param name="thresh" value="true"/>
                <param name="afreq" value="true"/>
                <param name="acount" value="true"/>
                <param name="hwep" value="true"/>
                <param name="het" value="true"/>
                <param name="entropy" value="true"/>
                <param name="mean" value="true"/>
                <param name="mode" value="true"/>
                <param name="var" value="true"/>
                <param name="numcalled" value="true"/>
            </section>

            <output name="stdout">
                <assert_contents>
                    <has_text text="Finished 9550 records"/>
                </assert_contents>
                <discovered_dataset designation="test_result" ftype="tabular" file="test_result.tab"/>
            </output>

            <output name="stderr" file="stderr.txt"/>

        </test>
    </tests>


    <help>usage: main.py [-h] --vcf VCF --out OUT [--vcftype VCFTYPE] [--precision PRECISION] [--samples SAMPLES]
        [--sample-prefixes SAMPLE_PREFIXES] [--region REGION] [--thresh] [--afreq] [--acount] [--hwep] [--het]
        [--entropy] [--mean]
        [--mode] [--var] [--numcalled] [--use-length] [--plot-afreq] [--version]

        options:
        -h, --help show this help message and exit

        Input/output:
        --vcf VCF Input STR VCF file
        --out OUT Output file prefix. Use stdout to print file to standard output. In addition, if not stdout then
        timing diagnostics are print to stdout.
        --vcftype VCFTYPE ##!!## Name trh could not be loaded
        --precision PRECISION
        How much precision to use when printing decimals

        Filtering group:
        --samples SAMPLES File containing list of samples to include. Or a comma-separated list of files to compute
        stats separate for each group of samples
        --sample-prefixes SAMPLE_PREFIXES
        Prefixes to name output for each samples group. By default uses 1,2,3 etc.
        --region REGION Restrict to the region chrom:start-end. Requires file to bgzipped and tabix indexed.

        Stats group:
        --thresh Output threshold field (max allele size, used for GangSTR strinfo).
        --afreq Output allele frequencies
        --acount Output allele counts
        --hwep Output HWE p-values per loci.
        --het Output the heterozygosity of each locus.
        --entropy Output the entropy of each locus.
        --mean Output mean of the allele frequencies.
        --mode Output mode of the allele frequencies.
        --var Output variance of the allele frequencies.
        --numcalled Output number of samples called.
        --use-length Calculate per-locus stats (het, HWE) collapsing alleles by length. This is implicitly true for
        genotypers which only emit length based genotypes.

        Plotting group:
        --plot-afreq Output allele frequency plot. Will only do for a maximum of 10 TRs.

        Version:
        --version show program's version number and exit
    </help>


    <citations>
        <citation type="bibtex">@misc{TRTools: a toolkit for genome-wide analysis of tandem repeats,
            author = {Nima Mousavi, Jonathan Margoliash, Neha Pusarla, Shubham Saini, Richard Yanicky, Melissa Gymrek},
            year = {2020},
            title = {TRTools},
            publisher = {GitHub},
            journal = {GitHub repository},
            url = {https://github.com/gymreklab/trtools},
            }
        </citation>
    </citations>
</tool>