diff StatSTR/statSTR.xml @ 0:ecf8c4f9f8ba draft

"planemo upload for repository https://github.com/Kulivox/TRToolsGalaxyWrapper commit 08480e3145858e41b14bf1061f87243e8cc2d04e"
author michal_klinka
date Sat, 23 Apr 2022 16:34:03 +0000
parents
children 8e8cf43f6836
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/StatSTR/statSTR.xml	Sat Apr 23 16:34:03 2022 +0000
@@ -0,0 +1,270 @@
+<?xml version="1.0" ?>
+<tool id="statSTR" name="StatSTR" version="@tool_version@">
+
+    <description>StatSTR takes in a TR genotyping VCF file and outputs per-locus statistics.</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>statSTR --version</version_command>
+
+    <command detect_errors="exit_code"><![CDATA[statSTR
+
+## vcf definition
+#if $input_output.vcf:
+   --vcf $input_output.vcf
+#end if
+## end vcf definition
+## out definition
+#if $input_output.out:
+   --out $input_output.out
+#end if
+## end out definition
+## vcftype definition
+#if $input_output.vcftype:
+   --vcftype $input_output.vcftype
+#end if
+## end vcftype definition
+## precision definition
+#if $input_output.precision:
+   --precision $input_output.precision
+#end if
+## end precision definition
+## samples definition
+#if $filtering_group.samples:
+   --samples $filtering_group.samples
+#end if
+## end samples definition
+## sample_prefixes definition
+#if $filtering_group.sample_prefixes:
+   --sample-prefixes $filtering_group.sample_prefixes
+#end if
+## end sample_prefixes definition
+## region definition
+#if $filtering_group.region:
+   --region $filtering_group.region
+#end if
+## end region definition
+## thresh definition
+#if $stats_group.thresh:
+   --thresh
+#end if
+## end thresh definition
+## afreq definition
+#if $stats_group.afreq:
+   --afreq
+#end if
+## end afreq definition
+## acount definition
+#if $stats_group.acount:
+   --acount
+#end if
+## end acount definition
+## hwep definition
+#if $stats_group.hwep:
+   --hwep
+#end if
+## end hwep definition
+## het definition
+#if $stats_group.het:
+   --het
+#end if
+## end het definition
+## entropy definition
+#if $stats_group.entropy:
+   --entropy
+#end if
+## end entropy definition
+## mean definition
+#if $stats_group.mean:
+   --mean
+#end if
+## end mean definition
+## mode definition
+#if $stats_group.mode:
+   --mode
+#end if
+## end mode definition
+## var definition
+#if $stats_group.var:
+   --var
+#end if
+## end var definition
+## numcalled definition
+#if $stats_group.numcalled:
+   --numcalled
+#end if
+## end numcalled definition
+## use_length definition
+#if $stats_group.use_length:
+   --use-length
+#end if
+## end use_length definition
+## plot_afreq definition
+#if $plotting_group.plot_afreq:
+   --plot-afreq
+#end if
+## end plot_afreq definition
+## version definition
+#if $version9ac8.version:
+   --version
+#end if
+## end version definition
+1>$stdout 2>$stderr
+]]></command>
+
+
+    <inputs>
+        <section name="input_output" title="Input/output" expanded="false">
+            <param argument="--vcf" type="data" format="vcf" optional="false" label="vcf" help="Input STR VCF file"/>
+            <param argument="--out" type="text" optional="false" label="out"
+                   help="Output file prefix. Use stdout to print file to standard output. In addition, if not stdout then timing diagnostics are print to stdout."/>
+            <param argument="--vcftype" type="select" optional="true" label="vcftype"
+                   help="Genotyper that was used to create this VCF input">
+                <expand macro="vcfTypes"/>
+            </param>
+            <param argument="--precision" type="integer" optional="true" label="precision"
+                   help="How much precision to use when printing decimals"/>
+        </section>
+        <section name="filtering_group" title="Filtering group" expanded="false">
+            <param argument="--samples" type="text" optional="true" label="samples"
+                   help="File containing list of samples to include. Or a comma-separated list of files to compute stats separate for each group of samples"/>
+            <param argument="--sample-prefixes" type="text" optional="true" label="sample_prefixes"
+                   help="Prefixes to name output for each samples group. By default uses 1,2,3 etc."/>
+            <param argument="--region" type="text" optional="true" label="region"
+                   help="Restrict to the region chrom:start-end. Requires file to bgzipped and tabix indexed."/>
+        </section>
+        <section name="stats_group" title="Stats group" expanded="false">
+            <param argument="--thresh" type="boolean" optional="true" label="thresh"
+                   help="Output threshold field (max allele size, used for GangSTR strinfo)."/>
+            <param argument="--afreq" type="boolean" optional="true" label="afreq" help="Output allele frequencies"/>
+            <param argument="--acount" type="boolean" optional="true" label="acount" help="Output allele counts"/>
+            <param argument="--hwep" type="boolean" optional="true" label="hwep" help="Output HWE p-values per loci."/>
+            <param argument="--het" type="boolean" optional="true" label="het"
+                   help="Output the heterozygosity of each locus."/>
+            <param argument="--entropy" type="boolean" optional="true" label="entropy"
+                   help="Output the entropy of each locus."/>
+            <param argument="--mean" type="boolean" optional="true" label="mean"
+                   help="Output mean of the allele frequencies."/>
+            <param argument="--mode" type="boolean" optional="true" label="mode"
+                   help="Output mode of the allele frequencies."/>
+            <param argument="--var" type="boolean" optional="true" label="var"
+                   help="Output variance of the allele frequencies."/>
+            <param argument="--numcalled" type="boolean" optional="true" label="numcalled"
+                   help="Output number of samples called."/>
+            <param argument="--use-length" type="boolean" optional="true" label="use_length"
+                   help="Calculate per-locus stats (het, HWE) collapsing alleles by length. This is implicitly true for genotypers which only emit length based genotypes."/>
+        </section>
+        <section name="plotting_group" title="Plotting group" expanded="false">
+            <param argument="--plot-afreq" type="boolean" optional="true" label="plot_afreq"
+                   help="Output allele frequency plot. Will only do for a maximum of 10 TRs."/>
+        </section>
+        <section name="version9ac8" title="Version" expanded="false">
+            <param argument="--version" type="boolean" optional="true" label="version"
+                   help="show program's version number and exit"/>
+        </section>
+    </inputs>
+
+
+    <outputs>
+        <data name="stdout" label="Outputs and console output" format="txt">
+            <discover_datasets format="tabular" pattern="(?P&lt;designation&gt;.+)\.tab" visible="true"/>
+        </data>
+        <data name="stderr" label="stderr output" format="txt"/>
+    </outputs>
+
+
+    <tests>
+        <test>
+            <section name="main_params">
+                <param name="vcf" value="NA12878_chr21_hipstr.sorted.vcf.gz"/>
+                <param name="out" value="test_result"/>
+                <param name="vcftype" value="hipstr"/>
+            </section>
+            <section name="opt_basic">
+                <param name="precision" value="5"/>
+            </section>
+            <section name="statistic_opt">
+                <param name="thresh" value="true"/>
+                <param name="afreq" value="true"/>
+                <param name="acount" value="true"/>
+                <param name="hwep" value="true"/>
+                <param name="het" value="true"/>
+                <param name="entropy" value="true"/>
+                <param name="mean" value="true"/>
+                <param name="mode" value="true"/>
+                <param name="var" value="true"/>
+                <param name="numcalled" value="true"/>
+            </section>
+
+            <output name="stdout">
+                <assert_contents>
+                    <has_text text="Finished 9550 records"/>
+                </assert_contents>
+                <discovered_dataset designation="test_result" ftype="tabular" file="test_result.tab"/>
+            </output>
+
+            <output name="stderr" file="stderr.txt"/>
+
+        </test>
+    </tests>
+
+
+    <help>usage: main.py [-h] --vcf VCF --out OUT [--vcftype VCFTYPE] [--precision PRECISION] [--samples SAMPLES]
+        [--sample-prefixes SAMPLE_PREFIXES] [--region REGION] [--thresh] [--afreq] [--acount] [--hwep] [--het]
+        [--entropy] [--mean]
+        [--mode] [--var] [--numcalled] [--use-length] [--plot-afreq] [--version]
+
+        options:
+        -h, --help show this help message and exit
+
+        Input/output:
+        --vcf VCF Input STR VCF file
+        --out OUT Output file prefix. Use stdout to print file to standard output. In addition, if not stdout then
+        timing diagnostics are print to stdout.
+        --vcftype VCFTYPE ##!!## Name trh could not be loaded
+        --precision PRECISION
+        How much precision to use when printing decimals
+
+        Filtering group:
+        --samples SAMPLES File containing list of samples to include. Or a comma-separated list of files to compute
+        stats separate for each group of samples
+        --sample-prefixes SAMPLE_PREFIXES
+        Prefixes to name output for each samples group. By default uses 1,2,3 etc.
+        --region REGION Restrict to the region chrom:start-end. Requires file to bgzipped and tabix indexed.
+
+        Stats group:
+        --thresh Output threshold field (max allele size, used for GangSTR strinfo).
+        --afreq Output allele frequencies
+        --acount Output allele counts
+        --hwep Output HWE p-values per loci.
+        --het Output the heterozygosity of each locus.
+        --entropy Output the entropy of each locus.
+        --mean Output mean of the allele frequencies.
+        --mode Output mode of the allele frequencies.
+        --var Output variance of the allele frequencies.
+        --numcalled Output number of samples called.
+        --use-length Calculate per-locus stats (het, HWE) collapsing alleles by length. This is implicitly true for
+        genotypers which only emit length based genotypes.
+
+        Plotting group:
+        --plot-afreq Output allele frequency plot. Will only do for a maximum of 10 TRs.
+
+        Version:
+        --version show program's version number and exit
+    </help>
+
+
+    <citations>
+        <citation type="bibtex">@misc{TRTools: a toolkit for genome-wide analysis of tandem repeats,
+            author = {Nima Mousavi, Jonathan Margoliash, Neha Pusarla, Shubham Saini, Richard Yanicky, Melissa Gymrek},
+            year = {2020},
+            title = {TRTools},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            url = {https://github.com/gymreklab/trtools},
+            }
+        </citation>
+    </citations>
+</tool>