view stacks_denovomap.xml @ 6:89eb29319bf4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 2f4c9bfc48d63075ae18a1687e8d01ffea509084
author iuc
date Wed, 11 May 2022 06:50:13 +0000
parents 04db7fd6b238
children
line wrap: on
line source

<tool id="stacks2_denovomap" name="Stacks2: de novo map" profile="@PROFILE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>the Stacks pipeline without a reference genome (denovo_map.pl)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
@FASTQ_INPUT_FUNCTIONS@
mkdir stacks_inputs stacks_outputs &&

#if $output_log
    ln -s '$output_log' stacks_outputs/denovo_map.log &&
#end if

#set ($link_command, $inputype) = $fastq_input_nonbatch( $input_type.fqinputs, $input_type.input_type_select, ".%d" )
$link_command

denovo_map.pl
--samples stacks_inputs
#if str($popmap) != 'None':
    --popmap '$popmap'
#end if
-o stacks_outputs
-T \${GALAXY_SLOTS:-1}

-M $assembly_options.M
-n $assembly_options.n
--var-alpha $model_options.var_alpha
--gt-alpha $model_options.gt_alpha
#if $input_type.input_type_select == "paired"
    --paired
#end if
$pe_options.rm_pcr_duplicates
--min-samples-per-pop $popfilter_options.min_samples_per_pop
--min-populations $popfilter_options.min_populations

## the catalog.calls output is a gzip-ed vcf extract it
## to make it usable in Galaxy (with the downside that we
## need to gzip it again for downstream calls like populations)
&& gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
    ]]></command>

    <inputs>
        <expand macro="fastq_input" multiple="true" listtype="list:paired"/>
        <param argument="--popmap" type="data" optional="true" format="tabular,txt" label="Population map"/>
        <section name="assembly_options" title="Assembly options" expanded="true">
            <param name="M" argument="-M" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual" help="used in ustacks"/>
            <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between loci when building the catalog" help="used in cstacks; suggested: set to -M"/>
        </section>
        <section name="model_options" title="SNP model options" expanded="true">
            <expand macro="variant_calling_options_vg" varalpha_default="0.01"/>
        </section>
		<section name="pe_options" title="Paired-end options" expanded="true">
            <param argument="--rm-pcr-duplicates" name="rm_pcr_duplicates" type="boolean" checked="false" truevalue="--rm-pcr-duplicates" falsevalue="" label="remove all but one set of read pairs of the same sample that have the same insert length" help=""/>
        </section>
		<section name="popfilter_options" title="Population filtering options" expanded="true">
            <param argument="--min-samples-per-pop" name="min_samples_per_pop" type="integer" value="0" label="minimum percentage of individuals in a population required to process a locus for that population" help="(for populations; default: 0)"/>
            <param argument="--min-populations" name="min_populations" type="integer" value="1" label="minimum number of populations a locus must be present in to process a locus" help="(for populations; default: 1)"/>
        </section>
        <expand macro="in_log"/>
    </inputs>
    <outputs>
        <expand macro="out_log"/> <!-- pipeline also writes tsv2bam.log, gstacks.log, populations.log .. could be a collection -->
        <expand macro="ustacks_outputs_macro" tooladd="(ustacks)"/>
        <expand macro="cstacks_outputs_macro" tooladd="(cstacks)"/>
        <expand macro="sstacks_outputs_macro" tooladd="(sstacks)"/>
        <expand macro="tsv2bam_outputs_macro" tooladd="(tsv2bam)"/>
        <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/>
        <expand macro="populations_output_light" tooladd="(populations)"/>
    </outputs>

    <tests>
       <!-- paired input using also reverse reads,
            the results are tested for equality, the tools corresponding the the programs
            used in the pipeline have also a test (should be the first) that test for
            equality thereby it is "ensured" that the pipeline w defaults is doing the same
            as the components w defaults. -->
        <test expect_num_outputs="11">
            <param name="input_type|input_type_select" value="paired"/>
            <param name="input_type|fqinputs">
                <collection type="list:paired">
                    <element name="PopA_01">
                        <collection type="paired">
                            <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger"/>
                            <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/>
                        </collection>
                    </element>
                    <element name="PopA_02">
                        <collection type="paired">
                            <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/>
                            <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/>
                        </collection>
                    </element>
                </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="denovo_map.pl is done."/></assert_contents></output>
            <output_collection name="tabs" type="list" count="6">
                <element name="PopA_01.tags">
                <assert_contents>
                        <has_text text="# ustacks version" />
                    </assert_contents>
                </element>
                <element name="PopA_01.snps">
                    <assert_contents>
                        <has_text text="# ustacks version" />
                    </assert_contents>
                </element>
                <expand macro="test_element_stacks_completed" element_name="PopA_01.alleles" />
                <element name="PopA_02.tags">
                    <assert_contents>
                        <has_text text="# ustacks version" />
                    </assert_contents>
                </element>
                <element name="PopA_02.snps">
                    <assert_contents>
                        <has_text text="# ustacks version" />
                    </assert_contents>
                </element>
                <expand macro="test_element_stacks_completed" element_name="PopA_02.alleles" />
            </output_collection>
            <output_collection name="catalog" type="list" count="3">
                <element name="catalog.alleles">
                    <assert_contents>
                        <has_text text="# cstacks version " />
                    </assert_contents>
                </element>
                <element name="catalog.snps" >
                    <assert_contents>
                        <has_text text="# cstacks version" />
                    </assert_contents>
                </element>
                <element name="catalog.tags">
                    <assert_contents>
                        <has_text text="# cstacks version" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="matches" type="list" count="2">
                <element name="PopA_01.matches">
                    <assert_contents>
                        <has_text text="# sstacks version" />
                    </assert_contents>
                </element>
                <element name="PopA_02.matches" >
                    <assert_contents>
                        <has_text text="# sstacks version" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="bams" type="list" count="2">
                <element name="PopA_01.matches">
                    <assert_contents>
                        <has_size value="1153" delta="200" />
                    </assert_contents>
                </element>
                <element name="PopA_02.matches" >
                    <assert_contents>
                        <has_size value="1169" delta="200" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="gstacks_out" type="list" count="2">
                <element name="catalog.calls.vcf">
                    <assert_contents>
                        <has_text text="##fileformat=VCFv4.2" />
                    </assert_contents>
                </element>
                <element name="catalog.fa.gz" >
                    <assert_contents>
                        <has_size value="334" delta="100" />
                    </assert_contents>
                </element>
            </output_collection>
            <output name="out_haplotypes">
                <assert_contents>
                    <has_text_matching expression="\#\s+Catalog\s+Locus\s+ID\s+Cnt" />
                </assert_contents>
            </output>
            <output name="out_hapstats">
                <assert_contents>
                    <has_text_matching expression="\#\s+Locus\s+ID\s+Chr\s+BP" />
                </assert_contents>
            </output>
            <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/>
            <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
            <output name="out_sumstats">
                <assert_contents>
                    <has_text_matching expression="\#\s+Locus\s+ID\s+Chr\s+BP" />
                </assert_contents>
            </output>
        </test>
        <!-- SE input as multi selection, defaults testing against the output of the pipeline components -->
        <test expect_num_outputs="11">
            <param name="input_type|input_type_select" value="single"/>
            <param name="input_type|fqinputs" value="demultiplexed/PopA_01.1.fq,demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="denovo_map.pl is done."/></assert_contents></output>
            <output_collection name="tabs" count="6"/>
            <output_collection name="catalog" type="list" count="3"/>
            <output_collection name="matches" type="list" count="2"/>
            <output_collection name="bams" type="list" count="2"/>
            <output_collection name="gstacks_out" type="list" count="2"/>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>
        </test>
        <!-- SE input as list, non-defaults, testing only correct size of the collections -->
        <test expect_num_outputs="11">
            <param name="input_type|input_type_select" value="paired"/>
            <param name="input_type|fqinputs">
                <collection type="list">
                    <element name="PopA_01" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger"/>
                    <element name="PopA_02" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/>
                </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="assembly_options|M" value="3"/>
            <param name="assembly_options|n" value="3"/>
            <param name="model_options|var_alpha" value="0.1"/>
            <param name="model_options|gt_alpha" value="0.1"/>
            <param name="pe_options|rm_pcr_duplicates" value="--rm-pcr-duplicates"/>
            <param name="popfilter_options|min_samples_per_pop" value="1"/>
            <param name="popfilter_options|min_populations" value="0"/>
            <assert_command>
                <has_text text="-M 3"/>
                <has_text text="-n 3"/>
                <has_text text="--var-alpha 0.1"/>
                <has_text text="--gt-alpha 0.1"/>
                <has_text text="--rm-pcr-duplicates"/>
                <has_text text="--min-samples-per-pop 1"/>
                <has_text text="--min-populations 0"/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="denovo_map.pl is done."/></assert_contents></output>
            <output_collection name="tabs" count="6"/>
            <output_collection name="catalog" type="list" count="3"/>
            <output_collection name="matches" type="list" count="2"/>
            <output_collection name="bams" type="list" count="2"/>
            <output_collection name="gstacks_out" type="list" count="2"/>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.

--------

**Input files**

FASTQ, FASTA

- Population map::

    indv_01    1
    indv_02    1
    indv_03    1
    indv_04    2
    indv_05    2
    indv_06    2


**Output files**


- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_


- XXX.matches.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.


- other files:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation"/>
</tool>