Mercurial > repos > iuc > stacks2_refmap

<tool id="stacks2_refmap" name="Stacks2: reference map" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
    <description>the Stacks pipeline with a reference genome (ref_map.pl)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
@FASTQ_INPUT_FUNCTIONS@
mkdir bam_inputs stacks_outputs&&
@BAM_INPUT@

ref_map.pl
--samples bam_inputs
#if str($popmap) != 'None':
    --popmap '$popmap'
#end if
$paired_select
-o stacks_outputs
-T \${GALAXY_SLOTS:-1}
--var-alpha $model_options.var_alpha
--gt-alpha $model_options.gt_alpha

@EXTRACT_VCF@

#if $output_log
    && mv stacks_outputs/ref_map.log $output_log
#end if
    ]]></command>

    <inputs>
        <expand macro="bam_input_macro"/>
        <param argument="--popmap" type="data" optional="true" format="tabular,txt" label="Population map" />
        <param name="paired_select" type="select" label="Paired end options" help="select single/paired for single end data or to select advanced paired end options, --unpaired: treat reverse reads as if they were forward reads; --ignore-pe-reads: ignore paired-end reads even if present in the input">
            <option value="" selected="true">single/paired</option>
            <option value="--unpaired" selected="true">ignore read pairing (--unpaired)</option>
            <option value="--ignore-pe-reads" selected="true">ignore paired-end reads (--ignore-pe-reads)</option>
        </param>
        <section name="model_options" title="Variant calling options (for gstacks)" expanded="true">
            <expand macro="variant_calling_options_vg" varalpha_default="0.01"/>
        </section>
        <expand macro="in_log"/>
    </inputs>
    <outputs>
        <expand macro="out_log"/> <!-- pipeline also writes other logs? .. could be a collection -->
        <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/>
        <expand macro="populations_output_light" tooladd="(populations)"/>
    </outputs>

    <tests>
        <!-- bams as list -->
        <test>
            <param name="input_bam" ftype="bam" value="refmap/PopA_01.bam,refmap/PopA_02.bam"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
            <param name="add_log" value="yes" />
            <output ftype="txt" name="output_log"><assert_contents><has_text text="ref_map.pl is done." /></assert_contents></output>
            <output_collection name="gstacks_out" type="list" count="2">
                <element name="catalog.calls.vcf" file="refmap/catalog.calls.vcf" ftype="vcf" lines_diff="2"/>
                <element name="catalog.fa.gz" file="refmap/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/>
            </output_collection>
            <output ftype="tabular" name="out_haplotypes" value="refmap/populations.haplotypes.tsv"/>
            <output ftype="tabular" name="out_hapstats" value="refmap/populations.hapstats.tsv"/>
            <output ftype="txt" name="out_populations_log_distribs" value="refmap/populations.log.distribs"/>
            <output ftype="tabular" name="out_sumstats_sum" value="refmap/populations.sumstats_summary.tsv"/>
            <output ftype="tabular" name="out_sumstats" value="refmap/populations.sumstats.tsv"/>
            <output ftype="tabular" name="out_sql" value="refmap/populations.markers.tsv"/>
        </test>
        <test>
            <param name="input_bam" ftype="bam" value="refmap/PopA_01.bam,refmap/PopA_02.bam"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
            <param name="paired_select" value="--unpaired"/>
            <param name="model_options|var_alpha" value="0.1" />
            <param name="model_options|gt_alpha" value="0.1" />
            <assert_command>
                <has_text text="--unpaired" />
                <has_text text="--var-alpha 0.1" />
                <has_text text="--gt-alpha 0.1" />
            </assert_command>
            <param name="add_log" value="yes" />
            <output ftype="txt" name="output_log"><assert_contents><has_text text="ref_map.pl is done." /></assert_contents></output>
            <output_collection name="gstacks_out" type="list" count="2"/>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sql"><assert_contents><has_text text="#"/></assert_contents></output>

        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program expects data that have been aligned to a reference genome, and can accept data directly from Bowtie, or from any aligner that can produce SAM format. To avoid datasets names problems, we recommand the use of the *Map with BWA for STACKS tool*. This program will execute each of the Stacks components: first, running pstacks on each of the samples specified, building loci (based on the reference alignment) and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci specified as 'parents' or 'samples' on the command line, again using alignment to match loci in the catalog. Finally, sstacks will be executed to match each sample against the catalog. The ref_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.

--------

**Input files**

- SAM, BAM

- Population map::

    indv_01    1
    indv_02    1
    indv_03    1
    indv_04    2
    indv_05    2
    indv_06    2

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_


- XXX.matches.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.


- other files:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>
author	iuc
date	Mon, 01 Jul 2019 11:03:22 -0400
parents
children	319b10947ec1