Mercurial > repos > iuc > stacks2_refmap

<tool id="stacks2_refmap" name="Stacks2: reference map" profile="@PROFILE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>the Stacks pipeline with a reference genome (ref_map.pl)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
@FASTQ_INPUT_FUNCTIONS@
mkdir bam_inputs stacks_outputs &&

#if $output_log
    ln -s '$output_log' stacks_outputs/ref_map.log &&
#end if

@BAM_INPUT@

ref_map.pl
--samples bam_inputs
#if str($popmap) != 'None':
    --popmap '$popmap'
#end if
$paired_select
-o stacks_outputs
-T \${GALAXY_SLOTS:-1}
--var-alpha $model_options.var_alpha
--gt-alpha $model_options.gt_alpha

@EXTRACT_VCF@
    ]]></command>

    <inputs>
        <expand macro="bam_input_macro"/>
        <param argument="--popmap" type="data" optional="true" format="tabular,txt" label="Population map"/>
        <param name="paired_select" type="select" label="Paired end options" help="select single/paired for single end data or to select advanced paired end options, --unpaired: treat reverse reads as if they were forward reads; --ignore-pe-reads: ignore paired-end reads even if present in the input">
            <option value="" selected="true">single/paired</option>
            <option value="--unpaired" selected="true">ignore read pairing (--unpaired)</option>
            <option value="--ignore-pe-reads" selected="true">ignore paired-end reads (--ignore-pe-reads)</option>
        </param>
        <section name="model_options" title="Variant calling options (for gstacks)" expanded="true">
            <expand macro="variant_calling_options_vg" varalpha_default="0.01"/>
        </section>
        <expand macro="in_log"/>
    </inputs>
    <outputs>
        <expand macro="out_log"/> <!-- pipeline also writes other logs? .. could be a collection -->
        <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/>
        <expand macro="populations_output_light" tooladd="(populations)"/>
    </outputs>

    <tests>
        <!-- bams as list -->
        <test expect_num_outputs="7">
            <param name="input_bam" ftype="bam" value="refmap/PopA_01.bam,refmap/PopA_02.bam"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="ref_map.pl is done."/></assert_contents></output>
            <output_collection name="gstacks_out" type="list" count="2">
                <element name="catalog.calls.vcf" file="refmap/catalog.calls.vcf" ftype="vcf" lines_diff="4"/>
                <element name="catalog.fa.gz" file="refmap/catalog.fa.gz" ftype="fasta.gz" compare="diff"/>
            </output_collection>
            <output ftype="tabular" name="out_haplotypes" value="refmap/populations.haplotypes.tsv"/>
            <output ftype="tabular" name="out_hapstats" value="refmap/populations.hapstats.tsv"/>
            <output ftype="txt" name="out_populations_log_distribs" value="refmap/populations.log.distribs"/>
            <output ftype="tabular" name="out_sumstats_sum" value="refmap/populations.sumstats_summary.tsv"/>
            <output ftype="tabular" name="out_sumstats" value="refmap/populations.sumstats.tsv"/>
        </test>
        <test expect_num_outputs="7">
            <param name="input_bam" ftype="bam" value="refmap/PopA_01.bam,refmap/PopA_02.bam"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="paired_select" value="--unpaired"/>
            <param name="model_options|var_alpha" value="0.1"/>
            <param name="model_options|gt_alpha" value="0.1"/>
            <assert_command>
                <has_text text="--unpaired"/>
                <has_text text="--var-alpha 0.1"/>
                <has_text text="--gt-alpha 0.1"/>
            </assert_command>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="ref_map.pl is done."/></assert_contents></output>
            <output_collection name="gstacks_out" type="list" count="2"/>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text="#"/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text="#"/></assert_contents></output>

        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program expects data that have been aligned to a reference genome, and can accept data directly from Bowtie, or from any aligner that can produce SAM format. To avoid datasets names problems, we recommand the use of the *Map with BWA for STACKS tool*. This program will execute each of the Stacks components: first, running pstacks on each of the samples specified, building loci (based on the reference alignment) and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci specified as 'parents' or 'samples' on the command line, again using alignment to match loci in the catalog. Finally, sstacks will be executed to match each sample against the catalog. The ref_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.

--------

**Input files**

- SAM, BAM

- Population map::

    indv_01    1
    indv_02    1
    indv_03    1
    indv_04    2
    indv_05    2
    indv_06    2

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_


- XXX.matches.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.


- other files:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation"/>
</tool>
author	iuc
date	Thu, 14 Apr 2022 09:23:35 +0000
parents	4f65e343ecdf
children