Mercurial > repos > iuc > stacks2_populations

<tool id="stacks2_populations" name="Stacks2: populations" profile="@PROFILE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>Calculate population-level summary statistics</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
#import re

mkdir stacks_inputs stacks_outputs &&

#if $output_log
    ln -s '$output_log' stacks_outputs/populations.log &&
#end if

#if str($options_usage.input_type) == 'stacks'
    #for $input_file in $options_usage.input_aln
        #set $filename = str($input_file.element_identifier)
        #if $filename == "catalog.calls" or $filename == "catalog.fa.gz":
            ln -s '${input_file}' 'stacks_inputs/${filename}' &&
        #elif $filename == "catalog.calls.vcf":
            gzip -c '${input_file}' > stacks_inputs/catalog.calls &&
        #end if
    #end for
#else if str($options_usage.input_type) == 'vcf'
    ln -s '$options_usage.input_vcf' 'stacks_inputs/input.vcf' &&
#end if

populations

-t \${GALAXY_SLOTS:-1}

#if str($options_usage.input_type) == 'vcf'
    -V stacks_inputs/input.vcf
#else
    -P stacks_inputs
#end if
-O stacks_outputs

#if str($popmap) != 'None':
    -M '$popmap'
#end if
#if str($batch_size) != '':
    --batch-size '$batch_size'
#end if

## Data filtering

--min-populations $options_filtering.minpop
--min-samples-per-pop $options_filtering.minperc
--min-samples-overall $options_filtering.min_samples_overall
$options_filtering.filter_haplotype_wise
--min-maf $options_filtering.min_maf
--min-mac $options_filtering.min_mac
#if str($options_filtering.max_obs_het)
    --max-obs-het $options_filtering.max_obs_het
#end if
## deprecated https://groups.google.com/forum/#!msg/stacks-users/jdC7gw_MuK4/1v0FG3u7AwAJ
## #if str($options_filtering.lnl)
##     --lnl-lim $options_filtering.lnl
## #end if
$options_filtering.filter_single_snp
#if $options_filtering.blacklist
    -B '$options_filtering.blacklist'
#end if
#if $options_filtering.whitelist
    -W '$options_filtering.whitelist'
#end if

## Merging and Phasing:
#if str($merge_phase.enzyme) != '':
    -e $merge_phase.enzyme
#end if
$merge_phase.merge_sites
--merge-prune-lim $merge_phase.merge_prune_lim

## Locus stats:
$locus_stats.hwe

## Fstats
#if str($fstats_conditional.fstats) != 'no':
    --fstats
    $fstats_conditional.fst_correction
    #if $fstats_conditional.fst_correction
        --p-value-cutoff $fstats_conditional.p_value_cutoff
    #end if
#end if

## Kernel-smoothing algorithm:
$kernel_smoothing.options_kernel.kernel
#if str($kernel_smoothing.options_kernel.kernel)!='':
    --sigma $kernel_smoothing.options_kernel.sigma
#end if
#if $kernel_smoothing.bootstrap_resampling_mode.bootstrap
    --bootstrap
#else
    $kernel_smoothing.bootstrap_resampling_mode.bootstrap_pifis
    $kernel_smoothing.bootstrap_resampling_mode.bootstrap_fst
    $kernel_smoothing.bootstrap_resampling_mode.bootstrap_div
    $kernel_smoothing.bootstrap_resampling_mode.bootstrap_phist
#end if

#if $kernel_smoothing.bootstrap_resampling_mode.bootstrap or $kernel_smoothing.bootstrap_resampling_mode.bootstrap_pifis or $kernel_smoothing.bootstrap_resampling_mode.bootstrap_fst or $kernel_smoothing.bootstrap_resampling_mode.bootstrap_div or $kernel_smoothing.bootstrap_resampling_mode.bootstrap_phist:
    #if str($kernel_smoothing.bootstrap_reps)
        --bootstrap-reps $kernel_smoothing.bootstrap_reps
    #end if
    #if $kernel_smoothing.bootstrap_wl
        --bootstrap-wl '$kernel_smoothing.bootstrap_wl'
    #end if
#end if

## File output options:

$populations_output.ordered_export
$populations_output.fasta_loci
$populations_output.fasta_samples
$populations_output.vcf
$populations_output.genepop
$populations_output.structure
$populations_output.radpainter
$populations_output.plink
$populations_output.hzar
$populations_output.phylip
$populations_output.phylip_var
##$populations_output.phylip_var_all
$populations_output.treemix
$populations_output.no_hap_exports
$populations_output.fasta_samples_raw
$populations_output.gtf

#if $genetic_map_options.map_type:
    --map-type $genetic_map_options.map_type
#end if
#if $genetic_map_options.map_format
    --map-format $genetic_map_options.map_format
#end if

## Additional options:

$advanced_options.log_fst_comp

## populations outputs log info to stdout for vcf input
## for gstacks input it creates stacks_output/population.log
## in the latter case we take the log file
## also for vcf input the output files are named input.p....
## instead of populations...

#if str($options_usage.input_type) == 'vcf'
    #if $output_log
        > $output_log
    #end if
    && find stacks_outputs -type f -iname "input.p*" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\/input.p/\/populations/;')"; done
#end if

## move the genotype output (note that in 2.52 there is an inconsistent
## naming for onemap output and also extensions are inconsistent -> therefore wildcard)
#if $genetic_map_options.map_type and $genetic_map_options.map_format
    && mv stacks_outputs/populations.${genetic_map_options.map_type}.* stacks_outputs/populations.sql.tsv
#end if
    ]]></command>

    <inputs>
        <conditional name="options_usage">
            <param name="input_type" type="select" label="Input type" help="select input file type" >
                <option value="stacks">Stacks output</option>
                <option value="vcf">VCF file</option>
            </param>
            <when value="stacks">
                <expand macro="input_aln_macro"/>
            </when>
            <when value="vcf">
                <param name="input_vcf" format="vcf" type="data" label="VCF file" argument="-V"/>
            </when>
        </conditional>
        <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" argument="-M" help="Format is 'SAMPLE1 \t POP1 \n SAMPLE2 ...'." optional="true"/>
        <param argument="--batch-size" type="integer" value="" optional="true" label="Number of loci to process in a batch"  help="(default: 10,000 in de novo mode; in reference mode, one chromosome per batch). Increase to speed analysis, uses more memory, decrease to save memory)"/>

        <!-- Data Filtering -->
        <section name="options_filtering" title="Data filtering options" expanded="true">

            <param name="minperc" argument="--min-samples-per-pop" type="float" value="0.0" min="0" max="1" label="Minimum percentage of individuals in a population required to process a locus for that population"/>
            <param name="minpop" argument="--min-populations" type="integer" value="1" label="Minimum number of populations a locus must be present in to process a locus"/>
            <param name="min_samples_overall" argument="--min-samples-overall" type="float" value="0.0" min="0" max="1" label="minimum percentage of individuals across populations required to process a locus"/>
            <param name="filter_haplotype_wise" argument="--filter-haplotype-wise" type="boolean" truevalue="--filter-haplotype-wise" falsevalue="" checked="false" label="apply the above filters haplotype wise" help="unshared SNPs will be pruned to reduce haplotype-wise missing data."/>
            <param argument="--min-maf" type="float" value="0" min="0" max="0.5" label="Minimum minor allele frequency" help="specify a minimum minor allele frequency required to process a nucleotide site at a locus"/>
            <param argument="--min-mac" type="integer" value="0" min="0" label="Minimum minor allele count" help="specify a minimum minor allele count required to process a nucleotide site at a locus"/>
            <param argument="--max-obs-het" type="float" value="" min="0" max="1" optional="true" label="Maximum observed heterozygosity required to process a nucleotide site at a locus."/>
            <!-- deprecated https://groups.google.com/forum/#!msg/stacks-users/jdC7gw_MuK4/1v0FG3u7AwAJ
            <param type="float" value="" optional="true" argument="\-\-lnl_lim" label="Filter loci with log likelihood values below this threshold"/>-->
            <param name="filter_single_snp" type="select" label="Analyse all SNPs per locus" >
                <option value="">yes</option>
                <option value="--write-single-snp">No: Analyse only the first SNP (--write-single-snp)</option>
                <option value="--write-random-snp">No: Analyse only one random SNP (--write-random-snp)</option>
            </param>
            <param name="whitelist" argument="-W" format="txt,tabular" type="data" optional="true" label="Specify a file containing markers to include in the export"/>
            <param name="blacklist" argument="-B" format="txt,tabular" type="data" optional="true" label="Specify a file containing markers to exclude from the export"/>
        </section>

        <!-- merging and Phasing -->
        <section name="merge_phase" title="Merging and Phasing" expanded="true">
            <param name="enzyme" argument="-e" type="select" label="Provide the restriction enzyme used" help="required if generating genomic output" >
                <expand macro="enzymes"/>
            </param>
            <param argument="--merge-sites" truevalue="--merge-sites" falsevalue="" type="boolean" checked="false" label="Merge loci that were produced from the same restriction enzyme cutsite" help="(requires reference-aligned data)"/>
            <param argument="--merge-prune-lim" type="float" value="1.0" min="0.0" max="1.0" label="Fraction of samples possessing both loci to prune remaining samples from analysis" help="for merging adjacent loci"/>
        </section>

        <!-- Locus stats -->
        <section name="locus_stats" title="Locus Stats" expanded="true">
            <param argument="--hwe" truevalue="--hwe" falsevalue="" type="boolean" checked="false" label="Calculate divergence from Hardy-Weinberg equilibrium for each locus"/>
        </section>

        <!-- Fstats -->
        <conditional name="fstats_conditional">
            <param argument="--fstats" type="select" label="Enable SNP and haplotype-based F statistics" >
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param argument="--fst-correction" truevalue="--fst-correction" falsevalue="" type="boolean" checked="false" label="apply a p-value correction to Fst values based on a Fisher's exact test" help=""/>
                <param argument="--p-value-cutoff" type="float" value="0.05" label="P-value cutoff" help="required p-value to keep an Fst measurement (0.05 by default)."/>
            </when>
        </conditional>

        <!-- File output options -->
        <section name="populations_output" title="Output options" expanded="true">
            <param argument="--ordered-export" truevalue="--ordered-export" falsevalue="" type="boolean" checked="false" label="If data is reference aligned, exports will be ordered; only a single representative of each overlapping site."/>
            <param argument="--fasta-loci" truevalue="--fasta-loci" falsevalue="" type="boolean" checked="false" label="Output locus consensus sequences in FASTA format"/>
            <param argument="--fasta-samples" truevalue="--fasta-samples" falsevalue="" type="boolean" checked="false" label="Output the sequences of the two haplotypes of each (diploid) sample, for each locus, in FASTA format"/>
            <param argument="--vcf" truevalue="--vcf" falsevalue="" type="boolean" checked="false" label="Output results in Variant Call Format (VCF)"/>
            <param argument="--genepop" truevalue="--genepop" falsevalue="" type="boolean" checked="false" label="Output results in GenePop Format"/>
            <param argument="--structure" truevalue="--structure" falsevalue="" type="boolean" checked="false" label="Output results in Structure Format"/>
            <param argument="--radpainter" truevalue="--radpainter" falsevalue="" type="boolean" checked="false" label="Output results in fineRADstructure/RADpainter format"/>
            <param argument="--plink" truevalue="--plink" falsevalue="" type="boolean" checked="false" label="Output genotypes in PLINK format"/>
            <param argument="--hzar" truevalue="--hzar" falsevalue="" type="boolean" checked="false" label="Output genotypes in Hybrid Zone Analysis using R (HZAR) format."/>
            <param argument="--phylip" truevalue="--phylip" falsevalue="" type="boolean" checked="false" label="Output nucleotides that are fixed-within, and variant among populations in Phylip format for phylogenetic tree construction"/>
            <param argument="--phylip-var" truevalue="--phylip-var" falsevalue="" type="boolean" checked="false" label="Include variable sites in the phylip output encoded using IUPAC notation."/>
            <!--<param argument="\-\-phylip_var_all" truevalue="\-\-phylip-var-all" falsevalue="" type="boolean" checked="false" label="Include all sequence as well as variable sites in the phylip output encoded using IUPAC notation."/>-->
            <param argument="--treemix" truevalue="--treemix" falsevalue="" type="boolean" checked="false" label="Output SNPs in a format useable for the TreeMix program (Pickrell and Pritchard)."/>
            <param argument="--no-hap-exports" truevalue="--no-hap-exports" falsevalue="" type="boolean" checked="false" label="Omit haplotype outputs"/>
            <param argument="--fasta-samples-raw" truevalue="--fasta-samples-raw" falsevalue="" type="boolean" checked="false" label="Output all haplotypes observed in each sample, for each locus, in FASTA format"/>
            <param argument="--gtf" truevalue="--gtf" falsevalue="" type="boolean" checked="false" label="output locus positions in a GTF annotation file"/>
        </section>

        <!-- Kernel-smoothing algorithm -->
        <section name="kernel_smoothing" title="Kernel smoothing" expanded="false">
            <conditional name="options_kernel">
                <param name="kernel" type="select" label="Enable kernel-smoothed calculations" help="The options -k and --smooth-fstats implicitly enable --fstats. Note that, in order to set non-default values for --fst-correction or --p-value-cutoff the option --fstats still needs to be enabled in the tool form." >
                    <option value="">No</option>
                    <option value="-k">For Pi, Fis, Fst, Fst', and Phi_st (-k/--smooth)</option>
                    <option value="--smooth-fstats">For Fst, Fst', and Phi_st (--smooth-fstats)</option>
                    <option value="--smooth-popstats">For Pi and Fis (--smooth-popstats)</option>
                </param>
                <when value=""/>
                <when value="-k">
                    <param argument="--sigma" type="float" value="150000" label="Standard deviation of the kernel smoothing weight distribution" help="distance over which to average values (default 150Kb)"/>
                </when>
                <when value="--smooth-fstats">
                    <param argument="--sigma" type="float" value="150000" label="Standard deviation of the kernel smoothing weight distribution" help="distance over which to average values (default 150Kb)"/>
                </when>
                <when value="--smooth-popstats">
                    <param argument="--sigma" type="float" value="150000" label="Standard deviation of the kernel smoothing weight distribution" help="distance over which to average values (default 150Kb)"/>
                </when>
            </conditional>
            <conditional name="bootstrap_resampling_mode">
                <param argument="--bootstrap" type="select" label="Enable bootstrap resampling for all smoothed statistics" >
                    <option value="">No</option>
                    <option value="--bootstrap">Yes</option>
                </param>
                <when value="--bootstrap">
                </when>
                <when value="">
                    <param argument="--bootstrap-pifis" type="boolean" checked="false" truevalue="--bootstrap-pifis" falsevalue="" label="Enable boostrap resampling for smoothed SNP-based Pi and Fis calculations"/>
                    <param argument="--bootstrap-fst" type="boolean" checked="false" truevalue="--bootstrap-fst" falsevalue="" label="Enable boostrap resampling for smoothed Fst calculations based on pairwise population comparison of SNPs"/>
                    <param argument="--bootstrap-div" type="boolean" checked="false" truevalue="--bootstrap-div" falsevalue="" label="Enable boostrap resampling for smoothed haplotype diveristy and gene diversity calculations based on haplotypes"/>
                    <param argument="--bootstrap-phist" type="boolean" checked="false" truevalue="--bootstrap-phist" falsevalue="" label="Enable boostrap resampling for smoothed Phi_st calculations based on haplotypes."/>
                </when>
            </conditional>
            <param argument="--bootstrap-reps" type="integer" value="100" optional="true" label="Number of bootstrap resamplings to calculate"/>
            <param argument="--bootstrap-wl" format="txt,tabular" type="data" optional="true" label="Only bootstrap loci contained in this whitelist"/>
        </section>

        <!-- Output options -->
        <section name="advanced_options" title="Advanced options" expanded="False">
            <param argument="--log-fst-comp" type="boolean" checked="false" truevalue="--log-fst-comp" falsevalue="" label="Log components of Fst/Phi_st calculations to a dataset"/>
        </section>

        <!-- Genetic map output options (population map must specify a genetic cross): -->
        <section name="genetic_map_options" title="Genetic map output options (population map must specify a genetic cross)" help="works only if a population map specifying only two groupings: 'parent' and 'progeny'" expanded="false">
            <param argument="--map-type" type="select" optional="true" label="genetic map type to write" help="needs to be selected to trigger the additional Genotyping markers output">
                <option value="CP">CP</option>
                <option value="DH">DH</option>
                <option value="F2">F2</option>
                <option value="BC1">BC1</option>
            </param>
            <param argument="--map-format" type="select" optional="true" label="mapping program format to write" help="needs to be selected to trigger the additional Genotyping markers output">
                <option value="joinmap">joinmap</option>
                <option value="onemap">onemap</option>
                <option value="rqtl">rqtl</option>
            </param>
        </section>
        <expand macro="in_log"/>
    </inputs>
    <outputs>
        <expand macro="out_log"/>
        <expand macro="populations_output_full"/>
    </outputs>
    <tests>
        <!-- test w gstacks output as input and default options -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="batch_size" value="9999"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="batch_size" value="9999"/>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log" value="populations/populations.log" lines_diff="4"/>
            <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
            <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/>
            <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/>
            <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
            <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/>
        </test>
        <!-- test w gstacks output as input, wo popmap, and default options -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
        <!-- test w gstacks output as input and + all outputs -->
        <test expect_num_outputs="27">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <param name="advanced_options|log_fst_comp" value="true"/>
            <param name="fstats_conditional|fstats" value="yes"/>
            <param name="populations_output|fasta_loci" value="true"/>
            <param name="populations_output|fasta_samples" value="true"/>
            <param name="populations_output|fasta_samples_raw" value="true"/>
            <param name="populations_output|phylip" value="true"/>
            <param name="populations_output|phylip_var" value="true"/>
            <param name="populations_output|genepop" value="true"/>
            <param name="populations_output|vcf" value="true"/>
            <param name="populations_output|hzar" value="true"/>
            <param name="populations_output|plink" value="true"/>
            <param name="populations_output|structure" value="true"/>
            <param name="populations_output|radpainter" value="true"/>
            <param name="populations_output|treemix" value="true"/>
            <param name="populations_output|gtf" value="true"/>
            <assert_command>
                <has_text text="--log-fst-comp"/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
            <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/>
            <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs" lines_diff="2"/>
            <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
            <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/>

            <output ftype="tabular" name="out_phistats" value="populations/populations.phistats.tsv"/>
            <output ftype="tabular" name="out_phistats_sum" value="populations/populations.phistats_summary.tsv"/>
            <output ftype="tabular" name="out_fststats_sum" value="populations/populations.fst_summary.tsv"/>
            <output ftype="tabular" name="out_fasta_strict" value="populations/populations.loci.fa" compare="sim_size" delta="50"/><!--there seems to be reordering of the lines, so differences are expected only in the date contained in the Header -->
            <output ftype="tabular" name="out_fasta" value="populations/populations.samples.fa" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_fasta_raw" value="populations/populations.samples-raw.fa" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_phylip_all_pop_fix" value="populations/populations.fixed.phylip" lines_diff="2"/><!-- " -->
            <output ftype="tabular" name="out_phylip_all_loci_fix" value="populations/populations.fixed.phylip.log" lines_diff="2"/>
            <output ftype="tabular" name="out_phylip_all_pop_var" value="populations/populations.var.phylip" compare="sim_size" delta="50"/>
            <output ftype="tabular" name="out_phylip_all_loci_var" value="populations/populations.var.phylip.log" compare="sim_size" delta="50"/><!--there seems to be reordering of the lines, so differences are expected only in the date contained in the Header -->
            <output ftype="tabular" name="out_genepop_snps" value="populations/populations.snps.genepop" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_genepop_haps" value="populations/populations.haps.genepop" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="vcf" name="out_vcf_haplotypes_snps" value="populations/populations.snps.vcf" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="vcf" name="out_vcf_haplotypes_haps" value="populations/populations.haps.vcf" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_plink_markers" value="populations/populations.plink.map" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_plink_genotypes" value="populations/populations.plink.ped" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_hzar" value="populations/populations.hzar.csv" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_structure" value="populations/populations.structure" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_radpainter" value="populations/populations.haps.radpainter" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="tabular" name="out_treemix" value="populations/populations.treemix" compare="sim_size" delta="50"/><!-- " -->
            <output ftype="gtf" name="out_gtf" value="populations/populations.gtf" lines_diff="2"/><!-- " -->
        </test>
        <!-- test w vcf input and default options, just checking if finished -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="vcf"/>
            <param name="options_usage|input_vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
        <!-- test w gstacks output as input and non default filtering -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <section name="options_filtering">
                <param name="minperc" value="0.1"/>
                <param name="minpop" value="2"/>
                <param name="min_samples_overall" value="0.01"/>
                <param name="filter_haplotype_wise" value="--filter-haplotype-wise"/>
                <param name="min_maf" value="0.01"/>
                <param name="min_mac" value="1"/>
                <param name="max_obs_het" value="0.8"/>
                <param name="blacklist" value="populations/blacklist.tsv" ftype="tabular"/>
            </section>
            <assert_command>
                <has_text text="--min-samples-per-pop 0.1"/>
                <has_text text="--min-populations 2"/>
                <has_text text="--min-samples-overall 0.01"/>
                <has_text text="--filter-haplotype-wise"/>
                <has_text text="--min-maf 0.01"/>
                <has_text text="--min-mac 1"/>
                <has_text text="--max-obs-het 0.8"/>
                <has_text text="-B "/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
        <!-- test w gstacks output as input and non default filtering (needed because filter-haplotype-wise is incompatible with write_random_snp/write_single_snp) -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <section name="options_filtering">
                <param name="minperc" value="0.1"/>
                <param name="minpop" value="2"/>
                <param name="min_samples_overall" value="0.01"/>
                <param name="min_maf" value="0.01"/>
                <param name="min_mac" value="1"/>
                <param name="max_obs_het" value="0.8"/>
                <!-- since write_random_snp runs in infinite loop switch to write_single -->
                <param name="filter_single_snp" value="--write-single-snp"/>
                <param name="blacklist" value="populations/blacklist.tsv" ftype="tabular"/>
            </section>
            <assert_command>
                <has_text text="--min-samples-per-pop 0.1"/>
                <has_text text="--min-populations 2"/>
                <has_text text="--min-samples-overall 0.01"/>
                <has_text text="--min-maf 0.01"/>
                <has_text text="--min-mac 1"/>
                <has_text text="--max-obs-het 0.8"/>
                <not_has_text text="--write-random-snp"/>
                <has_text text="-B "/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
        <!-- test w gstacks output as input and non default merging/hwe -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <section name="merge_phase">
                <param name="enzyme" value="ecoRI"/>
                <param name="merge_sites" value="yes"/>
                <param name="merge_prune_lim" value="0.9"/>
            </section>
            <section name="locus_stats">
                <param name="hwe" value="yes"/>
            </section>
            <param name="add_log" value="yes"/>
            <assert_command>
                <has_text text="-e ecoRI"/>
                <has_text text="--merge-sites"/>
                <has_text text="--merge-prune-lim 0.9"/>
                <has_text text="--hwe"/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
        <!-- test w gstacks output as input and non default fstats/kernel/bootstrap -->
        <test expect_num_outputs="9">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="refmap/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="refmap/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <conditional name="fstats_conditional">
                <param name="fstats" value="yes"/>
                <param name="fst_correction" value="--fst-correction"/>
                <param name="p_value_cutoff" value="0.01"/>
            </conditional>
            <conditional name="options_kernel">
                <param name="kernel" value="-k"/>
                <param name="sigma" value="150001"/>
            </conditional>
            <conditional name="bootstrap_resampling_mode">
                <param name="bootstrap" value="--bootstrap"/>
            </conditional>
            <param name="bootstrap_reps" value="23"/>
            <param name="add_log" value="yes"/>
            <assert_command>
                <has_text text="--fstats"/>
                <has_text text="--fst-correction"/>
                <has_text text="--p-value-cutoff 0.01"/>
                <has_text text="-k "/>
                <has_text text="--sigma 150001"/>
                <has_text text="--bootstrap"/>
                <has_text text="--bootstrap-reps 23"/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_phistats" value="populations/populations.phistats.tsv"/>
            <output ftype="tabular" name="out_phistats_sum" value="populations/populations.phistats_summary.tsv"/>
            <output ftype="tabular" name="out_fststats_sum" value="populations/populations.fst_summary.tsv"/>
        </test>
        <!-- same as previous test but without fstats parameter in order to check if the filter
             for out_phistats* and out_fststats_sum works (also instead of -k \-\-smooth-fstats
             is used) -->
        <test expect_num_outputs="9">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="refmap/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="refmap/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <conditional name="fstats_conditional">
                <param name="fstats" value="no"/>
            </conditional>
            <conditional name="options_kernel">
                <param name="kernel" value="--smooth-fstats"/>
                <param name="sigma" value="150001"/>
            </conditional>
            <conditional name="bootstrap_resampling_mode">
                <param name="bootstrap" value="--bootstrap"/>
            </conditional>
            <param name="bootstrap_reps" value="23"/>
            <param name="add_log" value="yes"/>
            <assert_command>
                <not_has_text text="--fstats"/>
                <not_has_text text="--fst-correction"/>
                <not_has_text text="--p-value-cutoff 0.01"/>
                <has_text text="--smooth-fstats "/>
                <has_text text="--sigma 150001"/>
                <has_text text="--bootstrap"/>
                <has_text text="--bootstrap-reps 23"/>
            </assert_command>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_phistats" value="populations/populations.phistats.tsv"/>
            <output ftype="tabular" name="out_phistats_sum" value="populations/populations.phistats_summary.tsv"/>
            <output ftype="tabular" name="out_fststats_sum" value="populations/populations.fst_summary.tsv"/>
        </test>
        <!-- test w gstacks output as input and default options -->
        <test expect_num_outputs="6">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log" value="populations/populations.log" lines_diff="4"/>
            <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
            <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/>
            <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/>
            <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
            <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/>
        </test>
        <!-- test w gstacks output as input, wo popmap, and Genetic map output options that have been added in 2.5 -->
        <test expect_num_outputs="7">
            <param name="options_usage|input_type" value="stacks"/>
            <param name="options_usage|input_aln">
                <collection type="list">
                    <element name="catalog.calls.vcf" ftype="vcf" value="gstacks/catalog.calls.vcf"/>
                    <element name="catalog.fa.gz" ftype="fasta.gz" value="gstacks/catalog.fa.gz"/>
               </collection>
            </param>
            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks_genotypes.tsv"/>
            <section name="genetic_map_options">
                <param name="map_type" value="CP"/>
                <param name="map_format" value="joinmap"/>
            </section>
            <param name="add_log" value="yes"/>
            <output ftype="txt" name="output_log"><assert_contents><has_text text="Populations is done."/></assert_contents></output>
            <output ftype="txt" name="out_sql" file="populations/populations.CP.joinmap.loc" lines_diff="4"/>
            <output ftype="tabular" name="out_haplotypes"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_hapstats"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="txt" name="out_populations_log_distribs"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats_sum"><assert_contents><has_text text=""/></assert_contents></output>
            <output ftype="tabular" name="out_sumstats"><assert_contents><has_text text=""/></assert_contents></output>
        </test>
    </tests>
    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program will be executed in place of the genotypes program when a population is being processed through the pipeline. A map specifiying which individuals belong to which population is submitted to the program and the program will then calculate population genetics statistics, expected/observed heterzygosity, π, and FIS at each nucleotide position. The populations program will compare all populations pairwise to compute FST. If a set of data is reference aligned, then a kernel-smoothed FST will also be calculated.

--------

**Input files**

Output from denovo_map or ref_map

- Population map::

    indv_01    1
    indv_02    1
    indv_03    1
    indv_04    2
    indv_05    2
    indv_06    2


**Output files**

- XXX.tags.tsv file

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_


- XXX.matches.tsv file

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.


- other files:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation"/>
</tool>
author	iuc
date	Thu, 14 Apr 2022 09:21:25 +0000
parents	887cf585cafc
children	564c33e5c3e3