view stacks_kmerfilter.xml @ 4:c9b77ced9a78 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 75f83e7b16127aecc68a58df2cb75062f2a9296a"
author iuc
date Wed, 01 Sep 2021 11:30:57 +0000
parents 1544278c272e
children 3d70c2996730
line wrap: on
line source

<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
<description>Identify PCR clones</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_cmd"/>
    <command detect_errors="aggressive"><![CDATA[
@FASTQ_INPUT_FUNCTIONS@
trap ">&2 cat '$output_log'" err exit &&
mkdir stacks_inputs stacks_outputs &&

#set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
$link_command

kmer_filter
#if $input_type.input_type_select == 'single':
    -f '$fwd_path'
#else
    -1 '$fwd_path'
    -2 '$rev_path'
#end if
## TODO    $options_kmer_char.read_k_freq
-i $inputype
-o stacks_outputs
$capture
-y fastq
$options_filtering.rare
$options_filtering.abundant
--k_len $options_filtering.k_len
--max_k_freq $options_advanced_filtering.max_k_freq
#if str($options_advanced_filtering.min_lim)!="":
    --min_lim $options_advanced_filtering.min_lim
#end if
#if str($options_advanced_filtering.max_lim)!="":
    --max_lim $options_advanced_filtering.max_lim
#end if
#if str($options_normalization.normalize)!="":
    --normalize $options_normalization.normalize
#end if
#if $options_kmer_char.write_k_freq
    --write-k-freq $kfreqdist
#end if
## TODO read_k_freq
$options_kmer_char.k_dist
#if $options_kmer_char.k_dist
    > '$kfreq'
#end if
@TEE_APPEND_LOG@

#if $options_kmer_char.k_dist 
    && sed -i -e 's/KmerFrequency/# KmerFrequency/' '$kfreq'
#elif $options_kmer_char.write_k_freq
    && sed -i -e 's/# Kmer Count/#Kmer\tCount/; s/ /\t/' '$kfreqdist';
#else
    ## move outputs such that Galaxy can find them
    ## if filtering is on then ...filt...fq is created
    ## if normalization is on then ...norm...fq is created
    ## if both are active then both files are created, but only norm is needed
    #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
        #if str($options_normalization.normalize)!="":
            #set infix="norm"
        #else
            #set infix="fil"
        #end if
        #if $capture:
            #if $input_type.input_type_select == "single"
                && mv stacks_outputs/*.discards.fastq '$discarded'
            #else
                && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
                && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
            #end if
        #end if
        #if $input_type.input_type_select == "single"
            && mv stacks_outputs/*.${infix}.fastq '$clean'
        #else
            && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
            && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
        #end if
    #end if
#end if
    ]]></command>
    <inputs>
        <expand macro="fastq_input_bc"/>
        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/>
        <section name="options_filtering" title="Filtering options" expanded="False">
            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers"/>
            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers"/>
            <param argument="--k_len" type="integer" value="15" label="K-mer size"/>
        </section>
        <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
            <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant"/>
            <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)."/>
            <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)"/>
        </section>
        <section name="options_normalization" title="Normalization options" expanded="False">
            <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage"/>
        </section>
        <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit"/>
            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit"/>
        </section>
        <!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
            <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files"/>
        </section>-->
        <expand macro="in_log"/>
    </inputs>
    <outputs>
        <expand macro="out_log"/>
        <expand macro="fastq_output_filter" format="fastq">
            <filter>not options_kmer_char['k_dist'] and not options_kmer_char['write_k_freq']</filter>
        </expand>
        <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies">
            <filter>options_kmer_char['k_dist']</filter>
        </data>
        <data format="tabular" name="kfreqdist" label="${tool.name} on ${on_string} kmer frequency distribution">
            <filter>options_kmer_char['write_k_freq']</filter>
        </data>
    </outputs>
    <tests>
        <!-- default output for filtering -->
        <test expect_num_outputs="2">
            <conditional name="input_type">
                <param name="input_type_select" value="single"/>
                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
            </conditional>
            <param name="add_log" value="yes"/>
            <output name="output_log" ftype="txt" file="kmerfilter/kmerfilter.log" lines_diff="8"/>
            <param name="rare" value="--rare"/>
            <param name="abundant" value="--abundant"/>
            <param name="k_len" value="16"/>
            <assert_command>
                <has_text text="--rare"/>
                <has_text text="--abundant"/>
                <has_text text="--k_len 16"/>
            </assert_command>
            <param name="add_log" value="yes"/>
            <output name="output_log"><assert_contents><has_text text="5 retained reads."/></assert_contents></output>
            <output name="clean" compare="diff" ftype="fastq" file="kmerfilter/Removed1_0001.1.1.fq.single"/>
        </test>
        <test expect_num_outputs="7">
            <conditional name="input_type">
                <param name="input_type_select" value="paired"/>
                <param name="fqinputs">
                    <collection type="paired">
                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz"/>
                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz"/>
                    </collection>
                </param>
            </conditional>
            <param name="capture" value="-D"/>
            <param name="normalize" value="1"/>
            <assert_command>
                <has_text text="--normalize 1"/>
            </assert_command>
            <param name="add_log" value="yes"/>
            <output name="output_log"><assert_contents><has_text text="8 retained reads."/></assert_contents></output>
            <output_collection name="clean_pair" type="paired">
                <element name="forward" compare="diff" ftype="fastq" file="kmerfilter/Removed1_0001.1.1.fq"/>
                <element name="reverse" compare="diff" ftype="fastq" file="kmerfilter/Removed2_0001.2.2.fq"/>
            </output_collection>
            <output_collection name="discarded_pair" type="paired">
                <element name="forward" compare="diff" ftype="fastq" file="kmerfilter/Discarded1_0001.1.1.fq"/>
                <element name="reverse" compare="diff" ftype="fastq" file="kmerfilter/Discarded2_0001.2.2.fq"/>
            </output_collection>
        </test>
        <!-- kfreq output -->
        <test expect_num_outputs="2">
            <conditional name="input_type">
                <param name="input_type_select" value="single"/>
                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
            </conditional>
            <section name="options_kmer_char">
                <param name="write_k_freq" value="--write_k_freq"/>
            </section>
            <param name="add_log" value="yes"/>
            <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output>
            <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
        </test>
        <!-- kfreqdist output -->
        <test expect_num_outputs="1">
            <conditional name="input_type">
                <param name="input_type_select" value="single"/>
                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
            </conditional>
            <section name="options_kmer_char">
                <param name="k_dist" value="--k_dist"/>
            </section>
            <param name="add_log" value="no"/>
            <assert_stderr><has_text text="Generating kmer distribution..."/></assert_stderr>
            <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
        </test>
    </tests>
    <help>
<![CDATA[
.. class:: infomark

Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data.

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation"/>
</tool>