view windowmasker_ustat.xml @ 0:d90ae4a02efd draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/windowmasker/ commit d529dafd5a2d3a017de226cd59e42cf630348e06
author iuc
date Thu, 14 Dec 2023 16:04:27 +0000
parents
children
line wrap: on
line source

<tool id="windowmasker_ustat" name="WindowMasker ustat" version="1.0" profile="22.01">
    <description>Mask sequences using a WindowMasker unit counts table</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
<![CDATA[
    windowmasker -ustat '${mkcount_input}'
        -infmt fasta
        -in '${fasta_input}'
        -outfmt '${output_format}'
        ${parse_seqids}

        #if str($use_dust.use_dust_selector) == "yes":
            -dust true 
            -dust_level ${use_dust.dust_level}
        #end if

        #if $adv.set_t_high:
            -set_t_high $adv.set_t_high
        #end if
        #if $adv.set_t_high:
            -set_t_low $adv.set_t_high
        #end if
        #if $adv.t_extend
            -t_extend $adv.t_extend
        #end if
        #if $adv.t_low
            -t_low $adv.t_low
        #end if
        #if $adv.t_high
            -t_high $adv.t_high
        #end if
        #if $adv.t_thres
            -t_thres $adv.t_thres
        #end if
        #if $adv.window
            -window $adv.window
        #end if

        ## Convert WindowMasker interval output to BED format
        #if str($output_format) == "interval":
            | perl '${__tool_directory__}/windowmasker_to_bed.pl' > '${mask_output}'
        #else
            -out '${mask_output}'
        #end if
]]>
    </command>
    <inputs>
        <param argument="-in" name="fasta_input" type="data" format="fasta"
                label="FASTA sequence file" />

        <param name="mkcount_input" type="data" format="txt"
                label="Unit counts produced by WindowMasker mkcount" />

        <param argument="-parse_seqids" type="boolean"  checked="false"
                truevalue="-parse_seqids" falsevalue=""
                label="Parse Seq-ids in FASTA input" />

        <conditional name="use_dust">
            <param argument="-dust" name="use_dust_selector" type="select" label="Use DUST to mask low complexity sequences?" >
                <option value="yes">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param argument="-dust_level" type="integer" min="1" value="20" label="DUST level"
                    help="Score threshold for subwindows" />
            </when>
            <when value="no"></when>
        </conditional>

        <param name="output_format" type="select" label="Output format">
            <option value="fasta">FASTA</option>
            <option value="interval" selected="true">BED</option>
            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
            <option value="maskinfo_xml">maskinfo XML</option>
        </param>

        <section name="adv" title="Advanced options" expanded="false">

            <param argument="-set_t_high" type="integer" label="set_t_high"
                    min="0" optional="true"
                    help="Score for units with unit count greater than T_high" />

            <param argument="-set_t_low" type="integer" label="set_t_low"
                    min="0" optional="true"
                    help="Score for units with unit count less than T_low" />

            <param argument="-t_extend" type="integer" label="t_extend"
                    min="0" optional="true"
                    help="Override the t_extend value in the unit counts file" />

            <param argument="-t_low" type="integer" label="t_low"
                    min="0" optional="true"
                    help="Override the t_low value in the unit counts file" />

            <param argument="-t_high" type="integer" label="t_high"
                    min="0" optional="true"
                    help="Override the t_high value in the unit counts file" />

            <param argument="-t_thres" type="integer" label="Score threshold"
                    min="0" optional="true"
                    help="Override the score threshold value in the unit counts file" />

            <param argument="-window" type="integer" label="Size of the sliding window"
                    min="0" optional="true"
                    help="The default is unit_size + 4" />
        </section>
    </inputs>
    <outputs>
        <data name="mask_output" format="bed">
            <change_format>
                <when input="output_format" value="fasta" format="fasta" />
                <when input="output_format" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
                <when input="output_format" value="maskinfo_asn1_text" format="maskinfo-asn1" />
                <when input="output_format" value="maskinfo_xml" format="xml" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <!-- Test WindowMasker ustat with fasta output -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="output_format" value="fasta" />
            <output name="mask_output" file="contigs.wm.fa" ftype="fasta" />
        </test>
        <test>
            <!-- Test WindowMasker ustat with ASN.1 text output -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="output_format" value="maskinfo_asn1_text" />
            <output name="mask_output" file="contigs.wm.asn1" ftype="maskinfo-asn1"/>
        </test>
        <test>
            <!-- Test WindowMasker ustat with ASN.1 binary output -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="output_format" value="maskinfo_asn1_bin" />
            <output name="mask_output" file="contigs.wm.asnb" ftype="maskinfo-asn1-binary"/>
        </test>
        <test>
            <!-- Test WindowMasker ustat with XML output -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="output_format" value="maskinfo_xml" />
            <output name="mask_output" file="contigs.wm.xml" ftype="xml"/>
        </test>
        <test>
            <!-- Test WindowMasker ustat with advanced settings -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="set_t_high" value="10" />
            <param name="set_t_low" value="9" />
            <param name="t_extend" value="5" />
            <param name="t_low" value="9" />
            <param name="t_high" value="10" />
            <param name="t_thres" value="20" />
            <param name="window" value="50" />
            <output name="mask_output" file="contigs.advanced_wm.bed" ftype="bed"/>
        </test>
        <test>
            <!-- Test WindowMasker ustat without dust -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="use_dust_selector" value="no" />
            <output name="mask_output" file="contigs.nodust_wm.bed" ftype="bed"/>
        </test>
        <test>
            <!-- Test WindowMasker ustat with parse Seq-ids -->
            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
            <param name="parse_seqids" value="-parse_seqids" />
            <output name="mask_output" file="contigs.seqid_wm.bed" ftype="bed"/>
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

This tool runs `stage 2 <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/>`_
of the WindowMasker analysis to identify repeats within the input sequences.

.. class:: infomark

**Output formats:**

* Use the **binary or text maskinfo ASN.1** output formats to generate the mask file for
  the `NCBI BLAST+ makeblastdb tool <https://www.ncbi.nlm.nih.gov/books/NBK279681/#_cookbook_Create_BLAST_database_with_the_>`_
* Use the BED output format to generate a list of masked regions


.. class:: infomark

**Advanced options:**

* See the `WindowMasker README file <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/README>`_
  for additional details on the WindowMasker repeat masking options

    ]]></help>
    <expand macro="citations" />
</tool>