diff windowmasker_ustat.xml @ 0:f80c9e6700ba draft default tip

planemo upload commit 91a780909d1eda07d17f6aebf7f08f0c024b6a25
author yating-l
date Tue, 16 May 2017 13:18:12 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/windowmasker_ustat.xml	Tue May 16 13:18:12 2017 -0400
@@ -0,0 +1,203 @@
+<?xml version="1.0"?>
+<tool id="windowmasker_ustat" name="WindowMasker_ustat" version="1.0">
+    <description>Mask sequences using a WindowMasker unit counts table</description>
+
+    <macros>
+        <import>windowmasker_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <command detect_errors="exit_code">
+<![CDATA[
+    @OPTIONAL_PARAM_FUNC@
+
+    windowmasker -ustat "${mkcount_input}"
+        -infmt fasta
+        -in "${fasta_input}"
+        -outfmt "${output_format}"
+        ${parse_seqids}
+
+        #if str($use_dust.use_dust_selector) == "yes":
+            -dust true -dust_level ${use_dust.dust_level}
+        #end if
+
+        $optional_param("-set_t_high", $adv.set_t_high)
+        $optional_param("-set_t_low",  $adv.set_t_low)
+        $optional_param("-t_extend",   $adv.t_extend)
+        $optional_param("-t_low",      $adv.t_low)
+        $optional_param("-t_high",     $adv.t_high)
+        $optional_param("-t_thres",    $adv.t_thres)
+        $optional_param("-window",     $adv.window)
+
+        ## Convert WindowMasker interval output to BED format
+        #if str($output_format) == "interval":
+            | ${__tool_directory__}/windowmasker_to_bed.pl > "${mask_output}"
+        #else
+            -out "${mask_output}"
+        #end if
+]]>
+    </command>
+    <inputs>
+        <param name="fasta_input" type="data" format="fasta"
+                label="FASTA sequence file" />
+
+        <param name="mkcount_input" type="data" format="txt"
+                label="Unit counts produced by WindowMasker mkcount" />
+
+        <param name="parse_seqids" type="boolean"  checked="false"
+                truevalue="-parse_seqids" falsevalue=""
+                label="Parse Seq-ids in FASTA input"
+                    help="-parse_seqids" />
+
+        <conditional name="use_dust">
+            <param name="use_dust_selector" type="select"
+                    label="Use DUST to mask low complexity sequences?"
+                    help="-dust">
+                <option value="yes">Yes</option>
+                <option value="no">No</option>
+            </param>
+
+            <when value="yes">
+                <param name="dust_level" type="integer"
+                    min="1" value="20"
+                    label="DUST level"
+                    help="Score threshold for subwindows" />
+            </when>
+
+            <when value="no"></when>
+        </conditional>
+
+        <param name="output_format" type="select" label="Output format">
+            <option value="fasta">FASTA</option>
+            <option value="interval" selected="true">BED</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo XML</option>
+        </param>
+
+        <section name="adv" title="Advanced options" expanded="false">
+
+            <param name="set_t_high" type="integer" label="set_t_high"
+                    min="0" optional="true"
+                    help="Score for units with unit count greater than T_high" />
+
+            <param name="set_t_low" type="integer" label="set_t_low"
+                    min="0" optional="true"
+                    help="Score for units with unit count less than T_low" />
+
+            <param name="t_extend" type="integer" label="t_extend"
+                    min="0" optional="true"
+                    help="Override the t_extend value in the unit counts file" />
+
+            <param name="t_low" type="integer" label="t_low"
+                    min="0" optional="true"
+                    help="Override the t_low value in the unit counts file" />
+
+            <param name="t_high" type="integer" label="t_high"
+                    min="0" optional="true"
+                    help="Override the t_high value in the unit counts file" />
+
+            <param name="t_thres" type="integer" label="t_threshold"
+                    min="0" optional="true"
+                    help="Override the score threshold value in the unit counts file" />
+
+            <param name="window" type="integer" label="window"
+                    min="0" optional="true"
+                    help="Size of the sliding window (default = unit_size + 4)" />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="mask_output" format="bed">
+            <change_format>
+                <when input="output_format" value="fasta" format="fasta" />
+                <when input="output_format" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+                <when input="output_format" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+                <when input="output_format" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <!-- Test WindowMasker ustat with fasta output -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="output_format" value="fasta" />
+            <output name="mask_output" file="contigs.wm.fa" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat with ASN.1 text output -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="output_format" value="maskinfo_asn1_text" />
+            <output name="mask_output" file="contigs.wm.asn1" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat with ASN.1 binary output -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="output_format" value="maskinfo_asn1_bin" />
+            <output name="mask_output" file="contigs.wm.asnb" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat with XML output -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="output_format" value="maskinfo_xml" />
+            <output name="mask_output" file="contigs.wm.xml" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat with advanced settings -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="set_t_high" value="10" />
+            <param name="set_t_low" value="9" />
+            <param name="t_extend" value="5" />
+            <param name="t_low" value="9" />
+            <param name="t_high" value="10" />
+            <param name="t_thres" value="20" />
+            <param name="window" value="50" />
+            <output name="mask_output" file="contigs.advanced_wm.bed" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat without dust -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="use_dust_selector" value="no" />
+            <output name="mask_output" file="contigs.nodust_wm.bed" />
+        </test>
+        <test>
+            <!-- Test WindowMasker ustat with parse Seq-ids -->
+            <param name="fasta_input" value="contigs.fa" ftype="fasta" />
+            <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
+            <param name="parse_seqids" value="-parse_seqids" />
+            <output name="mask_output" file="contigs.seqid_wm.bed" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+This tool runs `stage 2 <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/>`_
+of the WindowMasker analysis to identify repeats within the input sequences.
+
+.. class:: infomark
+
+**Output formats:**
+
+* Use the **binary or text maskinfo ASN.1** output formats to generate the mask file for
+  the `NCBI BLAST+ makeblastdb tool <https://www.ncbi.nlm.nih.gov/books/NBK279681/#_cookbook_Create_BLAST_database_with_the_>`_
+* Use the BED output format to generate a list of masked regions
+
+
+.. class:: infomark
+
+**Advanced options:**
+
+* See the `WindowMasker README file <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/README>`_
+  for additional details on the WindowMasker repeat masking options
+
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>