diff wtdbg.xml @ 0:6a060928f7ff draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/wtdbg commit c97be39112be9dc6118a3e12e51dcb15ed554274
author bgruening
date Tue, 12 Jun 2018 13:40:49 -0400
parents
children e100f3f4d80e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wtdbg.xml	Tue Jun 12 13:40:49 2018 -0400
@@ -0,0 +1,167 @@
+<tool id="wtdbg" name="WTDBG" version="1.2.8.1">
+    <description>De novo assembler AND consensuser for long noisy sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>wtdbg -help | grep 'Version:'</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    wtdbg
+        -t \${GALAXY_SLOTS:-4} 
+        -i $i
+        -o 'dbg' 
+        #if $I:
+            -I '$I'
+        #end if
+        #if $load_alignments:
+            --load-alignments '$load_alignments'
+        #end if
+
+        -k $k
+        -p $p
+        -K $K
+        -E $E
+        $F
+        -S $S
+        -X $X
+        -Y $Y
+        -x $x
+        -y $y
+        -l $l
+        -m $m
+        -s $s
+        --tidy-reads $tidy_reads 
+        --edge-min $edge_min 
+        $rescue_low_cov_edges
+    &&    
+    wtdbg-cns 
+        -t \${GALAXY_SLOTS:-4} 
+        -o dbg.ctg.lay.fa
+        -i dbg.ctg.lay
+        -j $cns.j
+        -k $cns.k
+        -Z $cns.Z
+        -W $cns.W
+        -H $cns.H
+        -L $cns.L
+        -c $cns.c
+        -M $cns.M
+        -X $cns.X
+        -I $cns.I
+        -D $cns.D
+        -E $cns.E
+        -m $cns.m
+        -S $cns.S
+
+    ]]></command>
+    <inputs>
+        <param type="data" argument="-i" format="fasta,fasta.gz" label="Long reads sequences file"/>
+        <param type="data" argument="-I" format="fasta,fasta.gz" optional="True" label="Error-free sequences file"/>
+        <param type="data" argument="--load-alignments" name="load_alignments" format="tabular" optional="True" label="Load pre-computed alignments"/>
+
+        <param argument="k" type="integer" value="0" min="0" max="25" label="Kmer fsize" />
+        <param argument="p" type="integer" value="21" min="0" max="25" label="Kmer psize" />
+        <param argument="K" type="float" value="1000" min="0" max="65535" label="Filter high frequency kmers" />
+        <param argument="E" type="integer" value="2" label="Min kmer frequency" />
+        <param argument="F" type="boolean" truevalue="-F" falsevalue="" checked="False" label="Filter low frequency kmers by a 4G-bytes array" />
+        <param argument="S" type="integer" value="4" label="Subsampling kmers, 1/S kmers are indexed" />
+        <param argument="X" type="integer" value="4" label="Max number of bin (256bp) in one gap" />
+        <param argument="Y" type="integer" value="4" label="Max number of bin (256bp) in one deviation" />
+        <param argument="x" type="integer" value="-7" label="penalty for BIN gap" />
+        <param argument="y" type="integer" value="-21" label="penalty for BIN deviation" />
+        <param argument="l" type="float" value="2048" min="1" label="Min length of alignment" />
+        <param argument="m" type="float" value="200" label="Min matched" />
+        <param argument="s" type="float" value="0.2" label="Max length variation of two aligned fragments" />
+
+        <param argument="--tidy-reads" name="tidy_reads" type="integer" value="0" label="Filter reads less than tidy-reads" />
+        <param argument="--edge-min" name="edge_min" type="integer" value="3" label="The minimal depth of a valid edge set to" />
+        <param argument="--rescue-low-cov-edges" name="rescue_low_cov_edges" type="boolean" truevalue="--rescue-low-cov-edges" 
+            falsevalue="" label="Try to rescue low coverage edges" />
+
+        <section name="cns" title="Consensus options">
+            <!-- optional inputs -->
+            <!-- <param argument="-i" type="data" format="utg.cns" label="Input file(s) *.utg.cns" /> -->
+
+            <param argument="-j" type="integer" value="1000" label="Expected length of node" />
+            <param argument="-k" type="integer" value="15" label="Kmer size for long reads" />
+            <param argument="-Z" type="integer" value="4" label="Z-cutoff, drop the lower" />
+            <param argument="-W" type="integer" value="48" label="W-cutoff, drop the lagger (position)" />
+            <param argument="-H" type="integer" value="1" label="High coverage bonus" />
+            <param argument="-L" type="integer" value="10" label="High coverage cutoff" />
+            <param argument="-c" type="select" label="Candidate strategy">
+                <option value="0" selected="true">best-kmers</option>
+                <option value="1" >median length</option>
+                <option value="2" >first (include)</option>
+                <option value="3" >first (exclude)</option>
+                <option value="4" >longest</option>
+                <option value="5" >shortest</option>
+            </param>
+
+            <param argument="-M" type="integer" value="2" label="Match score" />
+            <param argument="-X" type="integer" value="-7" label="Mismatch score" />
+            <param argument="-I" type="integer" value="-3" label="Insertion score" />
+            <param argument="-D" type="integer" value="-4" label="Deletion score" />
+            <param argument="-E" type="integer" value="-2" label="Gap extension score" />
+            <param argument="-m" type="select" label="Correction mode">
+                <option value="1" selected="true">DBG correction</option>
+                <option value="2" >DAG correction</option>
+            </param>
+            <param argument="-S" type="integer" value="1" label="Correct structure before error correction" />
+        </section>
+
+    </inputs>
+    <outputs>
+        <data name="output_alignments" format="fasta" label="${tool.name}  alignments" from_work_dir="dbg.alignments" />
+        <data name="output_ctglay" format="txt" label="${tool.name}  contigs layout" from_work_dir="dbg.ctg.lay" />
+        <data name="output_consensus" format="fasta" label="${tool.name} consensus" from_work_dir="dbg.ctg.lay.fa" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="i" value="ecoli-reads.fa"/>
+            <output name="output_alignments" file="result1.alignments"/>
+            <output name="output_ctglay" file="result1.ctg.lay"/>
+            <output name="output_consensus" file="consensus_result1.fa"/>
+        </test>
+        <test>
+            <param name="i" value="ecoli-reads.fa"/>
+            <param name="tidy_reads" value="5000"/>
+            <param name="edge_min" value="2"/>
+            <param name="rescue_low_cov_edges" value="True"/>
+            <output name="output_consensus" file="consensus_result2.fa"/>
+        </test>
+        <test>
+            <param name="i" value="ecoli-reads.fa"/>
+            <param name="cns.c" value="1"/>
+            <param name="cns.E" value="-3"/>
+            <param name="cns.j" value="500"/>
+            <param name="cns.m" value="2"/>
+            <param name="cns.k" value="5"/>
+            <output name="output_consensus" file="consensus_result3.fa"/>
+        </test>
+    
+    </tests>
+    
+    <help><![CDATA[
+**What it does**
+
+WTDBG is a de novo assembler for long noisy sequences, based on fuzzy Bruijn graphs (FBG).
+
+**Alignment**
+
+KBM (Kmer-BIN-Mapping) groups k-mers from each non-overlapped sliding 256 bp fragments in long reads into bins.
+Bins of which most k-mers are high frequency, are filtered as highly repetitive ones.
+Then, KBM searches synteny of matched bin pairs in sequences in a dynamic programming way.
+A matched bin pair in two sequences is defined as two bins different by original but share a set of k-mers.
+The result of alignments in KBM have the same features of traditional sequence alignment, excepting the unit of
+KBM alignments is 256 bp bin instead of single base.
+
+**Assembly**
+
+FBG (Fuzzy Bruijn Graph) is composed of vertices in length of 1024 bp from reads, and edges connecting vertices
+in their order on read paths. Comparing with DBG, the size of vertices in FBG are much bigger, thus won't be
+sensitive to small repeat. To tolerate high sequencing errors, FBG's vertices are found using gapped
+sequence alignments from KBM or other aligners, comparing with searching identical k-mers in DBG.
+
+  ]]></help>
+    <expand macro="citations" />
+</tool>