wtdbg: wtdbg.xml comparison

comparison wtdbg.xml @ 0:6a060928f7ff draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/wtdbg commit c97be39112be9dc6118a3e12e51dcb15ed554274

author	bgruening
date	Tue, 12 Jun 2018 13:40:49 -0400
parents
children	e100f3f4d80e

comparison

equal deleted inserted replaced

--1:000000000000
+:6a060928f7ff
+<tool id="wtdbg" name="WTDBG" version="1.2.8.1">
+<description>De novo assembler AND consensuser for long noisy sequences</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements" />
+<version_command>wtdbg -help | grep 'Version:'</version_command>
+<command detect_errors="exit_code"><![CDATA[
+wtdbg
+-t \${GALAXY_SLOTS:-4}
+-i $i
+-o 'dbg'
+#if $I:
+-I '$I'
+#end if
+#if $load_alignments:
+--load-alignments '$load_alignments'
+#end if
+-k $k
+-p $p
+-K $K
+-E $E
+$F
+-S $S
+-X $X
+-Y $Y
+-x $x
+-y $y
+-l $l
+-m $m
+-s $s
+--tidy-reads $tidy_reads
+--edge-min $edge_min
+$rescue_low_cov_edges
+&&
+wtdbg-cns
+-t \${GALAXY_SLOTS:-4}
+-o dbg.ctg.lay.fa
+-i dbg.ctg.lay
+-j $cns.j
+-k $cns.k
+-Z $cns.Z
+-W $cns.W
+-H $cns.H
+-L $cns.L
+-c $cns.c
+-M $cns.M
+-X $cns.X
+-I $cns.I
+-D $cns.D
+-E $cns.E
+-m $cns.m
+-S $cns.S
+]]></command>
+<inputs>
+<param type="data" argument="-i" format="fasta,fasta.gz" label="Long reads sequences file"/>
+<param type="data" argument="-I" format="fasta,fasta.gz" optional="True" label="Error-free sequences file"/>
+<param type="data" argument="--load-alignments" name="load_alignments" format="tabular" optional="True" label="Load pre-computed alignments"/>
+<param argument="k" type="integer" value="0" min="0" max="25" label="Kmer fsize" />
+<param argument="p" type="integer" value="21" min="0" max="25" label="Kmer psize" />
+<param argument="K" type="float" value="1000" min="0" max="65535" label="Filter high frequency kmers" />
+<param argument="E" type="integer" value="2" label="Min kmer frequency" />
+<param argument="F" type="boolean" truevalue="-F" falsevalue="" checked="False" label="Filter low frequency kmers by a 4G-bytes array" />
+<param argument="S" type="integer" value="4" label="Subsampling kmers, 1/S kmers are indexed" />
+<param argument="X" type="integer" value="4" label="Max number of bin (256bp) in one gap" />
+<param argument="Y" type="integer" value="4" label="Max number of bin (256bp) in one deviation" />
+<param argument="x" type="integer" value="-7" label="penalty for BIN gap" />
+<param argument="y" type="integer" value="-21" label="penalty for BIN deviation" />
+<param argument="l" type="float" value="2048" min="1" label="Min length of alignment" />
+<param argument="m" type="float" value="200" label="Min matched" />
+<param argument="s" type="float" value="0.2" label="Max length variation of two aligned fragments" />
+<param argument="--tidy-reads" name="tidy_reads" type="integer" value="0" label="Filter reads less than tidy-reads" />
+<param argument="--edge-min" name="edge_min" type="integer" value="3" label="The minimal depth of a valid edge set to" />
+<param argument="--rescue-low-cov-edges" name="rescue_low_cov_edges" type="boolean" truevalue="--rescue-low-cov-edges"
+falsevalue="" label="Try to rescue low coverage edges" />
+<section name="cns" title="Consensus options">
+<!-- optional inputs -->
+<!-- <param argument="-i" type="data" format="utg.cns" label="Input file(s) *.utg.cns" /> -->
+<param argument="-j" type="integer" value="1000" label="Expected length of node" />
+<param argument="-k" type="integer" value="15" label="Kmer size for long reads" />
+<param argument="-Z" type="integer" value="4" label="Z-cutoff, drop the lower" />
+<param argument="-W" type="integer" value="48" label="W-cutoff, drop the lagger (position)" />
+<param argument="-H" type="integer" value="1" label="High coverage bonus" />
+<param argument="-L" type="integer" value="10" label="High coverage cutoff" />
+<param argument="-c" type="select" label="Candidate strategy">
+<option value="0" selected="true">best-kmers</option>
+<option value="1" >median length</option>
+<option value="2" >first (include)</option>
+<option value="3" >first (exclude)</option>
+<option value="4" >longest</option>
+<option value="5" >shortest</option>
+</param>
+<param argument="-M" type="integer" value="2" label="Match score" />
+<param argument="-X" type="integer" value="-7" label="Mismatch score" />
+<param argument="-I" type="integer" value="-3" label="Insertion score" />
+<param argument="-D" type="integer" value="-4" label="Deletion score" />
+<param argument="-E" type="integer" value="-2" label="Gap extension score" />
+<param argument="-m" type="select" label="Correction mode">
+<option value="1" selected="true">DBG correction</option>
+<option value="2" >DAG correction</option>
+</param>
+<param argument="-S" type="integer" value="1" label="Correct structure before error correction" />
+</section>
+</inputs>
+<outputs>
+<data name="output_alignments" format="fasta" label="${tool.name}  alignments" from_work_dir="dbg.alignments" />
+<data name="output_ctglay" format="txt" label="${tool.name}  contigs layout" from_work_dir="dbg.ctg.lay" />
+<data name="output_consensus" format="fasta" label="${tool.name} consensus" from_work_dir="dbg.ctg.lay.fa" />
+</outputs>
+<tests>
+<test>
+<param name="i" value="ecoli-reads.fa"/>
+<output name="output_alignments" file="result1.alignments"/>
+<output name="output_ctglay" file="result1.ctg.lay"/>
+<output name="output_consensus" file="consensus_result1.fa"/>
+</test>
+<test>
+<param name="i" value="ecoli-reads.fa"/>
+<param name="tidy_reads" value="5000"/>
+<param name="edge_min" value="2"/>
+<param name="rescue_low_cov_edges" value="True"/>
+<output name="output_consensus" file="consensus_result2.fa"/>
+</test>
+<test>
+<param name="i" value="ecoli-reads.fa"/>
+<param name="cns.c" value="1"/>
+<param name="cns.E" value="-3"/>
+<param name="cns.j" value="500"/>
+<param name="cns.m" value="2"/>
+<param name="cns.k" value="5"/>
+<output name="output_consensus" file="consensus_result3.fa"/>
+</test>
+</tests>
+<help><![CDATA[
+**What it does**
+WTDBG is a de novo assembler for long noisy sequences, based on fuzzy Bruijn graphs (FBG).
+**Alignment**
+KBM (Kmer-BIN-Mapping) groups k-mers from each non-overlapped sliding 256 bp fragments in long reads into bins.
+Bins of which most k-mers are high frequency, are filtered as highly repetitive ones.
+Then, KBM searches synteny of matched bin pairs in sequences in a dynamic programming way.
+A matched bin pair in two sequences is defined as two bins different by original but share a set of k-mers.
+The result of alignments in KBM have the same features of traditional sequence alignment, excepting the unit of
+KBM alignments is 256 bp bin instead of single base.
+**Assembly**
+FBG (Fuzzy Bruijn Graph) is composed of vertices in length of 1024 bp from reads, and edges connecting vertices
+in their order on read paths. Comparing with DBG, the size of vertices in FBG are much bigger, thus won't be
+sensitive to small repeat. To tolerate high sequencing errors, FBG's vertices are found using gapped
+sequence alignments from KBM or other aligners, comparing with searching identical k-mers in DBG.
+]]></help>
+<expand macro="citations" />
+</tool>

Mercurial > repos > bgruening > wtdbg

comparison wtdbg.xml @ 0:6a060928f7ff draft