Mercurial > repos > bgruening > wtdbg
diff wtdbg.xml @ 0:6a060928f7ff draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/wtdbg commit c97be39112be9dc6118a3e12e51dcb15ed554274
author | bgruening |
---|---|
date | Tue, 12 Jun 2018 13:40:49 -0400 |
parents | |
children | e100f3f4d80e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wtdbg.xml Tue Jun 12 13:40:49 2018 -0400 @@ -0,0 +1,167 @@ +<tool id="wtdbg" name="WTDBG" version="1.2.8.1"> + <description>De novo assembler AND consensuser for long noisy sequences</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>wtdbg -help | grep 'Version:'</version_command> + <command detect_errors="exit_code"><![CDATA[ + wtdbg + -t \${GALAXY_SLOTS:-4} + -i $i + -o 'dbg' + #if $I: + -I '$I' + #end if + #if $load_alignments: + --load-alignments '$load_alignments' + #end if + + -k $k + -p $p + -K $K + -E $E + $F + -S $S + -X $X + -Y $Y + -x $x + -y $y + -l $l + -m $m + -s $s + --tidy-reads $tidy_reads + --edge-min $edge_min + $rescue_low_cov_edges + && + wtdbg-cns + -t \${GALAXY_SLOTS:-4} + -o dbg.ctg.lay.fa + -i dbg.ctg.lay + -j $cns.j + -k $cns.k + -Z $cns.Z + -W $cns.W + -H $cns.H + -L $cns.L + -c $cns.c + -M $cns.M + -X $cns.X + -I $cns.I + -D $cns.D + -E $cns.E + -m $cns.m + -S $cns.S + + ]]></command> + <inputs> + <param type="data" argument="-i" format="fasta,fasta.gz" label="Long reads sequences file"/> + <param type="data" argument="-I" format="fasta,fasta.gz" optional="True" label="Error-free sequences file"/> + <param type="data" argument="--load-alignments" name="load_alignments" format="tabular" optional="True" label="Load pre-computed alignments"/> + + <param argument="k" type="integer" value="0" min="0" max="25" label="Kmer fsize" /> + <param argument="p" type="integer" value="21" min="0" max="25" label="Kmer psize" /> + <param argument="K" type="float" value="1000" min="0" max="65535" label="Filter high frequency kmers" /> + <param argument="E" type="integer" value="2" label="Min kmer frequency" /> + <param argument="F" type="boolean" truevalue="-F" falsevalue="" checked="False" label="Filter low frequency kmers by a 4G-bytes array" /> + <param argument="S" type="integer" value="4" label="Subsampling kmers, 1/S kmers are indexed" /> + <param argument="X" type="integer" value="4" label="Max number of bin (256bp) in one gap" /> + <param argument="Y" type="integer" value="4" label="Max number of bin (256bp) in one deviation" /> + <param argument="x" type="integer" value="-7" label="penalty for BIN gap" /> + <param argument="y" type="integer" value="-21" label="penalty for BIN deviation" /> + <param argument="l" type="float" value="2048" min="1" label="Min length of alignment" /> + <param argument="m" type="float" value="200" label="Min matched" /> + <param argument="s" type="float" value="0.2" label="Max length variation of two aligned fragments" /> + + <param argument="--tidy-reads" name="tidy_reads" type="integer" value="0" label="Filter reads less than tidy-reads" /> + <param argument="--edge-min" name="edge_min" type="integer" value="3" label="The minimal depth of a valid edge set to" /> + <param argument="--rescue-low-cov-edges" name="rescue_low_cov_edges" type="boolean" truevalue="--rescue-low-cov-edges" + falsevalue="" label="Try to rescue low coverage edges" /> + + <section name="cns" title="Consensus options"> + <!-- optional inputs --> + <!-- <param argument="-i" type="data" format="utg.cns" label="Input file(s) *.utg.cns" /> --> + + <param argument="-j" type="integer" value="1000" label="Expected length of node" /> + <param argument="-k" type="integer" value="15" label="Kmer size for long reads" /> + <param argument="-Z" type="integer" value="4" label="Z-cutoff, drop the lower" /> + <param argument="-W" type="integer" value="48" label="W-cutoff, drop the lagger (position)" /> + <param argument="-H" type="integer" value="1" label="High coverage bonus" /> + <param argument="-L" type="integer" value="10" label="High coverage cutoff" /> + <param argument="-c" type="select" label="Candidate strategy"> + <option value="0" selected="true">best-kmers</option> + <option value="1" >median length</option> + <option value="2" >first (include)</option> + <option value="3" >first (exclude)</option> + <option value="4" >longest</option> + <option value="5" >shortest</option> + </param> + + <param argument="-M" type="integer" value="2" label="Match score" /> + <param argument="-X" type="integer" value="-7" label="Mismatch score" /> + <param argument="-I" type="integer" value="-3" label="Insertion score" /> + <param argument="-D" type="integer" value="-4" label="Deletion score" /> + <param argument="-E" type="integer" value="-2" label="Gap extension score" /> + <param argument="-m" type="select" label="Correction mode"> + <option value="1" selected="true">DBG correction</option> + <option value="2" >DAG correction</option> + </param> + <param argument="-S" type="integer" value="1" label="Correct structure before error correction" /> + </section> + + </inputs> + <outputs> + <data name="output_alignments" format="fasta" label="${tool.name} alignments" from_work_dir="dbg.alignments" /> + <data name="output_ctglay" format="txt" label="${tool.name} contigs layout" from_work_dir="dbg.ctg.lay" /> + <data name="output_consensus" format="fasta" label="${tool.name} consensus" from_work_dir="dbg.ctg.lay.fa" /> + </outputs> + <tests> + <test> + <param name="i" value="ecoli-reads.fa"/> + <output name="output_alignments" file="result1.alignments"/> + <output name="output_ctglay" file="result1.ctg.lay"/> + <output name="output_consensus" file="consensus_result1.fa"/> + </test> + <test> + <param name="i" value="ecoli-reads.fa"/> + <param name="tidy_reads" value="5000"/> + <param name="edge_min" value="2"/> + <param name="rescue_low_cov_edges" value="True"/> + <output name="output_consensus" file="consensus_result2.fa"/> + </test> + <test> + <param name="i" value="ecoli-reads.fa"/> + <param name="cns.c" value="1"/> + <param name="cns.E" value="-3"/> + <param name="cns.j" value="500"/> + <param name="cns.m" value="2"/> + <param name="cns.k" value="5"/> + <output name="output_consensus" file="consensus_result3.fa"/> + </test> + + </tests> + + <help><![CDATA[ +**What it does** + +WTDBG is a de novo assembler for long noisy sequences, based on fuzzy Bruijn graphs (FBG). + +**Alignment** + +KBM (Kmer-BIN-Mapping) groups k-mers from each non-overlapped sliding 256 bp fragments in long reads into bins. +Bins of which most k-mers are high frequency, are filtered as highly repetitive ones. +Then, KBM searches synteny of matched bin pairs in sequences in a dynamic programming way. +A matched bin pair in two sequences is defined as two bins different by original but share a set of k-mers. +The result of alignments in KBM have the same features of traditional sequence alignment, excepting the unit of +KBM alignments is 256 bp bin instead of single base. + +**Assembly** + +FBG (Fuzzy Bruijn Graph) is composed of vertices in length of 1024 bp from reads, and edges connecting vertices +in their order on read paths. Comparing with DBG, the size of vertices in FBG are much bigger, thus won't be +sensitive to small repeat. To tolerate high sequencing errors, FBG's vertices are found using gapped +sequence alignments from KBM or other aligners, comparing with searching identical k-mers in DBG. + + ]]></help> + <expand macro="citations" /> +</tool>