Mercurial > repos > iuc > seqwish
changeset 0:7f7a074326ac draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqwish/ commit eff85ef0f4297e12deda88b534627b231e218bde"
author | iuc |
---|---|
date | Wed, 08 Apr 2020 11:23:40 -0400 |
parents | |
children | 189ae8d7e08e |
files | seqwish.xml test-data/A-3105.fa.gz test-data/A-3105.paf.gz |
diffstat | 3 files changed, 72 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seqwish.xml Wed Apr 08 11:23:40 2020 -0400 @@ -0,0 +1,72 @@ +<tool id="seqwish" name="seqwish" version="@TOOL_VERSION@" > + <description>Alignment to variation graph inducer</description> + <macros> + <token name="@TOOL_VERSION@">0.4</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">seqwish</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +seqwish +-p '$paf_alns' +-s ${ ' '.join(["'%s'" % x for x in $seqs]) } +--gfa='$out_gfa' +#if $m: + -m '$m' +#end if +--threads=\${GALAXY_SLOTS:-1} +#if str($r): + -r $r +#end if +#if str($k): + -k $k +#end if +#if str($B): + -B $B +#end if + ]]></command> + <inputs> + <param argument="--paf-alns" type="data" format="paf" label="Induce the graph from these PAF formatted alignments" /> + <param argument="--seqs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="The sequences used to generate the alignments" /> + <param argument="-m" type="data" optional="true" format="txt,tabular" label="Subset the input alignments" + help="" /> + <param argument="-r" type="integer" min="1" optional="true" label="Limit copies of a given input base" + help="Limit transitive closure to include no more than N copies of a given input base" /> + <param argument="-k" type="integer" min="1" optional="true" label="Filter exact matches below this length" + help="This can smooth the graph locally and prevent the formation of complex local graph topologies from forming due to differential alignments." /> + <param argument="-B" type="integer" min="1" optional="true" label="Number of bp to use for transitive closure batch" + help="" /> + </inputs> + + <outputs> + <data name="out_gfa" format="gfa1" label="${tool.name} on ${on_string}" /> + </outputs> + + <tests> + <test> + <param name="seqs" value="A-3105.fa.gz" /> + <param name="paf_alns" value="A-3105.paf.gz" /> + <output name="out_gfa" md5="176f9b4be76a2e69550da9b9c39ac5eb" /> + </test> + <test> + <param name="seqs" value="A-3105.fa.gz" /> + <param name="paf_alns" value="A-3105.paf.gz" /> + <param name="r" value="3000" /> + <param name="k" value="1" /> + <param name="B" value="1000000" /> + <output name="out_gfa" md5="176f9b4be76a2e69550da9b9c39ac5eb" /> + </test> + </tests> + <help><![CDATA[ +These **seq**uences **wish** they were squished into a graph. + +seqwish implements a lossless conversion from pairwise alignments between sequences to a variation graph encoding the sequences +and their alignments. As input we typically take all-versus-all alignments, but the exact structure of the alignment set may +be defined in an application specific way. This algorithm uses a series of disk-backed sorts and passes over the alignment +and sequence inputs to allow the graph to be constructed from very large inputs that are commonly encountered when working with +large numbers of noisy input sequences. Memory usage during construction and traversal is limited by the use of sorted +disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph. + ]]></help> + <citations> + </citations> +</tool>