mashmap: mashmap.xml comparison

comparison mashmap.xml @ 0:a3a6b0b31f2d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mashmap commit 4d07f324b25c62ef0b56b22dfff84af87d54d142

author	iuc
date	Mon, 26 Feb 2024 11:41:43 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:a3a6b0b31f2d
+<tool name="mashmap" id="mashmap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
+<description>Fast local alignment boundaries</description>
+<macros>
+<token name="@TOOL_VERSION@">3.1.3</token>
+<token name="@VERSION_SUFFIX@">0</token>
+</macros>
+<requirements>
+<requirement version="@TOOL_VERSION@" type="package">mashmap</requirement>
+</requirements>
+<version_command>mashmap --version</version_command>
+<command><![CDATA[
+#if len($reflist) > 1:
+#for $r in $reflist:
+echo '$r' >> ./reflist &&
+#end for
+cat ./reflist &&
+#end if
+#if len($query) > 1:
+#for $q in $query:
+echo '$q' >> ./query &&
+#end for
+cat ./query &&
+#end if
+mashmap
+--threads \${GALAXY_SLOTS:-1}
+--perc_identity $perc_identity
+--segLength $seqLength
+--filter_mode $filter_mode
+$reportPercentage
+$dense
+$noMerge
+$noHgFilter
+#if $kmerThreshold:
+--kmerThreshold $kmerThreshold
+#end if
+#if $kmerComplexity
+--kmerComplexity $kmerComplexity
+#end if
+#if int($sketchSize) > 0:
+-J $sketchSize
+#end if
+#if len($reflist) == 1:
+-r '$reflist'
+#else
+--rl ./reflist
+#end if
+#if len($query) == 1:
+-q '$query'
+#else
+--ql ./query
+#end if
+]]> </command>
+<inputs>
+<param name="query" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true"
+label="Query sequences to mash against the references supplied below"/>
+<param name="reflist" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true"
+label="Reference or references to mash the query sequences on"
+help="Choose one or more reference sequences to mash the query sequences against."/>
+<param argument="--perc_identity" type="float" value="85.0" label="Identity threshold"
+help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/>
+<param argument="--seqLength" type="integer" value="5000" min="1" label="Minimum segment length"
+help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/>
+<param argument="--sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based"
+help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/>
+<param argument="dense" type="boolean" truevalue="--dense" falsevalue="" label="Dense sketching"
+help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI."/>
+<param argument="--kmerThreshold" type="float" min="0.0" max="0.0" optional="true" label="Ignore the top % most-frequent kmer window" />
+<param argument="--kmerComplexity" type="float" min="0.0" max="1.0" optional="true" label="Threshold for kmer complexity" />
+<param argument="filter_mode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings.">
+<option value="map" selected="true">map - best mapping for long reads</option>
+<option value="one-to-one">one-to-one - best for mapping orthologous reads</option>
+<option value="none">None</option>
+</param>
+<param argument="--reportPercentage" type="boolean" truevalue="--reportPercentage" falsevalue="" checked="false"
+label="Report predicted ANI values in [0, 100]"
+help="instead of [0,1]" />
+<param argument="--noMerge" type="boolean" truevalue="--noMerge" falsevalue="" checked="false"
+label="Don't merge consecutive segment-level mappings" />
+<param argument="--noHgFilter" type="boolean" truevalue="--noHgFilter" falsevalue="" checked="false" label="Use MashMap2 first pass filtering"
+help="Don't use the hypergeometric filtering and instead use the MashMap2 first pass filtering." />
+</inputs>
+<outputs>
+<data name="mashout" format="paf" from_work_dir="mashmap.out" />
+</outputs>
+<tests>
+<test expect_num_outputs="1">
+<param name="query" value="query_sample.fasta" ftype="fasta"/>
+<param name="reflist" value="reflist_sample.fasta" ftype="fasta"/>
+<param name="perc_identity" value="85.0"/>
+<param name="seqLength" value="5000"/>
+<param name="sketchSize" value="0"/>
+<param name="dense" value="true"/>
+<param name="filter_mode" value="map"/>
+<output name="mashout" value="mashout_sample.paf" ftype="paf"/>
+</test>
+<test expect_num_outputs="1">
+<param name="query" value="query_sample.fasta.gz" ftype="fasta.gz"/>
+<param name="reflist" value="reflist_sample.fasta.gz" ftype="fasta.gz"/>
+<param name="perc_identity" value="85.0"/>
+<param name="seqLength" value="5000"/>
+<param name="sketchSize" value="0"/>
+<param name="dense" value="true"/>
+<param name="filter_mode" value="map"/>
+<output name="mashout" value="mashout_sample.paf" ftype="paf"/>
+</test>
+<test expect_num_outputs="1">
+<param name="query" value="query_sample.fasta.gz,query_sample.fasta.gz" ftype="fasta.gz"/>
+<param name="reflist" value="reflist_1_sample.fasta.gz,reflist_2_sample.fasta.gz" ftype="fasta.gz"/>
+<param name="perc_identity" value="85.0"/>
+<param name="seqLength" value="5000"/>
+<param name="sketchSize" value="0"/>
+<param name="dense" value="true"/>
+<param name="filter_mode" value="map"/>
+<output name="mashout" value="mashout_multi_sample.paf" ftype="paf"/>
+</test>
+</tests>
+<help><![CDATA[
+*MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences.
+It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s).
+Given a minimum alignment length and an identity threshold for the desired local alignments,
+Mashmap computes alignment boundaries and identity estimates using k-mers. It does not compute the alignments explicitly,
+but rather estimates an unbiased k-mer based Jaccard similarity using a combination of minmers (a novel winnowing scheme) and MinHash.
+This is then converted to an estimate of sequence identity using the Mash distance. An appropriate k-mer sampling rate
+is automatically determined using the given minimum local alignment length and identity thresholds.
+As an example, Mashmap can map a human genome assembly to the human reference genome in about one minute total execution
+time and < 4 GB memory using just 8 CPU threads, achieving more than an order of magnitude improvement in both runtime and
+memory over alternative methods. We describe the algorithms associated with Mashmap, and report on speed, scalability, and
+accuracy of the software in the publications listed below. Unlike traditional mappers, MashMap does not compute exact sequence alignments.
+In future, we plan to add an optional alignment support to generate base-to-base alignments.
+The output is space-delimited with each line consisting of query name, length, 0-based start, end, strand, target name,
+length, start, end and mapping nucleotide identity.
+]]></help>
+<citations>
+<citation type="doi">10.1093/bioinformatics/btad512</citation>
+<citation type="doi">10.1093/bioinformatics/bts573</citation>
+</citations>
+</tool>

Mercurial > repos > iuc > mashmap

comparison mashmap.xml @ 0:a3a6b0b31f2d draft default tip