Mercurial > repos > iuc > mashmap
comparison mashmap.xml @ 0:a3a6b0b31f2d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mashmap commit 4d07f324b25c62ef0b56b22dfff84af87d54d142
| author | iuc |
|---|---|
| date | Mon, 26 Feb 2024 11:41:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a3a6b0b31f2d |
|---|---|
| 1 <tool name="mashmap" id="mashmap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> | |
| 2 <description>Fast local alignment boundaries</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">3.1.3</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 </macros> | |
| 7 <requirements> | |
| 8 <requirement version="@TOOL_VERSION@" type="package">mashmap</requirement> | |
| 9 </requirements> | |
| 10 <version_command>mashmap --version</version_command> | |
| 11 <command><![CDATA[ | |
| 12 #if len($reflist) > 1: | |
| 13 #for $r in $reflist: | |
| 14 echo '$r' >> ./reflist && | |
| 15 #end for | |
| 16 cat ./reflist && | |
| 17 #end if | |
| 18 #if len($query) > 1: | |
| 19 #for $q in $query: | |
| 20 echo '$q' >> ./query && | |
| 21 #end for | |
| 22 cat ./query && | |
| 23 #end if | |
| 24 mashmap | |
| 25 --threads \${GALAXY_SLOTS:-1} | |
| 26 --perc_identity $perc_identity | |
| 27 --segLength $seqLength | |
| 28 --filter_mode $filter_mode | |
| 29 $reportPercentage | |
| 30 $dense | |
| 31 $noMerge | |
| 32 $noHgFilter | |
| 33 #if $kmerThreshold: | |
| 34 --kmerThreshold $kmerThreshold | |
| 35 #end if | |
| 36 #if $kmerComplexity | |
| 37 --kmerComplexity $kmerComplexity | |
| 38 #end if | |
| 39 #if int($sketchSize) > 0: | |
| 40 -J $sketchSize | |
| 41 #end if | |
| 42 #if len($reflist) == 1: | |
| 43 -r '$reflist' | |
| 44 #else | |
| 45 --rl ./reflist | |
| 46 #end if | |
| 47 #if len($query) == 1: | |
| 48 -q '$query' | |
| 49 #else | |
| 50 --ql ./query | |
| 51 #end if | |
| 52 | |
| 53 ]]> </command> | |
| 54 <inputs> | |
| 55 <param name="query" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" | |
| 56 label="Query sequences to mash against the references supplied below"/> | |
| 57 <param name="reflist" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" | |
| 58 label="Reference or references to mash the query sequences on" | |
| 59 help="Choose one or more reference sequences to mash the query sequences against."/> | |
| 60 <param argument="--perc_identity" type="float" value="85.0" label="Identity threshold" | |
| 61 help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/> | |
| 62 <param argument="--seqLength" type="integer" value="5000" min="1" label="Minimum segment length" | |
| 63 help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/> | |
| 64 <param argument="--sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based" | |
| 65 help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/> | |
| 66 <param argument="dense" type="boolean" truevalue="--dense" falsevalue="" label="Dense sketching" | |
| 67 help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI."/> | |
| 68 <param argument="--kmerThreshold" type="float" min="0.0" max="0.0" optional="true" label="Ignore the top % most-frequent kmer window" /> | |
| 69 <param argument="--kmerComplexity" type="float" min="0.0" max="1.0" optional="true" label="Threshold for kmer complexity" /> | |
| 70 <param argument="filter_mode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings."> | |
| 71 <option value="map" selected="true">map - best mapping for long reads</option> | |
| 72 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option> | |
| 73 <option value="none">None</option> | |
| 74 </param> | |
| 75 <param argument="--reportPercentage" type="boolean" truevalue="--reportPercentage" falsevalue="" checked="false" | |
| 76 label="Report predicted ANI values in [0, 100]" | |
| 77 help="instead of [0,1]" /> | |
| 78 <param argument="--noMerge" type="boolean" truevalue="--noMerge" falsevalue="" checked="false" | |
| 79 label="Don't merge consecutive segment-level mappings" /> | |
| 80 <param argument="--noHgFilter" type="boolean" truevalue="--noHgFilter" falsevalue="" checked="false" label="Use MashMap2 first pass filtering" | |
| 81 help="Don't use the hypergeometric filtering and instead use the MashMap2 first pass filtering." /> | |
| 82 </inputs> | |
| 83 <outputs> | |
| 84 <data name="mashout" format="paf" from_work_dir="mashmap.out" /> | |
| 85 </outputs> | |
| 86 <tests> | |
| 87 <test expect_num_outputs="1"> | |
| 88 <param name="query" value="query_sample.fasta" ftype="fasta"/> | |
| 89 <param name="reflist" value="reflist_sample.fasta" ftype="fasta"/> | |
| 90 <param name="perc_identity" value="85.0"/> | |
| 91 <param name="seqLength" value="5000"/> | |
| 92 <param name="sketchSize" value="0"/> | |
| 93 <param name="dense" value="true"/> | |
| 94 <param name="filter_mode" value="map"/> | |
| 95 <output name="mashout" value="mashout_sample.paf" ftype="paf"/> | |
| 96 </test> | |
| 97 <test expect_num_outputs="1"> | |
| 98 <param name="query" value="query_sample.fasta.gz" ftype="fasta.gz"/> | |
| 99 <param name="reflist" value="reflist_sample.fasta.gz" ftype="fasta.gz"/> | |
| 100 <param name="perc_identity" value="85.0"/> | |
| 101 <param name="seqLength" value="5000"/> | |
| 102 <param name="sketchSize" value="0"/> | |
| 103 <param name="dense" value="true"/> | |
| 104 <param name="filter_mode" value="map"/> | |
| 105 <output name="mashout" value="mashout_sample.paf" ftype="paf"/> | |
| 106 </test> | |
| 107 <test expect_num_outputs="1"> | |
| 108 <param name="query" value="query_sample.fasta.gz,query_sample.fasta.gz" ftype="fasta.gz"/> | |
| 109 <param name="reflist" value="reflist_1_sample.fasta.gz,reflist_2_sample.fasta.gz" ftype="fasta.gz"/> | |
| 110 <param name="perc_identity" value="85.0"/> | |
| 111 <param name="seqLength" value="5000"/> | |
| 112 <param name="sketchSize" value="0"/> | |
| 113 <param name="dense" value="true"/> | |
| 114 <param name="filter_mode" value="map"/> | |
| 115 <output name="mashout" value="mashout_multi_sample.paf" ftype="paf"/> | |
| 116 </test> | |
| 117 </tests> | |
| 118 <help><![CDATA[ | |
| 119 *MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences. | |
| 120 It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s). | |
| 121 Given a minimum alignment length and an identity threshold for the desired local alignments, | |
| 122 | |
| 123 Mashmap computes alignment boundaries and identity estimates using k-mers. It does not compute the alignments explicitly, | |
| 124 but rather estimates an unbiased k-mer based Jaccard similarity using a combination of minmers (a novel winnowing scheme) and MinHash. | |
| 125 This is then converted to an estimate of sequence identity using the Mash distance. An appropriate k-mer sampling rate | |
| 126 is automatically determined using the given minimum local alignment length and identity thresholds. | |
| 127 | |
| 128 As an example, Mashmap can map a human genome assembly to the human reference genome in about one minute total execution | |
| 129 time and < 4 GB memory using just 8 CPU threads, achieving more than an order of magnitude improvement in both runtime and | |
| 130 memory over alternative methods. We describe the algorithms associated with Mashmap, and report on speed, scalability, and | |
| 131 accuracy of the software in the publications listed below. Unlike traditional mappers, MashMap does not compute exact sequence alignments. | |
| 132 In future, we plan to add an optional alignment support to generate base-to-base alignments. | |
| 133 | |
| 134 The output is space-delimited with each line consisting of query name, length, 0-based start, end, strand, target name, | |
| 135 length, start, end and mapping nucleotide identity. | |
| 136 | |
| 137 ]]></help> | |
| 138 <citations> | |
| 139 <citation type="doi">10.1093/bioinformatics/btad512</citation> | |
| 140 <citation type="doi">10.1093/bioinformatics/bts573</citation> | |
| 141 </citations> | |
| 142 </tool> | |
| 143 |
