comparison mashmap.xml @ 3:aa2234f3b23a draft

planemo upload
author fubar
date Fri, 23 Feb 2024 09:13:49 +0000 (11 months ago)
parents 6c6bf2bee1ca
children fba99cb9b0ef
comparison
equal deleted inserted replaced
2:6c6bf2bee1ca 3:aa2234f3b23a
1 <tool name="mashmap" id="mashmap" version="3.1.3" profile="22.05"> 1 <tool name="mashmap" id="mashmap" version="3.1.3" profile="22.05">
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> 2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
3 <!--Created by toolfactory@galaxy.org at 22/02/2024 21:43:25 using the Galaxy Tool Factory.--> 3 <!--Created by toolfactory@galaxy.org at 23/02/2024 20:10:51 using the Galaxy Tool Factory.-->
4 <description>Fast local alignment boundaries</description> 4 <description>Fast local alignment boundaries</description>
5 <requirements> 5 <requirements>
6 <requirement version="3.1.3" type="package">mashmap</requirement> 6 <requirement version="3.1.3" type="package">mashmap</requirement>
7 </requirements> 7 </requirements>
8 <version_command><![CDATA[echo "3.1.3"]]></version_command> 8 <version_command><![CDATA[echo "3.1.3"]]></version_command>
10 <configfiles> 10 <configfiles>
11 <configfile name="runme"><![CDATA[mashmap --pi '$perc_identity' -s '$seqLength' -f '$filtermode' \ 11 <configfile name="runme"><![CDATA[mashmap --pi '$perc_identity' -s '$seqLength' -f '$filtermode' \
12 #if int($sketchSize) > 0: 12 #if int($sketchSize) > 0:
13 -J '$sketchSize' \ 13 -J '$sketchSize' \
14 #end if 14 #end if
15 #if '$dense': 15 #if '$dense' == 'set':
16 --dense \ 16 --dense \
17 #end if 17 #end if
18 #if len($reflist) == 1: 18 #if len($reflist) == 1:
19 -r '$reflist' -q '$query' 19 -r $reflist -q '$query'
20 #else 20 #else
21 rm -rf 'reflist'
22 #for i, mash in enumerate($reflist): 21 #for i, mash in enumerate($reflist):
23 #if i == 0: 22 #if i == 0:
24 echo '$mash' > 'reflist' 23 echo $mash > 'reflist'
25 #else: 24 #else:
26 echo '$mash' >> 'reflist' 25 echo $mash >> 'reflist'
27 #end if 26 #end if
28 #end for 27 #end for
29 --rl 'reflist' -q '$query' 28 --rl 'reflist' -q '$query'
30 #end if 29 #end if
31 cp 'mashmap.out' '$mashout']]></configfile> 30 cp 'mashmap.out' $mashout]]></configfile>
32 </configfiles> 31 </configfiles>
33 <inputs> 32 <inputs>
34 <param name="query" type="data" optional="false" label="Query sequences (as fasta) to mash against the references supplied below" help="" format="fasta" multiple="false"/> 33 <param name="query" type="data" optional="false" label="Query sequences (as fasta) to mash against the references supplied below" help="" format="fasta" multiple="false"/>
35 <param name="reflist" type="data" optional="false" label="Reference or references to mash the query sequences on" help="Choose one or more reference sequences to mash the query sequences against." format="fasta" multiple="true"/> 34 <param name="reflist" type="data" optional="false" label="Reference or references to mash the query sequences on" help="Choose one or more reference sequences to mash the query sequences against." format="fasta" multiple="true"/>
36 <param name="perc_identity" type="float" value="85.0" label="Identity threshold" help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/> 35 <param name="perc_identity" type="float" value="85.0" label="Identity threshold" help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/>
37 <param name="seqLength" type="integer" value="5000" label="Minimum segment length" help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/> 36 <param name="seqLength" type="integer" value="5000" label="Minimum segment length" help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/>
38 <param name="sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based" help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/> 37 <param name="sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based" help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/>
39 <param name="dense" type="boolean" value="false" label="Dense sketching" help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI." checked="false" truevalue="--dense" falsevalue=""/> 38 <param name="dense" type="select" label="Dense sketching" help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI." display="radio">
39 <option value="notset">Do not set this flag</option>
40 <option value="set">Set this flag</option>
41 </param>
40 <param name="filtermode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings."> 42 <param name="filtermode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings.">
41 <option value="map">map - best mapping for long reads</option> 43 <option value="map">map - best mapping for long reads</option>
42 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option> 44 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option>
43 <option value="none">None</option> 45 <option value="none">None</option>
44 </param> 46 </param>
52 <param name="query" value="query_sample"/> 54 <param name="query" value="query_sample"/>
53 <param name="reflist" value="reflist_sample"/> 55 <param name="reflist" value="reflist_sample"/>
54 <param name="perc_identity" value="85.0"/> 56 <param name="perc_identity" value="85.0"/>
55 <param name="seqLength" value="5000"/> 57 <param name="seqLength" value="5000"/>
56 <param name="sketchSize" value="0"/> 58 <param name="sketchSize" value="0"/>
57 <param name="dense" value="false"/> 59 <param name="dense" value="notset"/>
58 <param name="filtermode" value="map"/> 60 <param name="filtermode" value="map"/>
59 </test> 61 </test>
60 </tests> 62 </tests>
61 <help><![CDATA[ 63 <help><![CDATA[
62 *MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences. It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s). Given a minimum alignment length and an identity threshold for the desired local alignments, 64 *MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences. It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s). Given a minimum alignment length and an identity threshold for the desired local alignments,