comparison nucmer.xml @ 0:a18fb4f826fc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mummer4 commit 8133565adbfc012fa54b96449c2a18d044049107
author iuc
date Wed, 05 Dec 2018 02:37:36 -0500
parents
children 5b0b49b5421c
comparison
equal deleted inserted replaced
-1:000000000000 0:a18fb4f826fc
1 <tool id="mummer_nucmer" name="Nucmer" version="@MUMMER_VERSION@">
2 <description>Align two or more sequences</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="5.2.3">gnuplot</requirement>
8 </expand>
9 <command detect_errors="exit_code">
10 <![CDATA[
11 ln -s $reference_sequence reference.fa &&
12 ln -s $query_sequence query.fa &&
13 nucmer
14 $anchoring
15 -b '$breaklen'
16 -c '$mincluster'
17 -D '$diagdiff'
18 -d '$diagfactor'
19 $noextend
20 $direction
21 -g '$maxgap'
22 -l '$minmatch'
23 -L '$minalign'
24 $nooptimize
25 $nosimplify
26 --threads "\${GALAXY_SLOTS:-1}"
27 #if $options.advanced == 'enable':
28 $options.banded
29 $options.large
30 $options.genome
31 -M '$options.max_chunk'
32 #end if
33 'reference.fa' 'query.fa'
34 #if $mumplot.plot == 'yes':
35 && mummerplot
36 #if $mumplot.sequences.seq_input == 'yes':
37 -R '$reference_sequence'
38 -Q '$query_sequence'
39 $mumplot.sequences.layout
40 #end if
41 -b '$mumplot.breaklen'
42 $mumplot.color
43 $mumplot.coverage
44 $mumplot.filter
45 $mumplot.fat
46 #if $mumplot.labels.IDs == 'yes':
47 -IdR '$mumplot.labels.ref_id'
48 -IdQ '$mumplot.labels.query_id'
49 #end if
50 -s '$mumplot.size'
51 -terminal png
52 -title '$mumplot.title'
53 $mumplot.snp
54 #if $mumplot.range.custom == 'yes':
55 -x [$mumplot.range.min_x:$mumplot.range.max_x]
56 -y [$mumplot.range.min_y:$mumplot.range.max_y]
57 #end if
58 'out.delta'
59 #end if
60 ]]>
61 </command>
62 <inputs>
63 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" />
64 <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" />
65 <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy">
66 <option value="">Use default</option>
67 <option value="--mum">Unique matches only (--mum)</option>
68 <option value="--maxmatch">All matches (--maxmatch)</option>
69 </param>
70 <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length"
71 help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up. (-b)" />
72 <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches. (-c)" />
73 <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference"
74 help="Set the maximum diagonal difference between two adjacent anchors in a cluster. (-D)" />
75 <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference"
76 help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length. (-d)" />
77 <param name="noextend" type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step. (--noextend)" />
78 <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use">
79 <option value="">Use foward and reverse sequences</option>
80 <option value="-f">Use only forward sequence of query (-f)</option>
81 <option value="-r">Use only reverese sequence of query (-r)</option>
82 </param>
83 <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster. (-g)" />
84 <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match. (-l)" />
85 <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension. (-L)" />
86 <param name="nooptimize" type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization"
87 help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" />
88 <param name="nosimplify" type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments"
89 help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats. (--nosimplify)" />
90 <conditional name="options">
91 <param name="advanced" type="select" label="Additional options">
92 <option value="defaults">Use defaults</option>
93 <option value="enable">Select additional options</option>
94 </param>
95 <when value="enable">
96 <param name="banded" type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding"
97 help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" />
98 <param name="large" type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" />
99 <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" />
100 <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" />
101 </when>
102 <when value="defaults" />
103 </conditional>
104 <conditional name="mumplot" >
105 <param name="plot" type="select" label="Do you want to output a 2-D dotplot of the input sequences? (mummerplot)" >
106 <option value="yes">YES</option>
107 <option value="no">NO</option>
108 </param>
109 <when value="yes" >
110 <expand macro="mumplot_input" >
111 <conditional name="sequences" >
112 <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" >
113 <option value="no">NO</option>
114 <option value="yes">YES</option>
115 </param>
116 <when value="yes">
117 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" />
118 <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" />
119 <param name="layout" type="boolean" argument="--layout" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion. (--layout)" />
120 </when>
121 <when value="no" />
122 </conditional>
123 </expand>
124 </when>
125 <when value="no" />
126 </conditional>
127 </inputs>
128 <outputs>
129 <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: alignment" />
130 <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" >
131 <filter>mumplot['plot'] == 'yes'</filter>
132 </data>
133 </outputs>
134 <tests>
135 <test>
136 <param name="advanced" value="defaults" />
137 <param name="plot" value="yes" />
138 <param name="seq_input" value="yes" />
139 <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/>
140 <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" />
141 <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/>
142 <output name="png_output" ftype="png" compare="sim_size" value="plot.png" />
143 </test>
144 </tests>
145 <help><![CDATA[
146 nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications.
147
148 All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that are unique in the reference sequence by default, use different Anchoring options to change this behavior.
149
150 **Options:**::
151
152 Defaults in parentheses
153
154 nucmer
155
156 --mum Use anchor matches that are unique in both the reference and query (false)
157
158 --maxmatch Use all anchor matches regardless of their uniqueness (false)
159
160 -b Set the distance an alignment extension will attempt to extend poor scoring regions
161 before giving up (200)
162
163 -c Sets the minimum length of a cluster of matches (65)
164
165 -D Set the maximum diagonal difference between two adjacent anchors in a cluster (5)
166
167 -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a
168 differential fraction of the gap length (0.12)
169
170 --noextend Do not perform cluster extension step (false)
171
172 -f Use only the forward strand of the Query sequences (false)
173
174 -r Use only the reverse complement of the Query sequences (false)
175
176 -g Set the maximum gap between two adjacent matches in a cluster (90)
177
178 -l Set the minimum length of a single exact match (20)
179
180 -L Minimum length of an alignment, after clustering and extension (0)
181
182 --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a
183 sequence, it will not backtrack to optimize the alignment score and instead terminate
184 the alignment at the end of the sequence (false)
185
186 --nosimplify Don't simplify alignments by removing shadowed clusters. Use this option when aligning
187 a sequence to itself to look for repeats (false)
188
189 --banded Enforce absolute banding of dynamic programming matrix based on diagdiff parameter (false)
190
191 --large Force the use of large offsets (false)
192
193 -G Map genome to genome (long query sequences) (false)
194
195 -M Max chunk. Stop adding sequence for a thread if more than MAX already. (50000)
196
197 mummerplot
198
199 -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest
200 sequence end
201
202 -color Color plot lines with a percent similarity gradient or turn off all plot color (default
203 color by match dir) If the plot is very sparse, edit the .gp script to plot with
204 'linespoints' instead of 'lines'
205
206 -c Generate a reference coverage plot (default for .tiling)
207
208 --filter Only display .delta alignments which represent the "best" hit to any particular spot on
209 either sequence, i.e. a one-to-one mapping of reference and query subsequences
210
211 --fat Layout sequences using fattest alignment only
212
213 -IdR Plot a particular reference sequence ID on the X-axis
214
215 -IdQ Plot a particular query sequence ID on the Y-axis
216
217 -s Set the output size to small, medium or large (--small) (--medium) (--large) (default 'small')
218
219 --SNP Highlight SNP locations in each alignment
220
221 -title Specify the gnuplot plot title (default none)
222
223 -x Set the xrange for the plot '[min:max]'
224
225 -y Set the yrange for the plot '[min:max]'
226
227 -R Plot an ordered set of reference sequences from Rfile
228
229 -Q Plot an ordered set of query sequences from Qfile
230
231 --layout Layout a .delta multiplot in an intelligible fashion, this option requires the -R -Q options
232
233 ]]></help>
234 <expand macro="citation" />
235 </tool>