comparison nucmer.xml @ 4:7cd7a55a678d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mummer4 commit 026db7297e987c1b7ce7f5dd4f8746d1bd435538
author iuc
date Mon, 18 Mar 2024 12:41:25 +0000
parents e18267f90096
children
comparison
equal deleted inserted replaced
3:e18267f90096 4:7cd7a55a678d
9 </expand> 9 </expand>
10 <command detect_errors="exit_code"> 10 <command detect_errors="exit_code">
11 <![CDATA[ 11 <![CDATA[
12 ln -s $reference_sequence reference.fa && 12 ln -s $reference_sequence reference.fa &&
13 ln -s $query_sequence query.fa && 13 ln -s $query_sequence query.fa &&
14 nucmer 14 nucmer
15 $anchoring 15 $anchoring
16 #if $outform.out_format != "delta":
17 --sam-long=outsam.sam
18 #end if
16 -b '$breaklen' 19 -b '$breaklen'
17 -c '$mincluster' 20 -c '$mincluster'
18 -D '$diagdiff' 21 -D '$diagdiff'
19 -d '$diagfactor' 22 -d '$diagfactor'
20 $noextend 23 $noextend
29 $options.banded 32 $options.banded
30 $options.large 33 $options.large
31 $options.genome 34 $options.genome
32 -M '$options.max_chunk' 35 -M '$options.max_chunk'
33 #end if 36 #end if
34 'reference.fa' 'query.fa' 37 'reference.fa' 'query.fa'
35 #if $mumplot.plot == 'yes': 38 #if $outform.out_format == "delta":
39 #if $mumplot.plot == 'yes' :
36 && mummerplot 40 && mummerplot
37 #if $mumplot.sequences.seq_input == 'yes': 41 #if $outform.mumplot.sequences.seq_input == 'yes':
38 -R '$reference_sequence' 42 -R '$reference_sequence'
39 -Q '$query_sequence' 43 -Q '$query_sequence'
40 $mumplot.sequences.layout 44 $outform.mumplot.sequences.layout
41 #end if 45 #end if
42 -b '$mumplot.breaklen' 46 -b '$outform.mumplot.breaklen'
43 $mumplot.color 47 $outform.mumplot.color
44 $mumplot.coverage 48 $outform.mumplot.coverage
45 $mumplot.filter 49 $outform.mumplot.filter
46 $mumplot.fat 50 $outform.mumplot.fat
47 #if $mumplot.labels.IDs == 'yes': 51 #if $outform.mumplot.labels.IDs == 'yes':
48 -IdR '$mumplot.labels.ref_id' 52 -IdR '$outform.mumplot.labels.ref_id'
49 -IdQ '$mumplot.labels.query_id' 53 -IdQ '$outform.mumplot.labels.query_id'
50 #end if 54 #end if
51 -s '$mumplot.size' 55 -s '$outform.mumplot.size'
52 -terminal png 56 -terminal png
53 -title '$mumplot.title' 57 -title '$outform.mumplot.title'
54 $mumplot.snp 58 $outform.mumplot.snp
55 #if $mumplot.range.custom == 'yes': 59 #if $outform.mumplot.range.custom == 'yes':
56 -x [$mumplot.range.min_x:$mumplot.range.max_x] 60 -x [$outform.mumplot.range.min_x:$outform.mumplot.range.max_x]
57 -y [$mumplot.range.min_y:$mumplot.range.max_y] 61 -y [$outform.mumplot.range.min_y:$outform.mumplot.range.max_y]
58 #end if 62 #end if
59 'out.delta' 63 'out.delta'
60 @MUMMER_GNUPLOT_MANUAL@ 64 @MUMMER_GNUPLOT_MANUAL@
65 #end if
66 #else:
67 && samtools dict reference.fa > outsamhead
68 && tail -n +3 outsam.sam >> outsamhead
69 && samtools sort -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" outsamhead |
70 #if $outform.out_format == 'bam-long':
71 samtools calmd -b --threads {GALAXY_SLOTS:-1} - reference.fa > outsam
72 #else if $outform.out_format == 'cram-long':
73 samtools view -C --reference reference.fa -o outsam -
74 #end if
61 #end if 75 #end if
62 ]]> 76 ]]>
63 </command> 77 </command>
64 <inputs> 78 <inputs>
65 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" /> 79 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" />
66 <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" /> 80 <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" />
81 <conditional name="outform">
82 <param name="out_format" type="select" label="Output format" help="Select delta format if a plot is needed. Jbrowse is a good choice to view cram and bam tracks">
83 <option value="bam-long">bam format</option>
84 <option value="cram-long">cram format</option>
85 <option value="delta">Mummer delta format - allows plots</option>
86 </param>
87 <when value="delta">
88 <conditional name="mumplot" >
89 <param name="plot" type="select" label="Create a 2-D dotplot of the input sequences?" >
90 <option value="no">No plot</option>
91 <option value="yes">Plot</option>
92 </param>
93 <when value="yes" >
94 <expand macro="mumplot_input" >
95 <conditional name="sequences" >
96 <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" >
97 <option value="no">NO</option>
98 <option value="yes">YES</option>
99 </param>
100 <when value="yes">
101 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" />
102 <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" />
103 <param argument="--layout" type="boolean" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion." />
104 </when>
105 <when value="no" />
106 </conditional>
107 </expand>
108 </when>
109 <when value="no" />
110 </conditional>
111 </when>
112 <when value="bam-long"/>
113 <when value="cram-long"/>
114 </conditional>
67 <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy"> 115 <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy">
68 <option value="">Use default</option> 116 <option value="">Use default</option>
69 <option value="--mum">Unique matches only (--mum)</option> 117 <option value="--mum">Unique matches only (--mum)</option>
70 <option value="--maxmatch">All matches (--maxmatch)</option> 118 <option value="--maxmatch">All matches (--maxmatch)</option>
71 </param> 119 </param>
120
72 <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length" 121 <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length"
73 help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up. (-b)" /> 122 help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up." />
74 <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches. (-c)" /> 123 <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches." />
75 <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference" 124 <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference"
76 help="Set the maximum diagonal difference between two adjacent anchors in a cluster. (-D)" /> 125 help="Set the maximum diagonal difference between two adjacent anchors in a cluster." />
77 <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference" 126 <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference"
78 help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length. (-d)" /> 127 help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length." />
79 <param name="noextend" type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step. (--noextend)" /> 128 <param type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step." />
80 <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use"> 129 <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use">
81 <option value="">Use foward and reverse sequences</option> 130 <option value="">Use forward and reverse sequences</option>
82 <option value="-f">Use only forward sequence of query (-f)</option> 131 <option value="-f">Use only forward sequence of query (-f)</option>
83 <option value="-r">Use only reverese sequence of query (-r)</option> 132 <option value="-r">Use only reverse sequence of query (-r)</option>
84 </param> 133 </param>
85 <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster. (-g)" /> 134 <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster." />
86 <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match. (-l)" /> 135 <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match." />
87 <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension. (-L)" /> 136 <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension." />
88 <param name="nooptimize" type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization" 137 <param type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization"
89 help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" /> 138 help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" />
90 <param name="nosimplify" type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments" 139 <param type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments"
91 help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats. (--nosimplify)" /> 140 help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats." />
92 <conditional name="options"> 141 <conditional name="options">
93 <param name="advanced" type="select" label="Additional options"> 142 <param name="advanced" type="select" label="Additional options">
94 <option value="defaults">Use defaults</option> 143 <option value="defaults">Use defaults</option>
95 <option value="enable">Select additional options</option> 144 <option value="enable">Select additional options</option>
96 </param> 145 </param>
97 <when value="enable"> 146 <when value="enable">
98 <param name="banded" type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding" 147 <param type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding"
99 help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" /> 148 help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" />
100 <param name="large" type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" /> 149 <param type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" />
101 <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" /> 150 <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" />
102 <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" /> 151 <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" />
103 </when> 152 </when>
104 <when value="defaults" /> 153 <when value="defaults" />
105 </conditional> 154 </conditional>
106 <conditional name="mumplot" >
107 <param name="plot" type="select" label="Do you want to output a 2-D dotplot of the input sequences? (mummerplot)" >
108 <option value="yes">YES</option>
109 <option value="no">NO</option>
110 </param>
111 <when value="yes" >
112 <expand macro="mumplot_input" >
113 <conditional name="sequences" >
114 <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" >
115 <option value="no">NO</option>
116 <option value="yes">YES</option>
117 </param>
118 <when value="yes">
119 <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" />
120 <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" />
121 <param name="layout" type="boolean" argument="--layout" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion. (--layout)" />
122 </when>
123 <when value="no" />
124 </conditional>
125 </expand>
126 </when>
127 <when value="no" />
128 </conditional>
129 </inputs> 155 </inputs>
130 <outputs> 156 <outputs>
131 <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: alignment" /> 157 <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: delta format">
158 <filter>outform["out_format"] == "delta"</filter>
159 </data>
160 <data name="sam_output" format="bam" from_work_dir="outsam" label="${tool.name} on ${on_string}">
161 <filter>outform["out_format"] != "delta"</filter>
162 <change_format>
163 <when input="outform.out_format" value="cram-long" format="cram" />
164 </change_format>
165 </data>
132 <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" > 166 <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" >
133 <filter>mumplot['plot'] == 'yes'</filter> 167 <filter>outform["out_format"] == "delta" and outform['mumplot']['plot'] == 'yes'</filter>
134 </data> 168 </data>
135 </outputs> 169 </outputs>
136 <tests> 170 <tests>
137 <test> 171 <test expect_num_outputs="2">
138 <param name="advanced" value="defaults" /> 172 <param name="advanced" value="defaults" />
173 <conditional name="outform">
174 <param name="out_format" value="delta" />
175 </conditional>
139 <param name="plot" value="yes" /> 176 <param name="plot" value="yes" />
140 <param name="seq_input" value="yes" /> 177 <param name="seq_input" value="yes" />
141 <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/> 178 <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/>
142 <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" /> 179 <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" />
143 <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/> 180 <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/>
144 <output name="png_output" ftype="png" compare="sim_size" value="plot.png" /> 181 <output name="png_output" ftype="png" compare="sim_size" value="plot.png" />
145 </test> 182 </test>
183 <test expect_num_outputs="1">
184 <param name="advanced" value="defaults" />
185 <conditional name="outform">
186 <param name="out_format" value="bam-long" />
187 </conditional>
188 <param name="seq_input" value="yes" />
189 <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/>
190 <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" />
191 <output name="sam_output" ftype="bam" compare="sim_size" value="out.bam" />
192 </test>
193 <test expect_num_outputs="1">
194 <param name="advanced" value="defaults" />
195 <conditional name="outform">
196 <param name="out_format" value="cram-long" />
197 </conditional>
198 <param name="seq_input" value="yes" />
199 <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/>
200 <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" />
201 <output name="sam_output" ftype="cram" compare="sim_size" value="out.cram" />
202 </test>
146 </tests> 203 </tests>
147 <help><![CDATA[ 204 <help><![CDATA[
148 nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications. 205 nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may
149 206 have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly
150 All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that are unique in the reference sequence by default, use different Anchoring options to change this behavior. 207 to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications.
208
209 All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that
210 are unique in the reference sequence by default, use different Anchoring options to change this behavior.
151 211
152 **Options:**:: 212 **Options:**::
153 213
154 Defaults in parentheses 214 Defaults in parentheses
155 215
156 nucmer 216 nucmer
157 217
218 --sam-long The original output format of nucmer, the delta format, contains only the minimum information necessary to quickly recreate the alignment.
219 It contains the name of the matching sequences, the length of the match, number of errors and positions of indels.
220 With --sam-long, it additionally reports the MD string (which specifies the mismatching positions), the sequence and, if applicable,
221 the quality values of the matching sequence. The long format is more expensive to compute and it generates larger output files,
222 but this option allows nucmer4 to match the behavior of other aligners such as Bowtie2 or BWA.
223
158 --mum Use anchor matches that are unique in both the reference and query (false) 224 --mum Use anchor matches that are unique in both the reference and query (false)
159 225
160 --maxmatch Use all anchor matches regardless of their uniqueness (false) 226 --maxmatch Use all anchor matches regardless of their uniqueness (false)
161 227
162 -b Set the distance an alignment extension will attempt to extend poor scoring regions 228 -b Set the distance an alignment extension will attempt to extend poor scoring regions
163 before giving up (200) 229 before giving up (200)
164 230
165 -c Sets the minimum length of a cluster of matches (65) 231 -c Sets the minimum length of a cluster of matches (65)
166 232
167 -D Set the maximum diagonal difference between two adjacent anchors in a cluster (5) 233 -D Set the maximum diagonal difference between two adjacent anchors in a cluster (5)
168 234
169 -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a 235 -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a
170 differential fraction of the gap length (0.12) 236 differential fraction of the gap length (0.12)
171 237
172 --noextend Do not perform cluster extension step (false) 238 --noextend Do not perform cluster extension step (false)
173 239
174 -f Use only the forward strand of the Query sequences (false) 240 -f Use only the forward strand of the Query sequences (false)
179 245
180 -l Set the minimum length of a single exact match (20) 246 -l Set the minimum length of a single exact match (20)
181 247
182 -L Minimum length of an alignment, after clustering and extension (0) 248 -L Minimum length of an alignment, after clustering and extension (0)
183 249
184 --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a 250 --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a
185 sequence, it will not backtrack to optimize the alignment score and instead terminate 251 sequence, it will not backtrack to optimize the alignment score and instead terminate
186 the alignment at the end of the sequence (false) 252 the alignment at the end of the sequence (false)
187 253
188 --nosimplify Don't simplify alignments by removing shadowed clusters. Use this option when aligning 254 --nosimplify Don't simplify alignments by removing shadowed clusters. Use this option when aligning
189 a sequence to itself to look for repeats (false) 255 a sequence to itself to look for repeats (false)
196 262
197 -M Max chunk. Stop adding sequence for a thread if more than MAX already. (50000) 263 -M Max chunk. Stop adding sequence for a thread if more than MAX already. (50000)
198 264
199 mummerplot 265 mummerplot
200 266
201 -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest 267 -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest
202 sequence end 268 sequence end
203 269
204 -color Color plot lines with a percent similarity gradient or turn off all plot color (default 270 -color Color plot lines with a percent similarity gradient or turn off all plot color (default
205 color by match dir) If the plot is very sparse, edit the .gp script to plot with 271 color by match dir) If the plot is very sparse, edit the .gp script to plot with
206 'linespoints' instead of 'lines' 272 'linespoints' instead of 'lines'
207 273
208 -c Generate a reference coverage plot (default for .tiling) 274 -c Generate a reference coverage plot (default for .tiling)
209 275
210 --filter Only display .delta alignments which represent the "best" hit to any particular spot on 276 --filter Only display .delta alignments which represent the "best" hit to any particular spot on
211 either sequence, i.e. a one-to-one mapping of reference and query subsequences 277 either sequence, i.e. a one-to-one mapping of reference and query subsequences
212 278
213 --fat Layout sequences using fattest alignment only 279 --fat Layout sequences using fattest alignment only
214 280
215 -IdR Plot a particular reference sequence ID on the X-axis 281 -IdR Plot a particular reference sequence ID on the X-axis