comparison ragtag.xml @ 0:a04e64efa43a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ragtag commit 4c4b2a548b4ce46da88810992459b3ac8581d035"
author iuc
date Wed, 10 Nov 2021 23:33:13 +0000
parents
children d110a4141898
comparison
equal deleted inserted replaced
-1:000000000000 0:a04e64efa43a
1 <tool id='ragtag' name='RagTag' version='@TOOL_VERSION@+galaxy@VERSION_SUFFIX@' profile='20.01'>
2 <description>reference-guided scaffolding of draft genomes</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro='xrefs' />
7 <expand macro='requirements' />
8 <command detect_errors='exit_code'><![CDATA[
9 #if $mode_conditional.mode_option != 'merge'
10 #if $mode_conditional.advanced_options.mapping_conditional.mapping_option == 'nucmer'
11 #set $nucmer_params = '%s -l %s -c %s' % ($mode_conditional.advanced_options.mapping_conditional.anchor_mode,
12 $mode_conditional.advanced_options.mapping_conditional.l,
13 $mode_conditional.advanced_options.mapping_conditional.c)
14 #end if
15 #end if
16 #if $mode_conditional.mode_option == 'merge'
17 #set $input_files = list()
18 mkdir merge_files &&
19 #for $i, $j in enumerate($mode_conditional.scaffold_files)
20 #set $out_file = './merge_files/scaffold_%s.agp' % $i
21 ln -s '${j}' $out_file &&
22 $input_files.append($out_file)
23 #end for
24 #set $merge_files = " ".join($input_files)
25 #end if
26 ragtag.py $mode_conditional.mode_option -u
27 #if $mode_conditional.mode_option == 'correct'
28 @INPUTS@
29 @COMMON_PARAMETERS@
30 #if $mode_conditional.validation_conditional.validation_option == 'true'
31 -R '${mode_conditional.validation_conditional.R}'
32 -T $mode_conditional.validation_conditional.read_type
33 -v $mode_conditional.validation_conditional.v
34 #if $mode_conditional.validation_conditional.max_cov
35 --max-cov $mode_conditional.validation_conditional.max_cov
36 #end if
37 #if $mode_conditional.validation_conditional.min_cov
38 --min-cov $mode_conditional.validation_conditional.min_cov
39 #end if
40 #end if
41 -b $mode_conditional.advanced_options.b
42 #if $mode_conditional.advanced_options.missasembly_break
43 $mode_conditional.advanced_options.missasembly_break
44 #end if
45 #if $mode_conditional.advanced_options.gff
46 --gff '${mode_conditional.advanced_options.gff}'
47 #end if
48 --read-aligner 'minimap2' ## it is the only allowed
49 #else if $mode_conditional.mode_option == 'scaffold'
50 @INPUTS@
51 @COMMON_PARAMETERS@
52 -i $mode_conditional.advanced_options.i
53 -a $mode_conditional.advanced_options.a
54 -s $mode_conditional.advanced_options.s
55 #if $mode_conditional.advanced_options.gap_conditional.gap_option == 'true'
56 -r
57 -g '${mode_conditional.advanced_options.gap_conditional.g}'
58 -m '${mode_conditional.advanced_options.gap_conditional.m}'
59 #end if
60 #if $mode_conditional.advanced_options.unplaced_conditional.unplaced_option == 'true'
61 -C
62 #if $mode_conditional.advanced_options.unplaced_conditional.J
63 -J '${mode_conditional.advanced_options.unplaced_conditional.J}'
64 #end if
65 #end if
66 #else if $mode_conditional.mode_option == 'patch'
67 @INPUTS@
68 @COMMON_PARAMETERS@
69 -s $mode_conditional.advanced_options.s
70 -i $mode_conditional.advanced_options.i
71 #if $mode_conditional.advanced_options.patching_mode
72 $mode_conditional.advanced_options.patching_mode
73 #end if
74 #else
75 $assembly_fasta
76 #if $mode_conditional.scaffold_files
77 $merge_files
78 #end if
79 #if $mode_conditional.merging_options.j
80 -j $mode_conditional.merging_options.j
81 #end if
82 -l $mode_conditional.merging_options.l
83 -e $mode_conditional.merging_options.e
84 --gap-func $mode_conditional.merging_options.function_merging
85 #if $mode_conditional.hic_options.b
86 -b $mode_conditional.hic_options.b
87 -r $mode_conditional.hic_options.r
88 -p $mode_conditional.hic_options.p
89 #end if
90 #end if
91 -o ./
92 #if $mode_conditional.mode_option != 'merge'
93 -t \${GALAXY_SLOTS:-2}
94 #end if
95 #if $mode_conditional.mode_option == 'patch'
96 && mv ragtag.patch.asm.paf.log ragtag.patch.log
97 #end if
98 ]]> </command>
99 <inputs>
100 <conditional name="mode_conditional">
101 <param name="mode_option" type="select" label="Operation mode">
102 <option value="correct">Correct: homology-based missasembly correction</option>
103 <option value="scaffold">Scaffold: homology-based assebly scaffolding</option>
104 <option value="patch">Patch: homology-based assembly patching</option>
105 <option value="merge">Merge: scaffolding merging</option>
106 </param>
107 <when value="correct">
108 <expand macro="input_options"/>
109 <conditional name="validation_conditional">
110 <param name="validation_option" type="select" label="Use validation reads">
111 <option value="true">Enabled</option>
112 <option value="false" selected="true">Disabled</option>
113 </param>
114 <when value="true">
115 <param argument="-R" type="data" format="fastq,fastqsanger" label="Validation reads"
116 help="Without validation, the module will break at any point of reference discordance as defined by the 'correction options'.
117 With validation, RagTag maps reads to the query assembly and verifies putative break points if they are near regions of
118 exceptionally low or high coverage. The reads used for validation should come from the same genotype as the query
119 assembly to ensure that coverage abnormalities don't arise from true biological variation" />
120 <param name="read_type" type="select" label="Read type">
121 <option value="sr">Illumina</option>
122 <option value="ont">Nanopore</option>
123 <option value="corr">Error corrected long-reads</option>
124 </param>
125 <param argument="-v" type="integer" min="0" value="10000" label="Coverage validation window size"
126 help="This parameter specifies the window around the putative misassembly break point that RagTag examines
127 for exceptionally low or high read coverage. The larger this window size, the more likely
128 it is to find an unrelated coverage abnormality"/>
129 <param argument="--max-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or above this coverage level"/>
130 <param argument="--min-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or below this coverage level"/>
131 </when>
132 <when value="false"/>
133 </conditional>
134 <section name="advanced_options" title="Advanced options">
135 <expand macro="common_parameters"/>
136 <param argument="-b" type="integer" min="0" value="5000" label="Minimum break distance from contig ends"
137 help="Breaks will not be made within -b bp of query sequence termini"/>
138 <param name="missasembly_break" type="select" optional="true" label="Break misassebly option"
139 help="One can also direct RagTag to only break misassemblies between (--inter, query maps to >1 reference sequence) or within
140 (--intra, query maps discordantly to 1 reference sequence) reference sequences">
141 <option value="--inter">Only break misassemblies between reference sequences (--inter)</option>
142 <option value="--intra">Only break missasemblies within reference sequences (--intra)</option>
143 </param>
144 <param argument="--gff" type="data" format="gff" optional="true" label="Don't break sequences within GFF intervals"
145 help=" If one has annotations associated with the query assembly, provide them with the --gff option to ensure that the query assembly
146 is never broken within annotation intervals. "/>
147 </section>
148 <param name="output_correct" type="select" multiple="true" label="Output files">
149 <option value="fasta" selected="true">The corrected query assembly in FASTA format</option>
150 <option value="agp" selected="true">The AGP file defining the exact coordinates of query sequence breaks</option>
151 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option>
152 <option value="log">Log file</option>
153 </param>
154 </when>
155 <when value="scaffold">
156 <expand macro="input_options"/>
157 <section name="advanced_options" title="advanced options">
158 <expand macro="common_parameters"/>
159 <param argument="-i" type="float" min="0" max="1" value="0.2" label="Minimum grouping confidence score"
160 help="The grouping confidence score is the number of base pairs a contig covered in its assigned reference chromosome
161 divided by the total number of covered base pairs in the entire reference genome"/>
162 <param argument="-a" type="float" min="0" max="1" value="0" label="Minimum location confidence score"
163 help="To create a metric associated with contig ordering confidence, Ragtag define a location confidence. First, the smallest
164 and largest alignment positions, with respect to the reference, between a contig and its assigned reference chromosome are found.
165 The location confidence is then calculated as the number of covered base pairs in this range divided by the total number of
166 base pairs in the range"/>
167 <param argument="-s" type="float" min="0" max="1" value="0" label="Minimum orientation confidence score"
168 help="To calculate the orientation confidence, each base pair in each alignment between a contig and its assigned reference chromosome
169 casts a vote for the orientation of its alignment. The orientation confidence is the number of votes for the assigned orientation of
170 the contig divided by the total number of votes"/>
171 <conditional name="gap_conditional">
172 <param name="gap_option" type="select" label="Infer gap sizes" help="When disabled, all gaps are 100 bp (-r)">
173 <option value="true" selected="true">Enabled</option>
174 <option value="false">Disabled</option>
175 </param>
176 <when value="true">
177 <param argument="-g" type="integer" min="0" value="100" label="Minimum infered gap size" />
178 <param argument="-m" type="integer" min="0" value="100000" label="Maximum inferred gap size"/>
179 </when>
180 <when value="false"/>
181 </conditional>
182 <conditional name="unplaced_conditional">
183 <param name="unplaced_option" type="select" label="Concatenate unplaced contigs and make 'chr0' (-C)">
184 <option value="true">Enabled</option>
185 <option value="false" selected="true">Disabled</option>
186 </param>
187 <when value="true">
188 <param argument="-J" type="data" format="txt" optional="true" label="List of query headers to leave unplaceds and exclude form 'chr0'"/>
189 </when>
190 <when value="false"/>
191 </conditional>
192 </section>
193 <param name="output_scaffold" type="select" multiple="true" label="Output files">
194 <option value="fasta" selected="true">The scaffolds in FASTA format, defined by the ordering and orientations of the sequences containted in the AGP file</option>
195 <option value="agp" selected="true">The ordering and orientations of query sequences in AGP format</option>
196 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option>
197 <option value="confidence">Confidence score values</option>
198 <option value="stats">Summary statistics for the scaffolding process</option>
199 <option value="log">Log file</option>
200 </param>
201 </when>
202 <when value="patch">
203 <expand macro="input_options"/>
204 <section name="advanced_options" title="advanced options">
205 <expand macro="common_parameters"/>
206 <param argument="-s" type="integer" min="0" value="50000" label="Minimum merged alignment length"
207 help="After merging, alignments less than -s bp long will be removed"/>
208 <param argument="-i" type="float" min="0" max="1" value="0.05" label="Maximum merged alignment distance"
209 help="Maximum merged alignment distance from sequence terminus as fraction of the sequence length. Alignments must
210 be within -i bp of a target sequence terminus or gap to be considered for patchin "/>
211 <param name="patching_mode" type="select" optional="true" label="Patching mode">
212 <option value="--fill-only">Only fill existing target gaps. Do not join target sequences</option>
213 <option value="--join_only">Only join and patch target sequences. DO not fill existing gaps</option>
214 </param>
215 </section>
216 <param name="output_patch" type="select" multiple="true" label="Output files">
217 <option value="final_fasta" selected="true">The final FASTA file containing the patched assembly</option>
218 <option value="final_agp" selected="true">The final AGP file defining how final FASTA is built</option>
219 <option value="assembly_file" selected="true">Assembly alignment files</option>
220 <option value="split_assembly">The split target assembly and the renamed query assembly combined into one FASTA file</option>
221 <option value="split_description">An AGP file defining how the target assembly was split at gaps</option>
222 <option value="target_gaps">The target assembly split at gaps</option>
223 <option value="agp_renamed">An AGP file defining the new names for query sequences</option>
224 <option value="fasta_renamed">A FASTA file with the original query sequence, but with new names</option>
225 <option value="log">Log file</option>
226 </param>
227 </when>
228 <when value="merge">
229 <param name="assembly_fasta" type="data" format="fasta" label="Assembly FASTA file"/>
230 <param name="scaffold_files" type="data" format="agp" multiple="true" optional="true" label="Scaffold AGP files"/>
231 <section name="merging_options" title="Merging options">
232 <param argument="-j" type="data" format="txt" optional="true" label="List of query headers to leave unplaced"/>
233 <param argument="-l" type="integer" min="0" value="100000" label="Minimum assembly sequence length"
234 help="Assembly sequences shorter than -l will also be left unplaced."/>
235 <param argument="-e" type="float" min="0" value="0" label="Minimum edge weight"
236 help="The edges in the merging graph represent scaffolding adjacencies. If an AGP file supports a particular adjacency,
237 its weight is added to the edge weight. Any edges with a weight lower than the minimum edge weigth will be removed from the graph"/>
238 <param name="function_merging" type="select" label="Function for merging gap lengths"
239 help="Scaffold gaps can differ between input AGP files. For example, a Hi-C derived AGP file might place 100 bp gaps between sequences
240 while a reference-guided AGP file might infer gap sizes based on a reference genome. Use this parameter to specify how gap sizes
241 should be computed from the supporting AGP files (--gap-func)">
242 <option value="min" selected="true">Min</option>
243 <option value="max">Max</option>
244 <option value="mean">Mean</option>
245 </param>
246 </section>
247 <section name="hic_options" title="HI-C options">
248 <param argument="-b" type="data" format="bam" optional="true" label="Hi-C alignments" help="Sorted by read name"/>
249 <param argument="-r" type="text" value="" optional="true" label="Restriction enzymes/sites or 'DNase'" help="List of restrction enzimes/sites or 'DNase', separated by comma. E.g. GATC,GACC">
250 <sanitizer invalid_char="">
251 <valid initial="string.letters,string.digits">
252 <add value="," />
253 <add value="[" />
254 <add value="]" />
255 </valid>
256 </sanitizer>
257 <validator type="regex">[0-9a-zA-Z,\]\[]+</validator>
258 </param>
259 <param argument="-p" type="float" min="0" max="1" value="1" optional="true" label="Portion of the sequence termini to consider for links"/>
260 </section>
261 </when>
262 </conditional>
263 </inputs>
264 <outputs>
265 <!--Correct mode outputs-->
266 <data format="paf" name="correct_paf" from_work_dir="ragtag.correct.asm.paf" label="${tool.name} on ${on_string}: PAF">
267 <filter>mode_conditional["mode_option"] == "correct" and "paf" in mode_conditional["output_correct"]</filter>
268 </data>
269 <data format="agp" name="correct_agp" from_work_dir="ragtag.correct.agp" label="${tool.name} on ${on_string}: AGP">
270 <filter>mode_conditional["mode_option"] == "correct" and "agp" in mode_conditional["output_correct"]</filter>
271 </data>
272 <data format="fasta" name="correct_fasta" from_work_dir="ragtag.correct.fasta" label="${tool.name} on ${on_string}: FASTA">
273 <filter>mode_conditional["mode_option"] == "correct" and "fasta" in mode_conditional["output_correct"]</filter>
274 </data>
275 <data format="txt" name="correct_log" from_work_dir="ragtag.correct.asm.paf.log" label="${tool.name} on ${on_string}: log">
276 <filter>mode_conditional["mode_option"] == "correct" and "log" in mode_conditional["output_correct"]</filter>
277 </data>
278 <!--Scaffold mode outputs-->
279 <data format="paf" name="scaffold_paf" from_work_dir="ragtag.scaffold.asm.paf" label="${tool.name} on ${on_string}: PAF">
280 <filter>mode_conditional["mode_option"] == "scaffold" and "paf" in mode_conditional["output_scaffold"]</filter>
281 </data>
282 <data format="agp" name="scaffold_agp" from_work_dir="ragtag.scaffold.agp" label="${tool.name} on ${on_string}: AGP">
283 <filter>mode_conditional["mode_option"] == "scaffold" and "agp" in mode_conditional["output_scaffold"]</filter>
284 </data>
285 <data format="fasta" name="scaffold_fasta" from_work_dir="ragtag.scaffold.fasta" label="${tool.name} on ${on_string}: FASTA">
286 <filter>mode_conditional["mode_option"] == "scaffold" and "fasta" in mode_conditional["output_scaffold"]</filter>
287 </data>
288 <data format="txt" name="scaffold_log" from_work_dir="ragtag.scaffold.asm.paf.log" label="${tool.name} on ${on_string}: log">
289 <filter>mode_conditional["mode_option"] == "scaffold" and "log" in mode_conditional["output_scaffold"]</filter>
290 </data>
291 <data format="tabular" name="scaffold_stats" from_work_dir="ragtag.scaffold.stats" label="${tool.name} on ${on_string}: stats">
292 <filter>mode_conditional["mode_option"] == "scaffold" and "stats" in mode_conditional["output_scaffold"]</filter>
293 </data>
294 <data format="tabular" name="scaffold_confidence" from_work_dir="ragtag.scaffold.confidence.txt" label="${tool.name} on ${on_string}: confidence">
295 <filter>mode_conditional["mode_option"] == "scaffold" and "confidence" in mode_conditional["output_scaffold"]</filter>
296 </data>
297 <!--Patch mode outputs-->
298 <data format="agp" name="patch_agp" from_work_dir="ragtag.patch.agp" label="${tool.name} on ${on_string}: final AGP">
299 <filter>mode_conditional["mode_option"] == "patch" and "final_agp" in mode_conditional["output_patch"]</filter>
300 </data>
301 <data format="paf" name="patch_paf" from_work_dir="ragtag.patch.asm.paf" label="${tool.name} on ${on_string}: final PAF">
302 <filter>mode_conditional["mode_option"] == "patch" and "assembly_file" in mode_conditional["output_patch"]</filter>
303 </data>
304 <data format="txt" name="patch_log" from_work_dir="ragtag.patch.log" label="${tool.name} on ${on_string}: log">
305 <filter>mode_conditional["mode_option"] == "patch" and "log" in mode_conditional["output_patch"]</filter>
306 </data>
307 <data format="fasta" name="patch_comps_fasta" from_work_dir="ragtag.patch.comps.fasta" label="${tool.name} on ${on_string}: components FASTA">
308 <filter>mode_conditional["mode_option"] == "patch" and "split_assembly" in mode_conditional["output_patch"]</filter>
309 </data>
310 <data format="agp" name="patch_ctg_agp" from_work_dir="ragtag.patch.ctg.agp" label="${tool.name} on ${on_string}: contigs AGP">
311 <filter>mode_conditional["mode_option"] == "patch" and "split_description" in mode_conditional["output_patch"]</filter>
312 </data>
313 <data format="fasta" name="patch_ctg_fasta" from_work_dir="ragtag.patch.ctg.fasta" label="${tool.name} on ${on_string}: contigs FASTA">
314 <filter>mode_conditional["mode_option"] == "patch" and "target_gaps" in mode_conditional["output_patch"]</filter>
315 </data>
316 <data format="fasta" name="patch_fasta" from_work_dir="ragtag.patch.fasta" label="${tool.name} on ${on_string}: final FASTA">
317 <filter>mode_conditional["mode_option"] == "patch" and "final_fasta" in mode_conditional["output_patch"]</filter>
318 </data>
319 <data format="agp" name="patch_rename_agp" from_work_dir="ragtag.patch.rename.agp" label="${tool.name} on ${on_string}: renamed AGP">
320 <filter>mode_conditional["mode_option"] == "patch" and "agp_renamed" in mode_conditional["output_patch"]</filter>
321 </data>
322 <data format="fasta" name="patch_rename_fasta" from_work_dir="ragtag.patch.rename.fasta" label="${tool.name} on ${on_string}: renamed FASTA">
323 <filter>mode_conditional["mode_option"] == "patch" and "fasta_renamed" in mode_conditional["output_patch"]</filter>
324 </data>
325 <!-- Merge mode outputs-->
326 <data format="agp" name="merge_agp" from_work_dir="ragtag.merge.agp" label="${tool.name} on ${on_string}: merged AGP">
327 <filter>mode_conditional["mode_option"] == "merge"</filter>
328 </data>
329 <data format="fasta" name="merge_fasta" from_work_dir="ragtag.merge.fasta" label="${tool.name} on ${on_string}: merged FASTA">
330 <filter>mode_conditional["mode_option"] == "merge"</filter>
331 </data>
332 </outputs>
333 <tests>
334 <test expect_num_outputs="4">
335 <!--Test 01 correct mode minimap2-->
336 <conditional name="mode_conditional">
337 <param name="mode_option" value="correct"/>
338 <param name="reference" value="genome.fna"/>
339 <param name="query" value="contigs.fna"/>
340 <param name="output_correct" value="fasta,agp,paf,log"/>
341 <section name="advanced_options">
342 <param name="e" value="reference_headers_skip.txt"/>
343 <param name="j" value="query_headers_skip.txt"/>
344 <param name="f" value="1000"/>
345 <conditional name="mapping_conditional">
346 <param name="mapping_option" value="minimap2"/>
347 <param name="mm2_params" value="asm5"/>
348 </conditional>
349 <param name="remove_small" value="false"/>
350 <param name="q" value="10"/>
351 <param name="d" value="100000"/>
352 <param name="b" value="5000"/>
353 <param name="missasembly_break" value="--inter"/>
354 <param name="gff" value="annotation.gff"/>
355 </section>
356 </conditional>
357 <output name="correct_paf" file="correct_paf_01.paf" ftype="paf"/>
358 <output name="correct_agp" file="correct_agp_01.agp" ftype="agp"/>
359 <output name="correct_fasta" file="correct_fasta_01.fasta" ftype="fasta"/>
360 <output name="correct_log" file="correct_log_01.txt" ftype="txt" lines_diff="20"/>
361 </test>
362 <!--Test 02 correct mode nucmer-->
363 <test expect_num_outputs="2">
364 <conditional name="mode_conditional">
365 <param name="mode_option" value="correct"/>
366 <param name="reference" value="genome.fna"/>
367 <param name="query" value="contigs.fna"/>
368 <param name="output_correct" value="fasta,agp"/>
369 <section name="advanced_options">
370 <param name="f" value="1000"/>
371 <conditional name="mapping_conditional">
372 <param name="mapping_option" value="nucmer"/>
373 </conditional>
374 <param name="remove_small" value="true"/>
375 <param name="q" value="10"/>
376 <param name="d" value="100000"/>
377 <param name="b" value="5000"/>
378 <param name="missasembly_break" value="--inter"/>
379 </section>
380 </conditional>
381 <output name="correct_fasta" file="correct_fasta_02.fasta" ftype="fasta"/>
382 <output name="correct_agp" file="correct_agp_02.agp" ftype="agp"/>
383
384 </test>
385 <!--Test 03 scaffold mode-->
386 <test expect_num_outputs="6">
387 <conditional name="mode_conditional">
388 <param name="mode_option" value="scaffold"/>
389 <param name="reference" value="genome.fna"/>
390 <param name="query" value="contigs.fna"/>
391 <param name="output_scaffold" value="fasta,agp,paf,confidence,log,stats"/>
392 <section name="advanced_options">
393 <param name="f" value="1000"/>
394 <param name="remove_small" value="true"/>
395 <param name="q" value="10"/>
396 <param name="d" value="100000"/>
397 <param name="i" value="0.2"/>
398 <param name="a" value="0"/>
399 <param name="s" value="0"/>
400 </section>
401 </conditional>
402 <output name="scaffold_paf" file="scaffold_paf_03.paf" ftype="paf"/>
403 <output name="scaffold_agp" file="scaffold_apg.03.agp" ftype="agp"/>
404 <output name="scaffold_fasta" file="scaffold_fasta_03.fasta" ftype="fasta"/>
405 <output name="scaffold_log" file="scaffold_log_03.txt" ftype="txt" lines_diff="20"/>
406 <output name="scaffold_stats" file="scaffold_stats_03.tabular" ftype="tabular"/>
407 <output name="scaffold_confidence" file="scaffold_confidence_03.tabular" ftype="tabular"/>
408 </test>
409 <!--Test 04 patch mode-->
410 <test expect_num_outputs="9">
411 <conditional name="mode_conditional">
412 <param name="mode_option" value="patch"/>
413 <param name="reference" value="genome.fna"/>
414 <param name="query" value="contigs.fna"/>
415 <param name="output_patch" value="final_fasta,final_agp,assembly_file,split_assembly,split_description,target_gaps,agp_renamed,fasta_renamed,log"/>
416 <section name="advanced_options">
417 <param name="s" value="50000"/>
418 <param name="i" value="0.05"/>
419 </section>
420 </conditional>
421 <output name="patch_agp" file="patch_agp_04.agp" ftype="agp"/>
422 <output name="patch_paf" file="patch_paf_04.paf" ftype="paf"/>
423 <output name="patch_log" file="patch_log_04.txt" ftype="txt" lines_diff="20"/>
424 <output name="patch_comps_fasta" ftype="fasta">
425 <assert_contents>
426 <has_size value="603691" delta="100" />
427 </assert_contents>
428 </output>
429 <output name="patch_ctg_fasta" file="patch_ctg_fasta_04.fasta" ftype="fasta"/>
430 <output name="patch_ctg_agp" file="patch_ctg_fasta_04.agp" ftype="agp"/>
431 <output name="patch_fasta" file="patch_fasta_04.fasta" ftype="fasta"/>
432 <output name="patch_rename_agp" file="patch_rename_agp.agp" ftype="agp"/>
433 <output name="patch_rename_fasta" file="patch_rename_fasta.fasta" ftype="fasta"/>
434 </test>
435 <test expect_num_outputs="2">
436 <!-- Test 05 merge mode-->
437 <conditional name="mode_conditional">
438 <param name="mode_option" value="merge"/>
439 <param name="assembly_fasta" value="correct_fasta_01.fasta"/>
440 <param name="scaffold_files" value="correct_agp_01.agp,correct_agp_02.agp"/>
441 <section name="merging_options">
442 <param name="l" value="100000"/>
443 <param name="e" value="0"/>
444 <param name="function_merging" value="min"/>
445 </section>
446 </conditional>
447 <output name="merge_agp" file="merge_agp_05.agp" ftype="agp"/>
448 <output name="merge_fasta" file="merge_fasta_05.fasta" ftype="fasta"/>
449 </test>
450 </tests>
451 <help><![CDATA[
452 .. class:: infomark
453
454 **Purpose**
455
456 RagTag is a collection of software tools for scaffolding and improving modern genome assemblies. Tasks include:
457
458 - Homology-based misassembly correction
459 - Homology-based assembly scaffolding and patching
460 - Scaffold merging
461
462 ----
463
464 .. class:: infomark
465
466 **Correct mode**
467
468 RagTag offers a correction module that uses a reference genome to identify and correct potential misassemblies in a query assembly.
469 RagTag also provides the option to verify putative misassemblies by aligning reads (from the same genotype) to the query assembly
470 and observing read coverage near misassembly break points. In all cases, sequence is never added or subtracted. Query sequences
471 are only broken at points of putative misassembly.
472
473 *Misassemblies vs true variation*
474
475 Reference-guided misassembly signatures are sometimes caused by true biological structural variation if the reference and query assemblies
476 represent distinct genotypes (or haplotypes). The read validation feature should help to avoid some of these misassembly false positives,
477 and the validation sensitivity can be tuned with command line parameters. However, it is ultimately up to the discretion of the user to decide
478 if misassembly correction is appropriate. One should validate all RagTag results with independent data (usually physical, optical, or genetic
479 maps), when possible.
480
481 ----
482
483 .. class:: infomark
484
485 **Scaffold mode**
486
487 Scaffolding is the process of ordering and orienting draft assembly (query) sequences into longer sequences. Gaps (stretches of "N" characters)
488 are placed between adjacent query sequences to indicate the presence of unknown sequence. RagTag uses whole-genome alignments to a reference
489 assembly to scaffold query sequences. RagTag does not alter input query sequence in any way and only orders and orients sequences, joining them with gaps.
490
491 ----
492
493 .. class:: infomark
494
495 **Patch mode**
496
497 This mode uses one genome assembly to *patch* another genome assembly. We define two types of patches:
498
499 - Fills are patches that fill assembly gaps. This process is like traditional gap-filling, though it uses an assembly instead of WGS sequencing reads.
500 - Joins are patches that join distinct contigs. This is essentially scaffolding and gap-filling in a single step.
501
502 ----
503
504 .. class:: infomark
505
506 **Merge mode**
507
508 Draft genome assemblies are often scaffolded multiple times using different approaches. For example, one might scaffold an assembly using different genome
509 maps (physical, linkage, Hi-C, etc.), different methods, or different method parameters. RagTag merge is a tool to merge and reconcile different scaffoldings
510 of the same assembly. In this way, one can leverage the advantages of multiple techniques to synergistically improve scaffolding.
511
512 Most tools write scaffolding results in the AGP file format, which encodes adjacency and gap information in a plain text file. To run RagTag merge,
513 one must supply the assembly in FASTA format and at least two AGP files that define a scaffolding of the assembly. Each AGP file can optionally be
514 assigned a weight, allowing users to assign the relative influence of each AGP on the final result.
515
516 If available, users can supply Hi-C alignments to the draft assembly to resolve conflicts in the merging graph. In this scenario, the input AGP
517 files are used to build the initial graph, but then Hi-C alignments are used to re-weight the graph before computing the scaffolding solution.
518
519
520 **List of accepted restriction enzymes**
521
522 List of all accepted restriction enzymes and their restriction sites:
523
524 - HindIII: AAGCTT
525 - Sau3AI: GATC
526 - MboI: GATC
527 - DpnII: GATC
528 - HinfI: GA[ATCG]TC
529 - DdeI: CT[ATCG]AG
530 - MseI: TTAA
531
532 For RagTag, use a comma separated list of enzymes or sites (or a mix). For example:
533
534 - Arima Hi-C v1.0: *Sau3AI,HinfI* or *GATC,GA[ATCG]TC*
535 - Arima Hi-C v2.0: *Sau3AI,HinfI,DdeI,MseI* or *GATC,GA[ATCG]TC,CT[ATCG]AG,TTAA*
536
537 Note that for restriction sites, wildcards are represented with python regex syntax, not IUPAC ambiguity codes. e.g. '[ATCG]' instead of 'N'.
538
539 Restriction enzymes are not necessarily the enzyme used for sample prep. Each is only a enzyme that cuts at the corresponding restriction site.
540
541 ]]> </help>
542 <expand macro="citations" />
543 </tool>