comparison tgsgapcloser.xml @ 0:86fa46d3ce2e draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tgsgapcloser commit dcc6bd722244004ed2d5ac49d53a4e1d71366b1a"
author bgruening
date Sun, 14 Nov 2021 21:28:36 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:86fa46d3ce2e
1 <tool id="tgsgapcloser" name="TGS-GapCloser" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>fills the N-gap of error-prone long reads</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="xrefs"/>
8 <version_command>tgsgapcloser --version</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 PILON=\$(which pilon)
11 PILON_JAR=\$(readlink -f \$PILON).jar
12 && tgsgapcloser
13 --scaff $scaff
14 --reads $reads
15 --output output
16 #if $error_conditional.error_options == 'pilon'
17 --pilon \$PILON_JAR
18 --ngs $error_conditional.ngs
19 --samtools `which samtools`
20 --java `which java`
21 --pilon_mem \${GALAXY_MEMORY_MB:-4096}M
22 #elif $error_conditional.error_options == 'racon'
23 --racon `which racon`
24 #else
25 --ne
26 #end if
27 --tgstype $tgstype_conditional.tgstype
28 --min_idy $tgstype_conditional.min_idy
29 --min_match $tgstype_conditional.min_match
30 --chunk $chunk
31 $g_check
32 --thread \${GALAXY_SLOTS:-16}
33 ]]></command>
34 <inputs>
35 <param argument="--scaff" type="data" format="fasta" multiple="true" label="Scaffold file" />
36 <param argument="--reads" type="data" format="fasta" multiple="true" label="Input reads" />
37 <conditional name="error_conditional">
38 <param name="error_options" type="select" label="Error correction">
39 <option value="ne">Do not error correct</option>
40 <option value="racon">Racon</option>
41 <option value="pilon">Pilon</option>
42 </param>
43 <when value="ne"/>
44 <when value="racon">
45 <param argument="--r_round" type="integer" min="0" max="10" value="1" label="Number of Racon error-correction rounds"
46 help="Although multiple rounds of racon can increase the quality of an assembly there are indications that it also
47 fragments the assembly and may decrease quality by removing structural variants and SNPs. Published assembly workflows
48 differ in the number of rounds but rarely apply more than 4 rounds of racon" />
49 </when>
50 <when value="pilon">
51 <param argument="--ngs" type="data" format="fastq,fastq.gz" label="Illumina reads"
52 help="Pilon can utilize Illumina short reads mapped to the draft assembly to
53 improve the local accuracy of the sequence by correcting sequence errors,
54 fixing misassemblies, and filling gaps"/>
55 <param argument="--p_round" type="integer" min="0" max="10" value="3" label="Number of Pilon error-correction rounds" />
56 </when>
57 </conditional>
58 <conditional name="tgstype_conditional">
59 <param argument="--tgstype" type="select" label="Type of third generation reads">
60 <option value="ont" selected="true">Oxford Nanopore Technologies (ONT)</option>
61 <option value="pb">PacBio (pb)</option>
62 </param>
63 <when value="ont">
64 <param argument="--min_idy" type="float" min="0" max="1" value="0.3" label="Minimum identity for filtering candidate sequences"/>
65 <param argument="--min_match" type="integer" min="0" max="1000" value="300" label="Minimum matched length for filtering candidate sequences"/>
66 </when>
67 <when value="pb">
68 <param argument="--min_idy" type="float" min="0" max="1" value="0.2" label="Minimum identity for filtering candidate sequences"/>
69 <param argument="--min_match" type="integer" min="0" max="1000" value="200" label="Minimum matched length for filtering candidate sequences"/>
70 </when>
71 </conditional>
72 <param argument="--chunk" type="integer" min="0" max="20" value="3" label="Chunks for error correction" help="Split candidates into # of chunks to separately correct errors" />
73 <param argument="--g_check" type="boolean" truevalue="--g_check" falsevalue="" label="Gap-size diff check"/>
74 <param name="output_options" type="select" multiple="true" optional="true" display="checkboxes" label="Output files">
75 <option value="log_file">General log file</option>
76 <option value="gapfill_log">Gapfill log file</option>
77 </param>
78 </inputs>
79 <outputs>
80 <data name="log" format="txt" from_work_dir="output.fill.log" label="${tool.name} on ${on_string}: log">
81 <filter>output_options and 'log_file' in output_options</filter>
82 </data>
83 <data name="final_assembly" format="fasta" from_work_dir="output.scaff_seqs" label="${tool.name} on ${on_string}: final assembly"/>
84
85 <data name="fill_details" format="txt" from_work_dir="output.gap_fill_detail" label="${tool.name} on ${on_string}: gap fill details">
86 <filter>output_options and 'gapfill_log' in output_options</filter>
87 </data>
88 </outputs>
89 <tests>
90 <!--Test 01: no correction-->
91 <test expect_num_outputs="3">
92 <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
93 <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
94 <conditional name="error_conditional">
95 <param name="error_options" value="ne"/>
96 </conditional>
97 <conditional name="tgstype_conditional">
98 <param name="tgstype" value="ont"/>
99 <param name="min_idy" value="0.3"/>
100 <param name="min_match" value="300"/>
101 </conditional>
102 <param name="chunk" value="3"/>
103 <param name="g_check" value="false"/>
104 <param name="output_options" value="log_file,gapfill_log"/>
105 <output name="final_assembly" file="test_01_final_assembly.fasta" ftype="fasta"/>
106 <output name="log" ftype="txt">
107 <assert_contents>
108 <has_text text="TGSGapCloser start now"/>
109 <has_text text="the one read provide filler choose count freq for a gap"/>
110 </assert_contents>
111 </output>
112 <output name="fill_details" file="test_01_gapfill.log" ftype="txt"/>
113 </test>
114 <!--Test 02: correction with racon-->
115 <test expect_num_outputs="1">
116 <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
117 <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
118 <conditional name="error_conditional">
119 <param name="error_options" value="racon"/>
120 <param name="r_round" value="2"/>
121 </conditional>
122 <conditional name="tgstype_conditional">
123 <param name="tgstype" value="ont"/>
124 <param name="min_idy" value="0.3"/>
125 <param name="min_match" value="300"/>
126 </conditional>
127 <param name="chunk" value="3"/>
128 <param name="g_check" value="false"/>
129 <output name="final_assembly" file="test_02_final_assembly.fasta" ftype="fasta"/>
130 </test>
131 <!--Test 03: correction pilon-->
132 <test expect_num_outputs="1">
133 <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
134 <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
135 <conditional name="error_conditional">
136 <param name="error_options" value="pilon"/>
137 <param name="ngs" value="ngs_reads.fastq" ftype="fastq"/>
138 <param name="r_round" value="1"/>
139 </conditional>
140 <conditional name="tgstype_conditional">
141 <param name="tgstype" value="ont"/>
142 <param name="min_idy" value="0"/>
143 <param name="min_match" value="0"/>
144 </conditional>
145 <param name="chunk" value="1"/>
146 <param name="g_check" value="true"/>
147 <output name="final_assembly" file="test_03_final_assembly.fasta" ftype="fasta"/>
148 </test>
149 <!--Test 04: correction with racon and pacbio-->
150 <test expect_num_outputs="1">
151 <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
152 <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
153 <conditional name="error_conditional">
154 <param name="error_options" value="racon"/>
155 <param name="r_round" value="2"/>
156 </conditional>
157 <conditional name="tgstype_conditional">
158 <param name="tgstype" value="pacbio"/>
159 <param name="min_idy" value="0.2"/>
160 <param name="min_match" value="200"/>
161 </conditional>
162 <param name="chunk" value="2"/>
163 <param name="g_check" value="false"/>
164 <output name="final_assembly" file="test_04_final_assembly.fasta" ftype="fasta"/>
165 </test>
166 <!--Test 05: fastq.gz files-->
167 <test expect_num_outputs="1">
168 <param name="scaff" value="scaffold.fasta" ftype="fasta"/>
169 <param name="reads" value="ont_reads.fasta" ftype="fasta"/>
170 <conditional name="error_conditional">
171 <param name="error_options" value="pilon"/>
172 <param name="ngs" value="ngs_reads.fastq.gz" ftype="fastq.gz"/>
173 <param name="r_round" value="1"/>
174 </conditional>
175 <conditional name="tgstype_conditional">
176 <param name="tgstype" value="ont"/>
177 <param name="min_idy" value="0"/>
178 <param name="min_match" value="0"/>
179 </conditional>
180 <param name="chunk" value="1"/>
181 <param name="g_check" value="false"/>
182 <output name="final_assembly" file="test_05_final_assembly.fasta" ftype="fasta"/>
183 </test>
184 </tests>
185 <help><![CDATA[
186
187 .. class:: infomark
188
189 **Purpose**
190
191 TGS-GapCloser is a gap-closing software tool that uses error-prone long reads generated by third-generation-sequence techniques (Pacbio,
192 Oxford Nanopore, etc.) or preassembled contigs to fill N-gap in the genome assembly. This tool can close gaps in large genome assemblies
193 using raw long reads quickly and cost-effectively. The final assemblies generated by TGS-GapCloser have improved contiguity and
194 completeness while maintaining high accuracy.
195
196 ----
197
198 .. class:: infomark
199
200 **Quick usage**
201
202 Input reads can only be in FASTA format. Both raw reads and pre-error-corrected reads are acceptable as input. If only raw long reads are
203 provided, it polishes raw TGS reads by calling Racon.If additional NGS short reads are available, it polishes raw TGS reads by calling Pilon.
204
205 ----
206
207 .. class:: infomark
208
209 **Gap fill details**
210
211
212 Format of a detailed information of gap fill report:
213
214 - Each scaffold name is followed by its data lines.
215 - A data line consists of 3 or 5 columns and describes the source of each segment in the final sequence:
216 - Column 1 is the segment's first bp position in the final sequence.
217 - Column 2 is the segment's last bp position in the final sequence.
218 - Column 3 is the segment's type , 'S' , 'N' or 'F'.
219 - 'S' means this segment is a segment of the input sequence and this line includes other two more columns:
220 - Column 4 is the segment's first bp position in the input sequence.
221 - Column 5 is the segment's last bp position in the input sequence.
222 - 'N' means this segment is a N area.
223 - 'F' means this segment is a filled sequence from TGS reads.
224
225 ----
226
227 .. class:: infomark
228
229 **Algorithm and implementation of TGS-GapCloser**
230
231 This is a brief description of the TGS-GapCloser algorithm. Please refer to the manuscript for more detailed information.
232
233 TGS-GapCloser is coded in the C++ programing language (requires GCC 4.4+). It uses minimap2 to obtain alignments, and Pilon (requires Java runtime 1.7+)
234 or Racon (requires GCC 4.8+) to correct candidate fragments. The algorithm automatically identifies gaps and tries to find the best matched long-read
235 fragments to close gaps or merge adjacent scaftigs. To accelerate the gap closure without losing efficiency and accuracy, TGS-GapCloser only selects a
236 limited number of fragmented long reads as candidates for subsequent error correction and competition.
237
238 TGS-GapCloser can accept as input any type of TGS long reads or other pre-assembled contigs to fill gaps in a draft assembly in the 4 steps :
239 (i) Identification of gap regions in the draft assembly;
240 (ii) Acquisition of candidates from the alignments of long reads against gaps;
241 (iii) Base-level error correction of alternative sub-long reads; and
242 (iv) Gap closure using the error-corrected candidates with the highest score for each gap or linkage of the neighboring scaftigs with overlaps.
243
244 ]]></help>
245 <expand macro="citations" />
246 </tool>