Mercurial > repos > bgruening > tgsgapcloser
comparison tgsgapcloser.xml @ 0:86fa46d3ce2e draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tgsgapcloser commit dcc6bd722244004ed2d5ac49d53a4e1d71366b1a"
author | bgruening |
---|---|
date | Sun, 14 Nov 2021 21:28:36 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:86fa46d3ce2e |
---|---|
1 <tool id="tgsgapcloser" name="TGS-GapCloser" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | |
2 <description>fills the N-gap of error-prone long reads</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="xrefs"/> | |
8 <version_command>tgsgapcloser --version</version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 PILON=\$(which pilon) | |
11 PILON_JAR=\$(readlink -f \$PILON).jar | |
12 && tgsgapcloser | |
13 --scaff $scaff | |
14 --reads $reads | |
15 --output output | |
16 #if $error_conditional.error_options == 'pilon' | |
17 --pilon \$PILON_JAR | |
18 --ngs $error_conditional.ngs | |
19 --samtools `which samtools` | |
20 --java `which java` | |
21 --pilon_mem \${GALAXY_MEMORY_MB:-4096}M | |
22 #elif $error_conditional.error_options == 'racon' | |
23 --racon `which racon` | |
24 #else | |
25 --ne | |
26 #end if | |
27 --tgstype $tgstype_conditional.tgstype | |
28 --min_idy $tgstype_conditional.min_idy | |
29 --min_match $tgstype_conditional.min_match | |
30 --chunk $chunk | |
31 $g_check | |
32 --thread \${GALAXY_SLOTS:-16} | |
33 ]]></command> | |
34 <inputs> | |
35 <param argument="--scaff" type="data" format="fasta" multiple="true" label="Scaffold file" /> | |
36 <param argument="--reads" type="data" format="fasta" multiple="true" label="Input reads" /> | |
37 <conditional name="error_conditional"> | |
38 <param name="error_options" type="select" label="Error correction"> | |
39 <option value="ne">Do not error correct</option> | |
40 <option value="racon">Racon</option> | |
41 <option value="pilon">Pilon</option> | |
42 </param> | |
43 <when value="ne"/> | |
44 <when value="racon"> | |
45 <param argument="--r_round" type="integer" min="0" max="10" value="1" label="Number of Racon error-correction rounds" | |
46 help="Although multiple rounds of racon can increase the quality of an assembly there are indications that it also | |
47 fragments the assembly and may decrease quality by removing structural variants and SNPs. Published assembly workflows | |
48 differ in the number of rounds but rarely apply more than 4 rounds of racon" /> | |
49 </when> | |
50 <when value="pilon"> | |
51 <param argument="--ngs" type="data" format="fastq,fastq.gz" label="Illumina reads" | |
52 help="Pilon can utilize Illumina short reads mapped to the draft assembly to | |
53 improve the local accuracy of the sequence by correcting sequence errors, | |
54 fixing misassemblies, and filling gaps"/> | |
55 <param argument="--p_round" type="integer" min="0" max="10" value="3" label="Number of Pilon error-correction rounds" /> | |
56 </when> | |
57 </conditional> | |
58 <conditional name="tgstype_conditional"> | |
59 <param argument="--tgstype" type="select" label="Type of third generation reads"> | |
60 <option value="ont" selected="true">Oxford Nanopore Technologies (ONT)</option> | |
61 <option value="pb">PacBio (pb)</option> | |
62 </param> | |
63 <when value="ont"> | |
64 <param argument="--min_idy" type="float" min="0" max="1" value="0.3" label="Minimum identity for filtering candidate sequences"/> | |
65 <param argument="--min_match" type="integer" min="0" max="1000" value="300" label="Minimum matched length for filtering candidate sequences"/> | |
66 </when> | |
67 <when value="pb"> | |
68 <param argument="--min_idy" type="float" min="0" max="1" value="0.2" label="Minimum identity for filtering candidate sequences"/> | |
69 <param argument="--min_match" type="integer" min="0" max="1000" value="200" label="Minimum matched length for filtering candidate sequences"/> | |
70 </when> | |
71 </conditional> | |
72 <param argument="--chunk" type="integer" min="0" max="20" value="3" label="Chunks for error correction" help="Split candidates into # of chunks to separately correct errors" /> | |
73 <param argument="--g_check" type="boolean" truevalue="--g_check" falsevalue="" label="Gap-size diff check"/> | |
74 <param name="output_options" type="select" multiple="true" optional="true" display="checkboxes" label="Output files"> | |
75 <option value="log_file">General log file</option> | |
76 <option value="gapfill_log">Gapfill log file</option> | |
77 </param> | |
78 </inputs> | |
79 <outputs> | |
80 <data name="log" format="txt" from_work_dir="output.fill.log" label="${tool.name} on ${on_string}: log"> | |
81 <filter>output_options and 'log_file' in output_options</filter> | |
82 </data> | |
83 <data name="final_assembly" format="fasta" from_work_dir="output.scaff_seqs" label="${tool.name} on ${on_string}: final assembly"/> | |
84 | |
85 <data name="fill_details" format="txt" from_work_dir="output.gap_fill_detail" label="${tool.name} on ${on_string}: gap fill details"> | |
86 <filter>output_options and 'gapfill_log' in output_options</filter> | |
87 </data> | |
88 </outputs> | |
89 <tests> | |
90 <!--Test 01: no correction--> | |
91 <test expect_num_outputs="3"> | |
92 <param name="scaff" value="scaffold.fasta" ftype="fasta"/> | |
93 <param name="reads" value="ont_reads.fasta" ftype="fasta"/> | |
94 <conditional name="error_conditional"> | |
95 <param name="error_options" value="ne"/> | |
96 </conditional> | |
97 <conditional name="tgstype_conditional"> | |
98 <param name="tgstype" value="ont"/> | |
99 <param name="min_idy" value="0.3"/> | |
100 <param name="min_match" value="300"/> | |
101 </conditional> | |
102 <param name="chunk" value="3"/> | |
103 <param name="g_check" value="false"/> | |
104 <param name="output_options" value="log_file,gapfill_log"/> | |
105 <output name="final_assembly" file="test_01_final_assembly.fasta" ftype="fasta"/> | |
106 <output name="log" ftype="txt"> | |
107 <assert_contents> | |
108 <has_text text="TGSGapCloser start now"/> | |
109 <has_text text="the one read provide filler choose count freq for a gap"/> | |
110 </assert_contents> | |
111 </output> | |
112 <output name="fill_details" file="test_01_gapfill.log" ftype="txt"/> | |
113 </test> | |
114 <!--Test 02: correction with racon--> | |
115 <test expect_num_outputs="1"> | |
116 <param name="scaff" value="scaffold.fasta" ftype="fasta"/> | |
117 <param name="reads" value="ont_reads.fasta" ftype="fasta"/> | |
118 <conditional name="error_conditional"> | |
119 <param name="error_options" value="racon"/> | |
120 <param name="r_round" value="2"/> | |
121 </conditional> | |
122 <conditional name="tgstype_conditional"> | |
123 <param name="tgstype" value="ont"/> | |
124 <param name="min_idy" value="0.3"/> | |
125 <param name="min_match" value="300"/> | |
126 </conditional> | |
127 <param name="chunk" value="3"/> | |
128 <param name="g_check" value="false"/> | |
129 <output name="final_assembly" file="test_02_final_assembly.fasta" ftype="fasta"/> | |
130 </test> | |
131 <!--Test 03: correction pilon--> | |
132 <test expect_num_outputs="1"> | |
133 <param name="scaff" value="scaffold.fasta" ftype="fasta"/> | |
134 <param name="reads" value="ont_reads.fasta" ftype="fasta"/> | |
135 <conditional name="error_conditional"> | |
136 <param name="error_options" value="pilon"/> | |
137 <param name="ngs" value="ngs_reads.fastq" ftype="fastq"/> | |
138 <param name="r_round" value="1"/> | |
139 </conditional> | |
140 <conditional name="tgstype_conditional"> | |
141 <param name="tgstype" value="ont"/> | |
142 <param name="min_idy" value="0"/> | |
143 <param name="min_match" value="0"/> | |
144 </conditional> | |
145 <param name="chunk" value="1"/> | |
146 <param name="g_check" value="true"/> | |
147 <output name="final_assembly" file="test_03_final_assembly.fasta" ftype="fasta"/> | |
148 </test> | |
149 <!--Test 04: correction with racon and pacbio--> | |
150 <test expect_num_outputs="1"> | |
151 <param name="scaff" value="scaffold.fasta" ftype="fasta"/> | |
152 <param name="reads" value="ont_reads.fasta" ftype="fasta"/> | |
153 <conditional name="error_conditional"> | |
154 <param name="error_options" value="racon"/> | |
155 <param name="r_round" value="2"/> | |
156 </conditional> | |
157 <conditional name="tgstype_conditional"> | |
158 <param name="tgstype" value="pacbio"/> | |
159 <param name="min_idy" value="0.2"/> | |
160 <param name="min_match" value="200"/> | |
161 </conditional> | |
162 <param name="chunk" value="2"/> | |
163 <param name="g_check" value="false"/> | |
164 <output name="final_assembly" file="test_04_final_assembly.fasta" ftype="fasta"/> | |
165 </test> | |
166 <!--Test 05: fastq.gz files--> | |
167 <test expect_num_outputs="1"> | |
168 <param name="scaff" value="scaffold.fasta" ftype="fasta"/> | |
169 <param name="reads" value="ont_reads.fasta" ftype="fasta"/> | |
170 <conditional name="error_conditional"> | |
171 <param name="error_options" value="pilon"/> | |
172 <param name="ngs" value="ngs_reads.fastq.gz" ftype="fastq.gz"/> | |
173 <param name="r_round" value="1"/> | |
174 </conditional> | |
175 <conditional name="tgstype_conditional"> | |
176 <param name="tgstype" value="ont"/> | |
177 <param name="min_idy" value="0"/> | |
178 <param name="min_match" value="0"/> | |
179 </conditional> | |
180 <param name="chunk" value="1"/> | |
181 <param name="g_check" value="false"/> | |
182 <output name="final_assembly" file="test_05_final_assembly.fasta" ftype="fasta"/> | |
183 </test> | |
184 </tests> | |
185 <help><![CDATA[ | |
186 | |
187 .. class:: infomark | |
188 | |
189 **Purpose** | |
190 | |
191 TGS-GapCloser is a gap-closing software tool that uses error-prone long reads generated by third-generation-sequence techniques (Pacbio, | |
192 Oxford Nanopore, etc.) or preassembled contigs to fill N-gap in the genome assembly. This tool can close gaps in large genome assemblies | |
193 using raw long reads quickly and cost-effectively. The final assemblies generated by TGS-GapCloser have improved contiguity and | |
194 completeness while maintaining high accuracy. | |
195 | |
196 ---- | |
197 | |
198 .. class:: infomark | |
199 | |
200 **Quick usage** | |
201 | |
202 Input reads can only be in FASTA format. Both raw reads and pre-error-corrected reads are acceptable as input. If only raw long reads are | |
203 provided, it polishes raw TGS reads by calling Racon.If additional NGS short reads are available, it polishes raw TGS reads by calling Pilon. | |
204 | |
205 ---- | |
206 | |
207 .. class:: infomark | |
208 | |
209 **Gap fill details** | |
210 | |
211 | |
212 Format of a detailed information of gap fill report: | |
213 | |
214 - Each scaffold name is followed by its data lines. | |
215 - A data line consists of 3 or 5 columns and describes the source of each segment in the final sequence: | |
216 - Column 1 is the segment's first bp position in the final sequence. | |
217 - Column 2 is the segment's last bp position in the final sequence. | |
218 - Column 3 is the segment's type , 'S' , 'N' or 'F'. | |
219 - 'S' means this segment is a segment of the input sequence and this line includes other two more columns: | |
220 - Column 4 is the segment's first bp position in the input sequence. | |
221 - Column 5 is the segment's last bp position in the input sequence. | |
222 - 'N' means this segment is a N area. | |
223 - 'F' means this segment is a filled sequence from TGS reads. | |
224 | |
225 ---- | |
226 | |
227 .. class:: infomark | |
228 | |
229 **Algorithm and implementation of TGS-GapCloser** | |
230 | |
231 This is a brief description of the TGS-GapCloser algorithm. Please refer to the manuscript for more detailed information. | |
232 | |
233 TGS-GapCloser is coded in the C++ programing language (requires GCC 4.4+). It uses minimap2 to obtain alignments, and Pilon (requires Java runtime 1.7+) | |
234 or Racon (requires GCC 4.8+) to correct candidate fragments. The algorithm automatically identifies gaps and tries to find the best matched long-read | |
235 fragments to close gaps or merge adjacent scaftigs. To accelerate the gap closure without losing efficiency and accuracy, TGS-GapCloser only selects a | |
236 limited number of fragmented long reads as candidates for subsequent error correction and competition. | |
237 | |
238 TGS-GapCloser can accept as input any type of TGS long reads or other pre-assembled contigs to fill gaps in a draft assembly in the 4 steps : | |
239 (i) Identification of gap regions in the draft assembly; | |
240 (ii) Acquisition of candidates from the alignments of long reads against gaps; | |
241 (iii) Base-level error correction of alternative sub-long reads; and | |
242 (iv) Gap closure using the error-corrected candidates with the highest score for each gap or linkage of the neighboring scaftigs with overlaps. | |
243 | |
244 ]]></help> | |
245 <expand macro="citations" /> | |
246 </tool> |