comparison edta.xml @ 0:f1a157358d4d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edta commit 24cb0421330e54b144b3e6f1be4ae35ac0e48c1c
author bgruening
date Sun, 16 Oct 2022 12:41:19 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f1a157358d4d
1 <tool id="edta" name="edta" version="@WRAPPER_VERSION@+@VERSION_SUFFIX@" profile="20.01">
2 <description>
3 Whole-genome de-novo TE annotation
4 </description>
5 <macros>
6 <token name="@WRAPPER_VERSION@">2.1.0</token>
7 <token name="@VERSION_SUFFIX@">galaxy0</token>
8 <import>edta_macros.xml</import>
9 </macros>
10 <expand macro="bio_tools"></expand>
11 <requirements>
12 <requirement type="package" version="@WRAPPER_VERSION@">edta</requirement>
13 <!--container type="docker">oushujun/edta:@WRAPPER_VERSION@</container-->
14 </requirements>
15 <command detect_errors="exit_code"><![CDATA[
16 ## Genome to TE annotations
17 #if $function_select.function == 'genome':
18 ln -s '$function_select.genome' ./input.fa &&
19 EDTA.pl
20 --genome input.fa
21 --species '$function_select.species'
22 --step '$function_select.step'
23 #if $function_select.cds:
24 --cds '$function_select.cds'
25 #end if
26 #if $function_select.curatedlib:
27 --curatedlib '$function_select.curatedlib'
28 #end if
29 $function_select.sensitive
30 #if $function_select.mutation_rate:
31 --u '$function_select.mutation_rate'
32 #end if
33 #if $function_select.anno_select.anno == 'yes':
34 --anno 1
35 $function_select.anno_select.evaluate
36 #if $function_select.anno_select.exclude:
37 --exclude '$function_select.anno_select.exclude'
38 #end if
39 && mv ./input.fa.mod.EDTA.anno/*.sum .
40 #end if
41 ## Find elements of a paticular TE type
42 #else if $function_select.function == 'te':
43 ln -s '$function_select.genome' ./input.fa &&
44 EDTA_raw.pl
45 --genome input.fa
46 --species '$function_select.species'
47 --type '$function_select.te_type'
48 && mv ./input.fa.mod.EDTA.raw/input.fa.mod* .
49 ## pan-EDTA
50 #else if $function_select.function =='pan-edta':
51 #set $cds_list = []
52 #for $i, $s in enumerate($function_select.te_library)
53 ln -s '${s.genome}' ./${i}_input.fa &&
54 EDTA.pl
55 --genome ${i}_input.fa
56 --species '${s.species}'
57 --anno 1
58 #if $s.cds:
59 --cds '${s.cds}'
60 #end if
61 #if $s.curatedlib:
62 --curatedlib '${s.curatedlib}'
63 #end if
64 $s.sensitive
65 #if $s.mutation_rate:
66 --u '${s.mutation_rate}'
67 #end if
68 $s.evaluate
69 #if $s.exclude:
70 --exclude '${s.exclude}'
71 #end if
72 #silent$cds_list.append(str($i) + '_input.fa')
73 && mv ./${i}_input.fa.mod.EDTA.anno/*.mod.out .
74 &&
75 #end for
76 #for $i in $cds_list
77 bash '$__tool_directory__/filter_out_single_copies.sh' $i &&
78 #end for
79 #if $function_select.known_te:
80 bash '$__tool_directory__/make_pan_library.sh' '${known_te}'
81 #else:
82 bash '$__tool_directory__/make_pan_library.sh'
83 #end if
84 #end if
85 ]]></command>
86 <inputs>
87 <conditional name="function_select">
88 <param name="function" type="select" label="Which Function should be run">
89 <option value="genome">Whole Genome</option>
90 <option value="te">Specific TE</option>
91 <option value="pan-edta">Pan-EDTA</option>
92 </param>
93 <!-- pan-EDTA run -->
94 <when value="pan-edta">
95 <expand macro="pan_edta"></expand>
96 </when>
97 <!-- Default EDTA run -->
98 <when value="genome">
99 <expand macro="edta_main_param"></expand>
100 </when>
101 <!-- Find elements of a paticular TE type -->
102 <when value="te">
103 <expand macro="te_only"></expand>
104 </when>
105 </conditional>
106 </inputs>
107 <outputs>
108 <!-- Genome to TE Annotations-->
109 <data name="outfile_library" from_work_dir="*.mod.EDTA.TElib.fa" format="fasta" label="${tool.name} on ${on_string}: Non-Redundant TE Library">
110 <filter>function_select['function'] == "genome"</filter>
111 </data>
112 <data name="Novel_TE_Families" from_work_dir="*.mod.EDTA.TElib.novel.fa" format="fasta" label="${tool.name} on ${on_string}: Novel TE Families">
113 <filter>function_select['function'] == "genome" and function_select['curatedlib']</filter>
114 </data>
115 <data name="Whole_Genome_TE_Annotation" from_work_dir="*.mod.EDTA.TEanno.gff3" format="gff3" label="${tool.name} on ${on_string}: Whole Genome TE Annotation">
116 <filter>function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes" </filter>
117 </data>
118 <data name="Summary_Whole_Genome_TE_Annotation" from_work_dir="*.mod.EDTA.TEanno.sum" format="xml" label="${tool.name} on ${on_string}: Summary of Whole Genome TE Annotation">
119 <filter>function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes"</filter>
120 </data>
121 <data name="Low_Threshold_TE_Masking" from_work_dir="*.mod.MAKER.masked" format="gff3" label="${tool.name} on ${on_string}: Low_Threshold_TE_Masking">
122 <filter>function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes"</filter>
123 </data>
124 <data name="Annotation_Inconsistency_Simple_TEs" from_work_dir="*.mod.EDTA.TE.fa.stat.redun.sum" format="xml" label="${tool.name} on ${on_string}: Simple TE Annotation Inconsistency">
125 <filter> function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes" and function_select['anno_select']['evaluate'] is True </filter>
126 </data>
127 <data name="Annotation_Inconsistency_Nested_TEs" from_work_dir="*.mod.EDTA.TE.fa.stat.nested.sum" format="xml" label="${tool.name} on ${on_string}: Nested TE Annotation Inconsistency">
128 <filter> function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes" and function_select['anno_select']['evaluate'] is True </filter>
129 </data>
130 <data name="Overall_Annotation_Inconsistency" from_work_dir="*.mod.EDTA.TE.fa.stat.all.sum" format="xml" label="${tool.name} on ${on_string}: Overall Annotation Inconsistency">
131 <filter> function_select['function'] == "genome" and function_select['anno_select']['anno'] == "yes" and function_select['anno_select']['evaluate'] is True</filter>
132 </data>
133 <!-- Find elements of a paticular TE type -->
134 <data name='ltr_intact_fa' from_work_dir="*.LTR.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_LTR.fa">
135 <filter>function_select['function'] == "te" and function_select['te_type'] == 'ltr' </filter>
136 </data>
137 <data name='ltr_intact_gff3' from_work_dir="*.LTR.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_LTR.gff3">
138 <filter>function_select['function'] == "te" and function_select['te_type'] == 'ltr' </filter>
139 </data>
140 <data name='ltr_raw_fa' from_work_dir="*.LTR.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_LTR.fa">
141 <filter>function_select['function'] == "te" and function_select['te_type'] == 'ltr' </filter>
142 </data>
143 <data name='tir_intact_fa' from_work_dir="*.TIR.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_TIR.fa">
144 <filter>function_select['function'] == "te" and function_select['te_type'] == 'tir' </filter>
145 </data>
146 <data name='tir_intact_gff3' from_work_dir="*.TIR.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_TIR.gff3">
147 <filter>function_select['function'] == "te" and function_select['te_type'] == 'tir' </filter>
148 </data>
149 <data name='tir_raw_fa' from_work_dir="*.TIR.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_TIR.fa">
150 <filter>function_select['function'] == "te" and function_select['te_type'] == 'tir' </filter>
151 </data>
152 <data name='helitron_intact_fa' from_work_dir="*.Helitron.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_Helitron.fa">
153 <filter>function_select['function'] == "te" and function_select['te_type'] == 'helitron' </filter>
154 </data>
155 <data name='helitron_intact_gff3' from_work_dir="*.Helitron.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_Helitron.gff3">
156 <filter>function_select['function'] == "te" and function_select['te_type'] == 'helitron' </filter>
157 </data>
158 <data name='helitron_raw_fa' from_work_dir="*.Helitron.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_Helitron.fa">
159 <filter>function_select['function'] == "te" and function_select['te_type'] == 'helitron' </filter>
160 </data>
161 <data name='all_ltr_intact_fa' from_work_dir="*.LTR.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_LTR.fa">
162 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
163 </data>
164 <data name='all_ltr_intact_gff3' from_work_dir="*.LTR.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_LTR.gff3">
165 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
166 </data>
167 <data name='all_ltr_raw_fa' from_work_dir="*.LTR.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_LTR.fa">
168 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
169 </data>
170 <data name='all_tir_intact_fa' from_work_dir="*.TIR.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_TIR.fa">
171 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
172 </data>
173 <data name='all_tir_intact_gff3' from_work_dir="*.TIR.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_TIR.gff3">
174 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
175 </data>
176 <data name='all_tir_raw_fa' from_work_dir="*.TIR.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_TIR.fa">
177 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
178 </data>
179 <data name='all_helitron_intact_fa' from_work_dir="*.Helitron.intact.fa" format="fasta" label="${tool.name} on ${on_string}: Intact_Helitron.fa">
180 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
181 </data>
182 <data name='all_helitron_intact_gff3' from_work_dir="*.Helitron.intact.gff3" format="gff3" label="${tool.name} on ${on_string}: Intact_Helitron.gff3">
183 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
184 </data>
185 <data name='all_helitron_raw_fa' from_work_dir="*.Helitron.raw.fa" format="fasta" label="${tool.name} on ${on_string}: Raw_Helitron.fa">
186 <filter>function_select['function'] == "te" and function_select['te_type'] == 'all' </filter>
187 </data>
188 <!-- pan-edta -->
189 <data name="pan_outfile_library" from_work_dir="*.mod.EDTA.TElib.fa" format="fasta" label="${tool.name} on ${on_string}: Non-Redundant TE Library">
190 <filter>function_select['function'] == 'pan-edta'</filter>
191 </data>
192 <data name='filter_copy' from_work_dir="*.mod.EDTA.TElib.novel.fa.real" format="fasta" label="${tool.name} on ${on_string}: filter out copy">
193 <filter>function_select['function'] == 'pan-edta'</filter>
194 </data>
195 <data name='get_classification_info' from_work_dir="*.mod.EDTA.TElib.novel.fa.real.ori" format="fasta" label="${tool.name} on ${on_string}: aggregate novel TE libraries">
196 <filter>function_select['function'] == 'pan-edta'</filter>
197 </data>
198 </outputs>
199 <tests>
200 <!-- Genome to TE Annotations-->
201 <test expect_num_outputs="8">
202 <conditional name="function_select">
203 <param name='genome' value='test_genome.fa' ftype='fasta'/>
204 <param name='cds' value='test_genome.cds.fa' ftype='fasta' />
205 <param name='speices' value='Others' />
206 <param name='step' value='all' />
207 <param name ='curatedlib' value='rice6.9.5.liban' ftype='fasta'/>
208 <param name ='sensitive' value='1' />
209 <conditional name ='anno_select'>
210 <param name='anno' value='yes'/>
211 <param name='evaluate' value='1' />
212 <param name='exclude' value='test_genome.exclude.bed' ftype='bed' />
213 </conditional>
214 </conditional>
215 <output name='outfile_library'>
216 <assert_contents>
217 <has_text text='>RST-Osativa-Cluster'></has_text>
218 <has_text text="match=NEW"></has_text>
219 </assert_contents>
220 </output>
221 <output name='Novel_TE_Families'>
222 <assert_contents>
223 <has_text text='Helitron'></has_text>
224 </assert_contents>
225 </output>
226 <output name="Whole_Genome_TE_Annotation">
227 <assert_contents>
228 <has_text text="Chr2"></has_text>
229 <has_text text="EDTA"></has_text>
230 </assert_contents>
231 </output>
232 <output name="Summary_Whole_Genome_TE_Annotation">
233 <assert_contents>
234 <has_text text="Repeat Classes"></has_text>
235 <has_text text="Repeat Stats"></has_text>
236 </assert_contents>
237 </output>
238 <output name="Low_Threshold_TE_Masking">
239 <assert_contents>
240 <has_text text=">Chr2"></has_text>
241 </assert_contents>
242 </output>
243 <output name="Annotation_Inconsistency_Simple_TEs">
244 <assert_contents>
245 <has_text text="LTR/Gypsy"></has_text>
246 </assert_contents>
247 </output>
248 <output name="Annotation_Inconsistency_Nested_TEs">
249 <assert_contents>
250 <has_text text="TIR/Mutator"></has_text>
251 </assert_contents>
252 </output>
253 <output name="Overall_Annotation_Inconsistency">
254 <assert_contents>
255 <has_text text="LTR/Copia"></has_text>
256 </assert_contents>
257 </output>
258 </test>
259 <test expect_num_outputs="5">
260 <conditional name="function_select">
261 <param name='genome' value='test_genome.fa' ftype='fasta'/>
262 <param name='cds' value='test_genome.cds.fa' ftype='fasta' />
263 <param name='speices' value='Others' />
264 <param name='step' value='all' />
265 <param name ='curatedlib' value='rice6.9.5.liban' ftype='fasta'/>
266 <param name ='sensitive' value='1' />
267 <conditional name ='anno_select'>
268 <param name='anno' value='yes'/>
269 <param name='evaluate' value='0' />
270 <param name='exclude' value='test_genome.exclude.bed' ftype='bed' />
271 </conditional>
272 </conditional>
273 </test>
274 <test expect_num_outputs="1">
275 <conditional name="function_select">
276 <param name='genome' value='test_genome.fa' ftype='fasta'/>
277 <param name='speices' value='Others' />
278 <param name='step' value='all' />
279 <param name ='sensitive' value='1' />
280 <conditional name ='anno_select'>
281 <param name='anno' value='no'/>
282 </conditional>
283 </conditional>
284 </test>
285 <!-- Find elements of a paticular TE type -->
286 <test expect_num_outputs="3">
287 <conditional name="function_select">
288 <param name="function" value="te"/>
289 <param name='genome' value='test_genome.fa' ftype='fasta'/>
290 <param name='species' value='Others'/>
291 <param name="te_type" value="ltr"/>
292 </conditional>
293 <output name="ltr_intact_fa">
294 <assert_contents>
295 <has_text text=">Chr2:"></has_text>
296 </assert_contents>
297 </output>
298 <output name="ltr_intact_gff3">
299 <assert_contents>
300 <has_text text="##gff-version 3"></has_text>
301 </assert_contents>
302 </output>
303 <output name="ltr_raw_fa">
304 <assert_contents>
305 <has_text text=">Chr2:"></has_text>
306 </assert_contents>
307 </output>
308 </test>
309 <test expect_num_outputs="9">
310 <conditional name="function_select">
311 <param name="function" value="te"/>
312 <param name='genome' value='test_genome.fa' ftype='fasta'/>
313 <param name='species' value='Others'/>
314 <param name="te_type" value="all"/>
315 </conditional>
316 </test>
317 <!-- pan-EDTA -->
318 <test expect_num_outputs="3">
319 <conditional name="function_select">
320 <param name="function" value="pan-edta"></param>
321 <repeat name="te_library">
322 <param name='genome' value='test_genome.fa' ftype='fasta'/>
323 <param name='cds' value='test_genome.cds.fa' ftype='fasta' />
324 <param name='species' value='Others' />
325 <param name ='curatedlib' value='rice6.9.5.liban' ftype='fasta'/>
326 <param name ='sensitive' value='1' />
327 <param name='evaluate' value='1' />
328 <param name='exclude' value='test_genome.exclude.bed' ftype='bed' />
329 </repeat>
330 </conditional>
331 </test>
332 </tests>
333 <help><![CDATA[
334
335 ===============================================
336 **The Extensive *de novo* TE Annotator (EDTA)**
337 ===============================================
338
339 .. class:: infomark
340
341 **What it does**
342
343 This package is developed for automated whole-genome de-novo TE annotation and benchmarking the annotation performance of TE libraries.
344
345 ----
346
347 The EDTA package was designed to filter out false discoveries in raw TE candidates and generate a high-quality non-redundant TE library for whole-genome TE annotations. Selection of initial search programs were based on benckmarkings on the annotation performance using a manually curated TE library in the rice genome.
348
349 .. image:: https://github.com/oushujun/EDTA/blob/master/development/EDTA%20workflow.png?raw=true
350 :alt: several example circos plots
351
352 ''''''''''
353 **Inputs**
354 ''''''''''
355
356
357 **Required**
358
359 1. **The genome file [FASTA]**. Please make sure sequence names are short (<=13 characters) and simple (i.e, letters, numbers, and underscore).
360
361 **Optional**
362
363 1. Coding sequence of the species or a closely related species [FASTA]. This file helps to purge gene sequences in the TE library.
364 2. Known gene positions of this version of the genome assembly [BED]. Coordinates specified in this file will be excluded from TE annotation to avoid over-masking.
365 3. Curated TE library of the species [FASTA]. This file is trusted 100%. Please make sure it's curated. If you only have a couple of curated sequences, that's fine. It doesn't need to be complete. Providing curated TE sequences, especially for those under annotated TE types (i.e., SINEs and LINEs), will greatly improve the annotation quality.
366
367 '''''''''''
368 **Outputs**
369 '''''''''''
370
371 1. **A non-redundant TE library.** The curated library will be included in this file if provided. TEs are classified into the superfamily level and using the three-letter naming system reported in Wicker et al. (2007).(https://www.nature.com/articles/nrg2165). Each sequence can be considered as a TE family.
372
373 **Optional**
374
375 1. Novel TE families: This file contains TE sequences that are not included in the curated library (`curatedlib` required).
376 2. Whole-genome TE annotation: This file contains both structurally intact and fragmented TE annotations (`anno` required).
377 3. Summary of whole-genome TE annotation: (`anno` required).
378 4. Low-threshold TE masking: This is a genome file with only long TEs (>=1 kb) being masked. You may use this for de novo gene annotations. In practice, this approach will reduce overmasking for genic regions, which can improve gene prediction quality. However, initial gene models should contain TEs and need further filtering (`anno` required).
379 5. Annotation inconsistency for simple TEs: (`anno and evaluate` required).
380 6. Annotation inconsistency for nested TEs: (`anno and evaluate` required).
381 7. Overall annotation inconsistency: (`anno and evaluate` required).
382
383 '''''''''''''''
384 **EDTA Usage**
385 '''''''''''''''
386 `````````````````````
387 **From head to toe:**
388 `````````````````````
389 *You got a genome and you want to get a high-quality TE annotation:*
390
391
392 - **--genome** [File] The genome FASTA file. Required.
393
394 - **--species** [Rice|Maize|others] Specify the species for identification of TIR candidates. Default: others
395
396 - **--step** [all|filter|final|anno] Specify which steps you want to run EDTA.
397
398 1. all: run the entire pipeline (default
399 2. filter: start from raw TEs to the end.
400 3. final: start from filtered TEs to finalizing the run.
401 4. anno: perform whole-genome annotation/analysis after TE library construction.
402
403 - **--overwrite** [0|1] If previous results are found, decide to overwrite (1, rerun) or not (0, default).
404
405 - **--cds** [File] Provide a FASTA file containing the coding sequence (no introns, UTRs, nor TEs) of this genome or its close relative.
406
407 - **--curatedlib** [file] Provided a curated library to keep consistant naming and classification for known TEs. All TEs in this file will be trusted 100%, so please ONLY provide MANUALLY CURATED ones here.This option is not mandatory. It's totally OK if no file is provided (default).
408
409 - **--sensitive** [0|1] Use RepeatModeler to identify remaining TEs (1) or not (0, default). This step is very slow and MAY help to recover some TEs.
410
411 - **--anno** [0|1] Perform (1) or not perform (0, default) whole-genome TE annotation after TE library construction.
412
413 - **--rmout** [File] Provide your own homology-based TE annotation instead of using the EDTA library for masking. File is in RepeatMasker .out format. This file will be merged with the structural-based TE annotation. (--anno 1 required). Default: use the EDTA library for annotation.
414
415 - **--evaluate** [0|1] Evaluate (1) classification consistency of the TE annotation. (--anno 1 required). Default: 0. This step is slow and does not affect the annotation result.
416
417 - **--exclude** [File] Exclude bed format regions from TE annotation. Default: undef. (--anno 1 required).
418
419 - **--u** [float] Neutral mutation rate to calculate the age of intact LTR elements.
420
421 ----
422
423 ```````````````````````
424 **Divide and conquer:**
425 ```````````````````````
426 *Identify intact elements of a paticular TE type*
427
428
429 - **--genome** [File] The genome FASTA
430
431 - **--species** [Rice|Maize|others] Specify the species for identification of TIR candidates. Default: others
432
433 - **--type** [ltr|tir|helitron|all] Specify which type of raw TE candidates you want to get. Default: all
434
435 ----
436
437 ````````````
438 **PAN-EDTA**
439 ````````````
440
441 For pan-genome annotations, you need to annotate each genome with EDTA, generate a pan-genome library, then reannotate each genome with the pan-genome library.
442
443 .. image:: https://github.com/HuffordLab/NAM-genomes/raw/master/te-annotation/assets/Pan-EDTA_scheme.png
444 :alt: several example circos plots
445 :width: 70%
446
447 For more information see the EDTA documentation_.
448
449 .. _documentation: https://github.com/oushujun/EDTA
450
451 ]]></help>
452 <citations>
453 <citation type="doi">10.1186/s13059-019-1905-y</citation>
454 </citations>
455 </tool>