comparison busco.xml @ 9:cf13a1e03e5b draft

"planemo upload commit e9c6496b181bbd2665e953a1f9ede35921707e2a"
author iuc
date Mon, 15 Mar 2021 21:27:17 +0000
parents 602fb8e63aa7
children 0d243f458b53
comparison
equal deleted inserted replaced
8:602fb8e63aa7 9:cf13a1e03e5b
1 <tool id="busco" name="Busco" profile="18.01" version="@TOOL_VERSION@"> 1 <tool id="busco" name="Busco" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>assess genome assembly and annotation completeness</description> 2 <description>assess genome assembly and annotation completeness</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">busco</requirement> 7 <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
8 <requirement type="package" version="1.32">tar</requirement> 8 <requirement type="package" version="1.32">tar</requirement>
9 <requirement type="package" version="1">fonts-conda-ecosystem</requirement>
9 </requirements> 10 </requirements>
11 <version_command>busco --version</version_command>
10 <command><![CDATA[ 12 <command><![CDATA[
11 if [ -z "\$AUGUSTUS_CONFIG_PATH" ] ; then BUSCO_PATH=\$(dirname \$(which busco)) ; export AUGUSTUS_CONFIG_PATH=\$(realpath \${BUSCO_PATH}/../config) ; fi && 13 if [ -z "\$AUGUSTUS_CONFIG_PATH" ] ; then BUSCO_PATH=\$(dirname \$(which busco)) ; export AUGUSTUS_CONFIG_PATH=\$(realpath \${BUSCO_PATH}/../config) ; fi &&
12 cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ && 14 cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ &&
13 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && 15 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ &&
14 16
15 #if $adv.aug_prediction.augustus_mode == 'history': 17 #if $busco_mode.mode == 'geno' and $busco_mode.use_augustus.use_augustus_selector == 'yes' and $busco_mode.use_augustus.aug_prediction.augustus_mode == 'history':
16 ## Using an augustus model from history, we need to unzip it and let augustus find it 18 ## Using an augustus model from history, we need to unzip it and let augustus find it
17 mkdir -p 'augustus_dir/species/' && 19 mkdir -p 'augustus_dir/species/' &&
18 tar -C 'augustus_dir/species/' -xzf '${adv.aug_prediction.augustus_model}' && 20 tar -C 'augustus_dir/species/' -xzf '${busco_mode.use_augustus.aug_prediction.augustus_model}' &&
19 #end if 21 #end if
20 22
21 busco 23 busco
22 --in '${input}' 24 --in '${input}'
23 --lineage_dataset '${lineage_dataset}' 25 --lineage_dataset '${lineage_dataset}'
24 --update-data 26 --update-data
25 --mode '${mode}' 27 --mode '${busco_mode.mode}'
26 -o busco_galaxy 28 --out busco_galaxy
27 --cpu \${GALAXY_SLOTS:-4} 29 --cpu \${GALAXY_SLOTS:-4}
28 --evalue ${adv.evalue} 30 --evalue ${adv.evalue}
29 ${adv.long}
30 --limit ${adv.limit} 31 --limit ${adv.limit}
31 #if $adv.aug_prediction.augustus_mode == 'builtin': 32
32 --augustus_species '${adv.aug_prediction.augustus_species}' 33 #if $adv.auto_lineage:
33 #else if $adv.aug_prediction.augustus_mode == 'history': 34 $adv.auto_lineage
34 --augustus_species local 35 #end if
35 #end if 36 #if $busco_mode.mode == 'geno' and $busco_mode.use_augustus.use_augustus_selector == 'yes':
36 ]]></command> 37
37 38 ${busco_mode.use_augustus.long}
39 --augustus
40
41 #if $busco_mode.use_augustus.aug_prediction.augustus_mode == 'builtin':
42 --augustus_species '${busco_mode.use_augustus.aug_prediction.augustus_species}'
43 #else if $busco_mode.use_augustus.aug_prediction.augustus_mode == 'history':
44 --augustus_species local
45 #end if
46 #end if
47
48 #if $adv.outputs and 'image' in $adv.outputs:
49 &&
50 mkdir BUSCO_summaries
51 &&
52 ls -l busco_galaxy/run_*/ &&
53 cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
54 &&
55 generate_plot.py -wd BUSCO_summaries -rt specific
56 #end if
57
58 ]]> </command>
38 <inputs> 59 <inputs>
39 <param type="data" name="input" format="fasta" label="Sequences to analyse" help="genome, transcriptome or proteome" /> 60 <param type="data" name="input" format="fasta" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set." />
40 <param argument="--mode" type="select" label="Mode"> 61 <conditional name="busco_mode">
41 <option value="geno">Genome</option> 62 <param argument="--mode" type="select" label="Mode">
42 <option value="tran">Transcriptome</option> 63 <option value="geno">Genome assemblies (DNA)</option>
43 <option value="prot">Proteome</option> 64 <option value="tran">Transcriptome assemblies (DNA)</option>
65 <option value="prot">annotated gene sets (protein)</option>
66 </param>
67 <when value="geno">
68 <conditional name="use_augustus">
69 <param name="use_augustus_selector" type="select" label="Use Augustus instead of Metaeuk">
70 <option value="yes">Yes, use Augustus</option>
71 <option value="no" selected="true">Use Metaeuk</option>
72 </param>
73 <when value="no" />
74 <when value="yes">
75 <conditional name="aug_prediction">
76 <param name="augustus_mode" type="select" label="Augustus species model">
77 <option value="no" selected="true">Use the default species for selected lineage</option>
78 <option value="builtin">Use another predefined species model</option>
79 <option value="history">Use a custom species model</option>
80 </param>
81 <when value="no" />
82 <when value="history">
83 <param name="augustus_model" type="data" format="augustus" label="Augustus model" />
84 </when>
85 <when value="builtin">
86 <param name="augustus_species" type="select" label="Augustus species model">
87 <expand macro="augustus_species" />
88 </param>
89 </when>
90 </conditional>
91 <param argument="--long" type="boolean" checked="false" truevalue="--long" falsevalue="" label="Optimization mode Augustus self-training" help="Adds considerably to run time, but can improve results for some non-model organisms" />
92 </when>
93 </conditional>
94 </when>
95 <when value="tran" />
96 <when value="prot" />
97 </conditional>
98
99 <param argument="--lineage_dataset" type="select" label="Lineage">
100 <expand macro="lineages" />
44 </param> 101 </param>
45 102
46 <param argument="--lineage_dataset" type="select" label="Lineage">
47 <expand macro="lineages"/>
48 </param>
49
50 <section name="adv" title="Advanced Options" expanded="False"> 103 <section name="adv" title="Advanced Options" expanded="False">
51 <param argument="--evalue" type="float" value="0.01" label="E-value cutoff for BLAST searches."/> 104 <param argument="--evalue" type="float" value="0.001" min="0" max="1" label="E-value cutoff for BLAST searches." />
52 <param argument="--limit" type="integer" value="3" label="How many candidate regions to consider"/> 105 <param argument="--limit" type="integer" value="3" label="How many candidate regions to consider" />
53 106 <param name="auto_lineage" type="select" optional="true" label="Run auto-lineage to find optimal lineage path">
54 <conditional name="aug_prediction"> 107 <option value="--auto-lineage">Run auto-lineage to find optimum lineage path</option>
55 <param name="augustus_mode" type="select" label="Augustus species model"> 108 <option value="--auto-lineage-prok">Run auto-lineage just on non-eukaryote trees to find optimum lineage path</option>
56 <option value="no" selected="true">Use the default species for selected lineage</option> 109 <option value="--auto-lineage-euk">Run auto-placement just on eukaryote tree to find optimum lineage path</option>
57 <option value="builtin">Use another predefined species model</option> 110 </param>
58 <option value="history">Use a custom species model</option> 111 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
59 </param> 112 <option value="short_summary">short summary text</option>
60 <when value="no"/> 113 <option value="missing">list with missing IDs</option>
61 <when value="history"> 114 <option value="image">summary image</option>
62 <param name="augustus_model" type="data" format="augustus" label="Augustus model"/> 115 </param>
63 </when>
64 <when value="builtin">
65 <param name="augustus_species" type="select" label="Augustus species model">
66 <expand macro="augustus_species"/>
67 </param>
68 </when>
69 </conditional>
70 <param argument="--long" type="boolean" checked="false" truevalue="--long" falsevalue="" label="Optimization mode Augustus self-training" help="Adds considerably to run time, but can improve results for some non-model organisms"/>
71 </section> 116 </section>
72 </inputs> 117 </inputs>
73 <outputs> 118 <outputs>
74 <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt"/> 119 <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt">
75 <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv"/> 120 <filter>adv['outputs'] and 'short_summary' in adv['outputs']</filter>
76 <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv"/> 121 </data>
122 <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv" />
123 <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv">
124 <filter>adv['outputs'] and 'missing' in adv['outputs']</filter>
125 </data>
126 <data name='summary_image' format='png' label="${tool.name} on ${on_string}: summary image" from_work_dir="BUSCO_summaries/busco_figure.png">
127 <filter>adv['outputs'] and 'image' in adv['outputs']</filter>
128 </data>
77 </outputs> 129 </outputs>
78 <tests> 130 <tests>
79 <test> 131 <test expect_num_outputs="3">
80 <param name="input" value="genome.fa"/> 132 <param name="input" value="genome.fa" />
81 <param name="lineage_dataset" value="arthropoda_odb10"/> 133 <param name="lineage_dataset" value="arthropoda_odb10" />
82 <param name="mode" value="geno"/> 134 <conditional name="busco_mode">
83 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> 135 <param name="mode" value="geno" />
84 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> 136 <conditional name="use_augustus">
137 <param name="use_augustus_selector" value="yes" />
138 </conditional>
139 </conditional>
140 <section name="adv">
141 <param name="outputs" value="short_summary,missing" />
142 </section>
143 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" />
144 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
85 <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"> 145 <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4">
86 <assert_contents> 146 <assert_contents>
87 <has_text text="# BUSCO version is: @TOOL_VERSION@" /> 147 <has_text text="# BUSCO version is: @TOOL_VERSION@" />
88 </assert_contents> 148 </assert_contents>
89 </output> 149 </output>
90 </test> 150 </test>
91 <test> 151 <test expect_num_outputs="4">
92 <param name="input" value="proteome.fa"/> 152 <param name="input" value="proteome.fa" />
93 <param name="lineage_dataset" value="arthropoda_odb10"/> 153 <param name="lineage_dataset" value="arthropoda_odb10" />
94 <param name="mode" value="prot"/> 154 <conditional name="busco_mode">
95 <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="4"/> 155 <param name="mode" value="prot" />
96 <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4"/> 156 </conditional>
97 <output name="busco_missing" file="proteome_results/missing_buscos_list" compare="diff" lines_diff="4"/> 157 <section name="adv">
98 </test> 158 <param name="outputs" value="short_summary,missing,image" />
99 <test> 159 </section>
100 <param name="input" value="transcriptome.fa"/> 160 <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="4" />
101 <param name="lineage_dataset" value="arthropoda_odb10"/> 161 <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4" />
102 <param name="mode" value="tran"/> 162 <output name="busco_missing" file="proteome_results/missing_buscos_list" compare="diff" lines_diff="4" />
103 <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="4"/> 163 <output name="summary_image" file="proteome_results/summary.png" compare="sim_size" />
104 <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="4"/> 164 </test>
105 <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="4"/> 165 <test expect_num_outputs="4">
106 </test> 166 <param name="input" value="transcriptome.fa" />
107 <test> 167 <param name="lineage_dataset" value="arthropoda_odb10" />
108 <param name="input" value="genome.fa"/> 168 <conditional name="busco_mode">
109 <param name="lineage_dataset" value="arthropoda_odb10"/> 169 <param name="mode" value="tran" />
110 <param name="mode" value="geno"/> 170 </conditional>
111 <param name="adv|aug_prediction|augustus_mode" value="builtin"/> 171 <section name="adv">
112 <param name="adv|aug_prediction|augustus_species" value="human"/> 172 <param name="auto_lineage" value="--auto-lineage" />
113 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> 173 <param name="outputs" value="short_summary,missing,image" />
114 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> 174 </section>
115 <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> 175 <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="4" />
116 </test> 176 <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="4" />
117 <test> 177 <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="4" />
118 <param name="input" value="genome.fa"/> 178 <output name="summary_image" file="transcriptome_results/summary.png" compare="sim_size" />
119 <param name="lineage_dataset" value="arthropoda_odb10"/> 179 </test>
120 <param name="mode" value="geno"/> 180 <test expect_num_outputs="2">
121 <param name="adv|aug_prediction|augustus_mode" value="history"/> 181 <param name="input" value="genome.fa" />
122 <param name="adv|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> 182 <param name="lineage_dataset" value="arthropoda_odb10" />
123 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> 183 <conditional name="busco_mode">
124 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> 184 <param name="mode" value="geno" />
125 <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> 185 <conditional name="use_augustus">
186 <param name="use_augustus_selector" value="yes" />
187 <conditional name="aug_prediction">
188 <param name="augustus_mode" value="builtin" />
189 <param name="augustus_species" value="human" />
190 </conditional>
191 </conditional>
192 </conditional>
193 <section name="adv">
194 <param name="outputs" value="short_summary" />
195 </section>
196 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" />
197 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
198 </test>
199 <test expect_num_outputs="3">
200 <param name="input" value="genome.fa" />
201 <param name="lineage_dataset" value="arthropoda_odb10" />
202 <conditional name="busco_mode">
203 <param name="mode" value="geno" />
204 <conditional name="use_augustus">
205 <param name="use_augustus_selector" value="yes" />
206 <conditional name="aug_prediction">
207 <param name="augustus_mode" value="history" />
208 <param name="augustus_model" value="local.tar.gz" ftype="augustus" />
209 </conditional>
210 </conditional>
211 </conditional>
212 <section name="adv">
213 <param name="outputs" value="short_summary,missing" />
214 </section>
215 <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" />
216 <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" />
217 <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4" />
218 </test>
219 <test expect_num_outputs="4">
220 <param name="input" value="genome.fa" />
221 <param name="lineage_dataset" value="arthropoda_odb10" />
222 <conditional name="busco_mode">
223 <param name="mode" value="geno" />
224 <conditional name="use_augustus">
225 <param name="use_augustus_selector" value="no" />
226 </conditional>
227 </conditional>
228 <section name="adv">
229 <param name="outputs" value="short_summary,missing,image" />
230 </section>
231 <output name="busco_sum" file="genome_results_metaeuk/short_summary" compare="diff" lines_diff="4" />
232 <output name="busco_table" file="genome_results_metaeuk/full_table" compare="diff" lines_diff="4" />
233 <output name="busco_missing" file="genome_results_metaeuk/missing_buscos_list" compare="diff" lines_diff="4" />
234 <output name="summary_image" file="genome_results_metaeuk/summary.png" compare="sim_size" />
126 </test> 235 </test>
127 </tests> 236 </tests>
128 <help> 237 <help><![CDATA[
238
239
129 BUSCO: assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs 240 BUSCO: assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs
241 --------------------------------------------------------------------------------------------------------------
242
243 Interpreting the results
244 ^^^^^^^^^^^^^^^^^^^^^^^^
245
246 BUSCO_ attempts to provide a quantitative assessment of the completeness in terms of the expected gene content of a
247 genome assembly, transcriptome, or annotated gene set. The results are simplified into categories of Complete
248 and single-copy, Complete and duplicated, Fragmented, or Missing BUSCOs.
249
250 BUSCO completeness results make sense only in the context of the biology of your organism.
251 You have to understand whether missing or duplicated genes are of biological or technical origin.
252 For instance, a high level of duplication may be explained by a recent whole duplication event
253 (biological) or a chimeric assembly of haplotypes (technical).
254 Transcriptomes and protein sets that are not filtered for isoforms will lead to a high proportion of duplicates.
255 Therefore you should filter them before a BUSCO analysis.
256 Finally, focusing on specific tissues or specific life stages and conditions in a transcriptomic experiment
257 is unlikely to produce a BUSCO-complete transcriptome. In this case, consistency across your samples
258 is what you will be aiming for.
259
260 For more information please refer to the Busco_ `user guide <https: / /busco.ezlab.org /busco_userguide.html#interpreting-the-results>`_
261 .
130 262
131 .. _BUSCO: http://busco.ezlab.org/ 263 .. _BUSCO: http://busco.ezlab.org/
132 </help> 264
133 <expand macro="citations"/> 265 ]]> </help>
266 <expand macro="citations" />
134 </tool> 267 </tool>