Mercurial > repos > iuc > busco
diff busco.xml @ 9:cf13a1e03e5b draft
"planemo upload commit e9c6496b181bbd2665e953a1f9ede35921707e2a"
author | iuc |
---|---|
date | Mon, 15 Mar 2021 21:27:17 +0000 |
parents | 602fb8e63aa7 |
children | 0d243f458b53 |
line wrap: on
line diff
--- a/busco.xml Wed Dec 30 14:07:37 2020 +0000 +++ b/busco.xml Mon Mar 15 21:27:17 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="busco" name="Busco" profile="18.01" version="@TOOL_VERSION@"> +<tool id="busco" name="Busco" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>assess genome assembly and annotation completeness</description> <macros> <import>macros.xml</import> @@ -6,129 +6,262 @@ <requirements> <requirement type="package" version="@TOOL_VERSION@">busco</requirement> <requirement type="package" version="1.32">tar</requirement> + <requirement type="package" version="1">fonts-conda-ecosystem</requirement> </requirements> + <version_command>busco --version</version_command> <command><![CDATA[ if [ -z "\$AUGUSTUS_CONFIG_PATH" ] ; then BUSCO_PATH=\$(dirname \$(which busco)) ; export AUGUSTUS_CONFIG_PATH=\$(realpath \${BUSCO_PATH}/../config) ; fi && cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ && export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && -#if $adv.aug_prediction.augustus_mode == 'history': +#if $busco_mode.mode == 'geno' and $busco_mode.use_augustus.use_augustus_selector == 'yes' and $busco_mode.use_augustus.aug_prediction.augustus_mode == 'history': ## Using an augustus model from history, we need to unzip it and let augustus find it mkdir -p 'augustus_dir/species/' && - tar -C 'augustus_dir/species/' -xzf '${adv.aug_prediction.augustus_model}' && + tar -C 'augustus_dir/species/' -xzf '${busco_mode.use_augustus.aug_prediction.augustus_model}' && #end if busco --in '${input}' --lineage_dataset '${lineage_dataset}' --update-data ---mode '${mode}' --o busco_galaxy +--mode '${busco_mode.mode}' +--out busco_galaxy --cpu \${GALAXY_SLOTS:-4} --evalue ${adv.evalue} -${adv.long} --limit ${adv.limit} -#if $adv.aug_prediction.augustus_mode == 'builtin': - --augustus_species '${adv.aug_prediction.augustus_species}' -#else if $adv.aug_prediction.augustus_mode == 'history': - --augustus_species local + +#if $adv.auto_lineage: + $adv.auto_lineage +#end if +#if $busco_mode.mode == 'geno' and $busco_mode.use_augustus.use_augustus_selector == 'yes': + + ${busco_mode.use_augustus.long} + --augustus + + #if $busco_mode.use_augustus.aug_prediction.augustus_mode == 'builtin': + --augustus_species '${busco_mode.use_augustus.aug_prediction.augustus_species}' + #else if $busco_mode.use_augustus.aug_prediction.augustus_mode == 'history': + --augustus_species local + #end if +#end if + +#if $adv.outputs and 'image' in $adv.outputs: + && + mkdir BUSCO_summaries + && + ls -l busco_galaxy/run_*/ && + cp busco_galaxy/short_summary.*.txt BUSCO_summaries/ + && + generate_plot.py -wd BUSCO_summaries -rt specific #end if - ]]></command> +]]> </command> <inputs> - <param type="data" name="input" format="fasta" label="Sequences to analyse" help="genome, transcriptome or proteome" /> - <param argument="--mode" type="select" label="Mode"> - <option value="geno">Genome</option> - <option value="tran">Transcriptome</option> - <option value="prot">Proteome</option> - </param> + <param type="data" name="input" format="fasta" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set." /> + <conditional name="busco_mode"> + <param argument="--mode" type="select" label="Mode"> + <option value="geno">Genome assemblies (DNA)</option> + <option value="tran">Transcriptome assemblies (DNA)</option> + <option value="prot">annotated gene sets (protein)</option> + </param> + <when value="geno"> + <conditional name="use_augustus"> + <param name="use_augustus_selector" type="select" label="Use Augustus instead of Metaeuk"> + <option value="yes">Yes, use Augustus</option> + <option value="no" selected="true">Use Metaeuk</option> + </param> + <when value="no" /> + <when value="yes"> + <conditional name="aug_prediction"> + <param name="augustus_mode" type="select" label="Augustus species model"> + <option value="no" selected="true">Use the default species for selected lineage</option> + <option value="builtin">Use another predefined species model</option> + <option value="history">Use a custom species model</option> + </param> + <when value="no" /> + <when value="history"> + <param name="augustus_model" type="data" format="augustus" label="Augustus model" /> + </when> + <when value="builtin"> + <param name="augustus_species" type="select" label="Augustus species model"> + <expand macro="augustus_species" /> + </param> + </when> + </conditional> + <param argument="--long" type="boolean" checked="false" truevalue="--long" falsevalue="" label="Optimization mode Augustus self-training" help="Adds considerably to run time, but can improve results for some non-model organisms" /> + </when> + </conditional> + </when> + <when value="tran" /> + <when value="prot" /> + </conditional> <param argument="--lineage_dataset" type="select" label="Lineage"> - <expand macro="lineages"/> + <expand macro="lineages" /> </param> <section name="adv" title="Advanced Options" expanded="False"> - <param argument="--evalue" type="float" value="0.01" label="E-value cutoff for BLAST searches."/> - <param argument="--limit" type="integer" value="3" label="How many candidate regions to consider"/> - - <conditional name="aug_prediction"> - <param name="augustus_mode" type="select" label="Augustus species model"> - <option value="no" selected="true">Use the default species for selected lineage</option> - <option value="builtin">Use another predefined species model</option> - <option value="history">Use a custom species model</option> - </param> - <when value="no"/> - <when value="history"> - <param name="augustus_model" type="data" format="augustus" label="Augustus model"/> - </when> - <when value="builtin"> - <param name="augustus_species" type="select" label="Augustus species model"> - <expand macro="augustus_species"/> - </param> - </when> - </conditional> - <param argument="--long" type="boolean" checked="false" truevalue="--long" falsevalue="" label="Optimization mode Augustus self-training" help="Adds considerably to run time, but can improve results for some non-model organisms"/> + <param argument="--evalue" type="float" value="0.001" min="0" max="1" label="E-value cutoff for BLAST searches." /> + <param argument="--limit" type="integer" value="3" label="How many candidate regions to consider" /> + <param name="auto_lineage" type="select" optional="true" label="Run auto-lineage to find optimal lineage path"> + <option value="--auto-lineage">Run auto-lineage to find optimum lineage path</option> + <option value="--auto-lineage-prok">Run auto-lineage just on non-eukaryote trees to find optimum lineage path</option> + <option value="--auto-lineage-euk">Run auto-placement just on eukaryote tree to find optimum lineage path</option> + </param> + <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> + <option value="short_summary">short summary text</option> + <option value="missing">list with missing IDs</option> + <option value="image">summary image</option> + </param> </section> </inputs> <outputs> - <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt"/> - <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv"/> - <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv"/> + <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt"> + <filter>adv['outputs'] and 'short_summary' in adv['outputs']</filter> + </data> + <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv" /> + <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv"> + <filter>adv['outputs'] and 'missing' in adv['outputs']</filter> + </data> + <data name='summary_image' format='png' label="${tool.name} on ${on_string}: summary image" from_work_dir="BUSCO_summaries/busco_figure.png"> + <filter>adv['outputs'] and 'image' in adv['outputs']</filter> + </data> </outputs> <tests> - <test> - <param name="input" value="genome.fa"/> - <param name="lineage_dataset" value="arthropoda_odb10"/> - <param name="mode" value="geno"/> - <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> - <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> + <test expect_num_outputs="3"> + <param name="input" value="genome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="geno" /> + <conditional name="use_augustus"> + <param name="use_augustus_selector" value="yes" /> + </conditional> + </conditional> + <section name="adv"> + <param name="outputs" value="short_summary,missing" /> + </section> + <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" /> <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"> <assert_contents> <has_text text="# BUSCO version is: @TOOL_VERSION@" /> </assert_contents> </output> </test> - <test> - <param name="input" value="proteome.fa"/> - <param name="lineage_dataset" value="arthropoda_odb10"/> - <param name="mode" value="prot"/> - <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="4"/> - <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4"/> - <output name="busco_missing" file="proteome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + <test expect_num_outputs="4"> + <param name="input" value="proteome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="prot" /> + </conditional> + <section name="adv"> + <param name="outputs" value="short_summary,missing,image" /> + </section> + <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4" /> + <output name="busco_missing" file="proteome_results/missing_buscos_list" compare="diff" lines_diff="4" /> + <output name="summary_image" file="proteome_results/summary.png" compare="sim_size" /> </test> - <test> - <param name="input" value="transcriptome.fa"/> - <param name="lineage_dataset" value="arthropoda_odb10"/> - <param name="mode" value="tran"/> - <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="4"/> - <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="4"/> - <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + <test expect_num_outputs="4"> + <param name="input" value="transcriptome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="tran" /> + </conditional> + <section name="adv"> + <param name="auto_lineage" value="--auto-lineage" /> + <param name="outputs" value="short_summary,missing,image" /> + </section> + <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="4" /> + <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="4" /> + <output name="summary_image" file="transcriptome_results/summary.png" compare="sim_size" /> </test> - <test> - <param name="input" value="genome.fa"/> - <param name="lineage_dataset" value="arthropoda_odb10"/> - <param name="mode" value="geno"/> - <param name="adv|aug_prediction|augustus_mode" value="builtin"/> - <param name="adv|aug_prediction|augustus_species" value="human"/> - <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> - <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> - <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + <test expect_num_outputs="2"> + <param name="input" value="genome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="geno" /> + <conditional name="use_augustus"> + <param name="use_augustus_selector" value="yes" /> + <conditional name="aug_prediction"> + <param name="augustus_mode" value="builtin" /> + <param name="augustus_species" value="human" /> + </conditional> + </conditional> + </conditional> + <section name="adv"> + <param name="outputs" value="short_summary" /> + </section> + <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" /> </test> - <test> - <param name="input" value="genome.fa"/> - <param name="lineage_dataset" value="arthropoda_odb10"/> - <param name="mode" value="geno"/> - <param name="adv|aug_prediction|augustus_mode" value="history"/> - <param name="adv|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> - <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> - <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> - <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + <test expect_num_outputs="3"> + <param name="input" value="genome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="geno" /> + <conditional name="use_augustus"> + <param name="use_augustus_selector" value="yes" /> + <conditional name="aug_prediction"> + <param name="augustus_mode" value="history" /> + <param name="augustus_model" value="local.tar.gz" ftype="augustus" /> + </conditional> + </conditional> + </conditional> + <section name="adv"> + <param name="outputs" value="short_summary,missing" /> + </section> + <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4" /> + <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4" /> + </test> + <test expect_num_outputs="4"> + <param name="input" value="genome.fa" /> + <param name="lineage_dataset" value="arthropoda_odb10" /> + <conditional name="busco_mode"> + <param name="mode" value="geno" /> + <conditional name="use_augustus"> + <param name="use_augustus_selector" value="no" /> + </conditional> + </conditional> + <section name="adv"> + <param name="outputs" value="short_summary,missing,image" /> + </section> + <output name="busco_sum" file="genome_results_metaeuk/short_summary" compare="diff" lines_diff="4" /> + <output name="busco_table" file="genome_results_metaeuk/full_table" compare="diff" lines_diff="4" /> + <output name="busco_missing" file="genome_results_metaeuk/missing_buscos_list" compare="diff" lines_diff="4" /> + <output name="summary_image" file="genome_results_metaeuk/summary.png" compare="sim_size" /> </test> </tests> - <help> + <help><![CDATA[ + + BUSCO: assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs +-------------------------------------------------------------------------------------------------------------- + +Interpreting the results +^^^^^^^^^^^^^^^^^^^^^^^^ + +BUSCO_ attempts to provide a quantitative assessment of the completeness in terms of the expected gene content of a +genome assembly, transcriptome, or annotated gene set. The results are simplified into categories of Complete +and single-copy, Complete and duplicated, Fragmented, or Missing BUSCOs. + +BUSCO completeness results make sense only in the context of the biology of your organism. +You have to understand whether missing or duplicated genes are of biological or technical origin. +For instance, a high level of duplication may be explained by a recent whole duplication event +(biological) or a chimeric assembly of haplotypes (technical). +Transcriptomes and protein sets that are not filtered for isoforms will lead to a high proportion of duplicates. +Therefore you should filter them before a BUSCO analysis. +Finally, focusing on specific tissues or specific life stages and conditions in a transcriptomic experiment +is unlikely to produce a BUSCO-complete transcriptome. In this case, consistency across your samples +is what you will be aiming for. + +For more information please refer to the Busco_ `user guide <https: / /busco.ezlab.org /busco_userguide.html#interpreting-the-results>`_ +. .. _BUSCO: http://busco.ezlab.org/ - </help> - <expand macro="citations"/> + + ]]> </help> + <expand macro="citations" /> </tool>