Mercurial > repos > iuc > busco
changeset 2:382a4c19007f draft
planemo upload commit 2896dcfd180800d00ea413a59264ef8b11788b8e
author | iuc |
---|---|
date | Fri, 20 Oct 2017 03:54:35 -0400 |
parents | 87983967544b |
children | f18f0b887c31 |
files | busco.xml test-data/busco.loc test-data/genome_results/full_table test-data/genome_results/missing_buscos_list test-data/genome_results/short_summary test-data/local.tar.gz test-data/proteome_results/full_table test-data/proteome_results/missing_buscos_list test-data/proteome_results/short_summary test-data/transcriptome_results/full_table test-data/transcriptome_results/missing_buscos_list test-data/transcriptome_results/short_summary tool-data/busco.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 15 files changed, 185 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/busco.xml Tue Jan 24 12:08:38 2017 -0500 +++ b/busco.xml Fri Oct 20 03:54:35 2017 -0400 @@ -1,22 +1,39 @@ -<tool id="busco" name="Busco" version="2.0"> +<tool id="busco" name="Busco" version="3.0.2"> <description>assess genome assembly and annotation completeness</description> - <requirements> - <requirement type="package" version="2.0">busco</requirement> + <requirement type="package" version="3.0.2">busco</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> </requirements> + <command><![CDATA[ + export BUSCO_CONFIG_FILE='busco_config.ini' - <command><![CDATA[ - BUSCO.py + && + + #if $adv.aug_prediction.augustus_mode == 'history': + ## Using an augustus model from history, we need to unzip it and let augustus find it + + cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ && + + mkdir -p 'augustus_dir/species/' && + + tar -C 'augustus_dir/species/' -xzvf '${adv.aug_prediction.augustus_model}' > /dev/null && + + export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && + #end if + + run_BUSCO.py --in '${input}' - --lineage '${lineage.fields.path}' + --lineage_path '${lineage_path.fields.path}/${lineage_path.fields.value}' --mode '${mode}' -o "busco_galaxy" --cpu \${GALAXY_SLOTS:-4} --evalue ${adv.evalue} ${adv.long} --limit ${adv.limit} - #if $adv.species: - --species '${adv.species}' + #if $adv.aug_prediction.augustus_mode == 'builtin': + --species '${adv.aug_prediction.augustus_species}' + #else if $adv.aug_prediction.augustus_mode == 'history': + --species local #end if --tarzip ]]></command> @@ -24,14 +41,13 @@ <inputs> <param type="data" name="input" format="fasta" label="Sequences to analyse" help="genome, transcriptome or proteome" /> - <param argument="--mode" type="select" label="Mode"> <option value="geno">Genome</option> <option value="tran">Transcriptome</option> <option value="prot">Proteome</option> </param> - <param argument="--lineage" type="select" label="Lineage"> + <param argument="--lineage_path" type="select" label="Lineage"> <options from_data_table="busco"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No indexes are available" /> @@ -41,7 +57,118 @@ <section name="adv" title="Advanced Options" expanded="False"> <param argument="--evalue" type="float" value="0.01" label="E-value cutoff for BLAST searches."/> <param argument="--limit" type="integer" value="3" label="How many candidate regions to consider"/> - <param argument="--species" type="text" optional="True" label="Name of existing Augustus species gene finding metaparameters"/> + + <conditional name="aug_prediction"> + <param name="augustus_mode" type="select" label="Augustus species model"> + <option value="no" selected="true">Use the default species for selected lineage</option> + <option value="builtin">Use another predefined species model</option> + <option value="history">Use a custom species model</option> + </param> + <when value="no"/> + <when value="history"> + <param name="augustus_model" type="data" format="augustus" label="Augustus model"/> + </when> + <when value="builtin"> + <param name="augustus_species" type="select" label="Augustus species model"> + <!-- If you update this list, please also update it in maker and augustus tools (../maker/maker.xml and ../augustus/augustus.xml) --> + <option value="human">Homo sapiens</option> + <option value="fly">Drosophila melanogaster</option> + <option value="arabidopsis">Arabidopsis thaliana</option> + <option value="brugia ">Brugia malayi</option> + <option value="aedes">Aedes aegypti</option> + <option value="tribolium2012">Tribolium castaneum</option> + <option value="schistosoma">Schistosoma mansoni</option> + <option value="tetrahymena">Tetrahymena thermophila</option> + <option value="galdieria">Galdieria sulphuraria</option> + <option value="maize">Zea mays</option> + <option value="toxoplasma">Toxoplasma gondii</option> + <option value="caenorhabditis ">Caenorhabditis elegans</option> + <option value="aspergillus_fumigatus">Aspergillus fumigatus</option> + <option value="aspergillus_nidulans ">Aspergillus nidulans</option> + <option value="aspergillus_oryzae ">Aspergillus oryzae</option> + <option value="aspergillus_terreus">Aspergillus terreus</option> + <option value="botrytis_cinerea ">Botrytis cinerea</option> + <option value="candida_albicans ">Candida albicans</option> + <option value="candida_guilliermondii ">Candida guilliermondii</option> + <option value="candida_tropicalis ">Candida tropicalis</option> + <option value="chaetomium_globosum">Chaetomium globosum</option> + <option value="coccidioides_immitis">Coccidioides immitis</option> + <option value="coprinus">Coprinus cinereus</option> + <option value="coprinus_cinereus">Coprinus cinereus</option> + <option value="cryptococcus_neoformans_gattii">Cryptococcus neoformans gattii</option> + <option value="cryptococcus_neoformans_neoformans_B">Cryptococcus neoformans neoformans</option> + <option value="cryptococcus_neoformans_neoformans_JEC21">Cryptococcus neoformans neoformans</option> + <option value="cryptococcus">Cryptococcus neoformans</option> + <option value="debaryomyces_hansenii">Debaryomyces hansenii</option> + <option value="encephalitozoon_cuniculi_GB">Encephalitozoon cuniculi</option> + <option value="eremothecium_gossypii">Eremothecium gossypii</option> + <option value="fusarium_graminearum ">Fusarium graminearum</option> + <option value="histoplasma_capsulatum ">Histoplasma capsulatum</option> + <option value="histoplasma">Histoplasma capsulatum</option> + <option value="kluyveromyces_lactis ">Kluyveromyces lactis</option> + <option value="laccaria_bicolor ">Laccaria bicolor</option> + <option value="lamprey">Petromyzon marinus</option> + <option value="leishmania_tarentolae">Leishmania tarentolae</option> + <option value="lodderomyces_elongisporus">Lodderomyces elongisporus</option> + <option value="magnaporthe_grisea ">Magnaporthe grisea</option> + <option value="neurospora_crassa">Neurospora crassa</option> + <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> + <option value="pichia_stipitis">Pichia stipitis</option> + <option value="rhizopus_oryzae">Rhizopus oryzae</option> + <option value="saccharomyces_cerevisiae_S288C">Saccharomyces cerevisiae</option> + <option value="saccharomyces_cerevisiae_rm11-1a_1">Saccharomyces cerevisiae</option> + <option value="saccharomyces">Saccharomyces cerevisiae</option> + <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> + <option value="trichinella">Trichinella spiralis</option> + <option value="ustilago_maydis">Ustilago maydis</option> + <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> + <option value="nasonia">Nasonia vitripennis</option> + <option value="tomato">Solanum lycopersicum</option> + <option value="chlamydomonas">Chlamydomonas reinhardtii</option> + <option value="amphimedon">Amphimedon queenslandica</option> + <option value="pneumocystis">Pneumocystis jirovecii</option> + <option value="chicken">Gallus gallus domesticus (chicken)</option> + <option value="cacao">Theobroma cacao (cacao)</option> + <option value="heliconius_melpomene1">Heliconius melpomene</option> + <option value="xenoturbella">Xenoturbella</option> + <option value="E_coli_K12">E coli K12</option> + <option value="c_elegans_trsk">c elegans trsk</option> + <option value="camponotus_floridanus">Camponotus floridanus</option> + <option value="coyote_tobacco">Coyote tobacco</option> + <option value="s_aureus">Staphylococcus aureus</option> + <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option> + <option value="wheat">wheat</option> + <option value="zebrafish">Danio rerio</option> + <option value="anidulans">Aspergillus nidulans</option> + <option value="bombus_impatiens1">Bombus impatiens1</option> + <option value="bombus_terrestris2">Bombus terrestris2</option> + <option value="botrytis_cinerea">Botrytis cinerea</option> + <option value="brugia_malayi">Brugia malayi</option> + <option value="conidiobolus_coronatus">Conidiobolus coronatus</option> + <option value="cryptococcus_neoformans">Cryptococcus neoformans</option> + <option value="culex_pipiens">Culex pipiens</option> + <option value="elephant_shark">Callorhinchus milii</option> + <option value="honeybee1">Apis mellifera</option> + <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> + <option value="pea_aphid">Acyrthosiphon pisum</option> + <option value="rhodnius_prolixus">Rhodnius prolixus</option> + <option value="ustilago_maydis">Ustilago maydis</option> + <option value="verticillium_albo_atrum1">Verticillium albo atrum1</option> + <option value="verticillium_longisporum1">Verticillium longisporum1</option> + <option value="Xipophorus_maculatus">Xipophorus_maculatus</option> + <option value="adorsata">adorsata</option> + <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option> + <option value="maker2_athal1">maker2_athal1</option> + <option value="maker2_c_elegans1">maker2_c_elegans1</option> + <option value="maker2_dmel1">maker2_dmel1</option> + <option value="maker2_spomb1">maker2_spomb1</option> + <option value="parasteatoda">parasteatoda</option> + <option value="rice">rice</option> + <option value="schistosoma2">schistosoma2</option> + <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option> + </param> + </when> + </conditional> <param argument="--long" type="boolean" checked="false" truevalue="--long" falsevalue="" label="Optimization mode Augustus self-training" help="Adds considerably to run time, but can improve results for some non-model organisms"/> </section> </inputs> @@ -53,7 +180,7 @@ <tests> <test> <param name="input" value="genome.fa"/> - <param name="lineage" value="arthropoda_2.0"/> + <param name="lineage_path" value="arthropoda"/> <param name="mode" value="geno"/> <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> @@ -61,7 +188,7 @@ </test> <test> <param name="input" value="proteome.fa"/> - <param name="lineage" value="arthropoda_2.0"/> + <param name="lineage_path" value="arthropoda"/> <param name="mode" value="prot"/> <output name="busco_sum" file="proteome_results/short_summary" compare="diff" lines_diff="4"/> <output name="busco_table" file="proteome_results/full_table" compare="diff" lines_diff="4"/> @@ -69,12 +196,32 @@ </test> <test> <param name="input" value="transcriptome.fa"/> - <param name="lineage" value="arthropoda_2.0"/> + <param name="lineage_path" value="arthropoda"/> <param name="mode" value="tran"/> <output name="busco_sum" file="transcriptome_results/short_summary" compare="diff" lines_diff="4"/> <output name="busco_table" file="transcriptome_results/full_table" compare="diff" lines_diff="4"/> <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="4"/> </test> + <test> + <param name="input" value="genome.fa"/> + <param name="lineage_path" value="arthropoda"/> + <param name="mode" value="geno"/> + <param name="adv|aug_prediction|augustus_mode" value="builtin"/> + <param name="adv|aug_prediction|augustus_species" value="human"/> + <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> + <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> + <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + </test> + <test> + <param name="input" value="genome.fa"/> + <param name="lineage_path" value="arthropoda"/> + <param name="mode" value="geno"/> + <param name="adv|aug_prediction|augustus_mode" value="history"/> + <param name="adv|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> + <output name="busco_sum" file="genome_results/short_summary" compare="diff" lines_diff="4"/> + <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="4"/> + <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="4"/> + </test> </tests> <help> BUSCO: assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs
--- a/test-data/busco.loc Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/busco.loc Fri Oct 20 03:54:35 2017 -0400 @@ -1,1 +1,1 @@ -arthropoda_2.0 arthropoda_2.0 Arthropoda (v2) ${__HERE__}/arthropoda/ +arthropoda Arthropoda data ${__HERE__}/
--- a/test-data/genome_results/full_table Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/genome_results/full_table Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpeQaU9k/job_working_directory/000/2/conda-env/bin/BUSCO.py -i /tmp/tmpeQaU9k/files/000/dataset_1.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m genome -c 1 -sp generic -e 0.01 -z # # Busco id Status Contig Start End Score Length
--- a/test-data/genome_results/missing_buscos_list Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/genome_results/missing_buscos_list Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpeQaU9k/job_working_directory/000/2/conda-env/bin/BUSCO.py -i /tmp/tmpeQaU9k/files/000/dataset_1.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m genome -c 1 -sp generic -e 0.01 -z # BUSCOaEOG7B0HST
--- a/test-data/genome_results/short_summary Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/genome_results/short_summary Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpeQaU9k/job_working_directory/000/2/conda-env/bin/BUSCO.py -i /tmp/tmpeQaU9k/files/000/dataset_1.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m genome -c 1 -sp generic -e 0.01 -z # # Summarized benchmarking in BUSCO notation for file /tmp/tmpeQaU9k/files/000/dataset_1.dat
--- a/test-data/proteome_results/full_table Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/proteome_results/full_table Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/4/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_5.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m proteins -c 1 -sp generic -e 0.01 -z # # Busco id Status Sequence Score Length
--- a/test-data/proteome_results/missing_buscos_list Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/proteome_results/missing_buscos_list Fri Oct 20 03:54:35 2017 -0400 @@ -1,4 +1,4 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/4/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_5.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m proteins -c 1 -sp generic -e 0.01 -z #
--- a/test-data/proteome_results/short_summary Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/proteome_results/short_summary Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/4/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_5.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m proteins -c 1 -sp generic -e 0.01 -z # # Summarized benchmarking in BUSCO notation for file /tmp/tmpCgx5rU/files/000/dataset_5.dat
--- a/test-data/transcriptome_results/full_table Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/transcriptome_results/full_table Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/6/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_9.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m tran -c 1 -sp generic -e 0.01 -z # # Busco id Status Sequence Score Length
--- a/test-data/transcriptome_results/missing_buscos_list Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/transcriptome_results/missing_buscos_list Fri Oct 20 03:54:35 2017 -0400 @@ -1,5 +1,5 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/6/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_9.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m tran -c 1 -sp generic -e 0.01 -z # BUSCOaEOG7B0HST
--- a/test-data/transcriptome_results/short_summary Tue Jan 24 12:08:38 2017 -0500 +++ b/test-data/transcriptome_results/short_summary Fri Oct 20 03:54:35 2017 -0400 @@ -1,9 +1,9 @@ -# BUSCO version is: 2.0 -# The lineage dataset is: arthropoda (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) +# BUSCO version is: 3.0.2 +# The lineage dataset is: N/A (Creation date: N/A, number of species: N/A, number of BUSCOs: N/A) # To reproduce this run: python /tmp/tmpCgx5rU/job_working_directory/000/6/conda-env/bin/BUSCO.py -i /tmp/tmpCgx5rU/files/000/dataset_9.dat -o busco_galaxy -l /home/abretaud/iuc_tools_abretaud/tools/busco/test-data/arthropoda/ -m tran -c 1 -sp generic -e 0.01 -z # # Summarized benchmarking in BUSCO notation for file /tmp/tmpCgx5rU/files/000/dataset_9.dat -# BUSCO was run in mode: tran +# BUSCO was run in mode: transcriptome C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:1
--- a/tool-data/busco.loc.sample Tue Jan 24 12:08:38 2017 -0500 +++ b/tool-data/busco.loc.sample Fri Oct 20 03:54:35 2017 -0400 @@ -1,13 +1,13 @@ # This is a sample file distributed with Galaxy that is used to define a # list of busco datasets, using four columns tab separated: # -# <unique_build_id> <dbkey> <display_name> <genome_fasta_file_path> +# <unique_build_id> <display_name> <genome_fasta_file_path> # # Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html # # "/some/path/arthropoda/" would be the last column in the line # If this were for the mm10 mouse genome, the resulting entry would look like: # -#arthropoda_2.0 arthropoda_2.0 Arthropoda (v2) /some/path/arthropoda/ +#arthropoda_2.0 arthropoda_2.0 /some/path/arthropoda/ # #
--- a/tool_data_table_conf.xml.sample Tue Jan 24 12:08:38 2017 -0500 +++ b/tool_data_table_conf.xml.sample Fri Oct 20 03:54:35 2017 -0400 @@ -1,6 +1,6 @@ <tables> <table name="busco" comment_char="#"> - <columns>value, dbkey, name, path</columns> + <columns>value, name, path</columns> <file path="tool-data/busco.loc" /> </table> </tables>
--- a/tool_data_table_conf.xml.test Tue Jan 24 12:08:38 2017 -0500 +++ b/tool_data_table_conf.xml.test Fri Oct 20 03:54:35 2017 -0400 @@ -1,6 +1,6 @@ <tables> <table name="busco" comment_char="#"> - <columns>value, dbkey, name, path</columns> + <columns>value, name, path</columns> <file path="${__HERE__}/test-data/busco.loc" /> </table> </tables>