Mercurial > repos > bgruening > augustus
view augustus.xml @ 7:09855551d713 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
author | iuc |
---|---|
date | Thu, 15 Jul 2021 17:16:12 +0000 |
parents | ca6d970d931c |
children | bd0e53f3a891 |
line wrap: on
line source
<tool id="augustus" name="Augustus" profile="20.01" version="@VERSION@+galaxy@SUFFIX_VERSION@"> <description>gene prediction for prokaryotic and eukaryotic genomes</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="aggressive"> <![CDATA[ #if $model.augustus_mode == 'history' ## Using an augustus model from history, we need to unzip it and let augustus find it cp -r `command -v augustus | xargs dirname`/../config/ augustus_dir/ && mkdir -p 'augustus_dir/species/' && tar -C 'augustus_dir/species/' -xzvf '${model.custom_model}' > /dev/null && export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && #end if ## Augustus writes the protein and coding sequences as comment into the gff/gtf file an external ## script is used to extract the sequences into additional files augustus --strand=$strand $noInFrameStop $gff --uniqueGeneId=true #if 'protein' in str($outputs).split(','): --protein=on #else: --protein=off #end if #if 'codingseq' in str($outputs).split(','): --codingseq=on #else: --codingseq=off #end if #if 'introns' in str($outputs).split(','): --introns=on #else: --introns=off #end if #if 'start' in str($outputs).split(','): --start=on #else: --stop=off #end if #if 'stop' in str($outputs).split(','): --stop=on #else: --stop=off #end if #if 'cds' in str($outputs).split(','): --cds=on #else: --cds=off #end if $singlestrand $input_genome $utr --genemodel=$genemodel --softmasking=$softmasking #if $hints.usehints == 'T' --hintsfile='$hints.hintsfile' --extrinsicCfgFile='$hints.extrinsiccfg' #end if #if $range.userange == 'T' --predictionStart=$range.start --predictionEnd=$range.stop #end if #if $model.augustus_mode == 'history' --species=local #else --species=$model.organism #end if | tee '$output' #if 'protein' in str($outputs).split(',') or 'codingseq' in str($outputs).split(','): | python $__tool_directory__/extract_features.py #if 'protein' in str($outputs).split(','): --protein $protein_output #end if #if 'codingseq' in str($outputs).split(','): --codingseq $codingseq_output #end if #end if ]]> </command> <inputs> <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/> <param name="noInFrameStop" argument="--noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" help="Otherwise, intron-spanning stop codons could occur." /> <param name="singlestrand" type="boolean" argument="--singlestrand" label="Predict genes independently on each strand" help="This allows overlapping genes on opposite strands." truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" /> <param name="utr" type="boolean" argument="--UTR" label="Predict the untranslated regions in addition to the coding sequence" truevalue="--UTR=on" falsevalue="--UTR=off" checked="false" help="This currently works only for human, galdieria, toxoplasma and caenorhabditis." /> <conditional name="model"> <param name="augustus_mode" type="select" label="Trainingset"> <option value="builtin">Run Augustus with a predefined trainingset</option> <option value="history">Run Augustus with a custom trainingset</option> </param> <when value="history"> <param name="custom_model" type="data" format="augustus" label="Augustus model" help="Archive created with the 'Train Augustus' tool"/> </when> <when value="builtin"> <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset."> <!-- If you update this list, please also update it in maker and busco tools (../maker/maker.xml and ../busco/busco.xml) --> <option value="human">Homo sapiens</option> <option value="fly">Drosophila melanogaster</option> <option value="maker2_dmel1">Drosophila melanogaster (maker2_dmel1)</option> <option value="arabidopsis">Arabidopsis thaliana</option> <option value="brugia ">Brugia malayi</option> <option value="aedes">Aedes aegypti</option> <option value="tribolium2012">Tribolium castaneum</option> <option value="schistosoma">Schistosoma mansoni</option> <option value="schistosoma2">Schistosoma mansoni (schistosoma2)</option> <option value="tetrahymena">Tetrahymena thermophila</option> <option value="galdieria">Galdieria sulphuraria</option> <option value="maize">Zea mays</option> <option value="toxoplasma">Toxoplasma gondii</option> <option value="caenorhabditis ">Caenorhabditis elegans</option> <option value="aspergillus_fumigatus">Aspergillus fumigatus</option> <option value="aspergillus_nidulans ">Aspergillus nidulans</option> <option value="aspergillus_oryzae ">Aspergillus oryzae</option> <option value="aspergillus_terreus">Aspergillus terreus</option> <option value="botrytis_cinerea ">Botrytis cinerea</option> <option value="candida_albicans ">Candida albicans</option> <option value="candida_guilliermondii ">Candida guilliermondii</option> <option value="candida_tropicalis ">Candida tropicalis</option> <option value="chaetomium_globosum">Chaetomium globosum</option> <option value="coccidioides_immitis">Coccidioides immitis</option> <option value="coprinus">Coprinus cinereus</option> <option value="coprinus_cinereus">Coprinus cinereus</option> <option value="cryptococcus_neoformans_gattii">Cryptococcus neoformans gattii</option> <option value="cryptococcus_neoformans_neoformans_B">Cryptococcus neoformans neoformans</option> <option value="cryptococcus_neoformans_neoformans_JEC21">Cryptococcus neoformans neoformans</option> <option value="cryptococcus">Cryptococcus neoformans</option> <option value="debaryomyces_hansenii">Debaryomyces hansenii</option> <option value="encephalitozoon_cuniculi_GB">Encephalitozoon cuniculi</option> <option value="eremothecium_gossypii">Eremothecium gossypii</option> <option value="fusarium_graminearum ">Fusarium graminearum</option> <option value="histoplasma_capsulatum ">Histoplasma capsulatum</option> <option value="histoplasma">Histoplasma capsulatum</option> <option value="kluyveromyces_lactis ">Kluyveromyces lactis</option> <option value="laccaria_bicolor ">Laccaria bicolor</option> <option value="lamprey">Petromyzon marinus</option> <option value="leishmania_tarentolae">Leishmania tarentolae</option> <option value="lodderomyces_elongisporus">Lodderomyces elongisporus</option> <option value="magnaporthe_grisea ">Magnaporthe grisea</option> <option value="neurospora_crassa">Neurospora crassa</option> <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> <option value="pichia_stipitis">Pichia stipitis</option> <option value="rhizopus_oryzae">Rhizopus oryzae</option> <option value="saccharomyces_cerevisiae_S288C">Saccharomyces cerevisiae</option> <option value="saccharomyces_cerevisiae_rm11-1a_1">Saccharomyces cerevisiae</option> <option value="saccharomyces">Saccharomyces cerevisiae</option> <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> <option value="trichinella">Trichinella spiralis</option> <option value="ustilago_maydis">Ustilago maydis</option> <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> <option value="nasonia">Nasonia vitripennis</option> <option value="tomato">Solanum lycopersicum</option> <option value="chlamydomonas">Chlamydomonas reinhardtii</option> <option value="amphimedon">Amphimedon queenslandica</option> <option value="pneumocystis">Pneumocystis jirovecii</option> <option value="chicken">Gallus gallus domesticus (chicken)</option> <option value="cacao">Theobroma cacao (cacao)</option> <option value="heliconius_melpomene1">Heliconius melpomene</option> <option value="xenoturbella">Xenoturbella</option> <option value="E_coli_K12">Escherichia coli K12</option> <option value="c_elegans_trsk">c elegans trsk</option> <option value="camponotus_floridanus">Camponotus floridanus</option> <option value="coyote_tobacco">Coyote tobacco</option> <option value="s_aureus">Staphylococcus aureus</option> <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option> <option value="wheat">Triticum aestivum</option> <option value="zebrafish">Danio rerio</option> <option value="anidulans">Aspergillus nidulans</option> <option value="bombus_impatiens1">Bombus impatiens1</option> <option value="bombus_terrestris2">Bombus terrestris2</option> <option value="botrytis_cinerea">Botrytis cinerea</option> <option value="brugia_malayi">Brugia malayi</option> <option value="conidiobolus_coronatus">Conidiobolus coronatus</option> <option value="cryptococcus_neoformans">Cryptococcus neoformans</option> <option value="culex_pipiens">Culex pipiens</option> <option value="elephant_shark">Callorhinchus milii</option> <option value="honeybee1">Apis mellifera</option> <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> <option value="pea_aphid">Acyrthosiphon pisum</option> <option value="rhodnius_prolixus">Rhodnius prolixus</option> <option value="ustilago_maydis">Ustilago maydis</option> <option value="verticillium_albo_atrum1">Verticillium albo-atrum</option> <option value="verticillium_longisporum1">Verticillium longisporum</option> <option value="Xipophorus_maculatus">Xipophorus maculatus</option> <option value="adorsata">Apis dorsata</option> <option value="ancylostoma_ceylanicum">Ancylostoma ceylanicum</option> <option value="maker2_athal1">Arabidopsis thaliana (maker2_athal1)</option> <option value="maker2_c_elegans1">Caenorhabditis elegans (maker2_c_elegans1)</option> <option value="maker2_spomb1">Saccharomyces cerevisiae (maker2_spomb1)</option> <option value="parasteatoda">Parasteatoda SP.</option> <option value="rice">Oryza sp.</option> <option value="Cassiopea_xamachana">Cassiopea xamachana</option> <option value="Ptychodera_flava">Ptychodera flava</option> <option value="Argopecten_irridians">Argopecten irridians</option> <option value="Nemopilema_nomurai">Nemopilema nomurai</option> <option value="Notospermus_geniculatus">Notospermus geniculatus</option> <option value="Chrysaora_chesapeakeij">Chrysaora chesapeakeij</option> <option value="Ectocarpus_siliculosus">Ectocarpus siliculosus</option> <option value="Trichoplax_adhaerens">Trichoplax adhaerens</option> <option value="Aurelia_aurita">Aurelia aurita</option> <option value="Rhopilema_esculentum">Rhopilema esculentum</option> <option value="Encephalitozoon_cuniculi">Encephalitozoon cuniculi</option> <option value="Gonapodya_prolifera">Dunaliella salina</option> <option value="Sordaria_macrospora">Sordaria macrospora</option> <option value="Sphaceloma_murrayae">Sphaceloma murrayae</option> <option value="Vitrella_brassicaformis">Vitrella brassicaformis</option> <option value="Monoraphidium_neglectum">Monoraphidium_neglectum</option> <option value="Raphidocelis_subcapita">Raphidocelis subcapita</option> <option value="Ostreococcus_tauri">Ostreococcus tauri</option> <option value="Ostreococcus_sp_lucimarinus">Ostreococcus sp. lucimarinus</option> <option value="Micromonas_pusilla">Micromonas pusilla</option> <option value="Micromonas_commoda">Micromonas commoda</option> <option value="Chlamydomonas_eustigma">Chlamydomonas eustigma</option> <option value="Thalassiosira_pseudonana">Thalassiosira pseudonana</option> <option value="Pseudo-nitzschia_multistriata">Pseudo-nitzschia multistriata</option> <option value="Phaeodactylum_tricornutum">Phaeodactylum tricornutum</option> <option value="Fragilariopsis_cylindrus">Fragilariopsis cylindrus</option> <option value="Fistulifera_solaris">Fistulifera solaris</option> <option value="Bathycoccus_prasinos">Bathycoccus prasinos</option> <option value="Chloropicon_primus">Chloropicon primus</option> </param> </when> </conditional> <param name="softmasking" type="boolean" argument="--softmasking" label="Softmasking" truevalue="1" falsevalue="0" checked="true" help="If this option is enabled, lowercase letters are considered as repeated regions." /> <param name="strand" type="select" argument="--strand" label="Predict genes on specific strands"> <option value="both">both</option> <option value="forward">forward</option> <option value="backward">backward</option> </param> <param name="genemodel" label="Gene Model" type="select" help="Gene Model to predict, for more information please refer to the help."> <option value="complete">complete</option> <option value="partial">partial</option> <option value="intronless">intronless</option> <option value="atleastone">atleastone</option> <option value="exactlyone">exactlyone</option> </param> <conditional name="hints"> <param name="usehints" label="Use hints (extrinsic information)?" type="select" display="radio"> <option value="F">No</option> <option value="T">Yes</option> </param> <when value="T"> <param name="hintsfile" label="Select hints file from history" type="data" format="gff" help="A file containing hints in gff format (--hintsfile)"/> <param name="extrinsiccfg" label="Select extrinsic configuration file from history" type="data" format="txt" help="A .cfg file listing hint sources and their boni and mali (--extrinsicCfgFile)"/> </when> <when value="F"> </when> </conditional> <conditional name="range"> <param name="userange" label="Specify prediction sequence range?" type="select" display="radio"> <option value="F">No</option> <option value="T">Yes</option> </param> <when value="T"> <param name="start" label="Starting position" type="integer" value="" help="The beginning of the search range (--predictionStart)"/> <param name="stop" label="Ending position" type="integer" value="" help="The end of the search range (--predictionEnd); must be greater than starting position"/> </when> <when value="F"> </when> </conditional> <param name="gff" type="boolean" label="GFF formated output" help="Standard output is GTF." truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" /> <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options"> <option value="protein" selected="True">predicted protein sequences (--protein)</option> <option value="codingseq" selected="True">coding sequence as comment in the output file (--codingseq)</option> <option value="introns">predicted intron sequences (--introns)</option> <option value="start">predicted start codons (--start)</option> <option value="stop">predicted stop codons (--stop)</option> <option value="cds" selected="true">CDS region (--cds)</option> </param> </inputs> <outputs> <data format="gtf" name="output" label="${tool.name} on ${on_string}: GTF/GFF"> <change_format> <when input="gff" value="--gff3=on" format="gff3" /> </change_format> </data> <data format="fasta" name="protein_output" label="${tool.name} on ${on_string}: Protein sequence"> <filter>'protein' in outputs</filter> </data> <data format="fasta" name="codingseq_output" label="${tool.name} on ${on_string}: Coding sequence"> <filter>'codingseq' in outputs</filter> </data> </outputs> <tests> <test> <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> <param name="organism" value="human" /> <param name="utr" value="True" /> <param name="softmasking" value="False"/> <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="6"/> </test> <test> <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> <param name="organism" value="human" /> <param name="utr" value="True" /> <param name="gff" value="True" /> <param name="softmasking" value="False"/> <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="6"/> </test> <test> <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> <param name="organism" value="human" /> <param name="outputs" value="protein,codingseq,introns,cds,start,stop" /> <param name="softmasking" value="False"/> <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gtf" lines_diff="6"/> <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" /> <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" /> </test> <test> <param name="input_genome" value="chr2R.truncated.fa" ftype="fasta" /> <param name="organism" value="fly" /> <param name="usehints" value="T" /> <param name="hintsfile" value="hints.truncated.adjusted.gff" /> <param name="extrinsiccfg" value="extrinsic.truncated.cfg" /> <param name="outputs" value="" /> <param name="softmasking" value="False"/> <output name="output" file="augustus.hints.output.gtf" ftype="gtf" lines_diff="12"> <assert_contents> <has_text_matching expression="chr2R\tAUGUSTUS\tgene\t7560\t9303\t0\.(7[8-9]|8[0-5])\t-\t.\tchr2R.g1" /> <has_text_matching expression="chr2R\tAUGUSTUS\ttranscript\t7560\t9303\t0\.(7[8-9]|8[0-5])\t-\t.\tchr2R.g1.t1" /> </assert_contents> </output> </test> <test> <param name="input_genome" value="chr2R.truncated.fa" ftype="fasta" /> <param name="organism" value="fly" /> <param name="usehints" value="T" /> <param name="hintsfile" value="hints.truncated.adjusted.gff" /> <param name="extrinsiccfg" value="extrinsic.truncated.cfg" /> <param name="userange" value="T" /> <param name="start" value="7000" /> <param name="stop" value="9000" /> <param name="outputs" value="" /> <param name="softmasking" value="False"/> <output name="output" file="augustus.hints_and_range.output.gtf" ftype="gtf" lines_diff="12"> <assert_contents> <has_text_matching expression="chr2R\tAUGUSTUS\tgene\t7560\t8931\t0.8[2-5]\t-\t.\tchr2R.g1" /> <has_text_matching expression="chr2R\tAUGUSTUS\ttranscript\t7560\t8931\t0.8[2-5]\t-\t.\tchr2R.g1.t1" /> </assert_contents> </output> </test> <!-- Test softmasking parameter--> <test> <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> <param name="organism" value="human" /> <param name="utr" value="True" /> <param name="softmasking" value="True"/> <output name="output" file="human_augustus_utr-on_softmasking.gtf" ftype="gtf" lines_diff="6"/> </test> </tests> <help> <![CDATA[ **What it does** AUGUSTUS is a gene prediction program for prokaryotes and eukaryotes written by Mario Stanke and Oliver Keller. It can be used as an ab initio program, which means it bases its prediction purely on the sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources such as EST, MS/MS, protein alignments and synthenic genomic alignments. **Input** Input data for the gene prediction tool Augustus is a FASTA file with a genomic nucleotide sequence. **Output** Augustus produces three output files: a FASTA file with predicted coding sequences, a FASTA file with predicted protein sequences and a gtf/GFF output file if selected. **Parameters** Gene Model: partial: allow prediction of incomplete genes at the sequence boundaries (default) intronless: only predict single-exon genes like in prokaryotes and some eukaryotes complete: only predict complete genes atleastone: predict at least one complete gene exactlyone: predict exactly one complete gene **Example** Suppose you have the following DNA FASTA sequence: >Seq1 cccgcggagcgggtaccacatcgctgcgcgatgtgcgagcgaacacccgggctgcgcccg ggtgttgcgctcccgctccgcgggagcgctggcgggacgctgcgcgtcccgctcaccaag cccgcttcgcgggcttggtgacgctccgtccgctgcgcttccggagttgcggggcttcgc cccgctaaccctgggcctcgcttcgctccgccttgggcctgcggcgggtccgctgcgctc ccccgcctcaagggcccttccggctgcgcctccaggacccaaccgcttgcgcgggcctgg Running this tool will produce this: # ----- prediction on sequence number 1 (length = 1992969, name = scaffold1|size1992969) ----- # # Constraints/Hints: # (none) # Predicted genes for sequence number 1 on both strands # start gene g1 scaffold1|size1992969 AUGUSTUS gene 17453 19382 0.11 + . g6 scaffold1|size1992969 AUGUSTUS transcript 17453 19382 0.11 + . g6.t1 scaffold1|size1992969 AUGUSTUS start_codon 17453 17455 . + 0 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS intron 17615 17660 0.38 + . transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS intron 17708 17772 0.54 + . transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS intron 17902 18035 0.58 + . transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS intron 18313 18367 0.99 + . transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS intron 19014 19080 0.44 + . transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 17453 17614 0.55 + 0 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 17661 17707 0.38 + 0 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 17773 17901 0.54 + 1 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 18036 18312 0.52 + 1 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 18368 19013 0.99 + 0 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS CDS 19081 19379 0.31 + 2 transcript_id "g6.t1"; gene_id "g6"; scaffold1|size1992969 AUGUSTUS stop_codon 19380 19382 . + 0 transcript_id "g6.t1"; gene_id "g6"; ]]> </help> <expand macro="citations"/> </tool>