Mercurial > repos > bjoern-gruening > augustus
comparison augustus.xml @ 5:a4fab0c1ae1a
Uploaded
author | bjoern-gruening |
---|---|
date | Sun, 09 Jun 2013 07:54:25 -0400 |
parents | 796814f16b12 |
children |
comparison
equal
deleted
inserted
replaced
4:796814f16b12 | 5:a4fab0c1ae1a |
---|---|
1 <tool id="augustus" name="Augustus" version="0.2"> | 1 <tool id="augustus" name="Augustus" version="0.3"> |
2 <description>gene prediction for eukaryotic genomes</description> | 2 <description>gene prediction for eukaryotic genomes</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.6.1">augustus</requirement> | 4 <requirement type="package" version="2.7">augustus</requirement> |
5 <requirement type="set_environment">AUGUSTUS_SCRIPT_PATH</requirement> | |
5 </requirements> | 6 </requirements> |
6 <command>augustus | 7 <command> |
7 --strand=$strand | 8 ## please set export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config |
8 $noInFrameStop | 9 ## or use the --AUGUSTUS_CONFIG_PATH=path if you are not installing through the toolshed |
9 $gff | 10 ## Augustus writes the protein and coding sequences as comment into the gff/gtf file an external script is used to extract the sequences into additional files |
10 $protein | 11 |
11 $introns | 12 augustus |
12 $start | 13 --strand=$strand |
13 $stop | 14 $noInFrameStop |
14 $cds | 15 $gff |
15 $codingseq | 16 $protein |
16 $singlestrand | 17 $introns |
17 $input_genome | 18 $start |
18 --genemodel=$genemodel | 19 $stop |
19 --species=$organism | 20 $cds |
20 --outfile=$output | 21 $codingseq |
21 | 22 $singlestrand |
22 #please set export AUGUSTUS_CONFIG_PATH=/path_to_augustus/augustus/config | 23 $input_genome |
23 #or use the --AUGUSTUS_CONFIG_PATH=path switch | 24 $mea |
24 | 25 $utr |
26 --genemodel=$genemodel | |
27 --species=$organism | |
28 ##--outfile=$output | |
29 | tee $output | |
30 #if $protein or $codingseq: | |
31 | python \$AUGUSTUS_SCRIPT_PATH/extract_features.py | |
32 #if $protein: | |
33 --protein $protein_output | |
34 #end if | |
35 #if $codingseq: | |
36 --codingseq $codingseq_output | |
37 #end if | |
38 #end if | |
25 </command> | 39 </command> |
26 <inputs> | 40 <inputs> |
27 <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/> | 41 <param name="input_genome" type="data" format="fasta" label="Genome Sequence"/> |
28 <param name="noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons. Otherwise, intron-spanning stop codons could occur" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" /> | 42 <param name="noInFrameStop" type="boolean" label="Don't report transcripts with in-frame stop codons (--noInFrameStop)" truevalue="--noInFrameStop=true" falsevalue="--noInFrameStop=false" checked="false" help="Otherwise, intron-spanning stop codons could occur" /> |
29 <param name="gff" type="boolean" label="GFF formated output, standard is GTF" truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" /> | 43 <param name="singlestrand" type="boolean" label="Predict genes independently on each strand, allow overlapping genes on opposite strands (--singlestrand)" truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" /> |
30 <param name="protein" type="boolean" label="Output predicted protein sequences" truevalue="--protein=on" falsevalue="--protein=off" checked="false" /> | 44 <param name="mea" type="boolean" label="Using the maximum expected accuracy approach (--mea)" truevalue="--mea=1" falsevalue="" checked="false" help="MEA is an alternative decoding approach." /> |
31 <param name="introns" type="boolean" label="Output predicted intron sequences" truevalue="--introns=on" falsevalue="--introns=off" checked="false" /> | 45 <param name="utr" type="boolean" label="Predict the untranslated regions in addition to the coding sequence (--UTR)" truevalue="--UTR=on" falsevalue="--UTR=off" checked="false" help="This currently works only for human, galdieria, toxoplasma and caenorhabditis." /> |
32 <param name="start" type="boolean" label="Output predicted start codons" truevalue="--start=on" falsevalue="--start=off" checked="false" /> | |
33 <param name="stop" type="boolean" label="Output predicted stop codons" truevalue="--stop=on" falsevalue="--stop=off" checked="false" /> | |
34 <param name="cds" type="boolean" label="Output CDS region" truevalue="--cds=on" falsevalue="--cds=off" checked="true" /> | |
35 <param name="codingseq" type="boolean" label="Output coding sequence as comment in the output file" truevalue="--codingseq=on" falsevalue="--codingseq=off" checked="false" /> | |
36 | |
37 <param name="singlestrand" type="boolean" label="Predict genes independently on each strand, allow overlapping genes on opposite strands" truevalue="--singlestrand=true" falsevalue="--singlestrand=false" checked="false" /> | |
38 | 46 |
39 <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset."> | 47 <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset."> |
40 <option value="human">Homo sapiens</option> | 48 <option value="human">Homo sapiens</option> |
41 <option value="fly">Drosophila melanogaster</option> | 49 <option value="fly">Drosophila melanogaster</option> |
42 <option value="arabidopsis">Arabidopsis thaliana</option> | 50 <option value="arabidopsis">Arabidopsis thaliana</option> |
43 <option value="brugia ">Brugia malayi</option> | 51 <option value="brugia ">Brugia malayi</option> |
44 <option value="aedes">Aedes aegypti</option> | 52 <option value="aedes">Aedes aegypti</option> |
45 <option value="tribolium">Tribolium castaneum</option> | 53 <option value="tribolium2012">Tribolium castaneum</option> |
46 <option value="schistosoma">Schistosoma mansoni</option> | 54 <option value="schistosoma">Schistosoma mansoni</option> |
47 <option value="tetrahymena">Tetrahymena thermophila</option> | 55 <option value="tetrahymena">Tetrahymena thermophila</option> |
48 <option value="galdieria">Galdieria sulphuraria</option> | 56 <option value="galdieria">Galdieria sulphuraria</option> |
49 <option value="maize">Zea mays</option> | 57 <option value="maize">Zea mays</option> |
50 <option value="toxoplasma ">Toxoplasma gondii</option> | 58 <option value="toxoplasma ">Toxoplasma gondii</option> |
86 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> | 94 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> |
87 <option value="trichinella">Trichinella spiralis</option> | 95 <option value="trichinella">Trichinella spiralis</option> |
88 <option value="ustilago_maydis">Ustilago maydis</option> | 96 <option value="ustilago_maydis">Ustilago maydis</option> |
89 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> | 97 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> |
90 <option value="nasonia">Nasonia vitripennis</option> | 98 <option value="nasonia">Nasonia vitripennis</option> |
91 <option value="tomato ">Solanum lycopersicum</option> | 99 <option value="tomato">Solanum lycopersicum</option> |
92 <option value="chlamydomonas">Chlamydomonas reinhardtii</option> | 100 <option value="chlamydomonas">Chlamydomonas reinhardtii</option> |
93 <option value="amphimedon ">Amphimedon queenslandica</option> | 101 <option value="amphimedon">Amphimedon queenslandica</option> |
94 <option value="pneumocystis ">Pneumocystis jirovecii</option> | 102 <option value="pneumocystis">Pneumocystis jirovecii</option> |
103 <option value="chicken">Gallus gallus domesticus (chicken)</option> | |
104 <option value="cacao">Theobroma cacao (cacao)</option> | |
105 <option value="heliconius_melpomene1">Heliconius melpomene</option> | |
106 <option value="xenoturbella">Xenoturbella</option> | |
95 </param> | 107 </param> |
96 | 108 |
97 <param name="strand" type="select" multiple="false" format="text" help="Report predicted genes on both strands, just the forward or just the backward strand."> | 109 <param name="strand" type="select" multiple="false" format="text" help="Report predicted genes on both strands, just the forward or just the backward strand."> |
98 <option value="both">both</option> | 110 <option value="both">both</option> |
99 <option value="forward">forward</option> | 111 <option value="forward">forward</option> |
104 <option value="complete">complete</option> | 116 <option value="complete">complete</option> |
105 <option value="partial">partial</option> | 117 <option value="partial">partial</option> |
106 <option value="intronless">intronless</option> | 118 <option value="intronless">intronless</option> |
107 <option value="atleastone">atleastone</option> | 119 <option value="atleastone">atleastone</option> |
108 <option value="exactlyone">exactlyone</option> | 120 <option value="exactlyone">exactlyone</option> |
121 <option value="bacterium">bacterium (beta version)</option> | |
109 </param> | 122 </param> |
110 | 123 |
111 </inputs> | 124 <param name="protein" type="boolean" label="Output predicted protein sequences (--protein)" truevalue="--protein=on" falsevalue="--protein=off" checked="true" /> |
112 <outputs> | 125 <param name="codingseq" type="boolean" label="Output coding sequence as comment in the output file (codingseq)" truevalue="--codingseq=on" falsevalue="--codingseq=off" checked="true" /> |
126 <param name="introns" type="boolean" label="Output predicted intron sequences (--introns)" truevalue="--introns=on" falsevalue="--introns=off" checked="false" /> | |
127 <param name="start" type="boolean" label="Output predicted start codons (--start)" truevalue="--start=on" falsevalue="--start=off" checked="false" /> | |
128 <param name="stop" type="boolean" label="Output predicted stop codons (--stop)" truevalue="--stop=on" falsevalue="--stop=off" checked="false" /> | |
129 <param name="cds" type="boolean" label="Output CDS region (--cds)" truevalue="--cds=on" falsevalue="--cds=off" checked="true" /> | |
130 <param name="gff" type="boolean" label="GFF formated output, standard is GTF (--gff3)" truevalue="--gff3=on" falsevalue="--gff3=off" checked="false" /> | |
131 | |
132 </inputs> | |
133 <outputs> | |
113 <data format="gtf" name="output"> | 134 <data format="gtf" name="output"> |
114 <change_format> | 135 <change_format> |
115 <when input="gff" value="--gff3=on" format="gff" /> | 136 <when input="gff" value="--gff3=on" format="gff" /> |
116 </change_format> | 137 </change_format> |
117 </data> | 138 </data> |
139 <data format="fasta" name="protein_output"> | |
140 <filter>protein == True</filter> | |
141 </data> | |
142 <data format="fasta" name="codingseq_output"> | |
143 <filter>codingseq == True</filter> | |
144 </data> | |
118 </outputs> | 145 </outputs> |
146 <tests> | |
147 <test> | |
148 <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> | |
149 <param name="organism" value="human" /> | |
150 <param name="utr" value="--UTR=on" /> | |
151 <output name="output" file="human_augustus_utr-on.gtf" ftype="gtf" lines_diff="2"/> | |
152 </test> | |
153 <test> | |
154 <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> | |
155 <param name="organism" value="human" /> | |
156 <param name="utr" value="--UTR=on" /> | |
157 <param name="gff" value="--gff3=on" /> | |
158 <output name="output" file="human_augustus_utr-on.gff" ftype="gff3" lines_diff="2"/> | |
159 </test> | |
160 <test> | |
161 <param name="input_genome" value="arabidopsis_augustus.fa" ftype="fasta" /> | |
162 <param name="organism" value="arabidopsis" /> | |
163 <param name="singlestrand" value="--singlestrand=true" /> | |
164 <param name="mea" value="--mea=1" /> | |
165 <output name="output" file="arabidopsis_augustus_utr-off_singlestrand-on_mea-on.gtf" ftype="gtf" lines_diff="2"/> | |
166 </test> | |
167 <test> | |
168 <param name="input_genome" value="human_augustus.fa" ftype="fasta" /> | |
169 <param name="organism" value="human" /> | |
170 <param name="protein" value="--protein=on" /> | |
171 <param name="codingseq" value="--codingseq=on" /> | |
172 <param name="introns" value="--introns=on" /> | |
173 <param name="cds" value="--cds=on" /> | |
174 <output name="output" file="human_augustus_protein_codingseq_introns_cds_main.gtf" ftype="gff" lines_diff="2"/> | |
175 <output name="codingseq_output" file="human_augustus_protein_codingseq_introns_cds_codingseq.fasta" ftype="fasta" /> | |
176 <output name="protein_output" file="human_augustus_protein_codingseq_introns_cds_protein.fasta" ftype="fasta" /> | |
177 </test> | |
178 </tests> | |
119 <help> | 179 <help> |
120 | 180 |
121 **What it does** | 181 **What it does** |
122 | 182 |
123 AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller. | 183 AUGUSTUS is a gene prediction program for eukaryotes written by Mario Stanke and Oliver Keller. |
124 It can be used as an ab initio program, which means it bases its prediction purely on the | 184 It can be used as an ab initio program, which means it bases its prediction purely on the |
125 sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources | 185 sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources |
126 such as EST, MS/MS, protein alignments and synthenic genomic alignments. | 186 such as EST, MS/MS, protein alignments and synthenic genomic alignments. |
127 | 187 |
128 ----- | 188 ----- |
129 | 189 |
130 **Parameters** | 190 **Parameters** |
131 | 191 |
132 Gene Model:: | 192 Gene Model:: |
133 | 193 |
180 Mario Stanke and Stephan Waack (2003) | 240 Mario Stanke and Stephan Waack (2003) |
181 Gene Prediction with a Hidden-Markov Model and a new Intron Submodel. | 241 Gene Prediction with a Hidden-Markov Model and a new Intron Submodel. |
182 Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225 | 242 Bioinformatics, Vol. 19, Suppl. 2, pages ii215-ii225 |
183 | 243 |
184 | 244 |
185 </help> | 245 </help> |
186 </tool> | 246 </tool> |