comparison mitos/mitos2.xml @ 2:de4408d88c67 draft default tip

Uploaded
author menegidio
date Thu, 17 Jun 2021 15:27:29 +0000
parents
children
comparison
equal deleted inserted replaced
1:d472d16b99c5 2:de4408d88c67
1 <tool id="mitos2" name="@MITOS_NAME@" version="@MITOS_VERSION@">
2 <description>de-novo annotation of metazoan mitochondrial genomes</description>
3 <macros>
4 <import>macros.xml</import>
5 <token name="@MITOS_NAME@">MITOS2</token>
6 <token name="@MITOS_VERSION@">2.0.6</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="@MITOS_VERSION@">mitos</requirement>
10 <requirement type="package">zip</requirement>
11 </requirements>
12 <version_command>python -c "import mitos; print(mitos.__version__)"</version_command>
13 <command detect_errors="aggressive"><![CDATA[
14 mkdir outdir &&
15
16 runmitos.py
17 --input '$input'
18 --code $code
19 --outdir outdir
20 --refdir '/'
21 --refseqver '$refseqver.fields.path'
22 $linear
23 #for tpe in ["prot", "trna", "rrna", "intron", "oril", "orih"]
24 #if not $tpe in str($advanced.featuretypes).split(',')
25 --$tpe 0
26 #end if
27 #end for
28 --finovl $advanced.finovl
29 $advanced.best
30 #set fragovl=float($advanced.fragovl)/100.0
31 --fragovl $fragovl
32 --fragfac $advanced.fragfac
33
34 --evalue $advanced_prot.evalue
35 #set cutoff=float($advanced_prot.cutoff)/100.0
36 --cutoff $cutoff
37 --clipfac $advanced_prot.clipfac
38 $advanced_prot.ncbicode
39 $advanced_prot.alarab
40 $advanced_prot.oldstst
41 $advanced_ncrna.locandgloc
42 --ncev $advanced_ncrna.ncev
43 $advanced_ncrna.sensitive
44 --maxtrnaovl $advanced_ncrna.maxtrnaovl
45 --maxrrnaovl $advanced_ncrna.maxrrnaovl
46
47 #if not ("protein_plot" in str($addoutputs).split(',') or "ncRNA_plot" in str($addoutputs).split(',')):
48 --noplots
49 #end if
50
51 #if "raw" in str($addoutputs).split(','):
52 && zip -9 -y -r output.zip outdir/ > /dev/null
53 #end if
54 ]]></command>
55 <inputs>
56 <param argument="--input" label="Sequence" type="data" format="fasta" help="a single sequence in fasta formated sequence">
57 <options options_filter_attribute="metadata.sequences">
58 <filter type="add_value" value="1"/>
59 </options>
60 </param>
61 <param argument="--code" label="Genetic code" type="select">
62 <option value="2">Vertebrate (2)</option>
63 <option value="3">Fungi (3)</option>
64 <option value="4">Mold, Protozoan, Coelenteral (4)</option>
65 <option value="5">Invertebrate (5)</option>
66 <option value="9">Echinoderm, Flatworm (9)</option>
67 <option value="13">Ascidian (13)</option>
68 <option value="14">Alternative Flatworm (14)</option>
69 </param>
70 <param argument="--refseqver" label="Reference data" type="select" help="contact the administrator of this Galaxy instance if you miss reference data">
71 <options from_data_table="mitos">
72 <filter type="static_value" value="mitos2" column="2"/>
73 </options>
74 <validator message="No reference annotation is available for MITOS2" type="no_options" />
75 </param>
76 <param argument="--linear" checked="false" label="Treat sequence as linear" type="boolean" truevalue="--linear" falsevalue=""/>
77 <param name="addoutputs" type="select" multiple="true" label="Outputs">
78 <option value="bed" selected="true">BED</option>
79 <option value="mito" selected="false">mito</option>
80 <option value="gff" selected="false">GFF file</option>
81 <option value="seq" selected="false">SEQ</option>
82 <option value="fas" selected="false">nucleotide FASTA</option>
83 <option value="faa" selected="false">protein FASTA</option>
84 <option value="geneorder" selected="false">geneorder</option>
85 <option value="protein_plot" selected="false">Protein prediction plot</option>
86 <option value="ncRNA_plot" selected="false">ncRNA prediction plot</option>
87 <!--<option value="ncRNA_structure_ps_plots" selected="false">ncRNA structure plots - postscript</option>-->
88 <option value="ncRNA_structure_svg_plots" selected="false">ncRNA structure plots - svg</option>
89 <option value="raw" selected="false">zipped raw results</option>
90 </param>
91 <section name="advanced" title="Advanced options">
92 <param name="featuretypes" label="Feature types" help="Feature types that should be predicted by MITOS (--noprot,--notrna,--norrna)" type="select" multiple="true">
93 <option value="prot" selected="true">Protein coding genes</option>
94 <option value="trna" selected="true">tRNAs</option>
95 <option value="rrna" selected="true">rRNAs</option>
96 <option value="intron" selected="false">Introns</option>
97 <option value="oril" selected="false">Origin of light strand replication</option>
98 <option value="orih" selected="false">Origin of heavy strand replication</option>
99 </param>
100 <param argument="--finovl" label="Final overlap (nt)" help="Maximum number of nucleotides by which genes of different types may overlap" type="integer" value="50" min="0"/>
101 <param argument="--best" checked="false" label="Annotate only the best copy of each feature" type="boolean" truevalue="--best" falsevalue=""/>
102 <param argument="--fragovl" label="Fragment overlap" help="Maximum allowed overlap of proteins in the query (in percent of the shorter query range) for two hits to be counted as fragments of the same gene" type="integer" value="20" min="0" max="100"/>
103 <param argument="--fragfac" label="Fragment quality factor" help="Maximum factor by which fragments of the same protein may differ in their quality" type="float" min="0" value="10"/>
104 </section>
105 <section name="advanced_prot" title="Advanced options for protein coding gene prediction">
106 <param argument="--evalue" label="BLAST E-value Exponent" help="Negation of the exponent of the E-value threshold used by BLAST, i.e. a value X gives an E-value of 10^(-X)" type="float" value="2" min="1"/>
107 <param argument="--cutoff" label="Quality cutoff" help="Minimum allowed quality in % of the maximum quality value per reading frame" type="integer" value="50" min="0" max="100"/>
108 <param argument="--clipfac" label="Clipping factor" help="Clip overlapping proteins with the same name that differ by less than the specified factor" type="float" value="10" min="0"/>
109 <param argument="--ncbicode" checked="false" label="use start/stop codons as in NCBI (default: learned start/stop codons)" type="boolean" truevalue="--ncbicode" falsevalue=""/>
110 <param argument="--alarab" checked="false" label="Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac" type="boolean" truevalue="--alarab" falsevalue=""/>
111 <param argument="--oldstst" checked="false" label="Use the old start/stop prediction method of MITOS1" type="boolean" truevalue="--oldstst" falsevalue=""/>
112 </section>
113 <section name="advanced_ncrna" title="Advanced options for ncRNA gene prediction">
114 <param argument="--locandgloc" checked="false" label="Run mitfi in glocal and local mode (default: local only)" type="boolean" truevalue="--locandgloc" falsevalue=""/>
115 <param argument="--ncev" label="e-value to use for inferal fast mode" type="float" min="0" value="0.01"/>
116 <param argument="--sensitive" checked="false" label="Use infernals sensitive mode only" type="boolean" truevalue="--sensitive" falsevalue=""/>
117 <param argument="--maxtrnaovl" label="Allow tRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
118 <param argument="--maxrrnaovl" label="Allow rRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
119 </section>
120 </inputs>
121 <outputs>
122 <data name="bedout" format="bed" from_work_dir="outdir/result.bed">
123 <filter>"bed" in str(addoutputs)</filter>
124 </data>
125 <data name="mitoout" format="tabular" from_work_dir="outdir/result.mitos" label="${tool.name} on ${on_string}: mito">
126 <filter>"mito" in str(addoutputs)</filter>
127 </data>
128 <data name="gffout" format="gff" from_work_dir="outdir/result.gff" label="${tool.name} on ${on_string}: GFF">
129 <filter>"gff" in str(addoutputs)</filter>
130 </data>
131 <data name="seqout" format="txt" from_work_dir="outdir/result.seq" label="${tool.name} on ${on_string}: TBL">
132 <filter>"seq" in str(addoutputs)</filter>
133 </data>
134 <data name="faa" format="fasta" from_work_dir="outdir/result.faa" label="${tool.name} on ${on_string}: aa FASTA">
135 <filter>"faa" in str(addoutputs)</filter>
136 </data>
137 <data name="fas" format="fasta" from_work_dir="outdir/result.fas" label="${tool.name} on ${on_string}: nt FASTA">
138 <filter>"fas" in str(addoutputs)</filter>
139 </data>
140 <data name="geneorderout" format="fasta" from_work_dir="outdir/result.geneorder" label="${tool.name} on ${on_string}: geneorder">
141 <filter>"geneorder" in str(addoutputs)</filter>
142 </data>
143 <data name="protein_plot_out" format="pdf" from_work_dir="outdir/plots/prot.pdf" label="${tool.name} on ${on_string}: Protein prediction plot">
144 <filter>"protein_plot" in str(addoutputs)</filter>
145 </data>
146 <data name="ncRNA_plot_out" format="pdf" from_work_dir="outdir/plots/rna.pdf" label="${tool.name} on ${on_string}: ncRNA prediction plot">
147 <filter>"ncRNA_plot" in str(addoutputs)</filter>
148 </data>
149 <!--<collection name="ncRNA_structure_plot_ps_out" type="list" label="${tool.name} on ${on_string}: ncRNA postscript structure plots">
150 <discover_datasets pattern="(?P&lt;name&gt;.+)\.ps" format="ps" directory="outdir/plots" />
151 <filter>"ncRNA_structure_ps_plots" in str(addoutputs)</filter>
152 </collection>-->
153 <collection name="ncRNA_structure_plot_svg_out" type="list" label="${tool.name} on ${on_string}: ncRNA svg structure plots">
154 <discover_datasets pattern="(?P&lt;name&gt;.+)\.svg" format="svg" directory="outdir/plots" />
155 <filter>"ncRNA_structure_svg_plots" in str(addoutputs)</filter>
156 </collection>
157 <data name="rawout" format="zip" from_work_dir="output.zip" label="${tool.name} on ${on_string}: raw data">
158 <filter>"raw" in str(addoutputs)</filter>
159 </data>
160 </outputs>
161 <tests>
162 <!-- default options -->
163 <test expect_num_outputs="1">
164 <param name="input" value="NC_012920.fasta"/>
165 <param name="code" value="2"/>
166 <param name="refseqver" value="mitos2-refdata" />
167 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed"/>
168 <assert_command>
169 <has_text text="--code 2"/>
170 <has_text text="--finovl 50"/>
171 <not_has_text text="--trna"/>
172 <not_has_text text="--rrna"/>
173 <not_has_text text="--prot"/>
174 <has_text text="--intron 0"/>
175 <has_text text="--oril 0"/>
176 <has_text text="--orih 0"/>
177 <has_text text="--evalue 2.0"/>
178 <has_text text="--cutoff 0.5"/>
179 <has_text text="--clipfac 10.0"/>
180 <not_has_text text="--best"/>
181 <has_text text="--fragovl 0.2"/>
182 <has_text text="--fragfac 10.0"/>
183 <has_text text="--ncev 0.01"/>
184 <has_text text="--maxtrnaovl 50"/>
185 <has_text text="--maxrrnaovl 50"/>
186 <has_text text="--noplots"/>
187 </assert_command>
188 </test>
189 <!-- different main options -->
190 <test expect_num_outputs="1">
191 <param name="input" value="NC_012920.fasta"/>
192 <param name="code" value="5"/>
193 <param name="refseqver" value="mitos2-refdata" />
194 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
195 <section name="advanced">
196 <param name="featuretypes" value="prot,trna,rrna,intron,oril,orih"/>
197 <param name="finovl" value="49"/>
198 <param name="best" value="true"/>
199 <param name="fragovl" value="10"/>
200 <param name="fragfac" value="9"/>
201 </section>
202 <assert_command>
203 <has_text text="--code 5"/>
204 <has_text text="--finovl 49"/>
205 <not_has_text text="--trna"/>
206 <not_has_text text="--rrna"/>
207 <not_has_text text="--prot"/>
208 <not_has_text text="--intron"/>
209 <not_has_text text="--oril"/>
210 <not_has_text text="--orih"/>
211 <has_text text="--evalue 2.0"/>
212 <has_text text="--cutoff 0.5"/>
213 <has_text text="--clipfac 10.0"/>
214 <has_text text="--best"/>
215 <has_text text="--fragovl 0.1"/>
216 <has_text text="--fragfac 9.0"/>
217 <has_text text="--ncev 0.01"/>
218 <has_text text="--maxtrnaovl 50"/>
219 <has_text text="--maxrrnaovl 50"/>
220 <has_text text="--noplots"/>
221 </assert_command>
222 </test>
223 <!-- different pcg and ncrn options -->
224 <test expect_num_outputs="11">
225 <param name="input" value="NC_012920.fasta"/>
226 <param name="code" value="2"/>
227 <param name="refseqver" value="mitos2-refdata" />
228 <section name="advanced_prot">
229 <param name="evalue" value="3"/>
230 <param name="cutoff" value="49"/>
231 <param name="clipfac" value="9"/>
232 <param name="ncbicode" value="true"/>
233 <param name="alarab" value="true"/>
234 <param name="oldstst" value="true"/>
235 </section>
236 <section name="advanced_ncrna">
237 <!-- <param name="locandgloc" value="true"/> should be possible from 2.0.5 https://gitlab.com/Bernt/MITOS/-/commit/9b4c55c29961c307dce02ac0319dadbd76f6b9e5-->
238 <param name="ncev" value="0.1"/>
239 <param name="sensitive" value="true"/>
240 <param name="maxtrnaovl" value="51"/>
241 <param name="maxrrnaovl" value="49"/>
242 </section>
243 <param name="addoutputs" value="bed,mito,gff,seq,fas,faa,geneorder,protein_plot,ncRNA_plot,ncRNA_structure_svg_plots,raw"/>
244 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
245 <output name="mitoout" file="mitos2_NC_012920.mitos" ftype="tabular"/>
246 <output name="gffout" file="mitos2_NC_012920.gff" ftype="gff"/>
247 <output name="seqout" file="mitos2_NC_012920.seq" ftype="txt"/>
248 <output name="faa" file="mitos2_NC_012920.faa" ftype="fasta"/>
249 <output name="fas" file="mitos2_NC_012920.fas" ftype="fasta"/>
250 <output name="geneorderout" file="mitos2_NC_012920.geneorder" ftype="fasta"/>
251 <output name="protein_plot_out" file="mitos2_NC_012920_prot.pdf" ftype="pdf" compare="sim_size"/>
252 <output name="ncRNA_plot_out" file="mitos2_NC_012920_ncrna.pdf" ftype="pdf" compare="sim_size"/>
253 <output name="rawout" ftype="zip">
254 <assert_contents>
255 <has_archive_member path=".*/result.bed"/>
256 </assert_contents>
257 </output>
258 <output_collection name="ncRNA_structure_plot_svg_out" type="list" count="17"/>
259 <assert_command>
260 <has_text text="--code 2"/>
261 <has_text text="--finovl 50"/>
262 <not_has_text text="--trna"/>
263 <not_has_text text="--rrna"/>
264 <not_has_text text="--prot"/>
265 <has_text text="--intron 0"/>
266 <has_text text="--oril 0"/>
267 <has_text text="--orih 0"/>
268 <has_text text="--evalue 3.0"/>
269 <has_text text="--cutoff 0.49"/>
270 <has_text text="--clipfac 9.0"/>
271 <has_text text="--alarab"/>
272 <has_text text="--oldstst"/>
273 <has_text text="--ncbicode"/>
274 <not_has_text text="--best"/>
275 <has_text text="--fragovl 0.2"/>
276 <has_text text="--fragfac 10.0"/>
277 <!--<has_text text="\-\-locandgloc"/>-->
278 <has_text text="--ncev 0.1"/>
279 <has_text text="--sensitive"/>
280 <has_text text="--maxtrnaovl 51"/>
281 <has_text text="--maxrrnaovl 49"/>
282 <not_has_text text="--noplots"/>
283 </assert_command>
284 </test>
285 </tests>
286 <help>@COMMON_HELP@
287 <![CDATA[
288
289
290 **Advanced options**
291
292 - Feature types
293
294 Select the feature types that should be annotated. By default this is protein coding genes, tRNA and rRNA which is useful for metazoan mitogenomes. In addition also the replication origins of the light (OL) and heavy (OH) strand and introns can be annotated. The annotation of the replication origins is most useful for chordate mitogenomes. Introns are usually only found in mitogenomes of non-metazoans and basal Metazoa.
295
296 - Final overlap (nt)
297
298 Maximum number of nucleotides by which genes of different types may overlap. Applies to merging of the final predictions.
299
300 - Annotate only the best copy of each feature
301
302 If there are copies of the same feature type only the one with the lowest e-value (for ncRNAs and OL) or highest quality score (protein coding genes and OH)
303
304 - Fragment overlap
305
306 Maximum fraction (of the shorter feature) allowed that two hits overlap in the query to be counted as fragments.
307
308 - Fragment quality factor
309
310 Maximum factor by which fragments may differ in their quality scores. Higher values allow that parts of a gene can differ more in their quality.
311
312 **Advanced options for protein coding gene prediction**
313
314 - BLAST E-value Exponent
315
316 The statistical significance threshold for considering matches in the BLASTX search. The value entered here is the negation of the exponent of the E-value threshold that should be used by BLAST, i.e. a value X gives an E-value of 10^(-X).
317
318 - Quality cutoff
319
320 Minimum allowed quality value (in percent) of the maximum quality value per reading frame. A higher values correspond to shorter protein prediction and therefore reduced risk for conflicts with other features
321
322 - Clipping factor
323
324 Clipping is started if overlapping prediction of hits with the same name differ by less than a factor X in their quality value.
325
326 - use start/stop codons as in NCBI (default: learned start/stop codons)
327
328 Instead of the codon probabilities derived from the protein coding genes annotated in RefSeq the codons listed at NCBI taxonomy are used with equal probabilities (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
329
330 - Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac parameters
331
332 Note: 1) this only works for Metazoa RefSeq release 63 reference data set. 2) This will only predict the protein coding genes that are typical for metazoan mitochondrial genomes.
333
334 - Use the old start/stop prediction method of MITOS1
335
336 The search for start and stop codons just takes the closest to the initial start / stop positions within 6aa (i.e. the method used in MITOS1)
337
338 **Advanced options for ncRNA gene prediction**
339
340 - Run mitfi in glocal and local mode (default: local only)
341
342 By default mitfi uses infernal's cmsearch in local search mode only. By enabling this option mitfi will invoke cmserach also in glocal mode if a feature is missing.
343
344 - e-value to use for inferal fast mode
345
346 The e-value passed to the first pass of cmsearch in the second pass (the sensitive search) an e-value of 0.1 is used.
347
348 - Use infernal's sensitive mode only
349
350 By default mitfi searches for ncRNAs using cmsearch's default fast mode first. If a ncRNA type is missing it is searched using the sensitive mode. This can be useful if low scoring copies are expected which might be missed when searching in the two stage mode.
351
352 - Allow tRNA/rRNA overlap of up to X nt for mitfi
353
354 Allow that a tRNA/rRNA overlaps with another feature by this number of nucleotides.
355
356 ]]></help>
357 <citations>
358 <citation type="doi">10.1093/nar/gkz833</citation>
359 <citation type="doi">10.1016/j.ympev.2016.09.024</citation>
360 </citations>
361 </tool>
362