view mitos2.xml @ 0:dd589aa77943 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mitos commit 791f28c8a7194fdd1ecec05ad166932d461899b2"
author iuc
date Fri, 27 Mar 2020 17:53:42 -0400
parents
children 80323066acd4
line wrap: on
line source

<tool id="mitos2" name="@MITOS_NAME@" version="@MITOS_VERSION@">
  <description>de-novo annotation of metazoan mitochondrial genomes</description>
  <macros>
    <import>macros.xml</import>
    <token name="@MITOS_NAME@">MITOS2</token>
    <token name="@MITOS_VERSION@">2.0.6</token>
  </macros>
  <requirements>
    <requirement type="package" version="@MITOS_VERSION@">mitos</requirement>
    <requirement type="package">zip</requirement>
  </requirements>
  <version_command>python -c "import mitos; print(mitos.__version__)"</version_command>
  <command detect_errors="aggressive"><![CDATA[
mkdir outdir &&

runmitos.py
--input '$input'
--code $code
--outdir outdir
--refdir '/'
--refseqver '$refseqver.fields.path'
$linear
#for tpe in ["prot", "trna", "rrna", "intron", "oril", "orih"]
  #if not $tpe in str($advanced.featuretypes).split(',')
    --$tpe 0
  #end if
#end for
--finovl $advanced.finovl
$advanced.best
#set fragovl=float($advanced.fragovl)/100.0
--fragovl $fragovl
--fragfac $advanced.fragfac

--evalue $advanced_prot.evalue
#set cutoff=float($advanced_prot.cutoff)/100.0
--cutoff $cutoff
--clipfac $advanced_prot.clipfac
$advanced_prot.ncbicode
$advanced_prot.alarab
$advanced_prot.oldstst
$advanced_ncrna.locandgloc
--ncev $advanced_ncrna.ncev
$advanced_ncrna.sensitive
--maxtrnaovl $advanced_ncrna.maxtrnaovl
--maxrrnaovl $advanced_ncrna.maxrrnaovl

#if not ("protein_plot" in str($addoutputs).split(',') or "ncRNA_plot" in str($addoutputs).split(',')):
  --noplots
#end if

#if "raw" in str($addoutputs).split(','):
    && zip -9 -y -r output.zip outdir/ > /dev/null
#end if
  ]]></command>
  <inputs>
    <param argument="--input" label="Sequence" type="data" format="fasta" help="a single sequence in fasta formated sequence">
      <options options_filter_attribute="metadata.sequences">
        <filter type="add_value" value="1"/>
      </options>
    </param> 
    <param argument="--code" label="Genetic code" type="select">
      <option value="2">Vertebrate (2)</option>
      <option value="4">Mold, Protozoan, Coelenteral (4)</option>
      <option value="5">Invertebrate (5)</option>
      <option value="9">Echinoderm, Flatworm (9)</option>
      <option value="13">Ascidian (13)</option>
      <option value="14">Alternative Flatworm (14)</option>
    </param>
    <param argument="--refseqver" label="Reference data" type="select" help="contact the administrator of this Galaxy instance if you miss reference data">
        <options from_data_table="mitos">
          <filter type="static_value" value="mitos2" column="2"/>
        </options>
        <validator message="No reference annotation is available for MITOS2" type="no_options" />
    </param>
    <param argument="--linear" checked="false" label="Treat sequence as linear" type="boolean" truevalue="--linear" falsevalue=""/>
    <param name="addoutputs" type="select" multiple="true" label="Outputs">
      <option value="bed" selected="true">BED</option>
      <option value="mito" selected="false">mito</option>
      <option value="gff" selected="false">GFF file</option>
      <option value="seq" selected="false">SEQ</option>
      <option value="fas" selected="false">nucleotide FASTA</option>
      <option value="faa" selected="false">protein FASTA</option>
      <option value="geneorder" selected="false">geneorder</option>
      <option value="protein_plot" selected="false">Protein prediction plot</option>
      <option value="ncRNA_plot" selected="false">ncRNA prediction plot</option>
      <!--<option value="ncRNA_structure_ps_plots" selected="false">ncRNA structure plots - postscript</option>-->
      <option value="ncRNA_structure_svg_plots" selected="false">ncRNA structure plots - svg</option>
      <option value="raw" selected="false">zipped raw results</option>
    </param>
    <section name="advanced" title="Advanced options">
      <param name="featuretypes" label="Feature types" help="Feature types that should be predicted by MITOS (--noprot,--notrna,--norrna)" type="select" multiple="true">
        <option value="prot" selected="true">Protein coding genes</option>
        <option value="trna" selected="true">tRNAs</option>
        <option value="rrna" selected="true">rRNAs</option>
        <option value="intron" selected="false">Introns</option>
        <option value="oril" selected="false">Origin of light strand replication</option>
        <option value="orih" selected="false">Origin of heavy strand replication</option>
      </param>
      <param argument="--finovl" label="Final overlap (nt)" help="Maximum number of nucleotides by which genes of different types may overlap" type="integer" value="50" min="0"/>
      <param argument="--best" checked="false" label="Annotate only the best copy of each feature" type="boolean" truevalue="--best" falsevalue=""/>
      <param argument="--fragovl" label="Fragment overlap" help="Maximum allowed overlap of proteins in the query (in percent of the shorter query range) for two hits to be counted as fragments of the same gene" type="integer" value="20" min="0" max="100"/>
      <param argument="--fragfac" label="Fragment quality factor" help="Maximum factor by which fragments of the same protein may differ in their quality" type="float" min="0" value="10"/>
    </section>
    <section name="advanced_prot" title="Advanced options for protein coding gene prediction">
      <param argument="--evalue" label="BLAST E-value Exponent" help="Negation of the exponent of the E-value threshold used by BLAST, i.e. a value X gives an E-value of 10^(-X)" type="float" value="2" min="1"/>
      <param argument="--cutoff" label="Quality cutoff" help="Minimum allowed quality in % of the maximum quality value per reading frame" type="integer" value="50" min="0" max="100"/>
      <param argument="--clipfac" label="Clipping factor" help="Clip overlapping proteins with the same name that differ by less than the specified factor" type="float" value="10" min="0"/>
      <param argument="--ncbicode" checked="false" label="use start/stop codons as in NCBI (default: learned start/stop codons)" type="boolean" truevalue="--ncbicode" falsevalue=""/>
      <param argument="--alarab" checked="false" label="Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac" type="boolean" truevalue="--alarab" falsevalue=""/>
      <param argument="--oldstst" checked="false" label="Use the old start/stop prediction method of MITOS1" type="boolean" truevalue="--oldstst" falsevalue=""/>
    </section>
    <section name="advanced_ncrna" title="Advanced options for ncRNA gene prediction">
      <param argument="--locandgloc" checked="false" label="Run mitfi in glocal and local mode (default: local only)" type="boolean" truevalue="--locandgloc" falsevalue=""/>
      <param argument="--ncev" label="e-value to use for inferal fast mode" type="float" min="0" value="0.01"/>
      <param argument="--sensitive" checked="false" label="Use infernals sensitive mode only" type="boolean" truevalue="--sensitive" falsevalue=""/>
      <param argument="--maxtrnaovl" label="Allow tRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
      <param argument="--maxrrnaovl" label="Allow rRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
    </section>
  </inputs>
  <outputs>
    <data name="bedout" format="bed" from_work_dir="outdir/result.bed">
      <filter>"bed" in str(addoutputs)</filter>
    </data>
    <data name="mitoout" format="tabular" from_work_dir="outdir/result.mitos" label="${tool.name} on ${on_string}: mito">
      <filter>"mito" in str(addoutputs)</filter>
    </data>
    <data name="gffout" format="gff" from_work_dir="outdir/result.gff" label="${tool.name} on ${on_string}: GFF">
      <filter>"gff" in str(addoutputs)</filter>
    </data>
    <data name="seqout" format="txt" from_work_dir="outdir/result.seq" label="${tool.name} on ${on_string}: TBL">
      <filter>"seq" in str(addoutputs)</filter>
    </data>
    <data name="faa" format="fasta" from_work_dir="outdir/result.faa" label="${tool.name} on ${on_string}: aa FASTA">
      <filter>"faa" in str(addoutputs)</filter>
    </data>
    <data name="fas" format="fasta" from_work_dir="outdir/result.fas" label="${tool.name} on ${on_string}: nt FASTA">
      <filter>"fas" in str(addoutputs)</filter>
    </data>
    <data name="geneorderout" format="fasta" from_work_dir="outdir/result.geneorder" label="${tool.name} on ${on_string}: geneorder">
      <filter>"geneorder" in str(addoutputs)</filter>
    </data>
    <data name="protein_plot_out" format="pdf" from_work_dir="outdir/plots/prot.pdf" label="${tool.name} on ${on_string}: Protein prediction plot">
      <filter>"protein_plot" in str(addoutputs)</filter>
    </data>
    <data name="ncRNA_plot_out" format="pdf" from_work_dir="outdir/plots/rna.pdf" label="${tool.name} on ${on_string}: ncRNA prediction plot">
      <filter>"ncRNA_plot" in str(addoutputs)</filter>
    </data>
    <!--<collection name="ncRNA_structure_plot_ps_out" type="list" label="${tool.name} on ${on_string}: ncRNA postscript structure plots">
      <discover_datasets pattern="(?P&lt;name&gt;.+)\.ps" format="ps" directory="outdir/plots" />
      <filter>"ncRNA_structure_ps_plots" in str(addoutputs)</filter>
    </collection>-->
    <collection name="ncRNA_structure_plot_svg_out" type="list" label="${tool.name} on ${on_string}: ncRNA svg structure plots">
      <discover_datasets pattern="(?P&lt;name&gt;.+)\.svg" format="svg" directory="outdir/plots" />
      <filter>"ncRNA_structure_svg_plots" in str(addoutputs)</filter>
    </collection>
    <data name="rawout" format="zip" from_work_dir="output.zip" label="${tool.name} on ${on_string}: raw data">
      <filter>"raw" in str(addoutputs)</filter>
    </data>
  </outputs>
  <tests>
    <!-- default options -->
    <test expect_num_outputs="1">
      <param name="input" value="NC_012920.fasta"/>
      <param name="code" value="2"/>
      <param name="refseqver" value="mitos2-refdata" />
      <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed"/>
      <assert_command>
        <has_text text="--code 2"/>
        <has_text text="--finovl 50"/>
        <not_has_text text="--trna"/>
        <not_has_text text="--rrna"/>
        <not_has_text text="--prot"/>
        <has_text text="--intron 0"/>
        <has_text text="--oril 0"/>
        <has_text text="--orih 0"/>
        <has_text text="--evalue 2.0"/>
        <has_text text="--cutoff 0.5"/>
        <has_text text="--clipfac 10.0"/>
        <not_has_text text="--best"/>
        <has_text text="--fragovl 0.2"/>
        <has_text text="--fragfac 10.0"/>
        <has_text text="--ncev 0.01"/>
        <has_text text="--maxtrnaovl 50"/>
        <has_text text="--maxrrnaovl 50"/>
        <has_text text="--noplots"/>
      </assert_command>
    </test>
    <!-- different main options  -->
    <test expect_num_outputs="1">
      <param name="input" value="NC_012920.fasta"/>
      <param name="code" value="5"/>
      <param name="refseqver" value="mitos2-refdata" />
      <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
      <section name="advanced">
        <param name="featuretypes" value="prot,trna,rrna,intron,oril,orih"/>
        <param name="finovl" value="49"/>
        <param name="best" value="true"/>
        <param name="fragovl" value="10"/>
        <param name="fragfac"  value="9"/>
      </section>
      <assert_command>
        <has_text text="--code 5"/>
        <has_text text="--finovl 49"/>
        <not_has_text text="--trna"/>
        <not_has_text text="--rrna"/>
        <not_has_text text="--prot"/>
        <not_has_text text="--intron"/>
        <not_has_text text="--oril"/>
        <not_has_text text="--orih"/>
        <has_text text="--evalue 2.0"/>
        <has_text text="--cutoff 0.5"/>
        <has_text text="--clipfac 10.0"/>
        <has_text text="--best"/>
        <has_text text="--fragovl 0.1"/>
        <has_text text="--fragfac 9.0"/>
        <has_text text="--ncev 0.01"/>
        <has_text text="--maxtrnaovl 50"/>
        <has_text text="--maxrrnaovl 50"/>
        <has_text text="--noplots"/>
      </assert_command>
    </test>
    <!-- different pcg and ncrn options -->
    <test expect_num_outputs="11">
      <param name="input" value="NC_012920.fasta"/>
      <param name="code" value="2"/>
      <param name="refseqver" value="mitos2-refdata" />
      <section name="advanced_prot">
        <param name="evalue" value="3"/>
        <param name="cutoff" value="49"/>
        <param name="clipfac" value="9"/>
        <param name="ncbicode" value="true"/>
        <param name="alarab" value="true"/>
        <param name="oldstst" value="true"/>
      </section>
      <section name="advanced_ncrna">
        <!-- <param name="locandgloc" value="true"/> should be possible from 2.0.5 https://gitlab.com/Bernt/MITOS/-/commit/9b4c55c29961c307dce02ac0319dadbd76f6b9e5-->
        <param name="ncev" value="0.1"/>
        <param name="sensitive" value="true"/>
        <param name="maxtrnaovl" value="51"/>
        <param name="maxrrnaovl" value="49"/>
      </section>
      <param name="addoutputs" value="bed,mito,gff,seq,fas,faa,geneorder,protein_plot,ncRNA_plot,ncRNA_structure_svg_plots,raw"/>
      <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
      <output name="mitoout" file="mitos2_NC_012920.mitos" ftype="tabular"/>
      <output name="gffout" file="mitos2_NC_012920.gff" ftype="gff"/>
      <output name="seqout" file="mitos2_NC_012920.seq" ftype="txt"/>
      <output name="faa" file="mitos2_NC_012920.faa" ftype="fasta"/>
      <output name="fas" file="mitos2_NC_012920.fas" ftype="fasta"/>
      <output name="geneorderout" file="mitos2_NC_012920.geneorder" ftype="fasta"/>
      <output name="protein_plot_out" file="mitos2_NC_012920_prot.pdf" ftype="pdf" compare="sim_size"/>
      <output name="ncRNA_plot_out" file="mitos2_NC_012920_ncrna.pdf" ftype="pdf" compare="sim_size"/>
      <output name="rawout" ftype="zip">
        <assert_contents>
          <has_archive_member path=".*/result.bed"/>
        </assert_contents>
      </output>
      <output_collection name="ncRNA_structure_plot_svg_out" type="list" count="17"/>
      <assert_command>
        <has_text text="--code 2"/>
        <has_text text="--finovl 50"/>
        <not_has_text text="--trna"/>
        <not_has_text text="--rrna"/>
        <not_has_text text="--prot"/>
        <has_text text="--intron 0"/>
        <has_text text="--oril 0"/>
        <has_text text="--orih 0"/>
        <has_text text="--evalue 3.0"/>
        <has_text text="--cutoff 0.49"/>
        <has_text text="--clipfac 9.0"/>
        <has_text text="--alarab"/>
        <has_text text="--oldstst"/>
        <has_text text="--ncbicode"/>
        <not_has_text text="--best"/>
        <has_text text="--fragovl 0.2"/>
        <has_text text="--fragfac 10.0"/>
        <!--<has_text text="\-\-locandgloc"/>-->
        <has_text text="--ncev 0.1"/>
        <has_text text="--sensitive"/>
        <has_text text="--maxtrnaovl 51"/>
        <has_text text="--maxrrnaovl 49"/>
        <not_has_text text="--noplots"/>
      </assert_command>
    </test>
  </tests>
  <help>@COMMON_HELP@
  <![CDATA[


**Advanced options**

- Feature types

  Select the feature types that should be annotated. By default this is protein coding genes, tRNA and rRNA which is useful for metazoan mitogenomes. In addition also the replication origins of the light (OL) and heavy (OH) strand and introns can be annotated. The annotation of the replication origins is most useful for chordate mitogenomes. Introns are usually only found in mitogenomes of non-metazoans and basal Metazoa.

- Final overlap (nt)

  Maximum number of nucleotides by which genes of different types may overlap. Applies to merging of the final predictions. 

- Annotate only the best copy of each feature

  If there are copies of the same feature type only the one with the lowest e-value (for ncRNAs and OL) or highest quality score (protein coding genes and OH)

- Fragment overlap

  Maximum fraction (of the shorter feature) allowed that two hits overlap in the query to be counted as fragments.

- Fragment quality factor

  Maximum factor by which fragments may differ in their quality scores. Higher values allow that parts of a gene can differ more in their quality.

**Advanced options for protein coding gene prediction**

- BLAST E-value Exponent

  The statistical significance threshold for considering matches in the BLASTX search. The value entered here is the negation of the exponent of the E-value threshold that should be used by BLAST, i.e. a value X gives an E-value of 10^(-X). 

- Quality cutoff

  Minimum allowed quality value (in percent) of the maximum quality value per reading frame. A higher values correspond to shorter protein prediction and therefore reduced risk for conflicts with other features

- Clipping factor

  Clipping is started if overlapping prediction of hits with the same name differ by less than a factor X in their quality value.

- use start/stop codons as in NCBI (default: learned start/stop codons)

  Instead of the codon probabilities derived from the protein coding genes annotated in RefSeq the codons listed at NCBI taxonomy are used with equal probabilities (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)

- Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac parameters

  Note: 1) this only works for Metazoa RefSeq release 63 reference data set. 2) This will only predict the protein coding genes that are typical for metazoan mitochondrial genomes.

- Use the old start/stop prediction method of MITOS1

  The search for start and stop codons just takes the closest to the initial start / stop positions within 6aa (i.e. the method used in MITOS1)

**Advanced options for ncRNA gene prediction**

- Run mitfi in glocal and local mode (default: local only)

  By default mitfi uses infernal's cmsearch in local search mode only. By enabling this option mitfi will invoke cmserach also in glocal mode if a feature is missing.

- e-value to use for inferal fast mode

  The e-value passed to the first pass of cmsearch in the second pass (the sensitive search) an e-value of 0.1 is used.

- Use infernal's sensitive mode only

  By default mitfi searches for ncRNAs using cmsearch's default fast mode first. If a ncRNA type is missing it is searched using the sensitive mode. This can be useful if low scoring copies are expected which might be missed when searching in the two stage mode. 

  - Allow tRNA/rRNA overlap of up to X nt for mitfi

  Allow that a tRNA/rRNA overlaps with another feature by this number of nucleotides.

]]></help>
  <citations>
    <citation type="doi">10.1093/nar/gkz833</citation>
    <citation type="doi">10.1016/j.ympev.2016.09.024</citation>
  </citations>
</tool>