view edge_pro.xml @ 5:407b894abb08 draft

Uploaded
author crs4
date Thu, 03 Nov 2016 10:56:44 -0400
parents d5464c9e1723
children e6e6f5f9b9c4
line wrap: on
line source

<tool id="edge_pro" name="EDGE-pro" version="1.0.1">
  <description>Gene expression in Prokaryotes</description>
  <requirements>
    <requirement type="package" version="2.1.0">bowtie2</requirement>
    <requirement type="package" version="1.3.1">edge-pro</requirement>
  </requirements>
  <command>
    edge.pl -t \${GALAXY_SLOTS:-4} -o edge_out
    ## Mandatory input parameters
    -g $genome
    -p $ptt
    -r $rnt

    #if $singlePaired.sPaired == "single"
      -u $singlePaired.input1
    #else if $singlePaired.sPaired == "paired"
      -u $singlePaired.input1
      -v $singlePaired.input2
    #else if $singlePaired.sPaired == "pairedCollection"
      -u $singlePaired.input.forward
      -v $singlePaired.input.reverse
    #end if

    #if $singlePaired.sPaired in ["paired", "pairedCollection"]
      #if str($singlePaired.minInsertSize)
        -m $singlePaired.minInsertSize
      #end if
      #if str($singlePaired.maxInsertSize)
        -M $singlePaired.maxInsertSize
      #end if
    #end if
    
    ## Optional input parameters
    #if $params.settingsType == "full"
      #if str($params.window)
        -w $params.window
      #end if
      #if str($params.utrSize)
        -i $params.utrSize
      #end if
      #if str($params.similarity)
        -x $params.similarity
      #end if
      #if str($params.readLength)
        -l $params.readLength
      #end if
      #if str($params.minCoverage)
        -c $params.minCoverage
      #end if
    #end if
    &gt; $out_log 2&gt;&amp;1 ## need to redirect stderr because edge.pl calls bowtie2 and count which write some logging info there
    &amp;&amp; cat edge_out.rpkm_* > $out_rpkm
  </command>
  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>
  <inputs>
    <conditional name="singlePaired">
      <param name="sPaired" type="select" label="Is this library mate-paired?">
        <option value="single">Single-end</option>
        <option value="paired">Paired-end</option>
        <option value="pairedCollection">Paired-end collection</option>
      </param>
      <when value="single">
        <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="FASTQ format with Sanger-scaled quality values (Galaxy fastqsanger datatype)"/>
      </when>
      <when value="paired">
        <param format="fastqsanger" name="input1" type="data" label="Forward FASTQ file" help="FASTQ format with Sanger-scaled quality values (Galaxy fastqsanger datatype)" />
        <param format="fastqsanger" name="input2" type="data" label="Reverse FASTQ file" help="FASTQ format with Sanger-scaled quality values (Galaxy fastqsanger datatype)" />
        <param name="minInsertSize" type="integer" optional="true" min="0" value="0" label="Minimun insert size in paired-end library for Bowtie2 (-m)" />
        <param name="maxInsertSize" type="integer" optional="true" min="1" value="500" label="Maximun insert size in paired-end library for Bowtie2 (-M)" />
      </when>
      <when value="pairedCollection">
          <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="FASTQ paired collection" help="FASTQ format with Sanger-scaled quality values (Galaxy fastqsanger datatype)" />
          <param name="minInsertSize" type="integer" optional="true" min="0" value="0" label="Minimun insert size in paired-end library for Bowtie2 (-m)" />
          <param name="maxInsertSize" type="integer" optional="true" min="1" value="500" label="Maximun insert size in paired-end library for Bowtie2 (-M)" />
      </when>
    </conditional>
    
    <param format="fasta" name="genome" type="data" label="Select the reference genome from your history (-g)" help="FASTA format" />
    <param format="ptt" name="ptt" type="data" label="Coordinates of coding genes (PTT file)" help="PTT file with coordinates of coding genes (-p)" />
    <param format="rnt" name="rnt" type="data" label="Coordinates of structural RNAs (RNT file)" help="RNT file with coordinates of structural RNA (-r)" />
    
    <conditional name="params">
      <param name="settingsType" type="select" label="Parameter settings" help="For most needs, use default settings. If you want full control use Full Parameter List">
        <option value="preSet">Use defaults</option>
        <option value="full">Full parameter list</option>
      </param>
      <when value="preSet" />
      <when value="full">
        <param name="window" type="integer" optional="true" value="100" label="Window length for coverage distribution (-w)" help="Used to distribute the coverage between two overlapping genes. See help below for details" />
        <param name="utrSize" type="integer" optional="true" value="40" label="Size of the untranslated region (-i)" help="Enter the size of the untranslated region between the initial transcription site and the start codon" />
        <param name="similarity" type="float" optional="true" value="0.15" label="Percentage for similar coverage (-x)" help="Enter the percentage used to determine when two coverage values are considered similar. See help below for details" />
        <param name="readLength" type="integer" optional="true" value="" label="Read length (-l)" help="If not specified, the first 1000 reads are used to approximate the read length" />
        <param name="minCoverage" type="integer" optional="true" value="3" label="Minimum average coverage for expressed genes (-c)" help="Coverage less than specified is assumed to be noise and gene is considered to not be expressed" />
      </when>
    </conditional>
  </inputs>

  <outputs>
    <data format="sam" name="out_aln" label="${tool.name} on ${on_string}: alignment" from_work_dir="edge_out.alignments" />
    <data format="tabular" name="out_rpkm" label="${tool.name} on ${on_string}: rpkm"/>
    <data format="txt" name="out_log" label="${tool.name} on ${on_string}: log"/>
  </outputs>

  <help>

**What it does**

`EDGE-pro`_, Estimated Degree of Gene Expression in PROkaryots is an efficient software system to estimate gene expression levels in prokaryotic genomes from RNA-seq data. EDGE-pro uses Bowtie2 for alignment and then estimates expression directly from the alignment results.
EDGE-pro includes routines to assign reads aligning to overlapping gene regions accurately. 15% or more of bacterial genes overlap other genes, making this a significant problem for bacterial RNA-seq, one that is generally ignored by programs designed for eukaryotic RNA-seq experiments.

**Input files:**

.. class:: infomark

Input files with gene coordinates in PTT and RNT format can be retrieved with the Get EDGE-pro Files tool available in Galaxy, or downloaded from the `NCBI ftp repository`_.
This tool accepts files in Sanger FASTQ format (Galaxy *fastqsanger* datatype). Use the FASTQ Groomer tool to prepare your files.

.. _NCBI ftp repository: ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/

.. class:: warningmark

All 3 types of files (FASTA reference genome, PTT and RNT) must have the same order of chromosomes/plasmids (e.g. if chr1 is before chr2 in genome.fasta file, then chr1 must be before chr2 in ptt and rnt files as well). If there is no PTT or RNT file for one of chromosomes/plasmids, place this chromosome/plasmid at the end of the file.

**License and citation**

This Galaxy tool is Copyright © 2012-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

You can use this tool only if you agree to the license terms of: `EDGE-pro`_.

.. _EDGE-pro: http://ccb.jhu.edu/software/EDGE-pro/

If you use this tool, please cite:

- |Cuccuru2014|_
- |Magoc2013|_.

.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
.. |Magoc2013| replace:: Magoc, T., Wood, D., Salzberg, S. L. (2013) EDGE-pro: Estimated Degree of Gene Expression in Prokaryotic Genomes. *Evol. Bioinform.* 2013:9, 127-136
.. _Magoc2013: http://www.la-press.com/edge-pro-estimated-degree-of-gene-expression-in-prokaryotic-genomes-article-a3586
  </help>
</tool>