changeset 0:b1be94418db9 draft

Uploaded
author crs4
date Tue, 17 Sep 2013 09:41:18 -0400
parents
children 93fa7ecb5292
files COPYING sspace.py sspace.xml tool_dependencies.xml
diffstat 4 files changed, 291 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COPYING	Tue Sep 17 09:41:18 2013 -0400
@@ -0,0 +1,24 @@
+Copyright © 2012-2013 CRS4 Srl. http://www.crs4.it/
+Created by:
+Massimiliano Orsini <massimiliano.orsini@crs4.it>
+Gianmauro Cuccuru <gianmauro.cuccuru@crs4.it>
+Nicola Soranzo <nicola.soranzo@crs4.it>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sspace.py	Tue Sep 17 09:41:18 2013 -0400
@@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+"""
+SSPACE wrapper
+"""
+
+import logging
+import optparse
+import os
+import shutil
+import subprocess
+import tempfile
+
+
+def which(name, flags=os.X_OK):
+    """ Search PATH for executable files with the given name. """
+    result = []
+    exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep))
+    path = os.environ.get('PATH', None)
+    if path is None:
+        return []
+    for p in os.environ.get('PATH', '').split(os.pathsep):
+        p = os.path.join(p, str(name))
+        if os.access(p, flags):
+            result.append(p)
+            for e in exts:
+                pext = p + e
+                if os.access(pext, flags):
+                    result.append(pext)
+    return result
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+
+def __main__():
+    """ main function """
+    parser = optparse.OptionParser()
+    parser.add_option('-c', dest='contigs', help='contigs mandatory (-s)')
+    parser.add_option('--r1', dest='reads1', help='')
+    parser.add_option('--r2', dest='reads2', help='')
+    parser.add_option('-i', dest='insert', type='int', help='')
+    parser.add_option('-e', dest='error', type='float', help='')
+    parser.add_option('-o', dest='orientation', choices=['FF', 'FR', 'RF', 'RR'], help='')
+    parser.add_option('-x', action='store_true', dest='extension', help='whether to extend the contigs (-x)')
+    parser.add_option('--minoverlap', dest='minoverlap', type='int', help='minoverlap (-m)')
+    parser.add_option('--numofreads', dest='numofreads', type='int', help='num of reads to call an extension (-o)')
+    parser.add_option('-t', dest='max_trim', type='int', help='maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)')
+    parser.add_option('-u', dest='unpaired', help='unpaired reads (-u, optional)')
+    parser.add_option('-r', dest='min_base_ratio', type='float', help='minimum base ratio used to accept a overhang consensus base (-r)')
+    parser.add_option('--minlink', dest='minlink', type='int', help='min link (-k)')
+    parser.add_option('--maxratio', dest='maxratio', type='float', help='max ratio (-a)')
+    parser.add_option('--contigoverlap', dest='contigoverlap', type='int', help='contigoverlap (-n)')
+    parser.add_option('--mincontig', dest='mincontig', type='int', help='mincontig (-z)')
+    parser.add_option('-T', dest='n_threads', type='int', help='number of threads to use in Bowtie (-T)')
+    parser.add_option('-p', dest='prefix', default='sspace_pre', help='prefix (-b)')
+    parser.add_option('--lib', dest='libraryname', default='galx', help='libraryfile')
+    parser.add_option('--fe', dest='finalevidence', help='')
+    parser.add_option('--fs', dest='finalscaffolds', help='')
+    parser.add_option('--lg', dest='logfile', help='')
+    parser.add_option('--summ', dest='summaryfile', help='')
+    parser.add_option('--loglevel', choices=LOG_LEVELS, help='logging level (default: INFO)', default='DEBUG')
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Wrong number of arguments')
+    
+    log_level = getattr(logging, options.loglevel)
+    kwargs = {'format' : LOG_FORMAT,
+              'datefmt' : LOG_DATEFMT,
+              'level' : log_level}
+    if options.logfile:
+        kwargs['filename'] = options.logfile
+    logging.basicConfig(**kwargs)
+    logger = logging.getLogger('SSPACE scaffold assembly')
+    
+    logger.debug('Creating temp dir')
+    contigs = options.contigs
+    reads1 = options.reads1
+    reads2 = options.reads2
+    insert = options.insert
+    error = options.error
+    orientation = options.orientation
+    extension = '-x 1' if options.extension else ''
+    minoverlap = "-m %d" % options.minoverlap if options.minoverlap is not None else ''
+    numofreads = "-o %d" % options.numofreads if options.numofreads is not None else ''
+    max_trim = "-t %d" % options.max_trim if options.max_trim is not None else ''
+    unpaired = "-u %s" % options.unpaired if options.unpaired else ''
+    min_base_ratio = "-r %s" % options.min_base_ratio if options.min_base_ratio is not None else ''
+    minlink = "-k %d" % options.minlink if options.minlink is not None else ''
+    maxratio = "-a %s" % options.maxratio if options.maxratio is not None else ''
+    contigoverlap = "-n %d" % options.contigoverlap if options.contigoverlap is not None else ''
+    mincontig = "-z %d" % options.mincontig if options.mincontig is not None else ''
+    n_threads = "-T %d" % options.n_threads if options.n_threads is not None else ''
+    prefix = options.prefix
+    libraryname = options.libraryname
+    finalevidence = options.finalevidence
+    finalscaffolds = options.finalscaffolds
+    summaryfile = options.summaryfile
+    exe_name = "SSPACE_Basic_v2.0.pl"
+    
+    # Set library
+    outline = 'lib1 %s %s %d %s %s\n' % (reads1, reads2, insert, error, orientation)
+    with open(libraryname, 'w') as out:
+        out.write(outline)
+    
+    exe = which(exe_name, os.R_OK)
+    command = "perl %s -l %s -s %s %s %s %s %s %s %s %s %s %s %s %s -b %s" % (exe.pop(), libraryname, contigs, extension, minoverlap, numofreads, max_trim, unpaired, min_base_ratio, minlink, maxratio, contigoverlap, mincontig, n_threads, prefix)
+    wd = tempfile.mkdtemp()
+    try:
+        os.chdir(wd)
+        logger.info("SSPACE running")
+        logger.debug("executing %s" % command)
+        subprocess.check_call(args=command, shell=True)
+        
+        # Clean environment
+        with open("%s.logfile.txt" % os.path.join(wd, prefix), 'rb') as sspace_log_file:
+            logger.info("\n".join(["Log from SSPACE", sspace_log_file.read()]))
+        logger.info("Moving result files")
+        shutil.move("%s.final.evidence" % os.path.join(wd, prefix), finalevidence)
+        shutil.move("%s.final.scaffolds.fasta" % os.path.join(wd, prefix), finalscaffolds)
+        shutil.move("%s.summaryfile.txt" % os.path.join(wd, prefix), summaryfile)
+        logger.debug("Removing temp dir: %s" % wd)
+    finally:
+        shutil.rmtree(wd)
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sspace.xml	Tue Sep 17 09:41:18 2013 -0400
@@ -0,0 +1,113 @@
+<tool id="sspace" name="SSPACE" version="1.0.5">
+  <description>scaffolder</description>
+  <requirements>
+    <requirement type="package" version="2.0">sspace</requirement>
+  </requirements>
+
+  <command interpreter="python">
+    sspace.py
+    \${SSPACE_SITE_OPTIONS:--T 1}
+    -c $contigs --r1 $reads1 --r2 $reads2 -i $insert -e $error -o $orientation
+    #if $exten
+      -x
+    #end if
+    #if str($minoverlap)
+      --minoverlap $minoverlap
+    #end if
+    #if str($numofreads)
+      --numofreads $numofreads
+    #end if
+    #if str($max_trim)
+      -t $max_trim
+    #end if
+    #if $unpaired
+      -u $unpaired
+    #end if
+    #if str($min_base_ratio)
+      -r $min_base_ratio
+    #end if
+    #if str($minlink)
+      --minlink $minlink
+    #end if
+    #if str($maxratio)
+      --maxratio $maxratio
+    #end if
+    #if str($contigoverlap)
+      --contigoverlap $contigoverlap
+    #end if
+    #if str($mincontig)
+      --mincontig $mincontig
+    #end if
+    --lib $libraryname --fe $finalevidence --fs $finalscaffolds --lg $logfile --summ $summaryfile
+  </command>
+
+  <inputs>
+    <param name="contigs" type="data" format="fasta" label="Contigs FASTA file (-s)" />
+    <param name="reads1" type="data" format="fasta,fastq" label="Paired-end reads 1" help="FASTA or FASTQ format" />
+    <param name="reads2" type="data" format="fasta,fastq" label="Paired-end reads 2" help="FASTA or FASTQ format" />
+    <param name="insert" type="integer" value="" label="Insert size">
+      <validator type="in_range" min="1" />
+    </param>
+    <param name="error" type="float" min="0" max="1" value="" label="Variability (e.g. 0.25 for 25%)" />
+    <param name="orientation" type="select" label="Orientation">
+      <option value="FF">FF</option>
+      <option value="FR" selected="true">FR</option>
+      <option value="RF">RF</option>
+      <option value="RR">RR</option>
+    </param>
+    <param name="exten" type="boolean" checked="true" label="Extension (-x)" help="Uncheck for scaffolding only" />
+    <param name="minoverlap" type="integer" optional="true" min="10" max="50" value="35" label="Extension: minimum number of overlapping bases with the seed/contig during overhang consensus build up (-m)" help="E.g. 32-35 for 36bp reads" />
+    <param name="numofreads" type="integer" optional="true" value="8" label="Extension: minimum number of reads needed to call an extension (-o)" help="Higher numbers increase reliability of the extension">
+      <validator type="in_range" min="1" />
+    </param>
+    <param name="max_trim" type="integer" optional="true" value="0" label="Extension: maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)">
+      <validator type="in_range" min="0" />
+    </param>
+    <param name="unpaired" type="data" format="fasta,fastq" optional="true" label="Extension: unpaired reads (-u, optional)" help="FASTA or FASTQ format" />
+    <param name="min_base_ratio" type="float" optional="true" min="0" max="1" value="0.9" label="Extension: minimum base ratio used to accept a overhang consensus base (-r)" />
+    <param name="minlink" type="integer" optional="true" value="5" label="Scaffolding: minimum number of links (read pairs) to compute scaffold (-k)">
+      <validator type="in_range" min="0" />
+    </param>
+    <param name="maxratio" type="float" optional="true" min="0" max="1" value="0.7" label="Scaffolding: maximum link ratio between two best contig pairs (-a)" help="Higher values lead to less accurate scaffolding" />
+    <param name="contigoverlap" type="integer" optional="true" value="30" label="Scaffolding: minimum overlap required between contigs to merge adjacent contigs in a scaffold (-n)">
+      <validator type="in_range" min="0" />
+    </param>
+    <param name="mincontig" type="integer" optional="true" value="200" label="Scaffolding: minimum contig size used for scaffold (-z)" help="Filters out contigs below this size">
+      <validator type="in_range" min="0" />
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="libraryname" hidden="True" format="tabular" label="${tool.name} on ${on_string}: libraryfile" />
+    <data name="finalevidence" format="txt" label="${tool.name} on ${on_string}: final evidence" />
+    <data name="finalscaffolds" format="fasta" label="${tool.name} on ${on_string}: final scaffolds" />
+    <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
+    <data name="summaryfile" format="txt" label="${tool.name} on ${on_string}: summary" />
+  </outputs>
+  <tests>
+
+  </tests>
+  <help>
+**What it does**
+
+SSPACE is a script able to extend and scaffold pre-assembled contigs using one or more mate pairs or paired-end libraries, or even a combination.
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2012-2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+If you use this tool in Galaxy, please cite |Cuccuru2013|_.
+
+.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
+.. _Cuccuru2013: http://orione.crs4.it/
+
+This tool uses `SSPACE basic`_, which is licensed separately. Please cite |Boetzer2011|_.
+
+.. _SSPACE basic: http://www.baseclear.com/landingpages/basetools-a-wide-range-of-bioinformatics-solutions/sspacev12/
+.. |Boetzer2011| replace:: Boetzer, M., Henkel, C. V., Jansen, H. J., Butler, D., Pirovano, W. (2011) Scaffolding pre-assembled contigs using SSPACE. *Bioinformatics* 27(4), 578-579
+.. _Boetzer2011: http://bioinformatics.oxfordjournals.org/content/27/4/578
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Sep 17 09:41:18 2013 -0400
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="sspace" version="2.0">
+    <install version="1.0">
+      <actions>
+        <action type="download_by_url" target_filename="SSPACE-BASIC-2.0_linux-x86_64.zip">http://www.baseclear.com/download.php?file_id=1038</action>
+        <!-- fix for Perl >= 5.16.0-->
+        <action type="shell_command">sed -i -e 's/require "getopts.pl"/use Getopt::Std/' -e 's/&amp;Getopts/getopts/' SSPACE_Basic_v2.0.pl</action>
+        <action type="move_directory_files">
+          <source_directory>.</source_directory>
+          <destination_directory>$INSTALL_DIR</destination_directory>
+        </action>
+        <action type="set_environment">
+          <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
+        </action>
+        <action type="set_environment">
+          <environment_variable name="SSPACE_SITE_OPTIONS" action="set_to">"-T 1"</environment_variable>
+        </action>
+      </actions>
+    </install>
+    <readme>
+Change the SSPACE_SITE_OPTIONS variable in the installed env.sh file to adjust the number of threads to use in Bowtie alignment (-T).
+    </readme>
+  </package>
+</tool_dependency>