Mercurial > repos > crs4 > sspace
changeset 0:b1be94418db9 draft
Uploaded
author | crs4 |
---|---|
date | Tue, 17 Sep 2013 09:41:18 -0400 |
parents | |
children | 93fa7ecb5292 |
files | COPYING sspace.py sspace.xml tool_dependencies.xml |
diffstat | 4 files changed, 291 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Tue Sep 17 09:41:18 2013 -0400 @@ -0,0 +1,24 @@ +Copyright © 2012-2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Massimiliano Orsini <massimiliano.orsini@crs4.it> +Gianmauro Cuccuru <gianmauro.cuccuru@crs4.it> +Nicola Soranzo <nicola.soranzo@crs4.it> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sspace.py Tue Sep 17 09:41:18 2013 -0400 @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +""" +SSPACE wrapper +""" + +import logging +import optparse +import os +import shutil +import subprocess +import tempfile + + +def which(name, flags=os.X_OK): + """ Search PATH for executable files with the given name. """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, str(name)) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + + +def __main__(): + """ main function """ + parser = optparse.OptionParser() + parser.add_option('-c', dest='contigs', help='contigs mandatory (-s)') + parser.add_option('--r1', dest='reads1', help='') + parser.add_option('--r2', dest='reads2', help='') + parser.add_option('-i', dest='insert', type='int', help='') + parser.add_option('-e', dest='error', type='float', help='') + parser.add_option('-o', dest='orientation', choices=['FF', 'FR', 'RF', 'RR'], help='') + parser.add_option('-x', action='store_true', dest='extension', help='whether to extend the contigs (-x)') + parser.add_option('--minoverlap', dest='minoverlap', type='int', help='minoverlap (-m)') + parser.add_option('--numofreads', dest='numofreads', type='int', help='num of reads to call an extension (-o)') + parser.add_option('-t', dest='max_trim', type='int', help='maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)') + parser.add_option('-u', dest='unpaired', help='unpaired reads (-u, optional)') + parser.add_option('-r', dest='min_base_ratio', type='float', help='minimum base ratio used to accept a overhang consensus base (-r)') + parser.add_option('--minlink', dest='minlink', type='int', help='min link (-k)') + parser.add_option('--maxratio', dest='maxratio', type='float', help='max ratio (-a)') + parser.add_option('--contigoverlap', dest='contigoverlap', type='int', help='contigoverlap (-n)') + parser.add_option('--mincontig', dest='mincontig', type='int', help='mincontig (-z)') + parser.add_option('-T', dest='n_threads', type='int', help='number of threads to use in Bowtie (-T)') + parser.add_option('-p', dest='prefix', default='sspace_pre', help='prefix (-b)') + parser.add_option('--lib', dest='libraryname', default='galx', help='libraryfile') + parser.add_option('--fe', dest='finalevidence', help='') + parser.add_option('--fs', dest='finalscaffolds', help='') + parser.add_option('--lg', dest='logfile', help='') + parser.add_option('--summ', dest='summaryfile', help='') + parser.add_option('--loglevel', choices=LOG_LEVELS, help='logging level (default: INFO)', default='DEBUG') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + log_level = getattr(logging, options.loglevel) + kwargs = {'format' : LOG_FORMAT, + 'datefmt' : LOG_DATEFMT, + 'level' : log_level} + if options.logfile: + kwargs['filename'] = options.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('SSPACE scaffold assembly') + + logger.debug('Creating temp dir') + contigs = options.contigs + reads1 = options.reads1 + reads2 = options.reads2 + insert = options.insert + error = options.error + orientation = options.orientation + extension = '-x 1' if options.extension else '' + minoverlap = "-m %d" % options.minoverlap if options.minoverlap is not None else '' + numofreads = "-o %d" % options.numofreads if options.numofreads is not None else '' + max_trim = "-t %d" % options.max_trim if options.max_trim is not None else '' + unpaired = "-u %s" % options.unpaired if options.unpaired else '' + min_base_ratio = "-r %s" % options.min_base_ratio if options.min_base_ratio is not None else '' + minlink = "-k %d" % options.minlink if options.minlink is not None else '' + maxratio = "-a %s" % options.maxratio if options.maxratio is not None else '' + contigoverlap = "-n %d" % options.contigoverlap if options.contigoverlap is not None else '' + mincontig = "-z %d" % options.mincontig if options.mincontig is not None else '' + n_threads = "-T %d" % options.n_threads if options.n_threads is not None else '' + prefix = options.prefix + libraryname = options.libraryname + finalevidence = options.finalevidence + finalscaffolds = options.finalscaffolds + summaryfile = options.summaryfile + exe_name = "SSPACE_Basic_v2.0.pl" + + # Set library + outline = 'lib1 %s %s %d %s %s\n' % (reads1, reads2, insert, error, orientation) + with open(libraryname, 'w') as out: + out.write(outline) + + exe = which(exe_name, os.R_OK) + command = "perl %s -l %s -s %s %s %s %s %s %s %s %s %s %s %s %s -b %s" % (exe.pop(), libraryname, contigs, extension, minoverlap, numofreads, max_trim, unpaired, min_base_ratio, minlink, maxratio, contigoverlap, mincontig, n_threads, prefix) + wd = tempfile.mkdtemp() + try: + os.chdir(wd) + logger.info("SSPACE running") + logger.debug("executing %s" % command) + subprocess.check_call(args=command, shell=True) + + # Clean environment + with open("%s.logfile.txt" % os.path.join(wd, prefix), 'rb') as sspace_log_file: + logger.info("\n".join(["Log from SSPACE", sspace_log_file.read()])) + logger.info("Moving result files") + shutil.move("%s.final.evidence" % os.path.join(wd, prefix), finalevidence) + shutil.move("%s.final.scaffolds.fasta" % os.path.join(wd, prefix), finalscaffolds) + shutil.move("%s.summaryfile.txt" % os.path.join(wd, prefix), summaryfile) + logger.debug("Removing temp dir: %s" % wd) + finally: + shutil.rmtree(wd) + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sspace.xml Tue Sep 17 09:41:18 2013 -0400 @@ -0,0 +1,113 @@ +<tool id="sspace" name="SSPACE" version="1.0.5"> + <description>scaffolder</description> + <requirements> + <requirement type="package" version="2.0">sspace</requirement> + </requirements> + + <command interpreter="python"> + sspace.py + \${SSPACE_SITE_OPTIONS:--T 1} + -c $contigs --r1 $reads1 --r2 $reads2 -i $insert -e $error -o $orientation + #if $exten + -x + #end if + #if str($minoverlap) + --minoverlap $minoverlap + #end if + #if str($numofreads) + --numofreads $numofreads + #end if + #if str($max_trim) + -t $max_trim + #end if + #if $unpaired + -u $unpaired + #end if + #if str($min_base_ratio) + -r $min_base_ratio + #end if + #if str($minlink) + --minlink $minlink + #end if + #if str($maxratio) + --maxratio $maxratio + #end if + #if str($contigoverlap) + --contigoverlap $contigoverlap + #end if + #if str($mincontig) + --mincontig $mincontig + #end if + --lib $libraryname --fe $finalevidence --fs $finalscaffolds --lg $logfile --summ $summaryfile + </command> + + <inputs> + <param name="contigs" type="data" format="fasta" label="Contigs FASTA file (-s)" /> + <param name="reads1" type="data" format="fasta,fastq" label="Paired-end reads 1" help="FASTA or FASTQ format" /> + <param name="reads2" type="data" format="fasta,fastq" label="Paired-end reads 2" help="FASTA or FASTQ format" /> + <param name="insert" type="integer" value="" label="Insert size"> + <validator type="in_range" min="1" /> + </param> + <param name="error" type="float" min="0" max="1" value="" label="Variability (e.g. 0.25 for 25%)" /> + <param name="orientation" type="select" label="Orientation"> + <option value="FF">FF</option> + <option value="FR" selected="true">FR</option> + <option value="RF">RF</option> + <option value="RR">RR</option> + </param> + <param name="exten" type="boolean" checked="true" label="Extension (-x)" help="Uncheck for scaffolding only" /> + <param name="minoverlap" type="integer" optional="true" min="10" max="50" value="35" label="Extension: minimum number of overlapping bases with the seed/contig during overhang consensus build up (-m)" help="E.g. 32-35 for 36bp reads" /> + <param name="numofreads" type="integer" optional="true" value="8" label="Extension: minimum number of reads needed to call an extension (-o)" help="Higher numbers increase reliability of the extension"> + <validator type="in_range" min="1" /> + </param> + <param name="max_trim" type="integer" optional="true" value="0" label="Extension: maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)"> + <validator type="in_range" min="0" /> + </param> + <param name="unpaired" type="data" format="fasta,fastq" optional="true" label="Extension: unpaired reads (-u, optional)" help="FASTA or FASTQ format" /> + <param name="min_base_ratio" type="float" optional="true" min="0" max="1" value="0.9" label="Extension: minimum base ratio used to accept a overhang consensus base (-r)" /> + <param name="minlink" type="integer" optional="true" value="5" label="Scaffolding: minimum number of links (read pairs) to compute scaffold (-k)"> + <validator type="in_range" min="0" /> + </param> + <param name="maxratio" type="float" optional="true" min="0" max="1" value="0.7" label="Scaffolding: maximum link ratio between two best contig pairs (-a)" help="Higher values lead to less accurate scaffolding" /> + <param name="contigoverlap" type="integer" optional="true" value="30" label="Scaffolding: minimum overlap required between contigs to merge adjacent contigs in a scaffold (-n)"> + <validator type="in_range" min="0" /> + </param> + <param name="mincontig" type="integer" optional="true" value="200" label="Scaffolding: minimum contig size used for scaffold (-z)" help="Filters out contigs below this size"> + <validator type="in_range" min="0" /> + </param> + </inputs> + + <outputs> + <data name="libraryname" hidden="True" format="tabular" label="${tool.name} on ${on_string}: libraryfile" /> + <data name="finalevidence" format="txt" label="${tool.name} on ${on_string}: final evidence" /> + <data name="finalscaffolds" format="fasta" label="${tool.name} on ${on_string}: final scaffolds" /> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="summaryfile" format="txt" label="${tool.name} on ${on_string}: summary" /> + </outputs> + <tests> + + </tests> + <help> +**What it does** + +SSPACE is a script able to extend and scaffold pre-assembled contigs using one or more mate pairs or paired-end libraries, or even a combination. + +**License and citation** + +This Galaxy tool is Copyright © 2012-2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `SSPACE basic`_, which is licensed separately. Please cite |Boetzer2011|_. + +.. _SSPACE basic: http://www.baseclear.com/landingpages/basetools-a-wide-range-of-bioinformatics-solutions/sspacev12/ +.. |Boetzer2011| replace:: Boetzer, M., Henkel, C. V., Jansen, H. J., Butler, D., Pirovano, W. (2011) Scaffolding pre-assembled contigs using SSPACE. *Bioinformatics* 27(4), 578-579 +.. _Boetzer2011: http://bioinformatics.oxfordjournals.org/content/27/4/578 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Sep 17 09:41:18 2013 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="sspace" version="2.0"> + <install version="1.0"> + <actions> + <action type="download_by_url" target_filename="SSPACE-BASIC-2.0_linux-x86_64.zip">http://www.baseclear.com/download.php?file_id=1038</action> + <!-- fix for Perl >= 5.16.0--> + <action type="shell_command">sed -i -e 's/require "getopts.pl"/use Getopt::Std/' -e 's/&Getopts/getopts/' SSPACE_Basic_v2.0.pl</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable> + </action> + <action type="set_environment"> + <environment_variable name="SSPACE_SITE_OPTIONS" action="set_to">"-T 1"</environment_variable> + </action> + </actions> + </install> + <readme> +Change the SSPACE_SITE_OPTIONS variable in the installed env.sh file to adjust the number of threads to use in Bowtie alignment (-T). + </readme> + </package> +</tool_dependency>