Mercurial > repos > crs4 > sspace
diff sspace.py @ 0:b1be94418db9 draft
Uploaded
author | crs4 |
---|---|
date | Tue, 17 Sep 2013 09:41:18 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sspace.py Tue Sep 17 09:41:18 2013 -0400 @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +""" +SSPACE wrapper +""" + +import logging +import optparse +import os +import shutil +import subprocess +import tempfile + + +def which(name, flags=os.X_OK): + """ Search PATH for executable files with the given name. """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, str(name)) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + + +def __main__(): + """ main function """ + parser = optparse.OptionParser() + parser.add_option('-c', dest='contigs', help='contigs mandatory (-s)') + parser.add_option('--r1', dest='reads1', help='') + parser.add_option('--r2', dest='reads2', help='') + parser.add_option('-i', dest='insert', type='int', help='') + parser.add_option('-e', dest='error', type='float', help='') + parser.add_option('-o', dest='orientation', choices=['FF', 'FR', 'RF', 'RR'], help='') + parser.add_option('-x', action='store_true', dest='extension', help='whether to extend the contigs (-x)') + parser.add_option('--minoverlap', dest='minoverlap', type='int', help='minoverlap (-m)') + parser.add_option('--numofreads', dest='numofreads', type='int', help='num of reads to call an extension (-o)') + parser.add_option('-t', dest='max_trim', type='int', help='maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)') + parser.add_option('-u', dest='unpaired', help='unpaired reads (-u, optional)') + parser.add_option('-r', dest='min_base_ratio', type='float', help='minimum base ratio used to accept a overhang consensus base (-r)') + parser.add_option('--minlink', dest='minlink', type='int', help='min link (-k)') + parser.add_option('--maxratio', dest='maxratio', type='float', help='max ratio (-a)') + parser.add_option('--contigoverlap', dest='contigoverlap', type='int', help='contigoverlap (-n)') + parser.add_option('--mincontig', dest='mincontig', type='int', help='mincontig (-z)') + parser.add_option('-T', dest='n_threads', type='int', help='number of threads to use in Bowtie (-T)') + parser.add_option('-p', dest='prefix', default='sspace_pre', help='prefix (-b)') + parser.add_option('--lib', dest='libraryname', default='galx', help='libraryfile') + parser.add_option('--fe', dest='finalevidence', help='') + parser.add_option('--fs', dest='finalscaffolds', help='') + parser.add_option('--lg', dest='logfile', help='') + parser.add_option('--summ', dest='summaryfile', help='') + parser.add_option('--loglevel', choices=LOG_LEVELS, help='logging level (default: INFO)', default='DEBUG') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + log_level = getattr(logging, options.loglevel) + kwargs = {'format' : LOG_FORMAT, + 'datefmt' : LOG_DATEFMT, + 'level' : log_level} + if options.logfile: + kwargs['filename'] = options.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('SSPACE scaffold assembly') + + logger.debug('Creating temp dir') + contigs = options.contigs + reads1 = options.reads1 + reads2 = options.reads2 + insert = options.insert + error = options.error + orientation = options.orientation + extension = '-x 1' if options.extension else '' + minoverlap = "-m %d" % options.minoverlap if options.minoverlap is not None else '' + numofreads = "-o %d" % options.numofreads if options.numofreads is not None else '' + max_trim = "-t %d" % options.max_trim if options.max_trim is not None else '' + unpaired = "-u %s" % options.unpaired if options.unpaired else '' + min_base_ratio = "-r %s" % options.min_base_ratio if options.min_base_ratio is not None else '' + minlink = "-k %d" % options.minlink if options.minlink is not None else '' + maxratio = "-a %s" % options.maxratio if options.maxratio is not None else '' + contigoverlap = "-n %d" % options.contigoverlap if options.contigoverlap is not None else '' + mincontig = "-z %d" % options.mincontig if options.mincontig is not None else '' + n_threads = "-T %d" % options.n_threads if options.n_threads is not None else '' + prefix = options.prefix + libraryname = options.libraryname + finalevidence = options.finalevidence + finalscaffolds = options.finalscaffolds + summaryfile = options.summaryfile + exe_name = "SSPACE_Basic_v2.0.pl" + + # Set library + outline = 'lib1 %s %s %d %s %s\n' % (reads1, reads2, insert, error, orientation) + with open(libraryname, 'w') as out: + out.write(outline) + + exe = which(exe_name, os.R_OK) + command = "perl %s -l %s -s %s %s %s %s %s %s %s %s %s %s %s %s -b %s" % (exe.pop(), libraryname, contigs, extension, minoverlap, numofreads, max_trim, unpaired, min_base_ratio, minlink, maxratio, contigoverlap, mincontig, n_threads, prefix) + wd = tempfile.mkdtemp() + try: + os.chdir(wd) + logger.info("SSPACE running") + logger.debug("executing %s" % command) + subprocess.check_call(args=command, shell=True) + + # Clean environment + with open("%s.logfile.txt" % os.path.join(wd, prefix), 'rb') as sspace_log_file: + logger.info("\n".join(["Log from SSPACE", sspace_log_file.read()])) + logger.info("Moving result files") + shutil.move("%s.final.evidence" % os.path.join(wd, prefix), finalevidence) + shutil.move("%s.final.scaffolds.fasta" % os.path.join(wd, prefix), finalscaffolds) + shutil.move("%s.summaryfile.txt" % os.path.join(wd, prefix), summaryfile) + logger.debug("Removing temp dir: %s" % wd) + finally: + shutil.rmtree(wd) + + +if __name__ == "__main__": + __main__()