Mercurial > repos > crs4 > ssake
diff ssake.py @ 0:0ec408bcfc80 draft
Uploaded
author | crs4 |
---|---|
date | Wed, 11 Sep 2013 12:51:21 -0400 |
parents | |
children | 386166019772 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ssake.py Wed Sep 11 12:51:21 2013 -0400 @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +""" +SSAKE wrapper +""" + +import logging +import optparse +import os +import shutil +import subprocess +import tempfile + +def execute(cmd): + """ """ + subprocess.check_call(args=cmd, stdout=open(os.devnull, 'w'), shell=True) + + +def which(name, flags=os.X_OK): + """ + Search PATH for executable files with the given name. + """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, str(name)) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +class SSAKE: + def __init__(self, logger, options): + self.logger = logger + self.executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl') + self.logger.debug(which(self.executables[0])) + self.logger.debug(which(self.executables[1])) + self.logger.debug(which(self.executables[2])) + self.logger.debug('Creating temp dir') + self.wd = tempfile.mkdtemp() + + self.kind_of_reads = int(options.kind_of_reads) + if not (self.kind_of_reads): + self.infile = options.if_unpaired + self.paired = 0 + else: + self.infile_r1 = options.if_paired_r1 + self.infile_r2 = options.if_paired_r2 + self.paired = 1 + self.insert_size = options.insert_size + self.minnumlinks = options.minnumlinks + self.error = options.error + self.maxlinkratio = options.maxlinkratio + self.minoverlap = options.minoverlap + self.mindepthofcoverage = options.mindepthofcoverage + self.minoverlappingbases = options.minoverlappingbases + self.mincall = options.mincall + self.baseratio = options.baseratio + self.ignore_header = options.ignore_header + self.prefix = options.prefix + self.contigs = options.contigs + self.log = options.logfile + self.short = options.short + self.singlets = options.singlets + if options.seeds_file: + self.seeds_file = options.seeds_file + + def run(self): + """ """ + os.chdir(self.wd) + seeds = '' + if hasattr(self, 'seeds_file'): + seeds = " -s %s" % self.seeds_file + if self.kind_of_reads == 1: + cmd = "%s %s %s %d" % ( + self.executables[1], self.infile_r1, self.infile_r2, + self.insert_size) + self.logger.info("Preparing data") + execute(cmd) + paired_file = "%s/paired.fa" % self.wd + command = "%s -f %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) + elif self.kind_of_reads == 2: + cmd = "%s %s %s %d" % ( + self.executables[2], self.infile_r1, self.infile_r2, + self.insert_size) + self.logger.info("Preparing data") + execute(cmd) + paired_file = "%s/paired.fa" % self.wd + unpaired_file = "%s/unpaired.fa" % self.wd + command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, unpaired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) + else: + command = "%s -f %s" % (self.executables[0], self.infile) + command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, self.mindepthofcoverage, self.minoverlappingbases, self.mincall, self.baseratio, self.ignore_header, self.prefix, self.paired) + self.logger.debug(command) + self.logger.info("Executing SSAKE") + execute(command) + + with open("%s.log" % os.path.join(self.wd, self.prefix), 'rb') as ssake_log_file: + self.logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()])) + self.logger.info("Moving result files") + shutil.move("%s.contigs" % os.path.join(self.wd, self.prefix), self.contigs) + shutil.move("%s.short" % os.path.join(self.wd, self.prefix), self.short) + shutil.move("%s.singlets" % os.path.join(self.wd, self.prefix), self.singlets) + + def __del__(self): + shutil.rmtree(self.wd) + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + + +def __main__(): + """ main function """ + parser = optparse.OptionParser() + parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name') + parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name') + parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name') + parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name') + parser.add_option('-w', dest='mindepthofcoverage', type='int', help='minimum depth of coverage allowed for contigs') + parser.add_option('-m', dest='minoverlappingbases', type='int', default=20, help='Minimum number of overlapping bases with the seed/contig during overhang consensus build up (default -m 20)') + parser.add_option('-o', dest='mincall', type='int', default=2, help='mincall -o ') + parser.add_option('-r', dest='baseratio', type='float', default=0.7, help='baseratio -r') + parser.add_option('-k', dest='minnumlinks', type='int', default=4, help='Minimum number of links (read pairs) to compute scaffold -k') + parser.add_option('-e', dest='error', type='float', default=0.75, help='Error (%) allowed on mean distance -e') + parser.add_option('-a', dest='maxlinkratio', type='float', default=0.5, help='Maximum link ratio between two best contig pairs -a') + parser.add_option('-x', dest='minoverlap', type='int', default=20, help='Minimum overlap required between contigs to merge adjacent contigs in a scaffold -x') + parser.add_option('--ignore_header', dest='ignore_header', choices=['0', '1'], default='1', help='Ignore read name/header *will use less RAM if set to 1* -h') + parser.add_option('--kind_of_reads', dest='kind_of_reads', choices=['0', '1', '2'], help='Kind of reads (-p)') + parser.add_option('--iz', dest='insert_size', type='int', help='Library insert size') + parser.add_option('--prefix', dest='prefix', default='ssake_pre', help='prefix') + parser.add_option('--out1', dest='contigs', help='contig file') + parser.add_option('--out2', dest='short', help='short file') + parser.add_option('--out3', dest='singlets', help='singlets file') + parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)') + parser.add_option('--logfile', help='log file (default=stderr)') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + log_level = getattr(logging, options.loglevel) + kwargs = {'format': LOG_FORMAT, + 'datefmt': LOG_DATEFMT, + 'level': log_level} + if options.logfile: + kwargs['filename'] = options.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('SSAKE scaffold assembly') + + S = SSAKE(logger, options) + S.run() + return + +if __name__ == "__main__": + __main__()