view ssake.py @ 2:f20db7280f89 draft default tip

Fix SSAKE download URL (thanks to Graham Etherington for reporting this). Update Orione citation.
author crs4
date Thu, 06 Mar 2014 11:50:08 -0500
parents 386166019772
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
SSAKE wrapper
"""

import logging
import optparse
import os
import shutil
import subprocess
import tempfile

def execute(cmd):
    """ """
    subprocess.check_call(args=cmd, stdout=open(os.devnull, 'w'), shell=True)


def which(name, flags=os.X_OK):
    """
    Search PATH for executable files with the given name.
    """
    result = []
    exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep))
    path = os.environ.get('PATH', None)
    if path is None:
        return []
    for p in os.environ.get('PATH', '').split(os.pathsep):
        p = os.path.join(p, str(name))
        if os.access(p, flags):
            result.append(p)
            for e in exts:
                pext = p + e
                if os.access(pext, flags):
                    result.append(pext)
    return result


LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']


def __main__():
    parser = optparse.OptionParser()
    parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name')
    parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name')
    parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name')
    parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name')
    parser.add_option('-w', dest='mindepthofcoverage', type='int', help='minimum depth of coverage allowed for contigs')
    parser.add_option('-m', dest='minoverlappingbases', type='int', default=20, help='Minimum number of overlapping bases with the seed/contig during overhang consensus build up (default -m 20)')
    parser.add_option('-o', dest='mincall', type='int', default=2, help='mincall -o ')
    parser.add_option('-r', dest='baseratio', type='float', default=0.7, help='baseratio -r')
    parser.add_option('-k', dest='minnumlinks', type='int', default=4, help='Minimum number of links (read pairs) to compute scaffold -k')
    parser.add_option('-e', dest='error', type='float', default=0.75, help='Error (%) allowed on mean distance -e')
    parser.add_option('-a', dest='maxlinkratio', type='float', default=0.5, help='Maximum link ratio between two best contig pairs -a')
    parser.add_option('-x', dest='minoverlap', type='int', default=20, help='Minimum overlap required between contigs to merge adjacent contigs in a scaffold -x')
    parser.add_option('--ignore_header', dest='ignore_header', choices=['0', '1'], default='1', help='Ignore read name/header *will use less RAM if set to 1* -h')
    parser.add_option('--kind_of_reads', dest='kind_of_reads', choices=['0', '1', '2'], help='Kind of reads (-p)')
    parser.add_option('--iz', dest='insert_size', type='int', help='Library insert size')
    parser.add_option('--prefix', dest='prefix', default='ssake_pre', help='prefix')
    parser.add_option('--out1', dest='contigs', help='contig file')
    parser.add_option('--out2', dest='short', help='short file')
    parser.add_option('--out3', dest='singlets', help='singlets file')
    parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)')
    parser.add_option('--logfile', help='log file (default=stderr)')
    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error('Wrong number of arguments')

    log_level = getattr(logging, options.loglevel)
    kwargs = {'format': LOG_FORMAT,
              'datefmt': LOG_DATEFMT,
              'level': log_level}
    if options.logfile:
        kwargs['filename'] = options.logfile
        logging.basicConfig(**kwargs)
        logger = logging.getLogger('SSAKE scaffold assembly')

    executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl')
    logger.debug(which(executables[0]))
    logger.debug(which(executables[1]))
    logger.debug(which(executables[2]))
    logger.debug('Creating temp dir')
    kind_of_reads = int(options.kind_of_reads)
    if not (kind_of_reads):
        infile = options.if_unpaired
        paired = 0
    else:
        infile_r1 = options.if_paired_r1
        infile_r2 = options.if_paired_r2
        paired = 1
        insert_size = options.insert_size
        minnumlinks = options.minnumlinks
        error = options.error
        maxlinkratio = options.maxlinkratio
        minoverlap = options.minoverlap
    mindepthofcoverage = options.mindepthofcoverage
    minoverlappingbases = options.minoverlappingbases
    mincall = options.mincall
    baseratio = options.baseratio
    ignore_header = options.ignore_header
    prefix = options.prefix
    contigs = options.contigs
    short = options.short
    singlets = options.singlets
    seeds = " -s %s" % options.seeds_file if options.seeds_file else ''
    wd = tempfile.mkdtemp()
    try:
        os.chdir(wd)
        if kind_of_reads == 1:
            cmd = "%s %s %s %d" % (
                executables[1], infile_r1, infile_r2,
                insert_size)
            logger.info("Preparing data")
            execute(cmd)
            paired_file = "%s/paired.fa" % wd
            command = "%s -f %s -k %d -e %s -a %s -x %d" % (executables[0], paired_file, minnumlinks, error, maxlinkratio, minoverlap)
        elif kind_of_reads == 2:
            cmd = "%s %s %s %d" % (
                executables[2], infile_r1, infile_r2,
                insert_size)
            logger.info("Preparing data")
            execute(cmd)
            paired_file = "%s/paired.fa" % wd
            unpaired_file = "%s/unpaired.fa" % wd
            command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (executables[0], paired_file, unpaired_file, minnumlinks, error, maxlinkratio, minoverlap)
        else:
            command = "%s -f %s" % (executables[0], infile)
        command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, mindepthofcoverage, minoverlappingbases, mincall, baseratio, ignore_header, prefix, paired)
        logger.debug(command)
        logger.info("Executing SSAKE")
        execute(command)

        with open("%s.log" % os.path.join(wd, prefix), 'rb') as ssake_log_file:
            logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()]))
        logger.info("Moving result files")
        shutil.move("%s.contigs" % os.path.join(wd, prefix), contigs)
        shutil.move("%s.short" % os.path.join(wd, prefix), short)
        shutil.move("%s.singlets" % os.path.join(wd, prefix), singlets)
    finally:
        shutil.rmtree(wd)


if __name__ == "__main__":
    __main__()