0
|
1 # -*- coding: utf-8 -*-
|
|
2 """
|
|
3 SSAKE wrapper
|
|
4 """
|
|
5
|
|
6 import logging
|
|
7 import optparse
|
|
8 import os
|
|
9 import shutil
|
|
10 import subprocess
|
|
11 import tempfile
|
|
12
|
|
13 def execute(cmd):
|
|
14 """ """
|
|
15 subprocess.check_call(args=cmd, stdout=open(os.devnull, 'w'), shell=True)
|
|
16
|
|
17
|
|
18 def which(name, flags=os.X_OK):
|
|
19 """
|
|
20 Search PATH for executable files with the given name.
|
|
21 """
|
|
22 result = []
|
|
23 exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep))
|
|
24 path = os.environ.get('PATH', None)
|
|
25 if path is None:
|
|
26 return []
|
|
27 for p in os.environ.get('PATH', '').split(os.pathsep):
|
|
28 p = os.path.join(p, str(name))
|
|
29 if os.access(p, flags):
|
|
30 result.append(p)
|
|
31 for e in exts:
|
|
32 pext = p + e
|
|
33 if os.access(pext, flags):
|
|
34 result.append(pext)
|
|
35 return result
|
|
36
|
|
37
|
|
38 class SSAKE:
|
|
39 def __init__(self, logger, options):
|
|
40 self.logger = logger
|
|
41 self.executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl')
|
|
42 self.logger.debug(which(self.executables[0]))
|
|
43 self.logger.debug(which(self.executables[1]))
|
|
44 self.logger.debug(which(self.executables[2]))
|
|
45 self.logger.debug('Creating temp dir')
|
|
46 self.wd = tempfile.mkdtemp()
|
|
47
|
|
48 self.kind_of_reads = int(options.kind_of_reads)
|
|
49 if not (self.kind_of_reads):
|
|
50 self.infile = options.if_unpaired
|
|
51 self.paired = 0
|
|
52 else:
|
|
53 self.infile_r1 = options.if_paired_r1
|
|
54 self.infile_r2 = options.if_paired_r2
|
|
55 self.paired = 1
|
|
56 self.insert_size = options.insert_size
|
|
57 self.minnumlinks = options.minnumlinks
|
|
58 self.error = options.error
|
|
59 self.maxlinkratio = options.maxlinkratio
|
|
60 self.minoverlap = options.minoverlap
|
|
61 self.mindepthofcoverage = options.mindepthofcoverage
|
|
62 self.minoverlappingbases = options.minoverlappingbases
|
|
63 self.mincall = options.mincall
|
|
64 self.baseratio = options.baseratio
|
|
65 self.ignore_header = options.ignore_header
|
|
66 self.prefix = options.prefix
|
|
67 self.contigs = options.contigs
|
|
68 self.log = options.logfile
|
|
69 self.short = options.short
|
|
70 self.singlets = options.singlets
|
|
71 if options.seeds_file:
|
|
72 self.seeds_file = options.seeds_file
|
|
73
|
|
74 def run(self):
|
|
75 """ """
|
|
76 os.chdir(self.wd)
|
|
77 seeds = ''
|
|
78 if hasattr(self, 'seeds_file'):
|
|
79 seeds = " -s %s" % self.seeds_file
|
|
80 if self.kind_of_reads == 1:
|
|
81 cmd = "%s %s %s %d" % (
|
|
82 self.executables[1], self.infile_r1, self.infile_r2,
|
|
83 self.insert_size)
|
|
84 self.logger.info("Preparing data")
|
|
85 execute(cmd)
|
|
86 paired_file = "%s/paired.fa" % self.wd
|
|
87 command = "%s -f %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap)
|
|
88 elif self.kind_of_reads == 2:
|
|
89 cmd = "%s %s %s %d" % (
|
|
90 self.executables[2], self.infile_r1, self.infile_r2,
|
|
91 self.insert_size)
|
|
92 self.logger.info("Preparing data")
|
|
93 execute(cmd)
|
|
94 paired_file = "%s/paired.fa" % self.wd
|
|
95 unpaired_file = "%s/unpaired.fa" % self.wd
|
|
96 command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, unpaired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap)
|
|
97 else:
|
|
98 command = "%s -f %s" % (self.executables[0], self.infile)
|
|
99 command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, self.mindepthofcoverage, self.minoverlappingbases, self.mincall, self.baseratio, self.ignore_header, self.prefix, self.paired)
|
|
100 self.logger.debug(command)
|
|
101 self.logger.info("Executing SSAKE")
|
|
102 execute(command)
|
|
103
|
|
104 with open("%s.log" % os.path.join(self.wd, self.prefix), 'rb') as ssake_log_file:
|
|
105 self.logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()]))
|
|
106 self.logger.info("Moving result files")
|
|
107 shutil.move("%s.contigs" % os.path.join(self.wd, self.prefix), self.contigs)
|
|
108 shutil.move("%s.short" % os.path.join(self.wd, self.prefix), self.short)
|
|
109 shutil.move("%s.singlets" % os.path.join(self.wd, self.prefix), self.singlets)
|
|
110
|
|
111 def __del__(self):
|
|
112 shutil.rmtree(self.wd)
|
|
113
|
|
114
|
|
115 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
|
|
116 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
|
|
117 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
|
|
118
|
|
119
|
|
120 def __main__():
|
|
121 """ main function """
|
|
122 parser = optparse.OptionParser()
|
|
123 parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name')
|
|
124 parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name')
|
|
125 parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name')
|
|
126 parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name')
|
|
127 parser.add_option('-w', dest='mindepthofcoverage', type='int', help='minimum depth of coverage allowed for contigs')
|
|
128 parser.add_option('-m', dest='minoverlappingbases', type='int', default=20, help='Minimum number of overlapping bases with the seed/contig during overhang consensus build up (default -m 20)')
|
|
129 parser.add_option('-o', dest='mincall', type='int', default=2, help='mincall -o ')
|
|
130 parser.add_option('-r', dest='baseratio', type='float', default=0.7, help='baseratio -r')
|
|
131 parser.add_option('-k', dest='minnumlinks', type='int', default=4, help='Minimum number of links (read pairs) to compute scaffold -k')
|
|
132 parser.add_option('-e', dest='error', type='float', default=0.75, help='Error (%) allowed on mean distance -e')
|
|
133 parser.add_option('-a', dest='maxlinkratio', type='float', default=0.5, help='Maximum link ratio between two best contig pairs -a')
|
|
134 parser.add_option('-x', dest='minoverlap', type='int', default=20, help='Minimum overlap required between contigs to merge adjacent contigs in a scaffold -x')
|
|
135 parser.add_option('--ignore_header', dest='ignore_header', choices=['0', '1'], default='1', help='Ignore read name/header *will use less RAM if set to 1* -h')
|
|
136 parser.add_option('--kind_of_reads', dest='kind_of_reads', choices=['0', '1', '2'], help='Kind of reads (-p)')
|
|
137 parser.add_option('--iz', dest='insert_size', type='int', help='Library insert size')
|
|
138 parser.add_option('--prefix', dest='prefix', default='ssake_pre', help='prefix')
|
|
139 parser.add_option('--out1', dest='contigs', help='contig file')
|
|
140 parser.add_option('--out2', dest='short', help='short file')
|
|
141 parser.add_option('--out3', dest='singlets', help='singlets file')
|
|
142 parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)')
|
|
143 parser.add_option('--logfile', help='log file (default=stderr)')
|
|
144 (options, args) = parser.parse_args()
|
|
145 if len(args) > 0:
|
|
146 parser.error('Wrong number of arguments')
|
|
147
|
|
148 log_level = getattr(logging, options.loglevel)
|
|
149 kwargs = {'format': LOG_FORMAT,
|
|
150 'datefmt': LOG_DATEFMT,
|
|
151 'level': log_level}
|
|
152 if options.logfile:
|
|
153 kwargs['filename'] = options.logfile
|
|
154 logging.basicConfig(**kwargs)
|
|
155 logger = logging.getLogger('SSAKE scaffold assembly')
|
|
156
|
|
157 S = SSAKE(logger, options)
|
|
158 S.run()
|
|
159 return
|
|
160
|
|
161 if __name__ == "__main__":
|
|
162 __main__()
|