comparison ssake.py @ 1:386166019772 draft

Uploaded
author crs4
date Tue, 07 Jan 2014 04:48:50 -0500
parents 0ec408bcfc80
children
comparison
equal deleted inserted replaced
0:0ec408bcfc80 1:386166019772
33 if os.access(pext, flags): 33 if os.access(pext, flags):
34 result.append(pext) 34 result.append(pext)
35 return result 35 return result
36 36
37 37
38 class SSAKE:
39 def __init__(self, logger, options):
40 self.logger = logger
41 self.executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl')
42 self.logger.debug(which(self.executables[0]))
43 self.logger.debug(which(self.executables[1]))
44 self.logger.debug(which(self.executables[2]))
45 self.logger.debug('Creating temp dir')
46 self.wd = tempfile.mkdtemp()
47
48 self.kind_of_reads = int(options.kind_of_reads)
49 if not (self.kind_of_reads):
50 self.infile = options.if_unpaired
51 self.paired = 0
52 else:
53 self.infile_r1 = options.if_paired_r1
54 self.infile_r2 = options.if_paired_r2
55 self.paired = 1
56 self.insert_size = options.insert_size
57 self.minnumlinks = options.minnumlinks
58 self.error = options.error
59 self.maxlinkratio = options.maxlinkratio
60 self.minoverlap = options.minoverlap
61 self.mindepthofcoverage = options.mindepthofcoverage
62 self.minoverlappingbases = options.minoverlappingbases
63 self.mincall = options.mincall
64 self.baseratio = options.baseratio
65 self.ignore_header = options.ignore_header
66 self.prefix = options.prefix
67 self.contigs = options.contigs
68 self.log = options.logfile
69 self.short = options.short
70 self.singlets = options.singlets
71 if options.seeds_file:
72 self.seeds_file = options.seeds_file
73
74 def run(self):
75 """ """
76 os.chdir(self.wd)
77 seeds = ''
78 if hasattr(self, 'seeds_file'):
79 seeds = " -s %s" % self.seeds_file
80 if self.kind_of_reads == 1:
81 cmd = "%s %s %s %d" % (
82 self.executables[1], self.infile_r1, self.infile_r2,
83 self.insert_size)
84 self.logger.info("Preparing data")
85 execute(cmd)
86 paired_file = "%s/paired.fa" % self.wd
87 command = "%s -f %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap)
88 elif self.kind_of_reads == 2:
89 cmd = "%s %s %s %d" % (
90 self.executables[2], self.infile_r1, self.infile_r2,
91 self.insert_size)
92 self.logger.info("Preparing data")
93 execute(cmd)
94 paired_file = "%s/paired.fa" % self.wd
95 unpaired_file = "%s/unpaired.fa" % self.wd
96 command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, unpaired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap)
97 else:
98 command = "%s -f %s" % (self.executables[0], self.infile)
99 command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, self.mindepthofcoverage, self.minoverlappingbases, self.mincall, self.baseratio, self.ignore_header, self.prefix, self.paired)
100 self.logger.debug(command)
101 self.logger.info("Executing SSAKE")
102 execute(command)
103
104 with open("%s.log" % os.path.join(self.wd, self.prefix), 'rb') as ssake_log_file:
105 self.logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()]))
106 self.logger.info("Moving result files")
107 shutil.move("%s.contigs" % os.path.join(self.wd, self.prefix), self.contigs)
108 shutil.move("%s.short" % os.path.join(self.wd, self.prefix), self.short)
109 shutil.move("%s.singlets" % os.path.join(self.wd, self.prefix), self.singlets)
110
111 def __del__(self):
112 shutil.rmtree(self.wd)
113
114
115 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' 38 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
116 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' 39 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
117 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 40 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
118 41
119 42
120 def __main__(): 43 def __main__():
121 """ main function """
122 parser = optparse.OptionParser() 44 parser = optparse.OptionParser()
123 parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name') 45 parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name')
124 parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name') 46 parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name')
125 parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name') 47 parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name')
126 parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name') 48 parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name')
152 if options.logfile: 74 if options.logfile:
153 kwargs['filename'] = options.logfile 75 kwargs['filename'] = options.logfile
154 logging.basicConfig(**kwargs) 76 logging.basicConfig(**kwargs)
155 logger = logging.getLogger('SSAKE scaffold assembly') 77 logger = logging.getLogger('SSAKE scaffold assembly')
156 78
157 S = SSAKE(logger, options) 79 executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl')
158 S.run() 80 logger.debug(which(executables[0]))
159 return 81 logger.debug(which(executables[1]))
82 logger.debug(which(executables[2]))
83 logger.debug('Creating temp dir')
84 kind_of_reads = int(options.kind_of_reads)
85 if not (kind_of_reads):
86 infile = options.if_unpaired
87 paired = 0
88 else:
89 infile_r1 = options.if_paired_r1
90 infile_r2 = options.if_paired_r2
91 paired = 1
92 insert_size = options.insert_size
93 minnumlinks = options.minnumlinks
94 error = options.error
95 maxlinkratio = options.maxlinkratio
96 minoverlap = options.minoverlap
97 mindepthofcoverage = options.mindepthofcoverage
98 minoverlappingbases = options.minoverlappingbases
99 mincall = options.mincall
100 baseratio = options.baseratio
101 ignore_header = options.ignore_header
102 prefix = options.prefix
103 contigs = options.contigs
104 short = options.short
105 singlets = options.singlets
106 seeds = " -s %s" % options.seeds_file if options.seeds_file else ''
107 wd = tempfile.mkdtemp()
108 try:
109 os.chdir(wd)
110 if kind_of_reads == 1:
111 cmd = "%s %s %s %d" % (
112 executables[1], infile_r1, infile_r2,
113 insert_size)
114 logger.info("Preparing data")
115 execute(cmd)
116 paired_file = "%s/paired.fa" % wd
117 command = "%s -f %s -k %d -e %s -a %s -x %d" % (executables[0], paired_file, minnumlinks, error, maxlinkratio, minoverlap)
118 elif kind_of_reads == 2:
119 cmd = "%s %s %s %d" % (
120 executables[2], infile_r1, infile_r2,
121 insert_size)
122 logger.info("Preparing data")
123 execute(cmd)
124 paired_file = "%s/paired.fa" % wd
125 unpaired_file = "%s/unpaired.fa" % wd
126 command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (executables[0], paired_file, unpaired_file, minnumlinks, error, maxlinkratio, minoverlap)
127 else:
128 command = "%s -f %s" % (executables[0], infile)
129 command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, mindepthofcoverage, minoverlappingbases, mincall, baseratio, ignore_header, prefix, paired)
130 logger.debug(command)
131 logger.info("Executing SSAKE")
132 execute(command)
133
134 with open("%s.log" % os.path.join(wd, prefix), 'rb') as ssake_log_file:
135 logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()]))
136 logger.info("Moving result files")
137 shutil.move("%s.contigs" % os.path.join(wd, prefix), contigs)
138 shutil.move("%s.short" % os.path.join(wd, prefix), short)
139 shutil.move("%s.singlets" % os.path.join(wd, prefix), singlets)
140 finally:
141 shutil.rmtree(wd)
142
160 143
161 if __name__ == "__main__": 144 if __name__ == "__main__":
162 __main__() 145 __main__()