annotate bin/bsfcall.py @ 0:06f8460885ff

migrate from GitHub
author yutaka-saito
date Sun, 19 Apr 2015 20:51:13 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #!/usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 Bisulfighter::bsf-call
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5 Bisulfighter (http://epigenome.cbrc.jp/bisulfighter)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 by National Institute of Advanced Industrial Science and Technology (AIST)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8 http://creativecommons.org/licenses/by-nc-sa/3.0/
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11 __version__= "1.3"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13 import sys
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14 import os
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 import glob
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16 import threading
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17 import subprocess
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18 import Queue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 from datetime import datetime
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 import hashlib
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21 from string import maketrans
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22 import gzip
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23 import bz2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 import zipfile
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25 import logging
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 from time import sleep
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 from shutil import copy
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 import re
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29 # import pysam
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 class BsfCallBase(object):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 base class for BsfCall, LastExecutor, McDetector.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 define functions that is used in each sub classes.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 def splitFilePath(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39 split file path to directory path, file name (with file extension),
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40 file name (without file extension) and file extension.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41 if extension of filePath is '.gz', '.gz' extension is ignored.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 dir_name, file_name = os.path.split(filePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 base_name, ext = os.path.splitext(file_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45 prog = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46 if (ext == '.gz' or ext == '.gzip' or ext == '.bz2' or ext == '.bzip2' or ext == '.zip'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 prog = ext[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48 base_name, ext = os.path.splitext(base_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 if len(ext) > 1:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 ext = ext[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51 return (dir_name, file_name, base_name, ext, prog)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54 def readNameByReadFile(self, readFilePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 get read name by read file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58 dir_name, file_name, read_name, ext, prog = self.splitFilePath(readFilePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 return read_name
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62 def secondReadFilePathByFirstReadFilePath(self, readFile, secondReadType = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 get second read file path by first read file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 if first read file path is '/path/to/read_file_1.fastq' second read file
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66 path is '/path/to/read_file_2.fastq'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67 if secondReadType is specified, the extension of second read file is its
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 value.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70 fpath = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 dir_name, file_name, basename, ext, prog = self.splitFilePath(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 if secondReadType:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 ext = ".%s" % secondReadType
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74 if prog is not None:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75 fpath = "%s/%s2%s.%s" % (dir_name, basename[0:-1], ext, prog)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77 fpath = "%s/%s2%s" % (dir_name, basename[0:-1], ext)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 return fpath
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81 def pairedEndReadNumbers(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82 return (1, 2)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 def clearGap(self, seq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86 return seq.replace("-", "")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 def complementStartPosition(self, genomeLen, subseqStart, subseqLen):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 return genomeLen - subseqStart - subseqLen
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 def complementSeq(self, seq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94 return seq.translate(maketrans("ATGCatgc", "TACGtacg"))[::-1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 def mcContextType(self, genomeSeq, cBasePos, strand='+'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 get mC context type (CG, CHG, CHH) by genome sequence and C base position.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100 if no mC context found, return None.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104 if strand == '+':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105 if genomeSeq[cBasePos + 1] == "G":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
106 return "CG"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
107 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
108 if genomeSeq[cBasePos + 2] == "G":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
109 return "CHG"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
110 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
111 return "CHH"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
112 elif strand == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
113 if genomeSeq[cBasePos - 1] == "C":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
114 return "CG"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
115 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
116 if genomeSeq[cBasePos - 2] == "C":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
117 return "CHG"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
118 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
119 return "CHH"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
120 return None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
121 except IndexError:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
122 return None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
123
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
124
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
125 def chrSort(self, a, b):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
126 return cmp(a, b)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
127
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
128
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
129 def strands(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
130 return ("+", "-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
131
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
132
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
133 def mcContextTypes(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
134 return ("CG", "CHG", "CHH")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
135
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
136
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
137 def bzip2File(self, filePath, wait = True, log = False):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
138 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
139 bzip2 file. If wait argument is False, without waiting for bzip2 process to be
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
140 completed, this function returns immediately.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
141 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
142
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
143 if log:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
144 logging.info("bzip2 start: %s" % filePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
145
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
146 dirpath, fname = os.path.split(filePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
147 cmd = "bzip2 %s" % fname
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
148 p = subprocess.Popen(cmd, shell = True, cwd = dirpath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
149 if wait:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
150 p.wait()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
151
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
152 if log:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
153 logging.info("bzip2 done: %s" % filePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
154
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
155
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
156 def isGzipFile(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
157 return filePath[-3:] == ".gz" or filePath[-5:] == ".gzip"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
158
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
159
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
160 def isBzip2File(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
161 return filePath[-4:] == ".bz2" or filePath[-6:] == ".bzip2"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
162
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
163
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
164 def isZipFile(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
165 return filePath[-4:] == ".zip"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
166
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
167
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
168 def isMafFile(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
169 f = open(filePath, "rb")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
170 data = f.read(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
171 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
172 if re.match("[\x20-\x7E]", data):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
173 f = open(filePath, "r")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
174 first_line = f.readline()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
175 if first_line[0:5] != "track" and first_line[0] != "#" and first_line[0:2] != "a ":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
176 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
177 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
178
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
179 if first_line[0:2] == "a ":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
180 cnt = 2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
181 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
182 cnt = 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
183
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
184 while True:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
185 line = f.readline()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
186 if line == "":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
187 break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
188 if line[0] == "#" or line.strip() == "":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
189 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
190
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
191 if cnt == 1 and line[0:2] != "a ":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
192 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
193 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
194
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
195 if cnt == 2 and line[0:2] != "s ":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
196 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
197 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
198
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
199 if cnt == 3:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
200 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
201 if line[0:2] == "s ":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
202 return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
203 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
204 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
205
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
206 cnt += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
207
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
208 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
209 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
210 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
211 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
212
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
213
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
214 def isBamFile(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
215 bgzf_magic = b"\x1f\x8b\x08\x04"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
216
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
217 f = open(filePath, "rb")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
218 data = f.read(4)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
219 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
220
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
221 return data == bgzf_magic
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
222
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
223
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
224 def isSamFile(self, filePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
225 f = open(filePath, "rb")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
226 data = f.read(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
227 if data == "@":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
228 tag = f.read(2)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
229 if tag == "HD" or tag == "SQ" or tag == "RG" or tag == "CO":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
230 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
231 return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
232
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
233 f.seek(0)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
234 data = f.read(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
235 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
236 if re.match("[\x20-\x7E]", data):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
237 f = open(filePath, "r")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
238 line = f.readline()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
239 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
240 return len(line.split("\t")) > 10
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
241 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
242 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
243
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
244
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
245 def scriptDir(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
246 return os.path.dirname(os.path.abspath(sys.argv[0]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
247
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
248
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
249 def chrnoFromFastaDescription(self, description):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
250 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
251 get chromosome number by fasta description line.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
252 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
253
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
254 return description.strip()[1:].strip()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
255
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
256
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
257 def chrsByRefGenome(self, refGenome):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
258 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
259 get all chromosome numbers that is included in the reference genome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
260 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
261
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
262 chrs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
263 for line in open(refGenome, "r"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
264 line = line.strip()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
265 if line[0] == ">":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
266 chrs.append(self.chrnoFromFastaDescription(line))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
267
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
268 return chrs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
269
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
270
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
271 def readRefGenome(self, refGenome, refGenomeBuf, refGenomeChr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
272 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
273 read the reference genome fasta file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
274 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
275
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
276 logging.info("BsfCallBase::readRefGenome: %s" % refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
277 chr = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
278 buf = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
279 fin = open(refGenome, 'r')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
280 for line in fin:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
281 if line[0] == '>':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
282 chr = self.chrnoFromFastaDescription(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
283 logging.info("BsfCallBase::readRefGenome: chr=%s" % chr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
284 if len(buf) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
285 refGenomeBuf[refGenomeChr[-1]]=''.join(buf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
286 del buf[:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
287 refGenomeChr.append(chr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
288 elif chr != None:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
289 buf.append(line.strip().upper())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
290 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
291 logging.fatal("BsfCallBase::readRefGenome: the specified reference genome file \"%s\" is malformed." % refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
292 fin.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
293 refGenomeBuf[refGenomeChr[-1]]=''.join(buf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
294 logging.info("BsfCallBase::readRefGenome: done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
295 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
296
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
297
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
298 def lastalOpts(self, lastOpt):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
299 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
300 get options for lastal by bsf-call --last option
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
301 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
302
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
303 return " ".join(lastOpt.split(","))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
304
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
305
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
306 def mergeOpts(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
307 return ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
308
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
309
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
310 def filterOpts(self, mismapProb, scoreThres, isPairedEnd):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
311 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
312 get filtering option. this option is specified to last-map-probs or
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
313 last-pair-probs.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
314 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
315
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
316 option = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
317 if isPairedEnd:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
318 option = "-m%s" % str(mismapProb)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
319 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
320 option = "-s%d -m%s" % (scoreThres, str(mismapProb))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
321
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
322 return option
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
323
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
324
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
325 def isPairedEnd(self, readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
326 return self.pairedEndReadNumbers()[1] in readAttr
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
327
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
328
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
329 def jobIdByQsubOutput(self, qsubOutput):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
330 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
331 get job id submitted by qsub command and its output.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
332 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
333
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
334 fields = qsubOutput.strip().split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
335
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
336 return fields[2]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
337
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
338
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
339 def waitForSubmitJobs(self, jobIds, checkInterval = 10):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
340 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
341 wait for all jobs that have been submitted with qsub command to finish.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
342 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
343
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
344 error_jobs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
345 while True:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
346 all_done = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
347 qstat = os.popen("qstat")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
348 for line in qstat:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
349 fields = line.strip().split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
350 if fields[0] in jobIds:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
351 if fields[4].find('E') > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
352 if fields[0] not in error_jobs:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
353 logging.fatal("Error has occurred: Job ID=%s" % fields[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
354 error_jobs.append(fields[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
355 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
356 all_done = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
357 break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
358 qstat.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
359
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
360 if all_done:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
361 break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
362 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
363 sleep(checkInterval)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
364
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
365 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
366
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
367
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
368 def logJobSubmit(self, msg, jobId, cmd = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
369 logging.info("Submit job: %s --> Job ID = %s" % (msg, jobId))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
370 if cmd:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
371 logging.info(cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
372
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
373
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
374 def bamMapq2Mismap(self, mapq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
375 return pow(0.1, (float(mapq) / 10))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
376
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
377
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
378 def getAllMappingResultFiles(self, resultDirs):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
379 mapping_result_files = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
380
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
381 for result_dir in resultDirs:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
382 for root, dirs, files in os.walk(result_dir):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
383 for filename in files:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
384 logging.info("McDetector::getAllMappingResultFiles: %s" % filename)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
385 mapping_result_file = os.path.join(root, filename)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
386 mapping_result_files.append(mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
387
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
388 return mapping_result_files
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
389
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
390
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
391 class BsfCall(BsfCallBase):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
392 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
393 class to execute bsf-call process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
394 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
395
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
396 def __init__(self, refGenome, readFilePaths, cmdOpts):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
397 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
398 constructor of BsfCall
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
399 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
400
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
401 self.refGenome = refGenome
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
402 self.readFilePaths = readFilePaths
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
403 self.reads = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
404 self.opts = cmdOpts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
405
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
406 self.dataDir = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
407 self.genomeDir = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
408 self.mcContextDir = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
409
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
410 self.readInFh1 = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
411 self.readInFh2 = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
412 self.numReads = {1: 0, 2: 0}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
413
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
414 self.mappingResultDirs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
415 self.mappingResultFiles = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
416
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
417 self.setDataDir()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
418 self.setLogger()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
419
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
420 logging.info("bsf-call start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
421 self.logOption()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
422
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
423 # self.numReadsPerFile = self.sizeForSplitRead(self.opts["split_read_size"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
424
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
425 if self.opts["mapping_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
426 self.opts["only_mcdetection"] = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
427 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
428 self.opts["only_mcdetection"] = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
429
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
430
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
431 def execute(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
432 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
433 execute bsf-call process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
434 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
435
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
436 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
437 if self.opts["mapping_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
438 # Only mc detection
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
439 self.mappingResultDirs = self.opts["mapping_dir"].split(",")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
440 self.mappingResultFiles = self.getAllMappingResultFiles(self.mappingResultDirs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
441 self.opts["mapping_result_files"] = self.mappingResultFiles
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
442 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
443 self.makeIndexFile()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
444 self.prepareForReads()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
445 self.mappingResultDirs = self.processMapping()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
446
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
447 logging.debug("BsfCall:execute: mapping result directories are: %s" % ','.join(self.mappingResultDirs))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
448 self.processMcDetection(self.mappingResultDirs, self.opts["local_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
449
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
450 logging.info("bsf-call done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
451 except:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
452 logging.exception("Exception has occurred.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
453
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
454
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
455 def processMapping(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
456 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
457 run read mapping and filtering process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
458 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
459
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
460 logging.info("Mapping and filtering process start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
461
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
462 result_dirs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
463 for read_attr in self.reads:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
464 self.runLast(read_attr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
465 result_dirs.append(read_attr["results_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
466
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
467 logging.info("Mapping and filtering process done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
468
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
469 return result_dirs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
470
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
471
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
472 def processMcDetection(self, resultDirs, localDir = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
473 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
474 run mC detection process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
475 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
476
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
477 logging.info("mC detection process start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
478
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
479 mc_detector = McDetector(self.refGenome, resultDirs, self.mcContextDir, self.opts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
480 mc_detector.execute(self.opts["output"], self.opts["num_threads"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
481
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
482 logging.info("mC detection process done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
483
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
484
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
485 def setDataDir(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
486 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
487 create directries to store the files of the bsf-call process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
488 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
489
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
490 if self.opts["work_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
491 self.dataDir = self.opts["work_dir"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
492 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
493 self.dataDir = self.autoWorkDir()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
494
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
495 self.mcContextDir = "%s/mc_contexts" % self.dataDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
496
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
497 if not os.path.exists(self.dataDir):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
498 os.makedirs(self.dataDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
499
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
500 if not os.path.exists(self.mcContextDir):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
501 os.makedirs(self.mcContextDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
502
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
503
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
504 def setLogger(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
505 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
506 create logger to store the logs of the bsf-call process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
507 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
508
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
509 log_level = logging.INFO
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
510 # log_level = logging.DEBUG
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
511
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
512 log_file = "%s/bsf-call.log" % self.dataDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
513 file_logger = logging.FileHandler(filename=log_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
514
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
515 file_logger.setLevel(log_level)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
516 file_logger.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
517
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
518 logging.getLogger().addHandler(file_logger)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
519 logging.getLogger().setLevel(log_level)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
520
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
521
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
522 def logOption(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
523 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
524 output bsf-call option values and arguments to the log.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
525 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
526
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
527 if self.opts["mapping_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
528 logging.info("Mapping result directory is specified. Only mC detection is executed.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
529 logging.info(" Mapping result directory: %s" % self.opts["mapping_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
530 logging.info(" Reference genome: %s" % self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
531 # logging.info(" Read BAM file: %s" % ("Yes" if self.opts["read_bam"] else "No"))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
532 # logging.info(" Read SAM file: %s" % ("Yes" if self.opts["read_sam"] else "No"))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
533 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
534 logging.info("Reference genome: %s" % self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
535 logging.info("Read files: %s" % self.readFilePaths)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
536 logging.info("Working directory: %s" % self.dataDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
537 logging.info("Options:")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
538 logging.info(" Threshold of the alignment score at filtering: %d" % self.opts["aln_score_thres"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
539 # logging.info(" Paired-end direction: %s" % self.opts["pe_direction"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
540 # logging.info(" Options for LAST: %s" % self.opts["last_opts"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
541
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
542 logging.info(" Threshold of read coverate: %d" % self.opts["coverage"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
543 logging.info(" Threshold of mC ratio: %s" % str(self.opts["lower_bound"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
544 logging.info(" Threshold of the mismap probability at filtering: %s" % str(self.opts["aln_mismap_prob_thres"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
545 logging.info(" Working directory: %s" % self.dataDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
546 logging.info(" Local directory: %s" % self.opts["local_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
547 logging.info(" Output file: %s" % (self.opts["output"] if self.opts["output"] else "(stdout)"))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
548 # logging.info(" Use cluster: %s" % ("Yes" if self.opts["use_cluster"] else "No"))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
549 # logging.info(" Queue name: %s" % self.opts["queue_list"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
550 logging.info(" Number of threads: %d" % self.opts["num_threads"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
551 # logging.info(" Split read size: %s" % self.opts["split_read_size"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
552
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
553
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
554 def prepareForReads(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
555 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
556 create directories to store split reads and result files.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
557 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
558
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
559 for read_no, read_path in enumerate(self.readFilePaths):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
560 readNo = read_no + 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
561 logging.info("Preparations for a read file start: %d: %s" % (readNo, read_path))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
562
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
563 data_dir = "%s/%d" % (self.dataDir, readNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
564 read = {"base_dir": data_dir, "path": read_path, "reads_dir": data_dir + "/reads", "results_dir": data_dir + "/results"}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
565
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
566 if not os.path.exists(data_dir):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
567 os.makedirs(data_dir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
568 os.makedirs(read["reads_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
569 os.makedirs(read["results_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
570
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
571 pe_no = self.pairedEndReadNumbers()[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
572 for readpath in read_path.split(","):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
573 dir_name, file_name, base_name, ext, prog = self.splitFilePath(readpath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
574 file_type = self.checkReadFileType(readpath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
575 read[pe_no] = {"name": base_name, "fname": file_name, "type": file_type, "path": readpath}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
576 pe_no += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
577
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
578 is_paired_end = self.isPairedEnd(read)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
579 logging.info("Paired-end: %s" % is_paired_end)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
580 logging.info(" Forward: %s" % read[1]["path"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
581 if is_paired_end:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
582 logging.info(" Reverse: %s" % read[2]["path"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
583
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
584 logging.info("Preparations for a read file done")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
585 self.reads.append(read)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
586 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
587
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
588
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
589 def runLast(self, readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
590 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
591 run LAST programs to map reads and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
592 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
593
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
594 is_paired_end = self.isPairedEnd(readAttr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
595 filter_option = self.filterOpts(self.opts["aln_mismap_prob_thres"], self.opts["aln_score_thres"], is_paired_end)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
596
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
597 if is_paired_end:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
598 logging.info('BsfCall::runLast: PairedEnd')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
599 last_exec = LastExecutorPairedEnd(self.refGenome, self.dataDir, readAttr["reads_dir"], readAttr["results_dir"], self.opts["num_threads"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
600 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
601 logging.info('BsfCall::runLast: Not PairedEnd')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
602 last_exec = LastExecutorSingle(self.refGenome, self.dataDir, readAttr["reads_dir"], readAttr["results_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
603
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
604 # last_exec.execute(readAttr, self.opts["num_threads"], self.lastalOpts(self.opts["last_opts"]), self.mergeOpts(), filter_option)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
605 # last_exec.execute(readAttr, 1, self.lastalOpts(self.opts["last_opts"]), self.mergeOpts(), filter_option)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
606 last_exec.execute(readAttr, 1, "", self.mergeOpts(), filter_option)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
607
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
608
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
609 def makeIndexFile(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
610 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
611 create index file of reference genome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
612 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
613
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
614 directions = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
615 if not os.path.exists("%s.f.prj" % self.refGenome):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
616 directions.append("f")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
617 if not os.path.exists("%s.r.prj" % self.refGenome):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
618 directions.append("r")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
619
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
620 if len(directions) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
621 logging.info("Make index file start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
622 last_executor = LastExecutor(self.refGenome, self.dataDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
623 last_executor.lastdb(directions, self.opts["num_threads"] > 1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
624 logging.info("Make index file done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
625
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
626
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
627 def checkReadFileType(self, readFilePath):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
628 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
629 get read file type.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
630 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
631
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
632 name, ext = os.path.splitext(readFilePath)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
633 if ext == ".gz":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
634 name, ext = os.path.splitext(name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
635
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
636 if len(ext) > 1:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
637 ext = ext[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
638
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
639 file_type = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
640
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
641 if ext == "sra" or "fastq" or "fasta":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
642 file_type = ext
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
643 elif ext == "fa":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
644 file_type = "fasta"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
645 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
646 f = open(readFilePath, "r")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
647 first_char = f.read(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
648 if first_char == "@":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
649 file_type = "fastq"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
650 elif first_char == ">":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
651 file_type = "fasta"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
652 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
653 file_type = "sra"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
654 f.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
655
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
656 return file_type
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
657
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
658
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
659 def splitedReadFilePath(self, outputDir, start, end, readDirection, ext):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
660 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
661 get splitted read file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
662 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
663
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
664 return "%s/%010d-%010d_%d.%s" % (outputDir, start, end, readDirection, ext)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
665
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
666
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
667 def fastqDumpedFilePath(self, outputDir, readName, readDirection = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
668 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
669 get output file path for fastq-dump command.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
670 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
671
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
672 path = "%s/%s" % (outputDir, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
673 if readDirection:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
674 path += "_%d" % readDirection
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
675
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
676 return path + ".fastq"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
677
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
678
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
679 def autoWorkDir(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
680 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
681 get working directory path determined automatically.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
682 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
683
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
684 now = datetime.now()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
685
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
686 s = ",".join(self.readFilePaths) + self.refGenome
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
687 for key, value in self.opts.items():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
688 s += ("%s:%s" % (key, value))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
689 h = hashlib.md5(s).hexdigest()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
690
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
691 return "%s-%06d-%s" % (now.strftime("%Y%m%d-%H%M%S"), now.microsecond, h[0:16])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
692
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
693
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
694 class BsfCallCluster(BsfCall):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
695 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
696 class to execute bsf-call process on pc cluster.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
697 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
698
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
699 def __init__(self, refGenome, readFilePaths, cmdOpts):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
700 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
701 constructor of BsfCallCluster
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
702 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
703
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
704 BsfCall.__init__(self, refGenome, readFilePaths, cmdOpts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
705
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
706 self.lastExecutor = LastExecutorCluster(self.refGenome, self.opts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
707 self.mappingJobIds = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
708
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
709
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
710 def processMapping(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
711 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
712 if bsf-call is executed on cluster, mapping and filtering job is submitted
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
713 when read file is splitted.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
714 therefore, in this function, only wait for all jobs to finish.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
715 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
716
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
717 logging.info("Waiting for all jobs to finish.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
718
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
719 self.waitForSubmitJobs(self.mappingJobIds)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
720
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
721 logging.info("Mapping and filtering process done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
722
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
723 return self.lastExecutor.resultDirs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
724
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
725
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
726 def processMcDetection(self, resultDirs, localDir = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
727 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
728 for each chromosome number, submit mC detection process job to the cluster.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
729 after all jobs have been finished, output mC detection result.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
730 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
731
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
732 logging.info("mC detection process start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
733
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
734 chrs = self.chrsByRefGenome(self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
735 job_ids = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
736 for chr_no in chrs:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
737 job_id = self.submitMcDetectionJob(resultDirs, chr_no)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
738 job_ids.append(job_id)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
739 sleep(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
740 logging.info("Submitted jobs: %s" % ",".join(job_ids))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
741
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
742 self.waitForSubmitJobs(job_ids)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
743
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
744 mc_detector = McDetector(self.refGenome, resultDirs, self.mcContextDir, self.opts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
745
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
746 mc_detector.chrs = chrs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
747 mc_detector.output(self.opts["output"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
748
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
749 logging.info("mC detection process done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
750
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
751
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
752 def submitMcDetectionJob(self, resultDirs, chrNo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
753 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
754 submit mC detection process job to the cluster.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
755 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
756
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
757 argv = self.qsubRemoteCommandArgv(resultDirs, chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
758 cmd = self.qsubCommand(chrNo, " ".join(map((lambda s: '"' + s + '"'), argv)))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
759
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
760 qsub = os.popen(cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
761 out = qsub.read()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
762 qsub.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
763
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
764 job_id = self.jobIdByQsubOutput(out)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
765 self.logJobSubmit("mC detection job: %s: Mapping result directories: %s" % (chrNo, resultDirs), job_id)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
766
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
767 return job_id
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
768
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
769
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
770 def qsubRemoteCommandArgv(self, resultDirs, chrNo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
771 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
772 get arguments for mC detection program (mc-detector.py).
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
773 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
774
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
775 argv = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
776
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
777 argv.append(self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
778 argv.append(",".join(resultDirs))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
779 argv.append(self.mcContextDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
780 argv.append(chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
781 argv.append(str(self.opts["only_mcdetection"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
782 argv.append(str(self.opts["lower_bound"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
783 argv.append(str(self.opts["coverage"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
784 argv.append(str(self.opts["aln_mismap_prob_thres"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
785
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
786 # if self.opts["read_bam"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
787 # argv.append("BAM")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
788 # elif self.opts["read_sam"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
789 # argv.append("SAM")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
790 # else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
791 # argv.append("MAF")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
792 argv.append("MAF")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
793
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
794 if self.opts["local_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
795 argv.append(self.opts["local_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
796
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
797 return argv
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
798
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
799
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
800 def qsubCommand(self, chrNo, cmdArgs):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
801 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
802 get qsub command to submit mC detection job.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
803 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
804
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
805 remote_cmd = os.path.join(self.scriptDir(), "mc-detector.py")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
806 out_file = os.path.join(self.mcContextDir, "mc-detector-%s.out" % chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
807 err_file = os.path.join(self.mcContextDir, "mc-detector-%s.err" % chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
808
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
809 if self.opts["queue_list"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
810 cmd = "qsub -o %s -e %s -q %s -pe openmpi %d -cwd -b y %s %s" % (out_file, err_file, self.opts["queue_list"], self.opts['num_threads'], remote_cmd, cmdArgs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
811 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
812 cmd = "qsub -o %s -e %s -pe openmpi %d -cwd -b y %s %s" % (out_file, err_file, self.opts['num_threads'], remote_cmd, cmdArgs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
813
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
814 return cmd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
815
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
816
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
817 def afterProcessSplitRead(self, readFile, readAttr = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
818 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
819 this function is called after output splitted one read file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
820 if bsf-call is executed on pc cluster, mapping and filtering job is submitted
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
821 after output one splitted read file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
822 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
823
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
824 if self.isPairedEnd(readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
825 fpath, ext = os.path.splitext(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
826 if fpath[-1:] == "2":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
827 read_file = "%s1.%s" % (fpath[0:-1], readAttr[1]["type"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
828 job_id = self.lastExecutor.executeOneRead(readFile, readAttr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
829 self.mappingJobIds.append(job_id)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
830 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
831 job_id = self.lastExecutor.executeOneRead(readFile, readAttr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
832 self.mappingJobIds.append(job_id)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
833
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
834
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
835 class LastExecutor(BsfCallBase):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
836 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
837 class to run LAST programs to map read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
838 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
839
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
840 def __init__(self, refGenome, baseDir = ".", readsDir = None, resultsDir = None, numThreads = 1):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
841 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
842 constructor of LastExecutor
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
843 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
844
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
845 self.refGenome = refGenome
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
846 self.baseDir = baseDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
847 self.readsDir = readsDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
848 self.resultsDir = resultsDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
849 self.queue = Queue.Queue()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
850 self.lock = threading.Lock()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
851 self.numThreads = numThreads
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
852
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
853
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
854 def execute(self, readAttr, numThreads = 1, lastalOpts = "", mergeOpts = "", filterOpts = ""):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
855 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
856 enqueue all splited read files to the queue.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
857 create and start threads to execute read mapping and filtering process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
858 wait for all threads to finish.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
859 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
860
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
861 self.enqueue(readAttr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
862
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
863 if self.queue.qsize()==0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
864 logging.fatal("LastExecutor::execute: Error: queue size=%d" % self.queue.qsize())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
865 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
866
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
867 logging.info("Queued %d files." % self.queue.qsize())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
868
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
869 threads = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
870 for i in range(numThreads):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
871 t = threading.Thread(target=self.worker, args=(readAttr, lastalOpts, mergeOpts, filterOpts))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
872 t.daemon = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
873 threads.append(t)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
874 t.start()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
875
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
876 i = 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
877 for thread in threads:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
878 logging.info("Waiting for %d th thread." % i)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
879 thread.join()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
880 logging.info("Joined %d th thread." % i)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
881 i+=1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
882
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
883
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
884 def worker(self, readAttr, lastalOpts, mergeOpts, filterOpts):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
885 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
886 thread worker to execute LAST.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
887 dequeue read file path from the queue and execute read mapping filtering
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
888 process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
889 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
890 if self.queue.empty():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
891 logging.info("LastExecutor::worker: Queue is empty.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
892 while not self.queue.empty():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
893 fpath = self.queue.get_nowait()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
894 self.runLast(fpath, readAttr, lastalOpts, mergeOpts, filterOpts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
895 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
896
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
897
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
898 def runLast(self, readFile, readAttr, lastalOpts, mergeOpts, filterOpts, rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
899 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
900 execute LAST programs to map read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
901 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
902
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
903 cmd = self.batchCmd(readFile, readAttr, lastalOpts, mergeOpts, filterOpts, rmInFiles)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
904 logging.info("LastExecutor::runLast: command=%s" % cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
905 p = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
906 # out, error = p.communicate()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
907 p.wait()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
908
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
909 error_msg = p.communicate()[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
910 if len(error_msg) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
911 logging.fatal(error_msg)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
912
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
913
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
914 def enqueue(self, readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
915 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
916 enqueue all splitted read files to the queue.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
917 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
918
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
919 # logging.info("%s/*_1.%s.bz2" % (self.readsDir, readAttr[1]["type"]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
920 # for read_file in glob.glob("%s/*_1.%s" % (self.readsDir, readAttr[1]["type"])):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
921 # self.queue.put(read_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
922 self.queue.put(readAttr[1]["path"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
923
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
924
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
925 def lastdb(self, directions, parallel = False):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
926 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
927 execute lastdb command to create index file of reference genome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
928 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
929
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
930 cmds = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
931 for direction in directions:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
932 cmds.append(self.lastdbCmd(direction))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
933
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
934 if parallel:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
935 processes = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
936 for cmd in cmds:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
937 logging.info(cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
938 p = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
939 out = p.stdout
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
940 processes.append({"p": p, "out": out})
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
941 for process in processes:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
942 process["p"].wait()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
943 out_data = process["out"].read()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
944 if len(out_data) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
945 logging.info(out_data)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
946 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
947 for cmd in cmds:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
948 logging.info(cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
949 p = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
950 out = p.stdout
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
951 p.wait()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
952 out_data = out.read()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
953 if len(out_data) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
954 logging.info(out_data)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
955
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
956
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
957 def lastdbCmd(self, direction):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
958 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
959 get lastdb command to create index file of reference genome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
960 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
961 # return "lastdb -w2 -u bisulfite_%s.seed %s.%s %s" % (direction, self.refGenome, direction, self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
962 return "lastdb -uBIS%s %s.%s %s" % (direction.upper(), self.refGenome, direction, self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
963
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
964
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
965 def lastalBsCmd(self, readFile, opts = ""):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
966 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
967 get lastal command to map read.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
968 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
969 # s_opt = self.lastalSopt(direction)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
970 read_name = self.readNameByReadFile(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
971 return "TMPDIR=%s last-bisulfite.sh %s.%s %s.%s %s > %s" % (self.readsDir, self.refGenome, 'f', self.refGenome, 'r', readFile, self.mappingResultFilePath(read_name))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
972
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
973 def lastalBsPairCmd(self, readFile1, readFile2, opts = ""):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
974 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
975 get lastal command to map read.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
976 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
977 # s_opt = self.lastalSopt(direction)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
978 read_name = self.readNameByReadFile(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
979 return "PARALLEL=-j%d TMPDIR=%s last-bisulfite-pair.sh %s.%s %s.%s %s %s > %s" % (self.numThreads, self.readsDir, self.refGenome, 'f', self.refGenome, 'r', readFile1, readFile2, self.mappingResultFilePath(read_name))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
980
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
981
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
982 # def mergeCmd(self, forwardFile, reverseFile, outputFile, opts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
983 def mergeCmd(self, inputFile, outputFile, opts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
984 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
985 get command to merge lastal output.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
986 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
987 cmd = "last-merge-batches %s > %s" % (inputFile, outputFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
988 if rmInFiles:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
989 cmd += "; rm %s" % inputFile
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
990 return cmd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
991
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
992
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
993 # def mappingAndMergeCmd(self, readFile, lastalOpts = "", mergeOpts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
994 def mappingPairCmd(self, readFile1, readFile2, lastalOpts = "", mergeOpts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
995 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
996 get read mapping and filtering command.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
997 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
998 read_name = self.readNameByReadFile(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
999 n, ext = os.path.splitext(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1000 if ext == ".gz":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1001 n, ext = os.path.splitext(n)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1002 lastal_qopt = self.lastalQopt(ext[1:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1003 lastal_opt = "%s %s" % (lastalOpts, lastal_qopt)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1004 mapping_file = self.mappingResultFilePath(read_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1005
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1006 return self.lastalBsPairCmd(readFile1, readFile2, lastal_opt)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1007
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1008
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1009 # def mappingResultFilePath(self, readName, direction):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1010 def mappingResultFilePath(self, readName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1011 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1012 get read mapping result file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1013 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1014
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1015 return "%s/%s.maf" % (self.resultsDir, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1016
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1017
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1018 def mergeResultFilePath(self, readName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1019 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1020 get merge result file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1021 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1022
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1023 return "%s/%s.merge.maf" % (self.resultsDir, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1024
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1025
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1026 def filterResultFilePath(self, readName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1027 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1028 get filtering result file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1029 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1030
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1031 return "%s/%s.maf" % (self.resultsDir, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1032
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1033
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1034 def lastalSopt(self, direction):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1035 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1036 get -s option for lastal.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1037 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1038
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1039 opt = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1040 if direction == "f":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1041 opt = "-s1"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1042 elif direction == "r":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1043 opt = "-s0"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1044
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1045 return opt
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1046
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1047
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1048 def lastalQopt(self, fileType):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1049 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1050 get -Q option for lastal.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1051 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1052
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1053 opt = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1054 if fileType == "fasta":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1055 opt = "-Q0"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1056 elif fileType == "fastq":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1057 opt = "-Q1"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1058
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1059 return opt
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1060
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1061
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1062 class LastExecutorCluster(LastExecutor):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1063 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1064 class to run LAST programs on pc cluster.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1065 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1066
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1067 def __init__(self, refGenome, bsfCallOpts):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1068 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1069 constructor of LastExecutorCluster
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1070 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1071
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1072 self.refGenome = refGenome
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1073 self.opts = bsfCallOpts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1074
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1075 self.resultDirs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1076
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1077
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1078 def executeOneRead(self, readFile, readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1079 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1080 execute read mapping and filtering process with specified read.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1081 on pc cluster, submit mapping and filtering job and return job id.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1082 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1083
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1084 if readAttr["results_dir"] not in self.resultDirs:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1085 self.resultDirs.append(readAttr["results_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1086
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1087 lastal_opts = self.lastalOpts(self.opts["last_opts"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1088 merge_opts = self.mergeOpts()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1089 filter_opts = self.filterOpts(self.opts["aln_mismap_prob_thres"], self.opts["aln_score_thres"], self.isPairedEnd(readAttr))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1090 job_id = self.submitJob(readFile, readAttr, lastal_opts, merge_opts, filter_opts, self.opts["queue_list"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1091
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1092 return job_id
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1093
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1094
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1095 def submitJob(self, readFile, readAttr, lastalOpts = "", mergeOpts = "", filterOpts = "", queueName = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1096 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1097 submit read mapping and filtering process job.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1098 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1099
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1100 job_id = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1101
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1102 read_name = self.readNameByReadFile(readFile)[0:-2]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1103 out_file = self.qsubStdoutFilePath(readAttr["base_dir"], read_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1104 err_file = self.qsubStderrFilePath(readAttr["base_dir"], read_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1105 remote_cmd = self.remoteCommand(readAttr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1106 remote_cmd_args = " ".join(map((lambda s: '"' + s + '"'), self.remoteCommandArgv(read_name, readAttr, lastalOpts, filterOpts)))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1107
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1108 if queueName:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1109 cmd = "qsub -o %s -e %s -q %s -cwd %s %s" % (out_file, err_file, queueName, remote_cmd, remote_cmd_args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1110 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1111 cmd = "qsub -o %s -e %s -cwd %s %s" % (out_file, err_file, remote_cmd, remote_cmd_args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1112
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1113 qsub = os.popen(cmd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1114 out = qsub.read()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1115 qsub.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1116
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1117 job_id = self.jobIdByQsubOutput(out)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1118
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1119 dir_path, file_name, base_name, ext, prog = self.splitFilePath(readFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1120 self.logJobSubmit("Mapping and filtering: read: %s/%s" % (dir_path, base_name[0:-2]), job_id)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1121
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1122 return job_id
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1123
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1124
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1125 def remoteCommand(self, readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1126 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1127 get read mapping and filtering command path to submit by qsub command.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1128 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1129
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1130 if self.isPairedEnd(readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1131 return os.path.join(self.scriptDir(), "mapping-p.sh")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1132 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1133 return os.path.join(self.scriptDir(), "mapping-s.sh")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1134
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1135
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1136 def remoteCommandArgv(self, readName, readAttr, lastalOpts, filterOpts):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1137 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1138 get read mapping and filtering command arguments.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1139 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1140
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1141 argv = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1142
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1143 argv.append(readAttr["reads_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1144 argv.append(readAttr["results_dir"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1145 argv.append(self.refGenome)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1146 argv.append(filterOpts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1147
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1148 argv.append(readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1149 argv.append(readAttr[1]["type"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1150 argv.append("%s %s" % (lastalOpts, self.lastalQopt(readAttr[1]["type"])))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1151
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1152 if self.isPairedEnd(readAttr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1153 argv.append(readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1154 argv.append(readAttr[2]["type"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1155 argv.append("%s %s" % (lastalOpts, self.lastalQopt(readAttr[2]["type"])))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1156
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1157 return argv
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1158
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1159
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1160 def qsubStdoutFilePath(self, dirPath, readName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1161 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1162 get qsub command standard output file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1163 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1164
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1165 return "%s/mapping_%s.out" % (dirPath, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1166
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1167
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1168 def qsubStderrFilePath(self, dirPath, readName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1169 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1170 get qsub command standard error file path.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1171 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1172
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1173 return "%s/mapping_%s.err" % (dirPath, readName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1174
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1175
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1176 class LastExecutorSingle(LastExecutor):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1177 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1178 class to run LAST programs to map single read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1179 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1180
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1181 def __init__(self, refGenome, baseDir, readsDir, resultsDir):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1182 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1183 constructor of LastExecutorSingle
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1184 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1185
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1186 LastExecutor.__init__(self, refGenome, baseDir, readsDir, resultsDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1187
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1188
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1189 def batchCmd(self, readFile1, readAttr, lastalOpts = "", mergeOpts = "", filterOpts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1190 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1191 get batch command to map read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1192 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1193 read_name = self.readNameByReadFile(readFile1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1194 out_file = self.filterResultFilePath(read_name[0:-2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1195 return "TMPDIR=%s last-bisulfite.sh %s.f %s.r %s > %s/%s.maf" % (self.readsDir, self.refGenome, self.refGenome, readFile1, self.resultsDir, read_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1196 # cmds = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1197 # cmds.append(self.mappingAndMergeCmd(readFile, lastalOpts, mergeOpts, rmInFiles))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1198 # cmds.append(self.mappingPairCmd(readFile1, readFile2, lastalOpts, mergeOpts, rmInFiles))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1199 # cmds.append(self.filterCmd(self.mergeResultFilePath(read_name), out_file, filterOpts, rmInFiles))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1200 # cmds.append(self.filterCmd(self.mappingResultFilePath(read_name), out_file, filterOpts, rmInFiles))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1201 # cmds.append("bzip2 %s" % out_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1202 # return "; ".join(cmds)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1203
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1204
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1205 def filterCmd(self, inputFile, outputFile, opts = "", rmInFile = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1206 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1207 get filter command.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1208 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1209
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1210 cmd = "last-map-probs %s %s > %s" % (opts, inputFile, outputFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1211 if rmInFile:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1212 cmd += "; rm %s" % inputFile
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1213
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1214 return cmd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1215
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1216
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1217 class LastExecutorPairedEnd(LastExecutor):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1218 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1219 class to run LAST programs to map paired-end read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1220 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1221
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1222 def __init__(self, refGenome, baseDir, readsDir, resultsDir, numThreads):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1223 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1224 constructor of LastExecutorPairedEnd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1225 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1226 LastExecutor.__init__(self, refGenome, baseDir, readsDir, resultsDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1227
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1228
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1229 def batchCmd(self, readFile1, readAttr, lastalOpts = "", mergeOpts = "", filterOpts = "", rmInFiles = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1230 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1231 get batch command to map read and filtering.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1232 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1233 # readFile2 = self.secondReadFilePathByFirstReadFilePath(readFile1, readAttr[2]["type"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1234 # read_name1 = self.readNameByReadFile(readFile1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1235 # read_name2 = self.readNameByReadFile(read_files[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1236 # merge_result_file = "%s %s" % (self.mergeResultFilePath(read_name1), self.mergeResultFilePath(read_name2))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1237 # mapping_result_file = "%s %s" % (self.mappingResultFilePath(read_name1), self.mappingResultFilePath(read_name2))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1238 # out_file = self.filterResultFilePath(read_name1[0:-2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1239 # return "TMPDIR=%s last-bisulfite-paired.sh %s.f %s.r %s %s > %s" % (self.baseDir, self.refGenome, self.refGenome, readFile1, readFile2, out_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1240 read_name1 = self.readNameByReadFile(readAttr[1]["path"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1241 read_name2 = self.readNameByReadFile(readAttr[2]["path"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1242 return "PARALLEL=-j%d TMPDIR=%s last-bisulfite-paired.sh %s.f %s.r %s %s > %s/%s,%s.maf" % (self.numThreads, self.readsDir, self.refGenome, self.refGenome, readAttr[1]["path"], readAttr[2]["path"], self.resultsDir, read_name1, read_name2)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1243
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1244
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1245 def filterCmd(self, inputFile, outputFile, opts = "", rmInFile = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1246 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1247 get filter command.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1248 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1249
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1250 cmd = "last-pair-probs %s %s > %s" % (opts, inputFile, outputFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1251 if rmInFile:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1252 cmd += "; rm %s" % inputFile
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1253
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1254 return cmd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1255
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1256
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1257 class McDetector(BsfCallBase):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1258 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1259 class to execute mC detection process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1260 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1261
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1262 def __init__(self, refGenome, resultDirs, mcContextDir, options):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1263 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1264 constructor of McDetector
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1265 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1266
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1267 self.refGenome = refGenome
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1268 self.refGenomeBuf = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1269 self.refGenomeChr = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1270
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1271 self.mappingResultDirs = resultDirs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1272 self.mcContextDir = mcContextDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1273 self.lowerBound = options["lower_bound"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1274 self.coverageThreshold = options["coverage"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1275 self.onlyMcDetection = options["only_mcdetection"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1276
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1277 self.opts = options
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1278
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1279 self.mappingResultFiles = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1280
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1281 if self.onlyMcDetection:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1282 self.mismapThreshold = options["aln_mismap_prob_thres"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1283 # self.readBam = options["read_bam"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1284 # self.readSam = options["read_sam"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1285 self.readBam = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1286 self.readSam = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1287 if "mapping_result_files" in options:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1288 self.mappingResultFiles = options["mapping_result_files"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1289 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1290 self.mappingResultFiles = self.getAllMappingResultFiles(resultDirs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1291 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1292 self.mismapThreshold = 1e-10
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1293 self.readBam = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1294 self.readSam = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1295 self.mappingResultFiles = self.getAllMappingResultFiles(resultDirs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1296
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1297 if len(self.mappingResultFiles)==0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1298 logging.fatal("McDetector::__init__: error: no mapping result file found.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1299 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1300
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1301 if options["local_dir"]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1302 self.localDir = options["local_dir"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1303 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1304 self.localDir = mcContextDir
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1305
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1306 self.readRefGenome(self.refGenome, self.refGenomeBuf, self.refGenomeChr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1307
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1308 logging.debug("McDetector::__init__: mappingResultDirs=%s" % ','.join(self.mappingResultDirs))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1309
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1310
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1311 def execute(self, outputFile, numWorkers = 1):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1312 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1313 execute mC detection process and output result.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1314 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1315
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1316 self.process()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1317 self.output(outputFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1318
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1319
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1320 def process(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1321 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1322 execute mC detection process.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1323 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1324
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1325 logging.info("mC detection process start")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1326
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1327 if len(self.mappingResultFiles)==0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1328 logging.fatal("McDetector::process: error: no mapping result found.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1329 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1330
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1331 for mapping_result_file in self.mappingResultFiles:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1332 logging.info("Parsing mapping result file: %s" % mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1333 if self.onlyMcDetection:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1334 if not (self.readBam or self.readSam):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1335 if self.isGzipFile(mapping_result_file) or self.isMafFile(mapping_result_file):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1336 self.processMafFile(mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1337 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1338 # BAM or SAM
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1339 if self.readBam and self.isBamFile(mapping_result_file):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1340 self.processSamFile(mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1341 elif self.readSam and self.isSamFile(mapping_result_file):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1342 self.processSamFile(mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1343 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1344 self.processMafFile(mapping_result_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1345
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1346 logging.info("Parsing mapping result file done")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1347
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1348 if self.mcContextDir != self.localDir:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1349 copy(self.mcContextLocalFilePath(self.targetChr), self.mcContextDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1350
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1351
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1352 def output(self, outputFile):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1353 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1354 output mC detection result.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1355 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1356
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1357 logging.info("McDetector::output: outputFile=%s" % outputFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1358 popen_args = ['sort', '-k1']
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1359 list = glob.glob("%s/*.bsf" % self.localDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1360 if len(list)==0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1361 logging.fatal("McDetect::output: no *._bsf_ files found in %s" % self.localDir)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1362 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1363 for bsf_file in list:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1364 if os.path.getsize(bsf_file) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1365 popen_args.append(bsf_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1366 logging.info("McDetector::output: added bsf_file=\"%s\"" % bsf_file)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1367 logging.debug("McDetector::output: popen_args=%s" % ' '.join(popen_args))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1368 fout = open(outputFile, 'w')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1369 if len(popen_args) > 2:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1370 pipe = subprocess.Popen(popen_args, stdout=subprocess.PIPE)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1371 block = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1372 for line in pipe.stdout:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1373 (chr, ctx_pos, strand, mc_ctx, base, conf) = line.strip().split("\t")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1374 ctx_pos = int(ctx_pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1375 conf = float(conf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1376 if len(block)==0 or block[-1][0]==chr:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1377 block.append([chr, ctx_pos, strand, mc_ctx, base, conf])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1378 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1379 self.outputOneChrBlock(fout, block)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1380 del block[:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1381 block.append([chr, ctx_pos, strand, mc_ctx, base, conf])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1382 if len(block) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1383 self.outputOneChrBlock(fout, block)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1384 pipe.stdout.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1385 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1386 logging.info("McDetector::output: no result files found.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1387 fout.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1388
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1389
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1390 def outputOneChrBlock(self, fout, block):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1391 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1392 output mC detection result for one chromosome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1393 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1394
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1395 chr = block[0][0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1396 logging.info("McDetector::outputOneChrBlock: chr=%s" % chr)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1397 mc_contexts = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1398 for b in sorted(block, key=lambda block: block[1]):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1399 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1400 ctx_pos, strand, mc_ctx, base, conf = b[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1401 if not ctx_pos in mc_contexts:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1402 mc_contexts[ctx_pos] = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1403 if not strand in mc_contexts[ctx_pos]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1404 mc_contexts[ctx_pos][strand] = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1405 if not mc_ctx in mc_contexts[ctx_pos][strand]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1406 mc_contexts[ctx_pos][strand][mc_ctx] = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1407 mc_contexts[ctx_pos][strand][mc_ctx].append([base, conf])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1408 except ValueError, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1409 logging.warning("McDetect::outputOneChrBlock: value error: %s: %s -> %s" % (fpath, line.strip(), e.args[0]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1410
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1411 num_entry = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1412 if len(mc_contexts.keys()) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1413 for pos in sorted(mc_contexts.keys()):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1414 for strand in mc_contexts[pos].keys():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1415 for mc_ctx in mc_contexts[pos][strand].keys():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1416 coverage, mc_ratio = self.calcCoverage(mc_contexts[pos][strand][mc_ctx], strand)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1417 if coverage >= self.coverageThreshold and mc_ratio >= self.lowerBound:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1418 fout.write("%s\t%d\t%s\t%s\t%g\t%d\n" % (chr, pos, strand, mc_ctx, mc_ratio, coverage))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1419 num_entry += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1420 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1421 logging.info("McDetect::outputOneChrBlock: rejected: chr=%s pos=%d strand=%s coverage=%g mc_ratio=%g" % (chr, pos, strand, coverage, mc_ratio))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1422 # self.bzip2File(fpath, False)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1423 logging.info("McDetector::outputOneChrBlock: number of entries %s" % num_entry)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1424
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1425
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1426 def processMafFile(self, resultFile):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1427 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1428 read mapping result file, and output mC context data file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1429 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1430
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1431 file_name = self.splitFilePath(resultFile)[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1432 outputFile = "%s/%s.bsf" % (self.localDir, file_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1433 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1434 fin = open(resultFile, 'r')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1435 fout = open(outputFile, 'w')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1436 block = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1437 for line in fin:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1438 if line[0] == '#' or line[0] == 'p' or line[0] == "\n":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1439 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1440 if line[0] == 'a' or line[0] == 's' or line[0] == 'q':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1441 block.append(line.strip())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1442 if len(block)==4:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1443 if block[0][0]=='a' and block[1][0]=='s' and block[2][0]=='s' and block[3][0]=='q':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1444 mismap_prob = float(block[0].split('=')[2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1445 if mismap_prob <= self.mismapThreshold:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1446 b1 = block[1].split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1447 chr = b1[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1448 ref_seq = b1[-1].upper()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1449 ref_start = int(b1[2]) # 0-based position
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1450 read_seq = block[2].split()[-1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1451 read_len = len(read_seq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1452 read_qual = block[3].split()[-1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1453 strand = self.findStrandFromAlignment(chr, ref_start, read_seq, read_len)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1454 # strand = block[2].split()[4]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1455 if strand == '+' or strand == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1456 lines = self.extractMcContextsByOneRead(chr, strand, mismap_prob, ref_seq, ref_start, read_seq, read_qual, read_len)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1457 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1458 fout.write(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1459 logging.debug("processMafFile: a maf block(%s) is successfully captured." % strand)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1460 elif strand == '+-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1461 for st in ('+', '-'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1462 lines = self.extractMcContextsByOneRead(chr, st, mismap_prob, ref_seq, ref_start, read_seq, read_qual, read_len)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1463 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1464 fout.write(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1465 logging.debug("processMafFile: a maf block(%s) is successfully captured." % strand)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1466 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1467 logging.debug("processMafFile: a maf block does not show strand-specific info.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1468 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1469 logging.info("processMafFile: alignment \"%s\" has greater mismap prob. than the threshold." % block[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1470 del block[:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1471 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1472 logging.fatal("processMafFile: error: unexpected malformed maf block is found.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1473 logging.fatal("block 1: \"%s\"\n" % block[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1474 logging.fatal("block 2: \"%s\"\n" % block[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1475 logging.fatal("block 3: \"%s\"\n" % block[2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1476 logging.fatal("block 4: \"%s\"\n" % block[3])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1477 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1478 fin.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1479 fout.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1480
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1481 if len(block) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1482 logging.fatal("McDetect::processMafFile: error: possible malformed MAF file.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1483 for b in block:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1484 logging.fatal(b)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1485 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1486 except IOError:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1487 logging.fatal("McDetect::processMafFile: error: unable to read a MAF file \"%s\"" % resultFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1488 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1489 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1490
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1491
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1492 def processSamFile(self, samFile):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1493 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1494 read mapping BAM/SAM file, and output mC context data file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1495 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1496
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1497 logging.info("Process BAM/SAM file start: %s" % samFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1498
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1499 samfile = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1500 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1501 if self.readBam:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1502 samfile = pysam.Samfile(samFile, "rb")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1503 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1504 samfile = pysam.Samfile(samFile, "r")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1505
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1506 counter = 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1507 for aln in samfile.fetch(until_eof = True):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1508 samaln = SamAlnParser(samfile, aln)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1509 samaln.setRefGenome(self.targetChr, self.targetSeqD, self.targetSeqLen)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1510 samaln.parse()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1511
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1512 chr = samaln.referenceName()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1513
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1514 if (chr is None) or (chr != self.targetChr):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1515 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1516
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1517 if aln.mapq:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1518 mismap = self.bamMapq2Mismap(aln.mapq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1519 if mismap - self.mismapThreshold > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1520 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1521
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1522 read_seq = samaln.alnReadSeq.replace("t", "C")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1523 self.extractMcContextsByOneRead(chr, samaln.strand, 0, samaln.alnRefSeq.upper(), samaln.alnRefStart, self.targetSeqLen, read_seq, read_qual)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1524
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1525 counter += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1526 if counter > 500000:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1527 self.outputMcContextData()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1528 counter = 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1529
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1530 samfile.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1531 self.outputMcContextData()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1532 except Exception, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1533 logging.fatal(samFile)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1534 logging.fatal(" %s" % str(type(e)))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1535 logging.fatal(" %s" % str(e))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1536 if samfile:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1537 samfile.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1538
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1539 def extractMcContextsByOneRead(self, chr, strand, mismapProb, refSeq, refStart, readSeq, readQual, readLen):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1540 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1541 extract mC context by one read.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1542 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1543
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1544 lines = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1545 if strand == '+':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1546 real_pos = refStart
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1547 for i in range(readLen):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1548 if readSeq[i] == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1549 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1550 # if refSeq[i] == 'C' and (readSeq[i] == 'C' or readSeq[i] == 'T'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1551 if refSeq[i] == 'C':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1552 baseConf = self.qualityCharToErrorProb(readQual[i])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1553 ctx_type = self.mcContextType(self.refGenomeBuf[chr], real_pos, strand)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1554 line = "%s\t%d\t%s\t%s\t%s\t%g\n" % (chr, real_pos, strand, ctx_type, readSeq[i], (1-mismapProb) * (1-baseConf))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1555 # logging.debug("extractMcContextsByOneRead: line=%s" % line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1556 lines.append(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1557 real_pos += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1558 if strand == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1559 real_pos = refStart
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1560 for i in range(readLen):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1561 if readSeq[i] == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1562 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1563 # if refSeq[i] == 'G' and (readSeq[i] == 'G' or readSeq[i] == 'A'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1564 if refSeq[i] == 'G':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1565 baseConf = self.qualityCharToErrorProb(readQual[i])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1566 ctx_type = self.mcContextType(self.refGenomeBuf[chr], real_pos, strand)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1567 read_base = self.complementaryBase(readSeq[i])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1568 line = "%s\t%d\t%s\t%s\t%s\t%g\n" % (chr, real_pos, strand, ctx_type, readSeq[i], (1-mismapProb) * (1-baseConf))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1569 # logging.debug("extractMcContextsByOneRead: line=%s" % line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1570 lines.append(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1571 real_pos += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1572 return lines
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1573
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1574
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1575 def complementaryBase(self, base):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1576 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1577 compute a complemetary base.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1578 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1579
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1580 if base == 'A': return 'T'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1581 if base == 'C': return 'G'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1582 if base == 'G': return 'C'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1583 if base == 'T': return 'A'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1584 if base == 'N': return 'N'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1585 if base == 'a': return 't'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1586 if base == 'c': return 'g'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1587 if base == 'g': return 'c'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1588 if base == 't': return 'a'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1589 if base == 'n': return 'n'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1590 sys.exit(1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1591
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1592
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1593 def findStrandFromAlignment(self, chr, ref_start, read_seq, read_len):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1594 plus_sup = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1595 minus_sup = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1596 other_mismatch = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1597 base_size = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1598 ref_seq = self.refGenomeBuf[chr]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1599 for i in range(read_len):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1600 j = ref_start + i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1601 if ref_seq[j]=='C' and read_seq[i]=='T':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1602 plus_sup += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1603 base_size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1604 elif ref_seq[j]=='G' and read_seq[i]=='A':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1605 minus_sup += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1606 base_size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1607 elif (ref_seq[j]!='-' and read_seq[i]!='-') and ref_seq[j]!=read_seq[i]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1608 other_mismatch += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1609 base_size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1610 if base_size==0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1611 return '.'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1612 mismatch_rate = float(other_mismatch)/float(base_size)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1613 # if plus_sup > minus_sup and plus_sup > other_mismatch:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1614 # if plus_sup > minus_sup and mismatch_rate < 0.05:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1615 if plus_sup > minus_sup:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1616 return '+'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1617 # if minus_sup > plus_sup and minus_sup > other_mismatch:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1618 # if minus_sup > plus_sup and mismatch_rate < 0.05:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1619 if minus_sup > plus_sup:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1620 return '-'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1621 if plus_sup == minus_sup:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1622 return '+-'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1623 return '.'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1624
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1625
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1626 def __extractMcContextsByOneRead(self, chr, strand, mismapProb, refSeq, refStart, refSeqLen, readSeq, readQual):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1627 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1628 extract mC context by one read.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1629 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1630
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1631 logging.debug("extractMcContextsByOneRead(%s, %s, %g, %s, %d, %d, %s, %s)" % (chr, strand, mismapProb, refSeq, refStart, refSeqLen, readSeq, readQual))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1632
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1633 nogap_refseq = self.clearGap(refSeq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1634 bases = list(refSeq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1635 last_pos = len(nogap_refseq) - 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1636 pos = -1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1637 while True:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1638 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1639 pos = bases.index("C", pos + 1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1640 num_gaps = refSeq.count("-", 0, pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1641 real_pos = pos - num_gaps
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1642 ctx_type = self.mcContextType(nogap_refseq, real_pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1643 ctx_pos = refStart + real_pos
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1644 if ctx_type == None:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1645 if strand == "+":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1646 ctx_type = self.mcContextType(self.targetSeqD, ctx_pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1647 elif strand == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1648 ctx_type = self.mcContextType(self.targetSeqC, ctx_pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1649 if ctx_type == None:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1650 continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1651 if strand == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1652 ctx_pos = refSeqLen - ctx_pos - 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1653 line = "%d\t%s\t%s\t%s\n" % (ctx_pos, strand, ctx_type, readSeq[pos])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1654 base_name = self.mcContextFileBaseName(ctx_pos)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1655 if base_name not in self.mcDetectData:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1656 self.mcDetectData[base_name] = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1657 self.mcDetectData[base_name].append(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1658 except IndexError:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1659 logging.debug("extractMcContextsByOneRead#IndexError: %s %d %s %s %s %d" % (chr, ctx_pos, strand, ctx_type, readSeq, pos))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1660 except ValueError:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1661 break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1662
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1663 def qualityCharToErrorProb(self, qualityChar):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1664 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1665 convert FASTQ (Sanger) quality character to error probability.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1666 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1667 return 10**((ord('!')-ord(qualityChar))*0.1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1668
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1669 def outputMcContextData(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1670 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1671 output mC context data to file.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1672 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1673
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1674 logging.info("Output mC detection data start.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1675
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1676 for base_name in self.mcDetectData.keys():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1677 fpath = self.mcContextFilePathByName(base_name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1678 logging.debug("%s: %d" % (fpath, len(self.mcDetectData[base_name])))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1679 fo = open(fpath, "a")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1680 fo.write("".join(self.mcDetectData[base_name]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1681 fo.close()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1682 self.mcDetectData.clear()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1683
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1684 logging.info("Output mC detection data done.")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1685
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1686
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1687 def mcContextHash(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1688 h = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1689 for strand in self.strands():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1690 h[strand] = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1691 for mc_ctx in self.mcContextTypes():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1692 h[strand][mc_ctx] = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1693
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1694 return h
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1695
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1696
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1697 def isC(self, seq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1698 return seq[0:1].upper() == "C"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1699
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1700
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1701 def isT(self, seq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1702 return seq[0:1].upper() == "T"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1703
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1704
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1705 def calcCoverage(self, seqAry, strand):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1706 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1707 count the number of C and T (or G and A), calculate mC ratio.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1708 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1709
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1710 num_c = 0.0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1711 num_t = 0.0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1712 num_all = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1713 (C, T) = ('C', 'T')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1714 if strand == '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1715 (C, T) = ('G', 'A')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1716 for i in seqAry:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1717 num_all += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1718 if i[0] == C:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1719 num_c += i[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1720 elif i[0] == T:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1721 num_t += i[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1722
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1723 num_ct = num_c + num_t
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1724 if num_all == 0 or num_ct == 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1725 return (0, 0)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1726 # return (num_all, num_c/num_all)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1727 return (num_all, num_c/num_ct)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1728
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1729
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1730 def mafMismapValue(self, aLine):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1731 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1732 get mismap value by maf "a" line.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1733 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1734
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1735 mismap_fields = filter((lambda s: s.startswith('mismap=')), aLine.split())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1736 if len(mismap_fields) > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1737 return float(mismap_fields[0][7:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1738 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1739 return None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1740
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1741
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1742 def mcContextFilePath(self, pos):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1743 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1744 get mC context data file path with specified position.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1745 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1746
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1747 return self.mcContextFilePathByName(self.mcContextFileBaseName(pos))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1748
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1749
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1750 def mcContextFilePathByName(self, name):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1751 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1752 get mC context data file path with specified name.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1753 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1754
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1755 return "%s/%s/%s.tsv" % (self.localDir, self.targetChr, name)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1756
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1757
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1758 def mcContextFileBaseName(self, pos):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1759 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1760 get mC context data file name with specified chromosome number.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1761 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1762
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1763 return "%010d" % (int(pos) / 100000)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1764
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1765
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1766 def mcContextLocalFilePath(self, chrNo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1767 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1768 get local mC context data file path with specified chromosome number.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1769 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1770
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1771 return "%s/%s.tsv" % (self.localDir, chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1772
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1773
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1774 def mcContextGlobalFilePath(self, chrNo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1775 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1776 get global mC context data file path with specified chromosome number.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1777 """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1778
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1779 return "%s/%s.tsv" % (self.mcContextDir, chrNo)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1780
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1781
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1782 class SamAlnParser(BsfCallBase):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1783
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1784 def __init__(self, samfile, aln):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1785 self.samfile = samfile
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1786 self.aln = aln
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1787
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1788 self.refName = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1789 self.refSeq = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1790 self.refSeqLen = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1791
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1792 self.strand = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1793
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1794 self.alnRefStart = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1795 self.alnRefSeq = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1796 self.alnRefSeqLen = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1797
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1798 self.alnReadSeq = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1799
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1800
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1801 def setRefGenome(self, name, sequence, length = None):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1802 if length is None:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1803 length = len(sequence)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1804
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1805 self.refName = name
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1806 self.refSeq = sequence
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1807 self.refSeqLen = length
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1808
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1809
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1810 def referenceName(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1811 if self.aln.tid >= 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1812 return self.samfile.getrname(self.aln.tid)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1813 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1814 return None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1815
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1816
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1817 def getStrand(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1818 if self.aln.is_reverse:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1819 return "-"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1820 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1821 return "+"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1822
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1823
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1824 def parseCigar(self, cigar):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1825 return [{"op": v[-1:], "num": int(v[:-1])} for v in re.findall('\d+[A-Z=]', cigar)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1826
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1827
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1828 def alignmentSequences(self, refSeqPos, readSeq, cigars):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1829 refs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1830 rest_refseq = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1831
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1832 reads = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1833 read_pos = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1834
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1835 for cigar in cigars:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1836 if cigar["op"] == "M":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1837 refs.append(self.refSeq[refSeqPos:refSeqPos+cigar["num"]])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1838 refSeqPos += cigar["num"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1839 reads.append(readSeq[read_pos:read_pos+cigar["num"]])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1840 read_pos += cigar["num"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1841 elif cigar["op"] == "I":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1842 refs.append("-" * cigar["num"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1843 reads.append(readSeq[read_pos:read_pos+cigar["num"]])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1844 read_pos += cigar["num"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1845 elif cigar["op"] == "P":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1846 refs.append("-" * cigar["num"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1847 reads.append("-" * cigar["num"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1848 elif cigar["op"] == "D" or cigar["op"] == "N":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1849 rest_refseq += cigar["num"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1850 reads.append("-" * cigar["num"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1851 elif cigar["op"] == "S":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1852 read_pos += cigar["num"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1853
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1854 if rest_refseq > 0:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1855 refs.append(self.refSeq[refSeqPos:refSeqPos+rest_refseq])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1856
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1857 return {"reference": "".join(refs), "read": "".join(reads)}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1858
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1859
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1860 def parse(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1861 self.strand = self.getStrand()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1862
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1863 self.alnRefStart = self.aln.pos
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1864 read_seq = self.aln.seq
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1865 cigars = self.parseCigar(self.aln.cigarstring)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1866 alignment = self.alignmentSequences(self.alnRefStart, read_seq, cigars)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1867
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1868 if self.strand == "+":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1869 self.alnRefSeq = alignment["reference"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1870 self.alnReadSeq = alignment["read"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1871 elif self.strand == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1872 nogap_refseq = self.clearGap(alignment["reference"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1873 self.alnRefStart = self.complementStartPosition(self.refSeqLen, self.alnRefStart, len(nogap_refseq))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1874 self.alnRefSeq = self.complementSeq(alignment["reference"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1875 self.alnReadSeq = self.complementSeq(alignment["read"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1876