annotate _modules/readsCoverage_res.py @ 0:69e8f12c8b31 draft

"planemo upload"
author bioit_sciensano
date Fri, 11 Mar 2022 15:06:20 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
1 ##@file readsCoverage_res.py
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
2 # Compact structure to store partial results of readsCoverage for later processing; used in multi machine mode and for checkpoints.
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
3 #
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
4 #@author vlegrand@pasteur.fr
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
5 import numpy as np
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
6 import os
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
7 import time
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
8
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
9 base_chk_fname="chk_"
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
10 chk_fname_sep="_"
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
11
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
12
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
13 ## Utility classes for testing the checkpoint implementation
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
14 # class checkpoint_visitor:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
15 # def __str__(self):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
16 # return self.__class__.__name__
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
17 #
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
18 # class checkpoint_visitor_11150_Cos5(checkpoint_visitor):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
19 # def visit(self,chk_res):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
20 # if chk_res.host_len!=0 or chk_res.gen!=25 or chk_res.reads_tested!=2:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
21 # return False
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
22 # return True
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
23 #
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
24 # class checkpoint_visitor_38_Cos5(checkpoint_visitor):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
25 # def visit(self,chk_res):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
26 # if chk_res.host_len!=0 or chk_res.gen!=25 or chk_res.reads_tested!=2:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
27 # return False
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
28 # return True
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
29
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
30
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
31
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
32
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
33
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
34
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
35 def loadArr(arr_idx0,arr_val0,arr_idx1,arr_val1,arr2D):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
36 for idx, val in zip(arr_idx0, arr_val0):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
37 arr2D[0][idx] = val
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
38
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
39 for idx, val in zip(arr_idx1, arr_val1):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
40 arr2D[1][idx] = val
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
41
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
42
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
43 def loadRCRes(filename):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
44 npzfile = np.load(filename)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
45 gen_len=npzfile['gen_len']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
46 gen_len=int(gen_len)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
47 host_len=npzfile['host_len']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
48 host_len=int(host_len)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
49 termini_coverage_idx0 = npzfile['termini_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
50 termini_coverage_val0=npzfile['termini_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
51 termini_coverage_idx1 = npzfile['termini_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
52 termini_coverage_val1 = npzfile['termini_coverage_val1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
53
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
54 whole_coverage_idx0=npzfile['whole_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
55 whole_coverage_val0 = npzfile['whole_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
56 whole_coverage_idx1 = npzfile['whole_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
57 whole_coverage_val1 = npzfile['whole_coverage_val1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
58
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
59 paired_whole_coverage_idx0=npzfile['paired_whole_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
60 paired_whole_coverage_val0 = npzfile['paired_whole_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
61 paired_whole_coverage_idx1 = npzfile['paired_whole_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
62 paired_whole_coverage_val1 = npzfile['paired_whole_coverage_val1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
63
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
64 phage_hybrid_coverage_idx0=npzfile['phage_hybrid_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
65 phage_hybrid_coverage_val0 = npzfile['phage_hybrid_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
66 phage_hybrid_coverage_idx1 = npzfile['phage_hybrid_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
67 phage_hybrid_coverage_val1 = npzfile['phage_hybrid_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
68
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
69 host_hybrid_coverage_idx0 = npzfile['host_hybrid_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
70 host_hybrid_coverage_val0 = npzfile['host_hybrid_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
71 host_hybrid_coverage_idx1 = npzfile['host_hybrid_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
72 host_hybrid_coverage_val1 = npzfile['host_hybrid_coverage_val1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
73
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
74 host_whole_coverage_idx0 = npzfile['host_whole_coverage_idx0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
75 host_whole_coverage_val0 = npzfile['host_whole_coverage_val0']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
76 host_whole_coverage_idx1 = npzfile['host_whole_coverage_idx1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
77 host_whole_coverage_val1 = npzfile['host_whole_coverage_val1']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
78
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
79 list_hybrid=npzfile['list_hybrid']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
80 insert=npzfile['insert']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
81 insert=list(insert)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
82 paired_mismatch=npzfile['paired_mismatch']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
83 reads_tested=npzfile['reads_tested']
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
84
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
85 termini_coverage=np.array([gen_len*[0], gen_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
86
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
87 whole_coverage = np.array([gen_len*[0], gen_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
88 paired_whole_coverage = np.array([gen_len*[0], gen_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
89 phage_hybrid_coverage = np.array([gen_len*[0], gen_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
90 host_hybrid_coverage = np.array([host_len*[0], host_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
91 host_whole_coverage = np.array([host_len*[0], host_len*[0]])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
92 loadArr(termini_coverage_idx0,termini_coverage_val0,termini_coverage_idx1,termini_coverage_val1,termini_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
93 loadArr(whole_coverage_idx0,whole_coverage_val0,whole_coverage_idx1,whole_coverage_val1,whole_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
94 loadArr(paired_whole_coverage_idx0,paired_whole_coverage_val0,paired_whole_coverage_idx1,paired_whole_coverage_val1,paired_whole_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
95 loadArr(phage_hybrid_coverage_idx0,phage_hybrid_coverage_val0,phage_hybrid_coverage_idx1,phage_hybrid_coverage_val1,phage_hybrid_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
96 loadArr(host_hybrid_coverage_idx0,host_hybrid_coverage_val0,host_hybrid_coverage_idx1,host_hybrid_coverage_val1,host_hybrid_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
97 loadArr(host_whole_coverage_idx0,host_whole_coverage_val0,host_whole_coverage_idx1,host_whole_coverage_val1,host_whole_coverage)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
98
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
99 res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
100 phage_hybrid_coverage, host_hybrid_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
101 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
102
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
103 return res
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
104
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
105 ##
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
106 # Working structure for readsCoverage (encapsulating temporary results)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
107 class RCWorkingS:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
108 def __init__(self,rc_res,cnt_line,read_match):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
109 self.interm_res=rc_res
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
110 self.count_line=cnt_line
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
111 self.read_match=read_match
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
112
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
113 class RCRes:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
114 def __init__(self,termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
115 phage_hybrid_coverage, host_hybrid_coverage, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
116 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
117
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
118 self.termini_coverage=termini_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
119 self.whole_coverage=whole_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
120 self.paired_whole_coverage=paired_whole_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
121 self.phage_hybrid_coverage=phage_hybrid_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
122 self.host_hybrid_coverage=host_hybrid_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
123 self.host_whole_coverage=host_whole_coverage
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
124
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
125 self.list_hybrid=list_hybrid
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
126 self.insert=insert
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
127 self.paired_mismatch=paired_mismatch
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
128 self.reads_tested=reads_tested
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
129
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
130 self.gen_len = len(self.termini_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
131 self.host_len= len(self.host_hybrid_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
132
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
133 # def accept(self,a_visitor):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
134 # self.vis=a_visitor
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
135 #
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
136 # def make_visit(self):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
137 # self.vis.visit()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
138
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
139 def save(self,filename):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
140 termini_coverage_idx0 = np.flatnonzero(self.termini_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
141 termini_coverage_val0 = self.termini_coverage[0][termini_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
142 termini_coverage_idx1 = np.flatnonzero(self.termini_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
143 termini_coverage_val1 = self.termini_coverage[1][termini_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
144
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
145 whole_coverage_idx0 = np.flatnonzero(self.whole_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
146 whole_coverage_val0 = self.whole_coverage[0][whole_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
147 whole_coverage_idx1 = np.flatnonzero(self.whole_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
148 whole_coverage_val1 = self.whole_coverage[1][whole_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
149
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
150 paired_whole_coverage_idx0 = np.flatnonzero(self.paired_whole_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
151 paired_whole_coverage_val0 = self.paired_whole_coverage[0][paired_whole_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
152 paired_whole_coverage_idx1 = np.flatnonzero(self.paired_whole_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
153 paired_whole_coverage_val1 = self.paired_whole_coverage[1][paired_whole_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
154
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
155 phage_hybrid_coverage_idx0 = np.flatnonzero(self.phage_hybrid_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
156 phage_hybrid_coverage_val0 = self.phage_hybrid_coverage[0][phage_hybrid_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
157 phage_hybrid_coverage_idx1 = np.flatnonzero(self.phage_hybrid_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
158 phage_hybrid_coverage_val1 = self.phage_hybrid_coverage[1][phage_hybrid_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
159
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
160 host_hybrid_coverage_idx0 = np.flatnonzero(self.host_hybrid_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
161 host_hybrid_coverage_val0 = self.host_hybrid_coverage[0][host_hybrid_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
162 host_hybrid_coverage_idx1 = np.flatnonzero(self.host_hybrid_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
163 host_hybrid_coverage_val1 = self.host_hybrid_coverage[1][host_hybrid_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
164
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
165 host_whole_coverage_idx0 = np.flatnonzero(self.host_whole_coverage[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
166 host_whole_coverage_val0 = self.host_whole_coverage[0][host_whole_coverage_idx0]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
167 host_whole_coverage_idx1 = np.flatnonzero(self.host_whole_coverage[1])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
168 host_whole_coverage_val1 = self.host_whole_coverage[1][host_whole_coverage_idx1]
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
169
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
170 np.savez(filename,gen_len=np.array(self.gen_len),host_len=np.array(self.host_len),\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
171 termini_coverage_idx0=termini_coverage_idx0, termini_coverage_val0=termini_coverage_val0,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
172 termini_coverage_idx1=termini_coverage_idx1, termini_coverage_val1=termini_coverage_val1,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
173 whole_coverage_idx0=whole_coverage_idx0,whole_coverage_val0=whole_coverage_val0,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
174 whole_coverage_idx1=whole_coverage_idx1,whole_coverage_val1=whole_coverage_val1,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
175 paired_whole_coverage_idx0=paired_whole_coverage_idx0,paired_whole_coverage_val0=paired_whole_coverage_val0,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
176 paired_whole_coverage_idx1=paired_whole_coverage_idx1,paired_whole_coverage_val1=paired_whole_coverage_val1, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
177 phage_hybrid_coverage_idx0=phage_hybrid_coverage_idx0,phage_hybrid_coverage_val0=phage_hybrid_coverage_val0, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
178 phage_hybrid_coverage_idx1=phage_hybrid_coverage_idx1,phage_hybrid_coverage_val1=phage_hybrid_coverage_val1, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
179 host_hybrid_coverage_idx0=host_hybrid_coverage_idx0,host_hybrid_coverage_val0=host_hybrid_coverage_val0, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
180 host_hybrid_coverage_idx1=host_hybrid_coverage_idx1,host_hybrid_coverage_val1=host_hybrid_coverage_val1, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
181 host_whole_coverage_idx0=host_whole_coverage_idx0,host_whole_coverage_val0=host_whole_coverage_val0, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
182 host_whole_coverage_idx1=host_whole_coverage_idx1,host_whole_coverage_val1=host_whole_coverage_val1, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
183 list_hybrid=self.list_hybrid,insert=self.insert,paired_mismatch=np.array(self.paired_mismatch),\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
184 reads_tested=self.reads_tested)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
185
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
186
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
187 class RCCheckpoint:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
188 def __init__(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
189 phage_hybrid_coverage, host_hybrid_coverage, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
190 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
191 self.count_line=count_line
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
192 self.core_id=core_id
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
193 self.idx_seq=idx_seq
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
194 self.read_match=read_match
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
195 self.res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
196 phage_hybrid_coverage, host_hybrid_coverage, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
197 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
198
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
199
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
200 def save(self,dir_chk,core_id,idx_refseq):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
201 filename=base_chk_fname+str(self.core_id)+chk_fname_sep+str(self.idx_seq)+chk_fname_sep+\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
202 str(self.count_line)+chk_fname_sep+str(self.read_match)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
203 full_fname = os.path.join(dir_chk, filename)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
204 self.res.save(full_fname)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
205 # remove old breakpoint file
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
206 list_f=os.listdir(dir_chk)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
207 sub_s=base_chk_fname+ str(core_id) + chk_fname_sep + str(idx_refseq) + chk_fname_sep
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
208 for f in list_f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
209 if f!=filename+".npz" and sub_s in f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
210 os.remove(os.path.join(dir_chk,f))
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
211
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
212
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
213 class RCCheckpoint_handler:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
214 def __init__(self,chk_freq,dir_chk,test_mode=False):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
215 self.chk_freq=chk_freq
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
216 self.test_mode = test_mode
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
217 self.start_t=0
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
218 self.dir_chk = dir_chk
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
219 # if self.test_mode == True:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
220 # self.v38_C5 = checkpoint_visitor_38_Cos5()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
221 # self.v11150_C5 = checkpoint_visitor_11150_Cos5()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
222 if self.test_mode==True:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
223 self.start_t = time.perf_counter_ns()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
224 if os.path.exists(dir_chk):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
225 if not (os.path.isdir(dir_chk)):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
226 raise RuntimeError("dir_chk must point to a directory")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
227 else:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
228 os.mkdir(dir_chk)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
229 elif self.chk_freq!=0:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
230 if os.path.exists(dir_chk):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
231 if not (os.path.isdir(dir_chk)):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
232 raise RuntimeError("dir_chk must point to a directory")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
233 else:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
234 raise RuntimeError("dir_chk must point to an existing directory")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
235
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
236 def getIdxSeq(self,core_id):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
237 idx_seq=0
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
238 if self.chk_freq!=0 or self.test_mode==True:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
239 list_f = os.listdir(self.dir_chk)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
240 subfname = base_chk_fname+ str(core_id) + chk_fname_sep
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
241 chk_f = ""
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
242 for fname in list_f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
243 if subfname in fname:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
244 chk_f = fname
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
245 break
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
246 if chk_f != "":
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
247 l=chk_f.split(chk_fname_sep)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
248 idx_seq=int(l[2])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
249 return idx_seq
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
250
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
251
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
252 def load(self,core_id,idx_refseq):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
253 if self.chk_freq!=0 or self.test_mode==True:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
254 list_f = os.listdir(self.dir_chk)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
255 subfname = base_chk_fname+ str(core_id) + chk_fname_sep + str(idx_refseq) + chk_fname_sep
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
256 chk_f = ""
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
257 for fname in list_f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
258 if subfname in fname:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
259 chk_f = fname
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
260 break
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
261 if chk_f != "":
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
262 interm_res=loadRCRes(os.path.join(self.dir_chk,chk_f))
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
263 # if self.test_mode==True:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
264 # interm_res.accept(self.v38_C5)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
265 l=chk_f.split(chk_fname_sep)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
266 cnt_line=int(l[-2])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
267 tmp=l[-1] # get rid of .npz extension
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
268 l2=tmp.split(".")
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
269 read_match=int(l2[0])
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
270 partial_res=RCWorkingS(interm_res,cnt_line,read_match)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
271 # if self.test_mode:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
272 # partial_res.accept(self.v38_C5)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
273 # partial_res.make_visit()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
274 return partial_res
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
275 else: # no checkpoint found for this sequence, start from beginning
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
276 return None
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
277 else:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
278 return None
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
279
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
280
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
281 def check(self,count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
282 phage_hybrid_coverage, host_hybrid_coverage, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
283 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
284 cur_t = time.perf_counter_ns()
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
285 elapsed_t = cur_t - self.start_t
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
286 #convert elapsed_t tp to seconds
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
287 elaspsed_t=elapsed_t * 0.000000001
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
288 if (self.test_mode==True or (self.chk_freq!=0 and (elapsed_t % self.chk_freq == 0))): # time to create checkpoint.
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
289 chkp=RCCheckpoint(count_line,core_id,idx_seq,termini_coverage,whole_coverage,paired_whole_coverage,\
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
290 phage_hybrid_coverage, host_hybrid_coverage, \
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
291 host_whole_coverage,list_hybrid,insert,paired_mismatch,reads_tested,read_match)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
292 chkp.save(self.dir_chk,core_id,idx_seq)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
293
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
294
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
295 def end(self,core_id):
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
296 if (self.test_mode==False and self.chk_freq!=0) :
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
297 # remove old breakpoint file
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
298 list_f = os.listdir(self.dir_chk)
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
299 sub_s=base_chk_fname+str(core_id)+chk_fname_sep
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
300 for f in list_f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
301 if sub_s in f:
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
302 os.remove(os.path.join(self.dir_chk, f))
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
303
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
304
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
305
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
306
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
307
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
308
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
309
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
310
69e8f12c8b31 "planemo upload"
bioit_sciensano
parents:
diff changeset
311