Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/SubexonCorrelation.hpp @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER | |
2 #define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER | |
3 | |
4 #include "SubexonGraph.hpp" | |
5 | |
6 #include <stdio.h> | |
7 #include <math.h> | |
8 | |
9 class SubexonCorrelation | |
10 { | |
11 private: | |
12 struct _subexon *lastSubexons ; | |
13 std::vector<FILE *> fileList ; | |
14 int offset ; | |
15 int prevSeCnt ; | |
16 int seCnt ; | |
17 | |
18 double **correlation ; | |
19 | |
20 int OverlapSize( int s0, int e0, int s1, int e1 ) | |
21 { | |
22 int s = -1, e = -1 ; | |
23 if ( e0 < s1 || s0 > e1 ) | |
24 return 0 ; | |
25 s = s0 > s1 ? s0 : s1 ; | |
26 e = e0 < e1 ? e0 : e1 ; | |
27 return e - s + 1 ; | |
28 } | |
29 public: | |
30 SubexonCorrelation() | |
31 { | |
32 offset = 0 ; | |
33 lastSubexons = NULL ; | |
34 correlation = NULL ; | |
35 prevSeCnt = 0 ; | |
36 } | |
37 | |
38 ~SubexonCorrelation() | |
39 { | |
40 int cnt = fileList.size() ; | |
41 int i ; | |
42 for ( i = 0 ; i < cnt ; ++i ) | |
43 fclose( fileList[i] ) ; | |
44 delete[] lastSubexons ; | |
45 } | |
46 | |
47 void Initialize( char *f ) | |
48 { | |
49 FILE *fpSl ; | |
50 fpSl = fopen( f, "r" ) ; | |
51 char buffer[1024] ; | |
52 while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL ) | |
53 { | |
54 int len = strlen( buffer ) ; | |
55 if ( buffer[len - 1] == '\n' ) | |
56 { | |
57 buffer[len - 1] = '\0' ; | |
58 --len ; | |
59 | |
60 } | |
61 FILE *fp = fopen( buffer, "r" ) ; | |
62 fileList.push_back( fp ) ; | |
63 } | |
64 | |
65 lastSubexons = new struct _subexon[ fileList.size() ] ; | |
66 int i, cnt ; | |
67 cnt = fileList.size() ; | |
68 for ( i = 0 ; i < cnt ; ++i ) | |
69 lastSubexons[i].chrId = -1 ; | |
70 | |
71 } | |
72 | |
73 // We assume the subexons only contains the subexons we are interested in. | |
74 // And we assume that each time we call this function, the subexons we are interested are | |
75 // sorted in order. | |
76 void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments ) | |
77 { | |
78 int sampleCnt = fileList.size() ; | |
79 if ( sampleCnt <= 1 ) | |
80 return ; | |
81 int i, j, k ; | |
82 seCnt = cnt ; | |
83 | |
84 // Obtain the depth matrix. | |
85 double **depth ; | |
86 depth = new double* [sampleCnt] ; | |
87 for ( i = 0 ; i < sampleCnt ; ++i ) | |
88 { | |
89 depth[i] = new double[ cnt ] ; | |
90 memset( depth[i], 0, sizeof( double ) * cnt ) ; | |
91 } | |
92 | |
93 char buffer[2048] ; | |
94 for ( i = 0 ; i < sampleCnt ; ++i ) | |
95 { | |
96 struct _subexon se ; | |
97 bool useLastSubexon = false ; | |
98 if ( lastSubexons[i].chrId != -1 ) | |
99 { | |
100 se = lastSubexons[i] ; | |
101 useLastSubexon = true ; | |
102 } | |
103 else | |
104 se.chrId = -1 ; | |
105 | |
106 // Locate the subexon | |
107 while ( 1 ) | |
108 { | |
109 if ( se.chrId < subexons[0].chrId | |
110 || ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) ) | |
111 { | |
112 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) | |
113 { | |
114 buffer[0] = '\0' ; | |
115 break ; | |
116 } | |
117 if ( buffer[0] == '#' ) | |
118 continue ; | |
119 | |
120 SubexonGraph::InputSubexon( buffer, alignments, se ) ; | |
121 --se.start ; --se.end ; | |
122 useLastSubexon = false ; | |
123 continue ; | |
124 } | |
125 break ; | |
126 } | |
127 if ( buffer[0] == '\0' ) | |
128 { | |
129 lastSubexons[i] = se ; | |
130 continue ; | |
131 } | |
132 | |
133 // Process the subexon. | |
134 int tag = 0 ; | |
135 while ( 1 ) | |
136 { | |
137 lastSubexons[i] = se ; | |
138 | |
139 if ( useLastSubexon == false ) | |
140 { | |
141 SubexonGraph::InputSubexon( buffer, alignments, se ) ; | |
142 --se.start ; --se.end ; | |
143 } | |
144 | |
145 if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end ) | |
146 break ; | |
147 | |
148 while ( 1 ) | |
149 { | |
150 if ( tag > cnt || subexons[tag].end >= se.start ) | |
151 break ; | |
152 ++tag ; | |
153 } | |
154 | |
155 for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j ) | |
156 { | |
157 int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ; | |
158 depth[i][j] += overlap * se.avgDepth ; | |
159 } | |
160 | |
161 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) | |
162 break ; | |
163 } | |
164 } | |
165 // Normalize the depth | |
166 for ( i = 0 ; i < sampleCnt ; ++i ) | |
167 { | |
168 for ( j = 0 ; j < cnt ; ++j ) | |
169 depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ; | |
170 } | |
171 | |
172 // Compute the correlation. | |
173 double *avg = new double[cnt] ; | |
174 double *var = new double[cnt] ; | |
175 if ( correlation != NULL ) | |
176 { | |
177 for ( i = 0 ; i < prevSeCnt ; ++i ) | |
178 delete[] correlation[i] ; | |
179 delete[] correlation ; | |
180 } | |
181 | |
182 correlation = new double*[cnt] ; | |
183 for ( i = 0 ; i < cnt ; ++i ) | |
184 correlation[i] = new double[cnt] ; | |
185 | |
186 memset( avg, 0, sizeof( double ) * cnt ) ; | |
187 memset( var, 0, sizeof( double ) * cnt ) ; | |
188 for ( i = 0 ; i < cnt ; ++i ) | |
189 { | |
190 //avg[i] = 0 ; | |
191 //var[i] = 0 ; | |
192 memset( correlation[i], 0, sizeof( double ) * cnt ) ; | |
193 correlation[i][i] = 1 ; | |
194 } | |
195 | |
196 for ( i = 0 ; i < sampleCnt ; ++i ) | |
197 for ( j = 0 ; j < cnt ; ++j ) | |
198 { | |
199 avg[j] += depth[i][j] ; | |
200 var[j] += depth[i][j] * depth[i][j] ; | |
201 } | |
202 for ( i = 0 ; i < cnt ; ++i ) | |
203 { | |
204 avg[i] /= sampleCnt ; | |
205 var[i] = var[i] / sampleCnt - avg[i] * avg[i] ; | |
206 } | |
207 | |
208 for ( i = 0 ; i < sampleCnt ; ++i ) | |
209 for ( j = 0 ; j < cnt ; ++j ) | |
210 for ( k = j + 1 ; k < cnt ; ++k ) | |
211 { | |
212 //if ( subexons[j].geneId == subexons[k].geneId ) | |
213 correlation[j][k] += depth[i][j] * depth[i][k] ; | |
214 } | |
215 for ( j = 0 ; j < cnt ; ++j ) | |
216 for ( k = j + 1 ; k < cnt ; ++k ) | |
217 { | |
218 if ( var[j] > 1e-6 && var[k] > 1e-6 ) | |
219 { | |
220 correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ; | |
221 } | |
222 else | |
223 correlation[j][k] = 0 ; | |
224 //printf( "%d %d %d\n", j, k, cnt ) ; | |
225 correlation[k][j] = correlation[j][k] ; | |
226 } | |
227 //printf( "%lf\n", correlation[0][1] ) ; | |
228 // Release the memory. | |
229 for ( i = 0 ; i < sampleCnt ; ++i ) | |
230 delete[] depth[i] ; | |
231 delete[] depth ; | |
232 delete[] avg ; | |
233 delete[] var ; | |
234 | |
235 prevSeCnt = cnt ; | |
236 } | |
237 | |
238 double Query( int i, int j ) | |
239 { | |
240 if ( fileList.size() > 1 ) | |
241 return correlation[i][j] ; | |
242 else | |
243 return 0 ; | |
244 } | |
245 | |
246 void Assign( const SubexonCorrelation &c ) | |
247 { | |
248 int i ; | |
249 fileList = c.fileList ; | |
250 if ( fileList.size() <= 1 ) | |
251 return ; | |
252 | |
253 if ( correlation != NULL ) | |
254 { | |
255 for ( i = 0 ; i < seCnt ; ++i ) | |
256 delete[] correlation[i] ; | |
257 delete[] correlation ; | |
258 } | |
259 | |
260 seCnt = c.seCnt ; | |
261 correlation = new double*[seCnt] ; | |
262 for ( i = 0 ; i < seCnt ; ++i ) | |
263 { | |
264 correlation[i] = new double[seCnt] ; | |
265 memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ; | |
266 } | |
267 } | |
268 } ; | |
269 | |
270 #endif |