comparison PsiCLASS-1.0.2/SubexonCorrelation.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 #ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
2 #define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
3
4 #include "SubexonGraph.hpp"
5
6 #include <stdio.h>
7 #include <math.h>
8
9 class SubexonCorrelation
10 {
11 private:
12 struct _subexon *lastSubexons ;
13 std::vector<FILE *> fileList ;
14 int offset ;
15 int prevSeCnt ;
16 int seCnt ;
17
18 double **correlation ;
19
20 int OverlapSize( int s0, int e0, int s1, int e1 )
21 {
22 int s = -1, e = -1 ;
23 if ( e0 < s1 || s0 > e1 )
24 return 0 ;
25 s = s0 > s1 ? s0 : s1 ;
26 e = e0 < e1 ? e0 : e1 ;
27 return e - s + 1 ;
28 }
29 public:
30 SubexonCorrelation()
31 {
32 offset = 0 ;
33 lastSubexons = NULL ;
34 correlation = NULL ;
35 prevSeCnt = 0 ;
36 }
37
38 ~SubexonCorrelation()
39 {
40 int cnt = fileList.size() ;
41 int i ;
42 for ( i = 0 ; i < cnt ; ++i )
43 fclose( fileList[i] ) ;
44 delete[] lastSubexons ;
45 }
46
47 void Initialize( char *f )
48 {
49 FILE *fpSl ;
50 fpSl = fopen( f, "r" ) ;
51 char buffer[1024] ;
52 while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL )
53 {
54 int len = strlen( buffer ) ;
55 if ( buffer[len - 1] == '\n' )
56 {
57 buffer[len - 1] = '\0' ;
58 --len ;
59
60 }
61 FILE *fp = fopen( buffer, "r" ) ;
62 fileList.push_back( fp ) ;
63 }
64
65 lastSubexons = new struct _subexon[ fileList.size() ] ;
66 int i, cnt ;
67 cnt = fileList.size() ;
68 for ( i = 0 ; i < cnt ; ++i )
69 lastSubexons[i].chrId = -1 ;
70
71 }
72
73 // We assume the subexons only contains the subexons we are interested in.
74 // And we assume that each time we call this function, the subexons we are interested are
75 // sorted in order.
76 void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments )
77 {
78 int sampleCnt = fileList.size() ;
79 if ( sampleCnt <= 1 )
80 return ;
81 int i, j, k ;
82 seCnt = cnt ;
83
84 // Obtain the depth matrix.
85 double **depth ;
86 depth = new double* [sampleCnt] ;
87 for ( i = 0 ; i < sampleCnt ; ++i )
88 {
89 depth[i] = new double[ cnt ] ;
90 memset( depth[i], 0, sizeof( double ) * cnt ) ;
91 }
92
93 char buffer[2048] ;
94 for ( i = 0 ; i < sampleCnt ; ++i )
95 {
96 struct _subexon se ;
97 bool useLastSubexon = false ;
98 if ( lastSubexons[i].chrId != -1 )
99 {
100 se = lastSubexons[i] ;
101 useLastSubexon = true ;
102 }
103 else
104 se.chrId = -1 ;
105
106 // Locate the subexon
107 while ( 1 )
108 {
109 if ( se.chrId < subexons[0].chrId
110 || ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) )
111 {
112 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
113 {
114 buffer[0] = '\0' ;
115 break ;
116 }
117 if ( buffer[0] == '#' )
118 continue ;
119
120 SubexonGraph::InputSubexon( buffer, alignments, se ) ;
121 --se.start ; --se.end ;
122 useLastSubexon = false ;
123 continue ;
124 }
125 break ;
126 }
127 if ( buffer[0] == '\0' )
128 {
129 lastSubexons[i] = se ;
130 continue ;
131 }
132
133 // Process the subexon.
134 int tag = 0 ;
135 while ( 1 )
136 {
137 lastSubexons[i] = se ;
138
139 if ( useLastSubexon == false )
140 {
141 SubexonGraph::InputSubexon( buffer, alignments, se ) ;
142 --se.start ; --se.end ;
143 }
144
145 if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end )
146 break ;
147
148 while ( 1 )
149 {
150 if ( tag > cnt || subexons[tag].end >= se.start )
151 break ;
152 ++tag ;
153 }
154
155 for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j )
156 {
157 int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ;
158 depth[i][j] += overlap * se.avgDepth ;
159 }
160
161 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
162 break ;
163 }
164 }
165 // Normalize the depth
166 for ( i = 0 ; i < sampleCnt ; ++i )
167 {
168 for ( j = 0 ; j < cnt ; ++j )
169 depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ;
170 }
171
172 // Compute the correlation.
173 double *avg = new double[cnt] ;
174 double *var = new double[cnt] ;
175 if ( correlation != NULL )
176 {
177 for ( i = 0 ; i < prevSeCnt ; ++i )
178 delete[] correlation[i] ;
179 delete[] correlation ;
180 }
181
182 correlation = new double*[cnt] ;
183 for ( i = 0 ; i < cnt ; ++i )
184 correlation[i] = new double[cnt] ;
185
186 memset( avg, 0, sizeof( double ) * cnt ) ;
187 memset( var, 0, sizeof( double ) * cnt ) ;
188 for ( i = 0 ; i < cnt ; ++i )
189 {
190 //avg[i] = 0 ;
191 //var[i] = 0 ;
192 memset( correlation[i], 0, sizeof( double ) * cnt ) ;
193 correlation[i][i] = 1 ;
194 }
195
196 for ( i = 0 ; i < sampleCnt ; ++i )
197 for ( j = 0 ; j < cnt ; ++j )
198 {
199 avg[j] += depth[i][j] ;
200 var[j] += depth[i][j] * depth[i][j] ;
201 }
202 for ( i = 0 ; i < cnt ; ++i )
203 {
204 avg[i] /= sampleCnt ;
205 var[i] = var[i] / sampleCnt - avg[i] * avg[i] ;
206 }
207
208 for ( i = 0 ; i < sampleCnt ; ++i )
209 for ( j = 0 ; j < cnt ; ++j )
210 for ( k = j + 1 ; k < cnt ; ++k )
211 {
212 //if ( subexons[j].geneId == subexons[k].geneId )
213 correlation[j][k] += depth[i][j] * depth[i][k] ;
214 }
215 for ( j = 0 ; j < cnt ; ++j )
216 for ( k = j + 1 ; k < cnt ; ++k )
217 {
218 if ( var[j] > 1e-6 && var[k] > 1e-6 )
219 {
220 correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ;
221 }
222 else
223 correlation[j][k] = 0 ;
224 //printf( "%d %d %d\n", j, k, cnt ) ;
225 correlation[k][j] = correlation[j][k] ;
226 }
227 //printf( "%lf\n", correlation[0][1] ) ;
228 // Release the memory.
229 for ( i = 0 ; i < sampleCnt ; ++i )
230 delete[] depth[i] ;
231 delete[] depth ;
232 delete[] avg ;
233 delete[] var ;
234
235 prevSeCnt = cnt ;
236 }
237
238 double Query( int i, int j )
239 {
240 if ( fileList.size() > 1 )
241 return correlation[i][j] ;
242 else
243 return 0 ;
244 }
245
246 void Assign( const SubexonCorrelation &c )
247 {
248 int i ;
249 fileList = c.fileList ;
250 if ( fileList.size() <= 1 )
251 return ;
252
253 if ( correlation != NULL )
254 {
255 for ( i = 0 ; i < seCnt ; ++i )
256 delete[] correlation[i] ;
257 delete[] correlation ;
258 }
259
260 seCnt = c.seCnt ;
261 correlation = new double*[seCnt] ;
262 for ( i = 0 ; i < seCnt ; ++i )
263 {
264 correlation[i] = new double[seCnt] ;
265 memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ;
266 }
267 }
268 } ;
269
270 #endif