0
|
1 #ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
|
|
2 #define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
|
|
3
|
|
4 #include "SubexonGraph.hpp"
|
|
5
|
|
6 #include <stdio.h>
|
|
7 #include <math.h>
|
|
8
|
|
9 class SubexonCorrelation
|
|
10 {
|
|
11 private:
|
|
12 struct _subexon *lastSubexons ;
|
|
13 std::vector<FILE *> fileList ;
|
|
14 int offset ;
|
|
15 int prevSeCnt ;
|
|
16 int seCnt ;
|
|
17
|
|
18 double **correlation ;
|
|
19
|
|
20 int OverlapSize( int s0, int e0, int s1, int e1 )
|
|
21 {
|
|
22 int s = -1, e = -1 ;
|
|
23 if ( e0 < s1 || s0 > e1 )
|
|
24 return 0 ;
|
|
25 s = s0 > s1 ? s0 : s1 ;
|
|
26 e = e0 < e1 ? e0 : e1 ;
|
|
27 return e - s + 1 ;
|
|
28 }
|
|
29 public:
|
|
30 SubexonCorrelation()
|
|
31 {
|
|
32 offset = 0 ;
|
|
33 lastSubexons = NULL ;
|
|
34 correlation = NULL ;
|
|
35 prevSeCnt = 0 ;
|
|
36 }
|
|
37
|
|
38 ~SubexonCorrelation()
|
|
39 {
|
|
40 int cnt = fileList.size() ;
|
|
41 int i ;
|
|
42 for ( i = 0 ; i < cnt ; ++i )
|
|
43 fclose( fileList[i] ) ;
|
|
44 delete[] lastSubexons ;
|
|
45 }
|
|
46
|
|
47 void Initialize( char *f )
|
|
48 {
|
|
49 FILE *fpSl ;
|
|
50 fpSl = fopen( f, "r" ) ;
|
|
51 char buffer[1024] ;
|
|
52 while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL )
|
|
53 {
|
|
54 int len = strlen( buffer ) ;
|
|
55 if ( buffer[len - 1] == '\n' )
|
|
56 {
|
|
57 buffer[len - 1] = '\0' ;
|
|
58 --len ;
|
|
59
|
|
60 }
|
|
61 FILE *fp = fopen( buffer, "r" ) ;
|
|
62 fileList.push_back( fp ) ;
|
|
63 }
|
|
64
|
|
65 lastSubexons = new struct _subexon[ fileList.size() ] ;
|
|
66 int i, cnt ;
|
|
67 cnt = fileList.size() ;
|
|
68 for ( i = 0 ; i < cnt ; ++i )
|
|
69 lastSubexons[i].chrId = -1 ;
|
|
70
|
|
71 }
|
|
72
|
|
73 // We assume the subexons only contains the subexons we are interested in.
|
|
74 // And we assume that each time we call this function, the subexons we are interested are
|
|
75 // sorted in order.
|
|
76 void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments )
|
|
77 {
|
|
78 int sampleCnt = fileList.size() ;
|
|
79 if ( sampleCnt <= 1 )
|
|
80 return ;
|
|
81 int i, j, k ;
|
|
82 seCnt = cnt ;
|
|
83
|
|
84 // Obtain the depth matrix.
|
|
85 double **depth ;
|
|
86 depth = new double* [sampleCnt] ;
|
|
87 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
88 {
|
|
89 depth[i] = new double[ cnt ] ;
|
|
90 memset( depth[i], 0, sizeof( double ) * cnt ) ;
|
|
91 }
|
|
92
|
|
93 char buffer[2048] ;
|
|
94 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
95 {
|
|
96 struct _subexon se ;
|
|
97 bool useLastSubexon = false ;
|
|
98 if ( lastSubexons[i].chrId != -1 )
|
|
99 {
|
|
100 se = lastSubexons[i] ;
|
|
101 useLastSubexon = true ;
|
|
102 }
|
|
103 else
|
|
104 se.chrId = -1 ;
|
|
105
|
|
106 // Locate the subexon
|
|
107 while ( 1 )
|
|
108 {
|
|
109 if ( se.chrId < subexons[0].chrId
|
|
110 || ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) )
|
|
111 {
|
|
112 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
|
|
113 {
|
|
114 buffer[0] = '\0' ;
|
|
115 break ;
|
|
116 }
|
|
117 if ( buffer[0] == '#' )
|
|
118 continue ;
|
|
119
|
|
120 SubexonGraph::InputSubexon( buffer, alignments, se ) ;
|
|
121 --se.start ; --se.end ;
|
|
122 useLastSubexon = false ;
|
|
123 continue ;
|
|
124 }
|
|
125 break ;
|
|
126 }
|
|
127 if ( buffer[0] == '\0' )
|
|
128 {
|
|
129 lastSubexons[i] = se ;
|
|
130 continue ;
|
|
131 }
|
|
132
|
|
133 // Process the subexon.
|
|
134 int tag = 0 ;
|
|
135 while ( 1 )
|
|
136 {
|
|
137 lastSubexons[i] = se ;
|
|
138
|
|
139 if ( useLastSubexon == false )
|
|
140 {
|
|
141 SubexonGraph::InputSubexon( buffer, alignments, se ) ;
|
|
142 --se.start ; --se.end ;
|
|
143 }
|
|
144
|
|
145 if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end )
|
|
146 break ;
|
|
147
|
|
148 while ( 1 )
|
|
149 {
|
|
150 if ( tag > cnt || subexons[tag].end >= se.start )
|
|
151 break ;
|
|
152 ++tag ;
|
|
153 }
|
|
154
|
|
155 for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j )
|
|
156 {
|
|
157 int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ;
|
|
158 depth[i][j] += overlap * se.avgDepth ;
|
|
159 }
|
|
160
|
|
161 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
|
|
162 break ;
|
|
163 }
|
|
164 }
|
|
165 // Normalize the depth
|
|
166 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
167 {
|
|
168 for ( j = 0 ; j < cnt ; ++j )
|
|
169 depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ;
|
|
170 }
|
|
171
|
|
172 // Compute the correlation.
|
|
173 double *avg = new double[cnt] ;
|
|
174 double *var = new double[cnt] ;
|
|
175 if ( correlation != NULL )
|
|
176 {
|
|
177 for ( i = 0 ; i < prevSeCnt ; ++i )
|
|
178 delete[] correlation[i] ;
|
|
179 delete[] correlation ;
|
|
180 }
|
|
181
|
|
182 correlation = new double*[cnt] ;
|
|
183 for ( i = 0 ; i < cnt ; ++i )
|
|
184 correlation[i] = new double[cnt] ;
|
|
185
|
|
186 memset( avg, 0, sizeof( double ) * cnt ) ;
|
|
187 memset( var, 0, sizeof( double ) * cnt ) ;
|
|
188 for ( i = 0 ; i < cnt ; ++i )
|
|
189 {
|
|
190 //avg[i] = 0 ;
|
|
191 //var[i] = 0 ;
|
|
192 memset( correlation[i], 0, sizeof( double ) * cnt ) ;
|
|
193 correlation[i][i] = 1 ;
|
|
194 }
|
|
195
|
|
196 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
197 for ( j = 0 ; j < cnt ; ++j )
|
|
198 {
|
|
199 avg[j] += depth[i][j] ;
|
|
200 var[j] += depth[i][j] * depth[i][j] ;
|
|
201 }
|
|
202 for ( i = 0 ; i < cnt ; ++i )
|
|
203 {
|
|
204 avg[i] /= sampleCnt ;
|
|
205 var[i] = var[i] / sampleCnt - avg[i] * avg[i] ;
|
|
206 }
|
|
207
|
|
208 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
209 for ( j = 0 ; j < cnt ; ++j )
|
|
210 for ( k = j + 1 ; k < cnt ; ++k )
|
|
211 {
|
|
212 //if ( subexons[j].geneId == subexons[k].geneId )
|
|
213 correlation[j][k] += depth[i][j] * depth[i][k] ;
|
|
214 }
|
|
215 for ( j = 0 ; j < cnt ; ++j )
|
|
216 for ( k = j + 1 ; k < cnt ; ++k )
|
|
217 {
|
|
218 if ( var[j] > 1e-6 && var[k] > 1e-6 )
|
|
219 {
|
|
220 correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ;
|
|
221 }
|
|
222 else
|
|
223 correlation[j][k] = 0 ;
|
|
224 //printf( "%d %d %d\n", j, k, cnt ) ;
|
|
225 correlation[k][j] = correlation[j][k] ;
|
|
226 }
|
|
227 //printf( "%lf\n", correlation[0][1] ) ;
|
|
228 // Release the memory.
|
|
229 for ( i = 0 ; i < sampleCnt ; ++i )
|
|
230 delete[] depth[i] ;
|
|
231 delete[] depth ;
|
|
232 delete[] avg ;
|
|
233 delete[] var ;
|
|
234
|
|
235 prevSeCnt = cnt ;
|
|
236 }
|
|
237
|
|
238 double Query( int i, int j )
|
|
239 {
|
|
240 if ( fileList.size() > 1 )
|
|
241 return correlation[i][j] ;
|
|
242 else
|
|
243 return 0 ;
|
|
244 }
|
|
245
|
|
246 void Assign( const SubexonCorrelation &c )
|
|
247 {
|
|
248 int i ;
|
|
249 fileList = c.fileList ;
|
|
250 if ( fileList.size() <= 1 )
|
|
251 return ;
|
|
252
|
|
253 if ( correlation != NULL )
|
|
254 {
|
|
255 for ( i = 0 ; i < seCnt ; ++i )
|
|
256 delete[] correlation[i] ;
|
|
257 delete[] correlation ;
|
|
258 }
|
|
259
|
|
260 seCnt = c.seCnt ;
|
|
261 correlation = new double*[seCnt] ;
|
|
262 for ( i = 0 ; i < seCnt ; ++i )
|
|
263 {
|
|
264 correlation[i] = new double[seCnt] ;
|
|
265 memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ;
|
|
266 }
|
|
267 }
|
|
268 } ;
|
|
269
|
|
270 #endif
|