0
|
1 // The class manage the blocks
|
|
2 // Li Song
|
|
3
|
|
4 #ifndef _LSONG_CLASSES_BLOCKS_HEADER
|
|
5 #define _LSONG_CLASSES_BLOCKS_HEADER
|
|
6
|
|
7 #include <stdlib.h>
|
|
8 #include <vector>
|
|
9 #include <map>
|
|
10 #include <assert.h>
|
|
11 #include <math.h>
|
|
12 #include <set>
|
|
13 #include <inttypes.h>
|
|
14
|
|
15 #include "defs.h"
|
|
16
|
|
17 extern bool VERBOSE ;
|
|
18 extern FILE *fpOut ;
|
|
19
|
|
20 extern int gMinDepth ;
|
|
21
|
|
22
|
|
23 struct _splitSite // means the next position belongs to another block
|
|
24 {
|
|
25 int64_t pos ;
|
|
26 char strand ;
|
|
27 int chrId ;
|
|
28 int type ; // 1-start of an exon. 2-end of an exon.
|
|
29
|
|
30 int support ;
|
|
31 int uniqSupport ;
|
|
32
|
|
33 int mismatchSum ;
|
|
34
|
|
35 int64_t oppositePos ; // the position of the other sites to form the intron.
|
|
36 } ;
|
|
37
|
|
38 struct _block
|
|
39 {
|
|
40 int chrId ;
|
|
41 int contigId ;
|
|
42 int64_t start, end ;
|
|
43 int64_t leftSplice, rightSplice ; // Some information about the left splice site and right splice site.
|
|
44 // record the leftmost and rightmost coordinates of a splice site within this block
|
|
45 //or the length of read alignment support the splice sites.
|
|
46 //Or the number of support.
|
|
47 int64_t depthSum ;
|
|
48
|
|
49 int leftType, rightType ; //0-soft boundary, 1-start of an exon, 2-end of an exon.
|
|
50
|
|
51 double leftRatio, rightRatio ; // the adjusted ratio-like value to the left and right anchor subexons.
|
|
52 double ratio ;
|
|
53
|
|
54 char leftStrand, rightStrand ;
|
|
55
|
|
56 int *depth ;
|
|
57 int prevCnt, nextCnt ; // Some connection information for the subexons.
|
|
58 int *prev ;
|
|
59 int *next ;
|
|
60 } ;
|
|
61
|
|
62 // adjacent graph
|
|
63 struct _adj
|
|
64 {
|
|
65 int ind ;
|
|
66 int info ;
|
|
67 int support ;
|
|
68 int next ;
|
|
69 } ;
|
|
70
|
|
71 class Blocks
|
|
72 {
|
|
73 private:
|
|
74 std::map<int, int> exonBlocksChrIdOffset ;
|
|
75
|
|
76 int64_t Overlap( int64_t s0, int64_t e0, int64_t s1, int64_t e1, int64_t &s, int64_t &e )
|
|
77 {
|
|
78 s = e = -1 ;
|
|
79 if ( e0 < s1 || s0 > e1 )
|
|
80 return 0 ;
|
|
81 s = s0 > s1 ? s0 : s1 ;
|
|
82 e = e0 < e1 ? e0 : e1 ;
|
|
83 return e - s + 1 ;
|
|
84 }
|
|
85
|
|
86 void Split( const char *s, char delimit, std::vector<std::string> &fields )
|
|
87 {
|
|
88 int i ;
|
|
89 fields.clear() ;
|
|
90 if ( s == NULL )
|
|
91 return ;
|
|
92
|
|
93 std::string f ;
|
|
94 for ( i = 0 ; s[i] ; ++i )
|
|
95 {
|
|
96 if ( s[i] == delimit || s[i] == '\n' )
|
|
97 {
|
|
98 fields.push_back( f ) ;
|
|
99 f.clear() ;
|
|
100 }
|
|
101 else
|
|
102 f.append( 1, s[i] ) ;
|
|
103 }
|
|
104 fields.push_back( f ) ;
|
|
105 f.clear() ;
|
|
106 }
|
|
107
|
|
108 void BuildBlockChrIdOffset()
|
|
109 {
|
|
110 // Build the map for the offsets of chr id in the exonBlock list.
|
|
111 exonBlocksChrIdOffset[ exonBlocks[0].chrId] = 0 ;
|
|
112 int cnt = exonBlocks.size() ;
|
|
113 for ( int i = 1 ; i < cnt ; ++i )
|
|
114 {
|
|
115 if ( exonBlocks[i].chrId != exonBlocks[i - 1].chrId )
|
|
116 exonBlocksChrIdOffset[ exonBlocks[i].chrId ] = i ;
|
|
117 }
|
|
118 }
|
|
119
|
|
120 bool CanReach( int from, int to, struct _adj *adj, bool *visited )
|
|
121 {
|
|
122 if ( visited[from] )
|
|
123 return false ;
|
|
124 visited[from] = true ;
|
|
125 int p ;
|
|
126 p = adj[from].next ;
|
|
127 while ( p != -1 )
|
|
128 {
|
|
129 if ( adj[p].ind == to )
|
|
130 return true ;
|
|
131 if ( adj[p].ind < to
|
|
132 && CanReach( adj[p].ind, to, adj, visited ) )
|
|
133 return true ;
|
|
134 p = adj[p].next ;
|
|
135 }
|
|
136 return false ;
|
|
137 }
|
|
138
|
|
139 void AdjustAndCreateExonBlocks( int tag, std::vector<struct _block> &newExonBlocks )
|
|
140 {
|
|
141 int i, j ;
|
|
142 if ( exonBlocks[tag].depth != NULL )
|
|
143 {
|
|
144 // Convert the increment and decrement into actual depth.
|
|
145 int len = exonBlocks[tag].end - exonBlocks[tag].start + 1 ;
|
|
146 int *depth = exonBlocks[tag].depth ;
|
|
147 for ( i = 1 ; i < len ; ++i )
|
|
148 depth[i] = depth[i - 1] + depth[i] ;
|
|
149
|
|
150 struct _block island ; // the portion created by the hollow.
|
|
151 island.start = island.end = -1 ;
|
|
152 island.depthSum = 0 ;
|
|
153
|
|
154 /*if ( exonBlocks[tag].start == 1562344 )
|
|
155 {
|
|
156 for ( i = 0 ; i < len ; ++i )
|
|
157 printf( "%d\n", depth[i] ) ;
|
|
158 }*/
|
|
159 // Adjust boundary accordingly.
|
|
160 int64_t adjustStart = exonBlocks[tag].start ;
|
|
161 int64_t adjustEnd = exonBlocks[tag].end ;
|
|
162
|
|
163 if ( exonBlocks[tag].leftType == 0 && exonBlocks[tag].rightType != 0 )
|
|
164 {
|
|
165 for ( i = len - 1 ; i >= 0 ; --i )
|
|
166 if ( depth[i] < gMinDepth )
|
|
167 break ;
|
|
168 ++i ;
|
|
169 if ( exonBlocks[tag].rightType == 2 && i + exonBlocks[tag].start > exonBlocks[tag].rightSplice )
|
|
170 i = exonBlocks[tag].rightSplice - exonBlocks[tag].start ;
|
|
171 adjustStart = i + exonBlocks[tag].start ;
|
|
172
|
|
173 if ( adjustStart > exonBlocks[tag].start )
|
|
174 {
|
|
175 int s, e ;
|
|
176 // firstly [s,e] is the range of depth array.
|
|
177 for ( s = 0 ; s < i ; ++s )
|
|
178 if ( depth[s] >= gMinDepth )
|
|
179 break ;
|
|
180 for ( e = i - 1 ; e >= s ; -- e )
|
|
181 if ( depth[e] >= gMinDepth )
|
|
182 break ;
|
|
183 if ( e >= s )
|
|
184 {
|
|
185 island = exonBlocks[tag] ;
|
|
186 island.depthSum = 0 ;
|
|
187 island.leftType = island.rightType = 0 ;
|
|
188 for ( j = s ; j <= e ; ++j )
|
|
189 island.depthSum += depth[j] ;
|
|
190 island.start = s + exonBlocks[tag].start ; // offset the coordinate.
|
|
191 island.end = e + exonBlocks[tag].start ;
|
|
192 }
|
|
193 }
|
|
194 }
|
|
195 else if ( exonBlocks[tag].leftType != 0 && exonBlocks[tag].rightType == 0 )
|
|
196 {
|
|
197 for ( i = 0 ; i < len ; ++i )
|
|
198 if ( depth[i] < gMinDepth )
|
|
199 break ;
|
|
200 --i ;
|
|
201 if ( exonBlocks[tag].leftType == 1 && i + exonBlocks[tag].start < exonBlocks[tag].leftSplice )
|
|
202 i = exonBlocks[tag].leftSplice - exonBlocks[tag].start ;
|
|
203 adjustEnd = i + exonBlocks[tag].start ;
|
|
204
|
|
205 if ( adjustEnd < exonBlocks[tag].end )
|
|
206 {
|
|
207 int s, e ;
|
|
208 // firstly [s,e] is the range of depth array.
|
|
209 for ( s = i + 1 ; s < len ; ++s )
|
|
210 if ( depth[s] >= gMinDepth )
|
|
211 break ;
|
|
212 for ( e = len - 1 ; e >= s ; --e )
|
|
213 if ( depth[e] >= gMinDepth )
|
|
214 break ;
|
|
215 if ( e >= s )
|
|
216 {
|
|
217 island = exonBlocks[tag] ;
|
|
218 island.depthSum = 0 ;
|
|
219 island.leftType = island.rightType = 0 ;
|
|
220 for ( j = s ; j <= e ; ++j )
|
|
221 island.depthSum += depth[j] ;
|
|
222 island.start = s + exonBlocks[tag].start ; // offset the coordinate.
|
|
223 island.end = e + exonBlocks[tag].start ;
|
|
224 }
|
|
225 }
|
|
226 }
|
|
227 else if ( exonBlocks[tag].leftType == 0 && exonBlocks[tag].rightType == 0 )
|
|
228 {
|
|
229 for ( i = 0 ; i < len ; ++i )
|
|
230 if ( depth[i] >= gMinDepth )
|
|
231 break ;
|
|
232 adjustStart = i + exonBlocks[tag].start ;
|
|
233
|
|
234 for ( i = len - 1 ; i >= 0 ; --i )
|
|
235 if ( depth[i] >= gMinDepth )
|
|
236 break ;
|
|
237 adjustEnd = i + exonBlocks[tag].start ;
|
|
238 }
|
|
239 else if ( exonBlocks[tag].leftType == 1 && exonBlocks[tag].rightType == 2
|
|
240 && exonBlocks[tag].end - exonBlocks[tag].start + 1 >= 1000 )
|
|
241 {
|
|
242 // The possible merge of two genes or merge of UTRs, if we don't break the low coverage part.
|
|
243 // If we decide to cut, I'll reuse the variable "island" to represent the subexon on right hand side.
|
|
244 int l ;
|
|
245 int gapSize = 30 ;
|
|
246 for ( i = 0 ; i <= len - ( gapSize + 1 ) ; ++i )
|
|
247 {
|
|
248 if ( depth[i] < gMinDepth )
|
|
249 {
|
|
250 for ( l = i ; l < i + ( gapSize + 1 ) ; ++l )
|
|
251 if ( depth[l] >= gMinDepth )
|
|
252 break ;
|
|
253 if ( l < i + ( gapSize + 1 ) )
|
|
254 {
|
|
255 i = l - 1 ;
|
|
256 continue ;
|
|
257 }
|
|
258 else
|
|
259 break ;
|
|
260 }
|
|
261 }
|
|
262
|
|
263 for ( j = len - 1 ; j >= ( gapSize + 1 ) - 1 ; --j )
|
|
264 {
|
|
265 if ( depth[j] < gMinDepth )
|
|
266 {
|
|
267 for ( l = j ; l >= j - ( gapSize + 1 ) + 1 ; --l )
|
|
268 if ( depth[l] >= gMinDepth )
|
|
269 break ;
|
|
270
|
|
271 if ( l >= j - ( gapSize + 1 ) + 1 )
|
|
272 {
|
|
273 j = l + 1 ;
|
|
274 continue ;
|
|
275 }
|
|
276 else
|
|
277 break ;
|
|
278 }
|
|
279 }
|
|
280
|
|
281 if ( j - i + 1 > gapSize )
|
|
282 {
|
|
283 // we break.
|
|
284 --i ; ++j ;
|
|
285
|
|
286 adjustEnd = i + exonBlocks[tag].start ;
|
|
287
|
|
288 if ( j < len - 1 )
|
|
289 {
|
|
290 island = exonBlocks[tag] ;
|
|
291 island.depthSum = 0 ;
|
|
292 island.leftType = 0 ;
|
|
293 for ( l = j ; l <= len - 1 ; ++l )
|
|
294 island.depthSum += depth[j] ;
|
|
295 island.start = j + exonBlocks[tag].start ; // offset the coordinate.
|
|
296 island.end = exonBlocks[tag].end ;
|
|
297 }
|
|
298
|
|
299 exonBlocks[tag].rightType = 0 ; // put it here, so the island can get the right info
|
|
300 //fprintf( stderr, "break %d %d %d %d.\n", (int)exonBlocks[tag].start, (int)adjustEnd, i + (int)exonBlocks[tag].start, j + (int)exonBlocks[tag].start ) ;
|
|
301 }
|
|
302 }
|
|
303
|
|
304 int lostDepthSum = 0 ;
|
|
305 for ( i = exonBlocks[tag].start ; i < adjustStart ; ++i )
|
|
306 lostDepthSum += depth[i - exonBlocks[tag].start ] ;
|
|
307 for ( i = adjustEnd + 1 ; i < exonBlocks[tag].end ; ++i )
|
|
308 lostDepthSum += depth[i - exonBlocks[tag].start ] ;
|
|
309 exonBlocks[tag].depthSum -= lostDepthSum ;
|
|
310
|
|
311 delete[] exonBlocks[tag].depth ;
|
|
312 exonBlocks[tag].depth = NULL ;
|
|
313
|
|
314 //if ( exonBlocks[tag].start == 1562344 )
|
|
315 // printf( "%d %d\n", adjustStart, adjustEnd ) ;
|
|
316 if ( ( len > 1 && adjustEnd - adjustStart + 1 <= 1 ) || ( adjustEnd - adjustStart + 1 <= 0 ) )
|
|
317 return ;
|
|
318 exonBlocks[tag].start = adjustStart ;
|
|
319 exonBlocks[tag].end = adjustEnd ;
|
|
320
|
|
321 if ( island.start != -1 && island.end < exonBlocks[tag].start )
|
|
322 newExonBlocks.push_back( island ) ;
|
|
323
|
|
324 newExonBlocks.push_back( exonBlocks[tag] ) ;
|
|
325 if ( island.start != -1 && island.start > exonBlocks[tag].end )
|
|
326 newExonBlocks.push_back( island ) ;
|
|
327 }
|
|
328 }
|
|
329 public:
|
|
330 std::vector<struct _block> exonBlocks ;
|
|
331
|
|
332 Blocks() { }
|
|
333 ~Blocks()
|
|
334 {
|
|
335 int blockCnt = exonBlocks.size() ;
|
|
336 for ( int i = 0 ; i < blockCnt ; ++i )
|
|
337 {
|
|
338 if ( exonBlocks[i].next != NULL )
|
|
339 {
|
|
340 delete[] exonBlocks[i].next ;
|
|
341 }
|
|
342 if ( exonBlocks[i].prev != NULL )
|
|
343 {
|
|
344 delete[] exonBlocks[i].prev ;
|
|
345 }
|
|
346 }
|
|
347 }
|
|
348
|
|
349 double GetAvgDepth( const struct _block &block )
|
|
350 {
|
|
351 return block.depthSum / (double)( block.end - block.start + 1 ) ;
|
|
352 }
|
|
353
|
|
354 int BuildExonBlocks( Alignments &alignments )
|
|
355 {
|
|
356 int tag = 0 ;
|
|
357 while ( alignments.Next() )
|
|
358 {
|
|
359 int i, j, k ;
|
|
360 int segCnt = alignments.segCnt ;
|
|
361 struct _pair *segments = alignments.segments ;
|
|
362 int eid = 0 ; // the exonblock id that the segment update
|
|
363
|
|
364 // locate the first exonblock that beyond the start of the read.
|
|
365 while ( tag < (int)exonBlocks.size() && ( exonBlocks[tag].end < segments[0].a - 1
|
|
366 || exonBlocks[tag].chrId != alignments.GetChromId() ) )
|
|
367 {
|
|
368 ++tag ;
|
|
369 }
|
|
370 // due to the merge of exon blocks, we might need roll back
|
|
371 --tag ;
|
|
372 while ( tag >= 0 && ( exonBlocks[tag].end >= segments[0].a - 1
|
|
373 && exonBlocks[tag].chrId == alignments.GetChromId() ) )
|
|
374 {
|
|
375 --tag ;
|
|
376 }
|
|
377 ++tag ;
|
|
378
|
|
379 for ( i = 0 ; i < segCnt ; ++i )
|
|
380 {
|
|
381 //if ( i == 0 )
|
|
382 // printf( "hi %d %s %d %d\n", i, alignments.GetReadId(), segments[i].a, segments[i].b ) ;
|
|
383 for ( j = tag ; j < (int)exonBlocks.size() ; ++j )
|
|
384 {
|
|
385 if ( exonBlocks[j].end >= segments[i].a - 1 )
|
|
386 break ;
|
|
387 }
|
|
388 if ( j >= (int)exonBlocks.size() )
|
|
389 {
|
|
390 // Append a new block
|
|
391 struct _block newSeg ;
|
|
392 newSeg.chrId = alignments.GetChromId() ;
|
|
393 newSeg.start = segments[i].a ;
|
|
394 newSeg.end = segments[i].b ;
|
|
395
|
|
396 newSeg.leftSplice = -1 ;
|
|
397 newSeg.rightSplice = -1 ;
|
|
398 if ( i > 0 )
|
|
399 newSeg.leftSplice = segments[i].a ;
|
|
400 if ( i < segCnt - 1 )
|
|
401 newSeg.rightSplice = segments[i].b ;
|
|
402
|
|
403 exonBlocks.push_back( newSeg ) ;
|
|
404
|
|
405 eid = exonBlocks.size() - 1 ;
|
|
406 }
|
|
407 else if ( exonBlocks[j].end < segments[i].b ||
|
|
408 ( exonBlocks[j].start > segments[i].a && exonBlocks[j].start <= segments[i].b + 1 ) )
|
|
409 {
|
|
410 // If overlaps with a current exon block, so we extend it
|
|
411 if ( exonBlocks[j].end < segments[i].b )
|
|
412 {
|
|
413 // extends toward right
|
|
414 exonBlocks[j].end = segments[i].b ;
|
|
415 if ( i > 0 && ( exonBlocks[j].leftSplice == -1 || segments[i].a < exonBlocks[j].leftSplice ) )
|
|
416 exonBlocks[j].leftSplice = segments[i].a ;
|
|
417 if ( i < segCnt - 1 && segments[i].b > exonBlocks[j].rightSplice )
|
|
418 exonBlocks[j].rightSplice = segments[i].b ;
|
|
419 eid = j ;
|
|
420
|
|
421 // Merge with next few exon blocks
|
|
422 for ( k = j + 1 ; k < (int)exonBlocks.size() ; ++k )
|
|
423 {
|
|
424 if ( exonBlocks[k].start <= exonBlocks[j].end + 1 )
|
|
425 {
|
|
426 if ( exonBlocks[k].end > exonBlocks[j].end )
|
|
427 exonBlocks[j].end = exonBlocks[k].end ;
|
|
428
|
|
429 if ( exonBlocks[k].leftSplice != -1 && ( exonBlocks[j].leftSplice == -1 || exonBlocks[k].leftSplice < exonBlocks[j].leftSplice ) )
|
|
430 exonBlocks[j].leftSplice = exonBlocks[k].leftSplice ;
|
|
431 if ( exonBlocks[k].rightSplice != -1 && exonBlocks[k].rightSplice > exonBlocks[j].rightSplice )
|
|
432 exonBlocks[j].rightSplice = exonBlocks[k].rightSplice ;
|
|
433
|
|
434 }
|
|
435 else
|
|
436 break ;
|
|
437 }
|
|
438
|
|
439 if ( k > j + 1 )
|
|
440 {
|
|
441 // Remove the merged blocks
|
|
442 int a, b ;
|
|
443 for ( a = j + 1, b = k ; b < (int)exonBlocks.size() ; ++a, ++b )
|
|
444 exonBlocks[a] = exonBlocks[b] ;
|
|
445 for ( a = 0 ; a < k - ( j + 1 ) ; ++a )
|
|
446 exonBlocks.pop_back() ;
|
|
447 }
|
|
448 }
|
|
449 else if ( exonBlocks[j].start > segments[i].a && exonBlocks[j].start <= segments[i].b + 1 )
|
|
450 {
|
|
451 // extends toward left
|
|
452 exonBlocks[j].start = segments[i].a ;
|
|
453 if ( i > 0 && ( exonBlocks[j].leftSplice == -1 || segments[i].a < exonBlocks[j].leftSplice ) )
|
|
454 exonBlocks[j].leftSplice = segments[i].a ;
|
|
455 if ( i < segCnt - 1 && segments[i].b > exonBlocks[j].rightSplice )
|
|
456 exonBlocks[j].rightSplice = segments[i].b ;
|
|
457 eid = j ;
|
|
458
|
|
459 // Merge with few previous exon blocks
|
|
460 for ( k = j - 1 ; k >= 0 ; --k )
|
|
461 {
|
|
462 if ( exonBlocks[k].end >= exonBlocks[k + 1].start - 1 )
|
|
463 {
|
|
464 if ( exonBlocks[k + 1].start < exonBlocks[k].start )
|
|
465 {
|
|
466 exonBlocks[k].start = exonBlocks[k + 1].start ;
|
|
467 }
|
|
468
|
|
469 if ( exonBlocks[k].leftSplice != -1 && ( exonBlocks[j].leftSplice == -1 || exonBlocks[k].leftSplice < exonBlocks[j].leftSplice ) )
|
|
470 exonBlocks[j].leftSplice = exonBlocks[k].leftSplice ;
|
|
471 if ( exonBlocks[k].rightSplice != -1 && exonBlocks[k].rightSplice > exonBlocks[j].rightSplice )
|
|
472 exonBlocks[j].rightSplice = exonBlocks[k].rightSplice ;
|
|
473
|
|
474 }
|
|
475 else
|
|
476 break ;
|
|
477 }
|
|
478
|
|
479 if ( k < j - 1 )
|
|
480 {
|
|
481 int a, b ;
|
|
482 eid = k + 1 ;
|
|
483 for ( a = k + 2, b = j + 1 ; b < (int)exonBlocks.size() ; ++a, ++b )
|
|
484 exonBlocks[a] = exonBlocks[b] ;
|
|
485 for ( a = 0 ; a < ( j - 1 ) - k ; ++a )
|
|
486 exonBlocks.pop_back() ;
|
|
487
|
|
488 }
|
|
489 }
|
|
490 }
|
|
491 else if ( exonBlocks[j].start > segments[i].b + 1 )
|
|
492 {
|
|
493 int size = exonBlocks.size() ;
|
|
494 int a ;
|
|
495 // No overlap, insert a new block
|
|
496 struct _block newSeg ;
|
|
497 newSeg.chrId = alignments.GetChromId() ;
|
|
498 newSeg.start = segments[i].a ;
|
|
499 newSeg.end = segments[i].b ;
|
|
500
|
|
501 newSeg.leftSplice = -1 ;
|
|
502 newSeg.rightSplice = -1 ;
|
|
503 if ( i > 0 )
|
|
504 newSeg.leftSplice = segments[i].a ;
|
|
505 if ( i < segCnt - 1 )
|
|
506 newSeg.rightSplice = segments[i].b ;
|
|
507
|
|
508 // Insert at position j
|
|
509 exonBlocks.push_back( newSeg ) ;
|
|
510 for ( a = size ; a > j ; --a )
|
|
511 exonBlocks[a] = exonBlocks[a - 1] ;
|
|
512 exonBlocks[a] = newSeg ;
|
|
513
|
|
514 eid = a ;
|
|
515 }
|
|
516 else
|
|
517 {
|
|
518 // The segment is contained in j
|
|
519 eid = -1 ;
|
|
520 }
|
|
521
|
|
522 // Merge the block with the mate if the gap is very small
|
|
523 // Note that since reads are already sorted by coordinate,
|
|
524 // the previous exon blocks is built completely.
|
|
525 /*int64_t matePos ;
|
|
526 int mateChrId ;
|
|
527 alignments.GetMatePosition( mateChrId, matePos ) ;
|
|
528 if ( i == 0 && eid > 0 && mateChrId == exonBlocks[ eid ].chrId
|
|
529 && matePos < segments[0].a
|
|
530 && matePos >= exonBlocks[eid - 1].start
|
|
531 && matePos <= exonBlocks[eid - 1].end
|
|
532 && segments[0].a - matePos + 1 <= 500
|
|
533 && exonBlocks[eid-1].chrId == exonBlocks[eid].chrId
|
|
534 && exonBlocks[eid].start - exonBlocks[eid - 1].end - 1 <= 30 )
|
|
535 {
|
|
536 printf( "%d: (%d-%d) (%d-%d). %d %d\n", ( segments[0].a + alignments.readLen - 1 ) - matePos + 1, exonBlocks[eid - 1].start, exonBlocks[eid - 1].end,
|
|
537 exonBlocks[eid].start,
|
|
538 exonBlocks[eid].end, eid, exonBlocks.size() ) ;
|
|
539 exonBlocks[eid - 1].end = exonBlocks[eid].end ;
|
|
540
|
|
541 if ( exonBlocks[eid].leftSplice != -1 && ( exonBlocks[eid - 1].leftSplice == -1 || exonBlocks[eid].leftSplice < exonBlocks[eid - 1].leftSplice ) )
|
|
542 exonBlocks[eid - 1].leftSplice = exonBlocks[k].leftSplice ;
|
|
543 if ( exonBlocks[eid].rightSplice != -1 && exonBlocks[eid].rightSplice > exonBlocks[eid - 1].rightSplice )
|
|
544 exonBlocks[eid - 1].rightSplice = exonBlocks[eid].rightSplice ;
|
|
545
|
|
546 int size = exonBlocks.size() ;
|
|
547 for ( j = eid ; j < size - 1 ; ++j )
|
|
548 exonBlocks[j] = exonBlocks[j + 1] ;
|
|
549 exonBlocks.pop_back() ;
|
|
550 }*/
|
|
551 }
|
|
552 }
|
|
553 /*for ( int i = 0 ; i < (int)exonBlocks.size() ; ++i )
|
|
554 {
|
|
555 printf( "%d %d\n", exonBlocks[i].start, exonBlocks[i].end ) ;
|
|
556 }*/
|
|
557
|
|
558 if ( exonBlocks.size() > 0 )
|
|
559 {
|
|
560 BuildBlockChrIdOffset() ;
|
|
561 int cnt = exonBlocks.size() ;
|
|
562 for ( int i = 0 ; i < cnt ; ++i )
|
|
563 {
|
|
564 exonBlocks[i].contigId = exonBlocks[i].chrId ;
|
|
565
|
|
566 exonBlocks[i].leftType = 0 ;
|
|
567 exonBlocks[i].rightType = 0 ;
|
|
568 exonBlocks[i].depth = NULL ;
|
|
569 exonBlocks[i].nextCnt = 0 ;
|
|
570 exonBlocks[i].prevCnt = 0 ;
|
|
571 exonBlocks[i].leftStrand = '.' ;
|
|
572 exonBlocks[i].rightStrand = '.' ;
|
|
573 }
|
|
574 }
|
|
575 return exonBlocks.size() ;
|
|
576 }
|
|
577
|
|
578 void FilterSplitSitesInRegions( std::vector<struct _splitSite> &sites )
|
|
579 {
|
|
580 int i, j, k ;
|
|
581 int size = sites.size() ;
|
|
582 int bsize = exonBlocks.size() ;
|
|
583 int tag = 0 ;
|
|
584
|
|
585 for ( i = 0 ; i < bsize ; ++i )
|
|
586 {
|
|
587 while ( tag < size && ( sites[tag].chrId < exonBlocks[i].chrId ||
|
|
588 ( sites[tag].chrId == exonBlocks[i].chrId && sites[tag].pos < exonBlocks[i].start ) ) )
|
|
589 {
|
|
590 ++tag ;
|
|
591 }
|
|
592 if ( tag >= size )
|
|
593 break ;
|
|
594
|
|
595 if ( sites[tag].chrId != exonBlocks[i].chrId || sites[tag].pos > exonBlocks[i].end )
|
|
596 continue ;
|
|
597
|
|
598 for ( j = tag + 1 ; j < size ; ++j )
|
|
599 if ( sites[j].chrId != exonBlocks[i].chrId || sites[j].pos > exonBlocks[i].end )
|
|
600 break ;
|
|
601
|
|
602 // [tag,j) holds the split sites in this region.
|
|
603 int leftStrandSupport[2] = {0, 0} ;
|
|
604 int rightStrandSupport[2] = {0, 0} ;
|
|
605 int strandCnt[2] = { 0, 0 } ;
|
|
606 for ( k = tag ; k < j ; ++k )
|
|
607 {
|
|
608 if ( sites[k].strand == '-' )
|
|
609 {
|
|
610 ++strandCnt[0] ;
|
|
611 if ( sites[k].oppositePos < exonBlocks[i].start )
|
|
612 leftStrandSupport[0] += sites[k].support ;
|
|
613 else if ( sites[k].oppositePos > exonBlocks[i].end )
|
|
614 rightStrandSupport[0] += sites[k].support ;
|
|
615 }
|
|
616 else if ( sites[k].strand == '+' )
|
|
617 {
|
|
618 ++strandCnt[1] ;
|
|
619 if ( sites[k].oppositePos < exonBlocks[i].start )
|
|
620 leftStrandSupport[1] += sites[k].support ;
|
|
621 else if ( sites[k].oppositePos > exonBlocks[i].end )
|
|
622 rightStrandSupport[1] += sites[k].support ;
|
|
623 }
|
|
624
|
|
625 }
|
|
626
|
|
627
|
|
628 if ( leftStrandSupport[0] == 0 && rightStrandSupport[0] == 0
|
|
629 && leftStrandSupport[1] != 0 && rightStrandSupport[1] != 0 && strandCnt[0] == 2 )
|
|
630 {
|
|
631 // check whether a different strand accidentally show up in this region.
|
|
632 bool remove = false ;
|
|
633 for ( k = tag ; k < j ; ++k )
|
|
634 {
|
|
635 if ( sites[k].strand == '-' &&
|
|
636 sites[k].oppositePos >= sites[k].pos && sites[k].oppositePos <= exonBlocks[i].end &&
|
|
637 sites[k].support <= 2 )
|
|
638 {
|
|
639 remove = true ;
|
|
640 break ;
|
|
641 }
|
|
642 }
|
|
643
|
|
644 if ( remove )
|
|
645 for ( k = tag ; k < j ; ++k )
|
|
646 if ( sites[k].strand == '-' )
|
|
647 sites[k].support = -1 ;
|
|
648 }
|
|
649 else if ( leftStrandSupport[1] == 0 && rightStrandSupport[1] == 0
|
|
650 && leftStrandSupport[0] != 0 && rightStrandSupport[0] != 0 && strandCnt[1] == 2 )
|
|
651 {
|
|
652 // check whether a different strand accidentally show up in this region.
|
|
653 bool remove = false ;
|
|
654 for ( k = tag ; k < j ; ++k )
|
|
655 {
|
|
656 if ( sites[k].strand == '+' &&
|
|
657 sites[k].oppositePos >= sites[k].pos && sites[k].oppositePos <= exonBlocks[i].end &&
|
|
658 sites[k].support <= 2 )
|
|
659 {
|
|
660 remove = true ;
|
|
661 break ;
|
|
662 }
|
|
663 }
|
|
664
|
|
665 if ( remove )
|
|
666 for ( k = tag ; k < j ; ++k )
|
|
667 if ( sites[k].strand == '+' )
|
|
668 sites[k].support = -1 ;
|
|
669 }
|
|
670
|
|
671 tag = j ;
|
|
672 }
|
|
673
|
|
674 k = 0 ;
|
|
675 for ( i = 0 ; i < size ; ++i )
|
|
676 if ( sites[i].support > 0 )
|
|
677 {
|
|
678 sites[k] = sites[i] ;
|
|
679 ++k ;
|
|
680 }
|
|
681 sites.resize( k ) ;
|
|
682 }
|
|
683
|
|
684 // Filter the pair of split sites that is likely merge two genes.
|
|
685 // We filter the case like [..]------------------------[..]
|
|
686 // [..]--[..] [..]-[...]
|
|
687 void FilterGeneMergeSplitSites( std::vector<struct _splitSite> &sites )
|
|
688 {
|
|
689 int i, j, k ;
|
|
690 int bsize = exonBlocks.size() ;
|
|
691 int ssize = sites.size() ;
|
|
692
|
|
693 struct _pair32 *siteToBlock = new struct _pair32[ ssize ] ;
|
|
694 struct _adj *adj = new struct _adj[ ssize / 2 + bsize ] ;
|
|
695 bool *visited = new bool[bsize] ;
|
|
696
|
|
697 int adjCnt = 0 ;
|
|
698 for ( i = 0 ; i < bsize ; ++i )
|
|
699 {
|
|
700 adj[i].info = 0 ; // in the header, info means the number of next block
|
|
701 adj[i].support = 0 ;
|
|
702 adj[i].next = -1 ;
|
|
703 }
|
|
704 adjCnt = i ;
|
|
705 memset( siteToBlock, -1, sizeof( struct _pair32 ) * ssize ) ;
|
|
706 memset( visited, false, sizeof( bool ) * bsize ) ;
|
|
707
|
|
708 // Build the graph.
|
|
709 k = 0 ;
|
|
710 for ( i = 0 ; i < bsize ; ++i )
|
|
711 {
|
|
712 for ( ; k < ssize ; ++k )
|
|
713 {
|
|
714 if ( sites[k].oppositePos < sites[k].pos )
|
|
715 continue ;
|
|
716 if ( sites[k].chrId < exonBlocks[i].chrId
|
|
717 || ( sites[k].chrId == exonBlocks[i].chrId && sites[k].pos < exonBlocks[i].start ) )
|
|
718 continue ;
|
|
719 break ;
|
|
720 }
|
|
721
|
|
722 for ( ; k < ssize ; ++k )
|
|
723 {
|
|
724 if ( sites[k].chrId > exonBlocks[i].chrId
|
|
725 || sites[k].pos > exonBlocks[i].end )
|
|
726 break ;
|
|
727
|
|
728 if ( sites[k].oppositePos <= exonBlocks[i].end ) // ignore self-loop
|
|
729 continue ;
|
|
730
|
|
731 for ( j = i + 1 ; j < bsize ; ++j )
|
|
732 if ( sites[k].oppositePos >= exonBlocks[j].start && sites[k].oppositePos <= exonBlocks[j].end )
|
|
733 break ;
|
|
734 if ( sites[k].oppositePos >= exonBlocks[j].start && sites[k].oppositePos <= exonBlocks[j].end )
|
|
735 {
|
|
736 int p ;
|
|
737 p = adj[i].next ;
|
|
738 while ( p != -1 )
|
|
739 {
|
|
740 if ( adj[p].ind == j )
|
|
741 {
|
|
742 ++adj[p].info ;
|
|
743 adj[p].support += sites[k].uniqSupport ;
|
|
744 break ;
|
|
745 }
|
|
746 p = adj[p].next ;
|
|
747 }
|
|
748 if ( p == -1 )
|
|
749 {
|
|
750 adj[ adjCnt ].ind = j ;
|
|
751 adj[ adjCnt ].info = 1 ;
|
|
752 adj[ adjCnt ].support = sites[k].uniqSupport ;
|
|
753 adj[ adjCnt ].next = adj[i].next ;
|
|
754 adj[i].next = adjCnt ;
|
|
755 ++adj[i].info ;
|
|
756 ++adjCnt ;
|
|
757 }
|
|
758
|
|
759 siteToBlock[k].a = i ;
|
|
760 siteToBlock[k].b = j ;
|
|
761 }
|
|
762 }
|
|
763 }
|
|
764 for ( k = 0 ; k < ssize ; ++k )
|
|
765 {
|
|
766 if ( sites[k].oppositePos - sites[k].pos + 1 < 20000 || sites[k].uniqSupport >= 30 )
|
|
767 continue ;
|
|
768
|
|
769 int from = siteToBlock[k].a ;
|
|
770 int to = siteToBlock[k].b ;
|
|
771 if ( to - from - 1 < 2 || adj[from].info <= 1 )
|
|
772 continue ;
|
|
773
|
|
774 memset( &visited[from], false, sizeof( bool ) * ( to - from + 1 ) ) ;
|
|
775
|
|
776 int cnt = 0 ;
|
|
777 int p = adj[from].next ;
|
|
778 int max = -1 ;
|
|
779 int maxP = 0 ;
|
|
780 while ( p != -1 )
|
|
781 {
|
|
782 if ( adj[p].support >= 10 && adj[p].ind <= to )
|
|
783 ++cnt ;
|
|
784 if ( adj[p].support > max || ( adj[p].support == max && adj[p].ind == to ) )
|
|
785 {
|
|
786 max = adj[p].support ;
|
|
787 maxP = p ;
|
|
788 }
|
|
789
|
|
790 if ( adj[p].ind == to && adj[p].info > 1 )
|
|
791 {
|
|
792 cnt = 0 ;
|
|
793 break ;
|
|
794 }
|
|
795 p = adj[p].next ;
|
|
796 }
|
|
797 if ( cnt <= 1 )
|
|
798 continue ;
|
|
799
|
|
800 if ( max != -1 && adj[maxP].ind == to )
|
|
801 continue ;
|
|
802
|
|
803 // No other path can reach "to"
|
|
804 p = adj[from].next ;
|
|
805 while ( p != -1 )
|
|
806 {
|
|
807 if ( adj[p].ind != to )
|
|
808 {
|
|
809 //if ( sites[k].pos ==43917158 )
|
|
810 // printf( "hi %d %d. (%d %d)\n", from, adj[p].ind, exonBlocks[ adj[p].ind ].start, exonBlocks[ adj[p].ind ].end ) ;
|
|
811 if ( CanReach( adj[p].ind, to, adj, visited ) )
|
|
812 break ;
|
|
813 }
|
|
814 p = adj[p].next ;
|
|
815 }
|
|
816 if ( p != -1 )
|
|
817 continue ;
|
|
818
|
|
819 //if ( sites[k].pos == 34073267 )
|
|
820 // printf( "hi %d %d. (%d %d)\n", from, to, exonBlocks[to].start, exonBlocks[to].end ) ;
|
|
821
|
|
822 // There are some blocks between that can reach "to"
|
|
823 for ( i = from + 1 ; i < to ; ++i )
|
|
824 {
|
|
825 p = adj[i].next ;
|
|
826 while ( p != -1 )
|
|
827 {
|
|
828 if ( adj[p].ind == to && adj[p].support >= 10 )
|
|
829 break ;
|
|
830 p = adj[p].next ;
|
|
831 }
|
|
832 if ( p != -1 )
|
|
833 break ;
|
|
834 }
|
|
835 if ( i >= to )
|
|
836 continue ;
|
|
837
|
|
838 // Filter the site
|
|
839 //printf( "filtered: %d %d\n", sites[k].pos, sites[k].oppositePos ) ;
|
|
840 sites[k].support = -1 ;
|
|
841 for ( j = k + 1 ; j < ssize ; ++j )
|
|
842 {
|
|
843 if ( sites[j].pos == sites[k].oppositePos && sites[j].oppositePos == sites[k].pos )
|
|
844 sites[j].support = -1 ;
|
|
845 if ( sites[j].pos > sites[k].oppositePos )
|
|
846 break ;
|
|
847 }
|
|
848 }
|
|
849
|
|
850 k = 0 ;
|
|
851 for ( i = 0 ; i < ssize ; ++i )
|
|
852 if ( sites[i].support > 0 )
|
|
853 {
|
|
854 sites[k] = sites[i] ;
|
|
855 ++k ;
|
|
856 }
|
|
857 sites.resize( k ) ;
|
|
858
|
|
859 // Filter the intron on the different strand of a gene.
|
|
860 ssize = k ;
|
|
861 k = 0 ;
|
|
862 for ( i = 0 ; i < bsize ; )
|
|
863 {
|
|
864 int farthest = exonBlocks[i].end ;
|
|
865
|
|
866 for ( j = i ; j < bsize ; ++j )
|
|
867 {
|
|
868 if ( exonBlocks[j].start > farthest || exonBlocks[i].chrId != exonBlocks[j].chrId )
|
|
869 break ;
|
|
870 int p = adj[i].next ;
|
|
871 while ( p != -1 )
|
|
872 {
|
|
873 if ( exonBlocks[ adj[p].ind ].end > farthest )
|
|
874 farthest = exonBlocks[ adj[p].ind ].end ;
|
|
875 p = adj[p].next ;
|
|
876 }
|
|
877 }
|
|
878
|
|
879 for ( ; k < ssize ; ++k )
|
|
880 {
|
|
881 if ( sites[k].chrId < exonBlocks[i].chrId ||
|
|
882 ( sites[k].chrId == exonBlocks[i].chrId && sites[k].pos < exonBlocks[i].start ) )
|
|
883 continue ;
|
|
884 break ;
|
|
885 }
|
|
886
|
|
887 if ( sites[k].chrId > exonBlocks[i].chrId || sites[k].pos > farthest )
|
|
888 {
|
|
889 i = j ;
|
|
890 continue ;
|
|
891 }
|
|
892 //printf( "%d %d. %d %d. %d %d\n", i, j, exonBlocks[i].start, farthest, k, sites[k].pos ) ;
|
|
893
|
|
894
|
|
895 int from = k ;
|
|
896 int to ;
|
|
897 for ( to = k ; to < ssize ; ++to )
|
|
898 if ( sites[to].chrId != exonBlocks[i].chrId || sites[to].pos > farthest )
|
|
899 break ;
|
|
900
|
|
901 int strandSupport[2] = {0, 0};
|
|
902 int strandCount[2] = {0, 0};
|
|
903 for ( k = from ; k < to ; ++k )
|
|
904 {
|
|
905 if ( sites[k].oppositePos <= sites[k].pos )
|
|
906 continue ;
|
|
907
|
|
908 int tag = ( sites[k].strand == '+' ? 1 : 0 ) ;
|
|
909 strandSupport[tag] += sites[k].support ;
|
|
910 ++strandCount[tag] ;
|
|
911 }
|
|
912
|
|
913 // Not mixtured.
|
|
914 if ( strandCount[0] * strandCount[1] == 0 )
|
|
915 {
|
|
916 i = j ;
|
|
917 k = to ;
|
|
918 continue ;
|
|
919 }
|
|
920
|
|
921 char removeStrand = 0 ;
|
|
922 if ( strandCount[0] == 1 && strandCount[1] >= 3
|
|
923 && strandSupport[1] >= 20 * strandSupport[0] && strandSupport[0] <= 5 )
|
|
924 {
|
|
925 removeStrand = '-' ;
|
|
926 }
|
|
927 else if ( strandCount[1] == 1 && strandCount[0] >= 3
|
|
928 && strandSupport[0] >= 20 * strandSupport[1] && strandSupport[1] <= 5 )
|
|
929 {
|
|
930 removeStrand = '+' ;
|
|
931 }
|
|
932
|
|
933 if ( removeStrand == 0 )
|
|
934 {
|
|
935 i = j ; k = to ;
|
|
936 continue ;
|
|
937 }
|
|
938
|
|
939 for ( k = from ; k < to ; ++k )
|
|
940 {
|
|
941 if ( sites[k].strand == removeStrand && sites[k].oppositePos >= exonBlocks[i].start && sites[k].oppositePos <= farthest )
|
|
942 {
|
|
943 //printf( "filtered: %d %d\n", sites[k].pos, sites[k].oppositePos ) ;
|
|
944 sites[k].support = -1 ;
|
|
945 }
|
|
946 }
|
|
947
|
|
948 i = j ;
|
|
949 k = to ;
|
|
950 }
|
|
951
|
|
952 k = 0 ;
|
|
953 for ( i = 0 ; i < ssize ; ++i )
|
|
954 if ( sites[i].support > 0 )
|
|
955 {
|
|
956 sites[k] = sites[i] ;
|
|
957 ++k ;
|
|
958 }
|
|
959 sites.resize( k ) ;
|
|
960
|
|
961 delete[] visited ;
|
|
962 delete[] siteToBlock ;
|
|
963 delete[] adj ;
|
|
964 }
|
|
965
|
|
966 void SplitBlocks( Alignments &alignments, std::vector< struct _splitSite > &splitSites )
|
|
967 {
|
|
968 std::vector<struct _block> rawExonBlocks = exonBlocks ;
|
|
969 int i, j ;
|
|
970 int tag = 0 ;
|
|
971 int bsize = rawExonBlocks.size() ;
|
|
972 int ssize = splitSites.size() ;
|
|
973
|
|
974 // Make sure not overflow
|
|
975 struct _splitSite tmp ;
|
|
976 tmp.pos = -1 ;
|
|
977 splitSites.push_back( tmp ) ;
|
|
978
|
|
979 exonBlocks.clear() ;
|
|
980 for ( i = 0 ; i < bsize ; ++i )
|
|
981 {
|
|
982 while ( tag < ssize && ( splitSites[tag].chrId < rawExonBlocks[i].chrId ||
|
|
983 ( splitSites[tag].chrId == rawExonBlocks[i].chrId && splitSites[tag].pos < rawExonBlocks[i].start ) ) )
|
|
984 ++tag ;
|
|
985
|
|
986 int l ;
|
|
987 for ( l = tag ; l < ssize ; ++l )
|
|
988 if ( splitSites[l].chrId != rawExonBlocks[i].chrId || splitSites[l].pos > rawExonBlocks[i].end )
|
|
989 break ;
|
|
990 int64_t start = rawExonBlocks[i].start ;
|
|
991 int64_t end = rawExonBlocks[i].end ;
|
|
992 //printf( "%lld %lld\n", start, end ) ;
|
|
993 //printf( "%s %s: %d %d\n", alignments.GetChromName( splitSites[tag].chrId ), alignments.GetChromName( rawExonBlocks[i].chrId ), tag, l ) ;
|
|
994 for ( j = tag ; j <= l ; ++j )
|
|
995 {
|
|
996 int leftType = 0 ;
|
|
997 int rightType = 0 ;
|
|
998 if ( j > tag )
|
|
999 {
|
|
1000 start = splitSites[j - 1].pos ; // end is from previous stage
|
|
1001 leftType = splitSites[j - 1].type ;
|
|
1002 }
|
|
1003 else
|
|
1004 start = rawExonBlocks[i].start ;
|
|
1005
|
|
1006 if ( j <= l - 1 )
|
|
1007 {
|
|
1008 end = splitSites[j].pos ;
|
|
1009 rightType = splitSites[j].type ;
|
|
1010 }
|
|
1011 else
|
|
1012 end = rawExonBlocks[i].end ;
|
|
1013
|
|
1014 if ( leftType == 2 )
|
|
1015 ++start ;
|
|
1016 if ( rightType == 1 )
|
|
1017 --end ;
|
|
1018
|
|
1019 struct _block tmpB ;
|
|
1020 tmpB = rawExonBlocks[i] ;
|
|
1021 tmpB.start = start ;
|
|
1022 tmpB.end = end ;
|
|
1023 tmpB.depthSum = 0 ;
|
|
1024 tmpB.ratio = 0 ;
|
|
1025 //printf( "\t%lld %lld\n", start, end ) ;
|
|
1026 tmpB.leftType = leftType ;
|
|
1027 tmpB.rightType = rightType ;
|
|
1028 tmpB.depth = NULL ;
|
|
1029
|
|
1030 exonBlocks.push_back( tmpB ) ;
|
|
1031 // handle the soft boundary is the same as the hard boundary case
|
|
1032 // or adjacent splice sites
|
|
1033 /*if ( j == tag && start == end )
|
|
1034 {
|
|
1035 exonBlocks.pop_back() ;
|
|
1036 --end ;
|
|
1037 }
|
|
1038 else if ( j == l && start > end )
|
|
1039 {
|
|
1040 exonBlocks.pop_back() ;
|
|
1041 }*/
|
|
1042 if ( start > end
|
|
1043 //|| ( tmpB.start == rawExonBlocks[i].start && tmpB.end != rawExonBlocks[i].end && tmpB.end - tmpB.start + 1 <= 7 )
|
|
1044 //|| ( tmpB.end == rawExonBlocks[i].end && tmpB.start != rawExonBlocks[i].start && tmpB.end - tmpB.start + 1 <= 7 )) // the case of too short overhang
|
|
1045 || ( tmpB.leftType == 0 && tmpB.rightType == 2 && tmpB.end - tmpB.start + 1 <= 9 )
|
|
1046 || ( tmpB.leftType == 1 && tmpB.rightType == 0 && tmpB.end - tmpB.start + 1 <= 9 ) )
|
|
1047 {
|
|
1048 exonBlocks.pop_back() ;
|
|
1049 }
|
|
1050 /*else if ( start == end )
|
|
1051 {
|
|
1052
|
|
1053 }*/
|
|
1054 }
|
|
1055 }
|
|
1056 BuildBlockChrIdOffset() ;
|
|
1057 }
|
|
1058
|
|
1059 void ComputeDepth( Alignments &alignments )
|
|
1060 {
|
|
1061 // Go through the alignment list again to fill in the depthSum;
|
|
1062 int i ;
|
|
1063 int j ;
|
|
1064 int tag = 0 ;
|
|
1065 int blockCnt = exonBlocks.size() ;
|
|
1066
|
|
1067 std::vector<struct _block> newExonBlocks ;
|
|
1068 while ( alignments.Next() )
|
|
1069 {
|
|
1070 int segCnt = alignments.segCnt ;
|
|
1071 struct _pair *segments = alignments.segments ;
|
|
1072
|
|
1073 while ( tag < blockCnt && ( exonBlocks[tag].chrId < alignments.GetChromId() ||
|
|
1074 ( exonBlocks[tag].chrId == alignments.GetChromId() && exonBlocks[tag].end < segments[0].a ) ) )
|
|
1075 {
|
|
1076 AdjustAndCreateExonBlocks( tag, newExonBlocks ) ;
|
|
1077 ++tag ;
|
|
1078 }
|
|
1079 for ( i = 0 ; i < segCnt ; ++i )
|
|
1080 {
|
|
1081 for ( j = tag ; j < blockCnt && ( exonBlocks[j].chrId == alignments.GetChromId() && exonBlocks[j].start <= segments[i].b ) ; ++j )
|
|
1082 {
|
|
1083 int64_t s = -1, e = -1 ;
|
|
1084 exonBlocks[j].depthSum += Overlap( segments[i].a, segments[i].b, exonBlocks[j].start, exonBlocks[j].end, s, e ) ;
|
|
1085 if ( s == -1 )
|
|
1086 continue ;
|
|
1087
|
|
1088 if ( exonBlocks[j].depth == NULL )
|
|
1089 {
|
|
1090 int len = exonBlocks[j].end - exonBlocks[j].start + 1 ;
|
|
1091 exonBlocks[j].depth = new int[len + 1] ;
|
|
1092 memset( exonBlocks[j].depth, 0, sizeof( int ) * ( len + 1 ) ) ;
|
|
1093 exonBlocks[j].leftSplice = exonBlocks[j].start ;
|
|
1094 exonBlocks[j].rightSplice = exonBlocks[j].end ;
|
|
1095 }
|
|
1096 int *depth = exonBlocks[j].depth ;
|
|
1097 ++depth[s - exonBlocks[j].start] ;
|
|
1098 --depth[e - exonBlocks[j].start + 1] ; // notice the +1 here, since the decrease of coverage actually happens at next base.
|
|
1099
|
|
1100 // record the longest alignment stretch support the splice site.
|
|
1101 if ( exonBlocks[j].leftType == 1 && segments[i].a == exonBlocks[j].start
|
|
1102 && e > exonBlocks[j].leftSplice )
|
|
1103 {
|
|
1104 exonBlocks[j].leftSplice = e ;
|
|
1105 }
|
|
1106 if ( exonBlocks[j].rightType == 2 && segments[i].b == exonBlocks[j].end
|
|
1107 && s < exonBlocks[j].rightSplice )
|
|
1108 {
|
|
1109 exonBlocks[j].rightSplice = s ;
|
|
1110 }
|
|
1111 }
|
|
1112 }
|
|
1113 }
|
|
1114
|
|
1115 for ( ; tag < blockCnt ; ++tag )
|
|
1116 AdjustAndCreateExonBlocks( tag, newExonBlocks ) ;
|
|
1117 exonBlocks.clear() ;
|
|
1118
|
|
1119 // Due to multi-alignment, we may skip some alignments that determines leftSplice and
|
|
1120 // rightSplice, hence filtered some subexons. As a result, some subexons' anchor subexon will disapper
|
|
1121 // and we need to change its boundary type.
|
|
1122 blockCnt = newExonBlocks.size() ;
|
|
1123 for ( i = 0 ; i < blockCnt ; ++i )
|
|
1124 {
|
|
1125 if ( ( i == 0 && newExonBlocks[i].leftType == 2 ) ||
|
|
1126 ( i > 0 && newExonBlocks[i].leftType == 2 && newExonBlocks[i - 1].end + 1 != newExonBlocks[i].start ) )
|
|
1127 {
|
|
1128 newExonBlocks[i].leftType = 0 ;
|
|
1129 }
|
|
1130
|
|
1131 if ( ( i == blockCnt - 1 && newExonBlocks[i].rightType == 1 ) ||
|
|
1132 ( i < blockCnt - 1 && newExonBlocks[i].rightType == 1 &&
|
|
1133 newExonBlocks[i].end + 1 != newExonBlocks[i + 1].start ) )
|
|
1134 newExonBlocks[i].rightType = 0 ;
|
|
1135 }
|
|
1136 exonBlocks = newExonBlocks ;
|
|
1137 }
|
|
1138
|
|
1139 // If two blocks whose soft boundary are close to each other, we can merge them.
|
|
1140 void MergeNearBlocks()
|
|
1141 {
|
|
1142 std::vector<struct _block> rawExonBlocks = exonBlocks ;
|
|
1143 int i, k ;
|
|
1144 int bsize = rawExonBlocks.size() ;
|
|
1145 int *newIdx = new int[bsize] ; // used for adjust prev and next. Note that by merging, it won't change the number of prevCnt or nextCnt.
|
|
1146
|
|
1147 exonBlocks.clear() ;
|
|
1148 exonBlocks.push_back( rawExonBlocks[0] ) ;
|
|
1149 k = 0 ;
|
|
1150 newIdx[0] = 0 ;
|
|
1151 for ( i = 1 ; i < bsize ; ++i )
|
|
1152 {
|
|
1153 if ( rawExonBlocks[i].chrId == exonBlocks[k].chrId
|
|
1154 && rawExonBlocks[i].leftType == 0 && exonBlocks[k].rightType == 0
|
|
1155 && rawExonBlocks[i].start - exonBlocks[k].end - 1 <= 50
|
|
1156 && rawExonBlocks[i].depthSum / double( rawExonBlocks[i].end - rawExonBlocks[i].start + 1 ) < 3.0
|
|
1157 && exonBlocks[k].depthSum / double( exonBlocks[k].end - exonBlocks[i].start + 1 ) < 3.0 )
|
|
1158 {
|
|
1159 exonBlocks[k].end = rawExonBlocks[i].end ;
|
|
1160 exonBlocks[k].rightType = rawExonBlocks[i].rightType ;
|
|
1161 exonBlocks[k].rightStrand = rawExonBlocks[i].rightStrand ;
|
|
1162 exonBlocks[k].depthSum += rawExonBlocks[i].depthSum ;
|
|
1163 if ( rawExonBlocks[i].rightSplice != -1 )
|
|
1164 exonBlocks[k].rightSplice = rawExonBlocks[i].rightSplice ;
|
|
1165
|
|
1166 if ( exonBlocks[k].nextCnt > 0 )
|
|
1167 delete[] exonBlocks[k].next ;
|
|
1168 exonBlocks[k].nextCnt = rawExonBlocks[i].nextCnt ;
|
|
1169 exonBlocks[k].next = rawExonBlocks[i].next ;
|
|
1170 }
|
|
1171 else
|
|
1172 {
|
|
1173 exonBlocks.push_back( rawExonBlocks[i] ) ;
|
|
1174 ++k ;
|
|
1175 }
|
|
1176 newIdx[i] = k ;
|
|
1177 }
|
|
1178 BuildBlockChrIdOffset() ;
|
|
1179 bsize = exonBlocks.size() ;
|
|
1180 for ( i = 0 ; i < bsize ; ++i )
|
|
1181 {
|
|
1182 int cnt = exonBlocks[i].prevCnt ;
|
|
1183 for ( k = 0 ; k < cnt ; ++k )
|
|
1184 exonBlocks[i].prev[k] = newIdx[ exonBlocks[i].prev[k] ] ;
|
|
1185
|
|
1186 cnt = exonBlocks[i].nextCnt ;
|
|
1187 for ( k = 0 ; k < cnt ; ++k )
|
|
1188 exonBlocks[i].next[k] = newIdx[ exonBlocks[i].next[k] ] ;
|
|
1189 }
|
|
1190 delete[] newIdx ;
|
|
1191 }
|
|
1192
|
|
1193 void AddIntronInformation( std::vector<struct _splitSite> &sites, Alignments &alignments )
|
|
1194 {
|
|
1195 // Add the connection information, support information and the strand information.
|
|
1196 int i, j, k, tag ;
|
|
1197 tag = 0 ;
|
|
1198 int scnt = sites.size() ;
|
|
1199 int exonBlockCnt = exonBlocks.size() ;
|
|
1200 bool flag ;
|
|
1201
|
|
1202 for ( i = 0 ; i < exonBlockCnt ; ++i )
|
|
1203 {
|
|
1204 exonBlocks[i].prevCnt = exonBlocks[i].nextCnt = 0 ;
|
|
1205 exonBlocks[i].prev = exonBlocks[i].next = NULL ;
|
|
1206 exonBlocks[i].leftStrand = exonBlocks[i].rightStrand = '.' ;
|
|
1207 exonBlocks[i].leftSplice = exonBlocks[i].rightSplice = 0 ;
|
|
1208 }
|
|
1209
|
|
1210 // Now, we add the region id and compute the length of each region, here the region does not contain possible IR event.
|
|
1211 int regionId = 0 ;
|
|
1212 for ( i = 0 ; i < exonBlockCnt ; )
|
|
1213 {
|
|
1214 if ( exonBlocks[i].leftType == 2 && exonBlocks[i].rightType == 1 )
|
|
1215 {
|
|
1216 j = i + 1 ;
|
|
1217 }
|
|
1218 else
|
|
1219 for ( j = i + 1 ; j < exonBlockCnt ; ++j )
|
|
1220 if ( exonBlocks[j].start > exonBlocks[j - 1].end + 1 || ( exonBlocks[j].leftType == 2 && exonBlocks[j].rightType == 1 ) )
|
|
1221 break ;
|
|
1222 for ( k = i ; k < j ; ++k )
|
|
1223 exonBlocks[k].contigId = regionId ;
|
|
1224 ++regionId ;
|
|
1225 i = j ;
|
|
1226 }
|
|
1227 int *regionLength = new int[regionId] ;
|
|
1228 memset( regionLength, 0, sizeof( int ) * regionId ) ;
|
|
1229 for ( i = 0 ; i < exonBlockCnt ; ++i )
|
|
1230 {
|
|
1231 regionLength[ exonBlocks[i].contigId ] += exonBlocks[i].end - exonBlocks[i].start + 1 ;
|
|
1232 }
|
|
1233
|
|
1234 for ( i = 0 ; i < scnt ; )
|
|
1235 {
|
|
1236 for ( j = i ; j < scnt ; ++j )
|
|
1237 {
|
|
1238 if ( sites[j].chrId != sites[i].chrId ||
|
|
1239 sites[j].pos != sites[i].pos ||
|
|
1240 sites[j].type != sites[i].type )
|
|
1241 break ;
|
|
1242 }
|
|
1243 // [i,j-1] are the indices of the sites with same coordinate
|
|
1244 // It is possible a sites corresponds to 2 strands, then we should only keep one of them.
|
|
1245 char strand = '.' ;
|
|
1246 int strandSupport[2] = {0, 0};
|
|
1247 for ( k = i ; k < j ; ++k )
|
|
1248 {
|
|
1249 if ( sites[k].strand == '-' )
|
|
1250 strandSupport[0] += sites[k].support ;
|
|
1251 else if ( sites[k].strand == '+' )
|
|
1252 strandSupport[1] += sites[k].support ;
|
|
1253 }
|
|
1254 if ( strandSupport[0] > strandSupport[1] )
|
|
1255 strand = '-' ;
|
|
1256 else if ( strandSupport[1] > strandSupport[0] )
|
|
1257 strand = '+' ;
|
|
1258
|
|
1259
|
|
1260 int cnt = j - i ;
|
|
1261 // Locate the first subexon that can overlap with this site
|
|
1262 while ( tag < exonBlockCnt )
|
|
1263 {
|
|
1264 if ( ( exonBlocks[tag].chrId == sites[i].chrId && exonBlocks[tag].end >= sites[i].pos )
|
|
1265 || exonBlocks[tag].chrId > sites[i].chrId )
|
|
1266 break ;
|
|
1267 ++tag ;
|
|
1268 }
|
|
1269 flag = false ;
|
|
1270 for ( k = tag; k < exonBlockCnt ; ++k )
|
|
1271 {
|
|
1272 if ( exonBlocks[tag].start > sites[i].pos ||
|
|
1273 exonBlocks[tag].chrId > sites[i].chrId )
|
|
1274 break ;
|
|
1275
|
|
1276 if ( exonBlocks[tag].start == sites[i].pos ||
|
|
1277 exonBlocks[tag].end == sites[i].pos )
|
|
1278 {
|
|
1279 flag = true ;
|
|
1280 break ;
|
|
1281 }
|
|
1282 }
|
|
1283
|
|
1284 if ( !flag )
|
|
1285 {
|
|
1286 i = j ;
|
|
1287 continue ;
|
|
1288 }
|
|
1289
|
|
1290 if ( sites[i].type == 1 && exonBlocks[tag].start == sites[i].pos )
|
|
1291 {
|
|
1292 exonBlocks[tag].prevCnt = 0 ;
|
|
1293 exonBlocks[tag].prev = new int[cnt] ;
|
|
1294 exonBlocks[tag].leftStrand = strand ;
|
|
1295
|
|
1296 // And we also need to put the "next" here.
|
|
1297 // Here we assume the oppositePos sorted in increasing order
|
|
1298 k = tag - 1 ;
|
|
1299 for ( int l = j - 1 ; l >= i ; --l )
|
|
1300 {
|
|
1301 for ( ; k >= 0 ; --k )
|
|
1302 {
|
|
1303 if ( exonBlocks[k].end < sites[l].oppositePos || exonBlocks[k].chrId != sites[l].chrId )
|
|
1304 break ;
|
|
1305 if ( exonBlocks[k].end == sites[l].oppositePos ) //&& exonBlocks[k].rightType == sites[l].type )
|
|
1306 {
|
|
1307 if ( sites[l].strand != strand ||
|
|
1308 sites[l].strand != exonBlocks[k].rightStrand )
|
|
1309 break ;
|
|
1310 exonBlocks[k].next[ exonBlocks[k].nextCnt ] = tag ;
|
|
1311 exonBlocks[tag].prev[ exonBlocks[tag].prevCnt] = k ; // Note that the prev are sorted in decreasing order
|
|
1312 ++exonBlocks[k].nextCnt ;
|
|
1313 ++exonBlocks[tag].prevCnt ;
|
|
1314 double factor = 1.0 ;
|
|
1315 if ( regionLength[ exonBlocks[k].contigId ] < alignments.readLen )
|
|
1316 {
|
|
1317 int left ;
|
|
1318 for ( left = k ; left >= 0 ; --left )
|
|
1319 if ( exonBlocks[left].contigId != exonBlocks[k].contigId )
|
|
1320 break ;
|
|
1321 ++left ;
|
|
1322
|
|
1323 int prevType = 0 ;
|
|
1324 // only consider the portion upto k.
|
|
1325 for ( int m = left ; m <= k ; ++m )
|
|
1326 if ( exonBlocks[m].leftType == 1 )
|
|
1327 ++prevType ;
|
|
1328
|
|
1329 if ( prevType == 0 )
|
|
1330 factor = 2 * (double)alignments.readLen / regionLength[ exonBlocks[k].contigId ] ;
|
|
1331 }
|
|
1332 if ( regionLength[ exonBlocks[tag].contigId ] < alignments.readLen )
|
|
1333 {
|
|
1334 int right ;
|
|
1335 for ( right = tag ; right < exonBlockCnt ; ++right )
|
|
1336 if ( exonBlocks[right].contigId != exonBlocks[tag].contigId )
|
|
1337 break ;
|
|
1338 --right ;
|
|
1339
|
|
1340 int nextType = 0 ;
|
|
1341 for ( int m = tag ; m <= right ; ++m )
|
|
1342 if ( exonBlocks[m].rightType == 2 )
|
|
1343 ++nextType ;
|
|
1344
|
|
1345 if ( nextType == 0 )
|
|
1346 factor = 2 * (double)alignments.readLen / regionLength[ exonBlocks[tag].contigId ] ;
|
|
1347 }
|
|
1348 if ( factor > 10 )
|
|
1349 factor = 10 ;
|
|
1350
|
|
1351 if ( exonBlocks[k].contigId != exonBlocks[tag].contigId ) // the support of introns between regions.
|
|
1352 {
|
|
1353 // Adjust the support if one of the anchor exon is too short.
|
|
1354 // This avoid the creation of alternative 3'/5' end due to low coverage from too short anchor.
|
|
1355 exonBlocks[tag].leftSplice += sites[l].support * factor ;
|
|
1356 exonBlocks[k].rightSplice += sites[l].support * factor ;
|
|
1357 }
|
|
1358 break ;
|
|
1359 }
|
|
1360 }
|
|
1361 }
|
|
1362 }
|
|
1363 else if ( sites[i].type == 2 && exonBlocks[tag].end == sites[i].pos )
|
|
1364 {
|
|
1365 exonBlocks[tag].nextCnt = 0 ; // cnt ; it should reach cnt after putting the ids in
|
|
1366 exonBlocks[tag].next = new int[cnt] ;
|
|
1367 exonBlocks[tag].rightStrand = strand ;
|
|
1368 }
|
|
1369
|
|
1370 i = j ;
|
|
1371 }
|
|
1372 // Reverse the prev list
|
|
1373 for ( i = 0 ; i < exonBlockCnt ; ++i )
|
|
1374 {
|
|
1375 for ( j = 0, k = exonBlocks[i].prevCnt - 1 ; j < k ; ++j, --k )
|
|
1376 {
|
|
1377 int tmp = exonBlocks[i].prev[j] ;
|
|
1378 exonBlocks[i].prev[j] = exonBlocks[i].prev[k] ;
|
|
1379 exonBlocks[i].prev[k] = tmp ;
|
|
1380 }
|
|
1381 }
|
|
1382
|
|
1383 // If all of the subexon anchored the intron are filtered, we need to change the type of the other anchor.
|
|
1384 for ( i = 0 ; i < exonBlockCnt ; ++i )
|
|
1385 {
|
|
1386 if ( exonBlocks[i].leftType == 1 && exonBlocks[i].prevCnt == 0 )
|
|
1387 {
|
|
1388 exonBlocks[i].leftType = 0 ;
|
|
1389 exonBlocks[i].leftStrand = '.' ;
|
|
1390 if ( i > 0 && exonBlocks[i - 1].rightType == 1 )
|
|
1391 exonBlocks[i - 1].rightType = 0 ;
|
|
1392 }
|
|
1393 if ( exonBlocks[i].rightType == 2 && exonBlocks[i].nextCnt == 0 )
|
|
1394 {
|
|
1395 exonBlocks[i].rightType = 0 ;
|
|
1396 exonBlocks[i].rightStrand = '.' ;
|
|
1397 if ( i < exonBlockCnt - 1 && exonBlocks[i + 1].leftType == 2 )
|
|
1398 exonBlocks[i + 1].leftType = 0 ;
|
|
1399 }
|
|
1400 }
|
|
1401 MergeNearBlocks() ;
|
|
1402
|
|
1403 delete[] regionLength ;
|
|
1404 }
|
|
1405
|
|
1406 double PickLeftAndRightRatio( double l, double r )
|
|
1407 {
|
|
1408 if ( l >= 0 && r >= 0 )
|
|
1409 return l < r ? l : r ;
|
|
1410 else if ( l < 0 && r < 0 )
|
|
1411 return -1 ;
|
|
1412 else if ( l < 0 )
|
|
1413 return r ;
|
|
1414 else
|
|
1415 return l ;
|
|
1416 }
|
|
1417
|
|
1418 double PickLeftAndRightRatio( const struct _block &b )
|
|
1419 {
|
|
1420 return PickLeftAndRightRatio( b.leftRatio, b.rightRatio ) ;
|
|
1421 }
|
|
1422
|
|
1423 void ComputeRatios()
|
|
1424 {
|
|
1425 int i, j ;
|
|
1426 int exonBlockCnt = exonBlocks.size() ;
|
|
1427 for ( i = 0 ; i < exonBlockCnt ; ++i )
|
|
1428 {
|
|
1429 exonBlocks[i].leftRatio = -1 ;
|
|
1430 exonBlocks[i].rightRatio = -1 ;
|
|
1431 if ( exonBlocks[i].leftType == 2 && exonBlocks[i].rightType == 1 )
|
|
1432 {
|
|
1433 // ]...[
|
|
1434 double anchorAvg = 0 ;
|
|
1435 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
|
|
1436 if ( i >= 1 && exonBlocks[i - 1].chrId == exonBlocks[i].chrId )
|
|
1437 {
|
|
1438 anchorAvg = GetAvgDepth( exonBlocks[i - 1] ) ;
|
|
1439 if ( anchorAvg > 1 && avgDepth > 1 )
|
|
1440 exonBlocks[i].leftRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
|
|
1441 }
|
|
1442 if ( i < exonBlockCnt - 1 && exonBlocks[i + 1].chrId == exonBlocks[i].chrId )
|
|
1443 {
|
|
1444 anchorAvg = GetAvgDepth( exonBlocks[i + 1] ) ;
|
|
1445 if ( anchorAvg > 1 && avgDepth > 1 )
|
|
1446 exonBlocks[i].rightRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
|
|
1447 }
|
|
1448 }
|
|
1449
|
|
1450 if ( exonBlocks[i].leftType == 0 && exonBlocks[i].rightType == 1 )
|
|
1451 {
|
|
1452 // (..[ , overhang subexon.
|
|
1453 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
|
|
1454 double anchorAvg = GetAvgDepth( exonBlocks[i + 1] ) ;
|
|
1455 if ( avgDepth > 1 && anchorAvg > 1 )
|
|
1456 exonBlocks[i].rightRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
|
|
1457 }
|
|
1458
|
|
1459 /*if ( exonBlocks[i].leftType == 1 )
|
|
1460 {
|
|
1461 // For the case (...[, the leftRatio is actuall the leftratio of the subexon on its right.
|
|
1462 int len = 0 ;
|
|
1463 double depthSum = 0 ;
|
|
1464 int tag = i ;
|
|
1465
|
|
1466 for ( j = 0 ; j < exonBlocks[tag].prevCnt ; ++j )
|
|
1467 {
|
|
1468 int k = exonBlocks[tag].prev[j] ;
|
|
1469 len += ( exonBlocks[k].end - exonBlocks[k].start + 1 ) ;
|
|
1470 depthSum += exonBlocks[k].depthSum ;
|
|
1471 }
|
|
1472 double otherAvgDepth = depthSum / len ;
|
|
1473 double avgDepth = GetAvgDepth( exonBlocks[tag] ) ;
|
|
1474
|
|
1475 // adjust avgDepth by looking into the following exon blocks(subexon) in the same region whose left side is not hard end
|
|
1476 for ( j = tag + 1 ; j < exonBlockCnt ; ++j )
|
|
1477 {
|
|
1478 if ( exonBlocks[j].start != exonBlocks[j - 1].end + 1 || exonBlocks[j].leftType == 1 )
|
|
1479 break ;
|
|
1480 double tmp = GetAvgDepth( exonBlocks[j] ) ;
|
|
1481 if ( tmp > avgDepth )
|
|
1482 avgDepth = tmp ;
|
|
1483 }
|
|
1484
|
|
1485 if ( avgDepth < 1 )
|
|
1486 avgDepth = 1 ;
|
|
1487 if ( otherAvgDepth < 1 )
|
|
1488 otherAvgDepth = 1 ;
|
|
1489
|
|
1490 //exonBlocks[i].leftRatio = pow( log( avgDepth ) / log(2.0), 2.0 / 3.0 ) - pow( log( otherAvgDepth ) / log( 2.0 ), 2.0 / 3.0 );
|
|
1491 exonBlocks[i].leftRatio = sqrt( avgDepth ) - log( avgDepth ) - ( sqrt( otherAvgDepth ) + log( otherAvgDepth ) ) ;
|
|
1492
|
|
1493 }*/
|
|
1494 if ( exonBlocks[i].rightType == 0 && exonBlocks[i].leftType == 2 )
|
|
1495 {
|
|
1496 // for overhang subexon.
|
|
1497 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
|
|
1498 double anchorAvg = GetAvgDepth( exonBlocks[i - 1] ) ;
|
|
1499 if ( avgDepth > 1 && anchorAvg > 1 )
|
|
1500 exonBlocks[i].leftRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
|
|
1501
|
|
1502 }
|
|
1503 /*if ( exonBlocks[i].rightType == 2 )
|
|
1504 {
|
|
1505 int len = 0 ;
|
|
1506 double depthSum = 0 ;
|
|
1507 int tag = i ;
|
|
1508
|
|
1509 for ( j = 0 ; j < exonBlocks[tag].nextCnt ; ++j )
|
|
1510 {
|
|
1511 int k = exonBlocks[tag].next[j] ;
|
|
1512 len += ( exonBlocks[k].end - exonBlocks[k].start + 1 ) ;
|
|
1513 depthSum += exonBlocks[k].depthSum ;
|
|
1514 }
|
|
1515 double otherAvgDepth = depthSum / len ;
|
|
1516 double avgDepth = GetAvgDepth( exonBlocks[tag] ) ;
|
|
1517
|
|
1518 // adjust avgDepth by looking into the earlier exon blocks(subexon) in the same region whose right side is not hard end
|
|
1519 for ( j = tag - 1 ; j >= 0 ; --j )
|
|
1520 {
|
|
1521 if ( exonBlocks[j].end + 1 != exonBlocks[j + 1].start || exonBlocks[j].rightType == 2 )
|
|
1522 break ;
|
|
1523 double tmp = GetAvgDepth( exonBlocks[j] ) ;
|
|
1524 if ( tmp > avgDepth )
|
|
1525 avgDepth = tmp ;
|
|
1526 }
|
|
1527
|
|
1528 if ( avgDepth < 1 )
|
|
1529 avgDepth = 1 ;
|
|
1530 if ( otherAvgDepth < 1 )
|
|
1531 otherAvgDepth = 1 ;
|
|
1532
|
|
1533 exonBlocks[i].rightRatio = sqrt( avgDepth ) - log( avgDepth ) - ( sqrt( otherAvgDepth ) + log( otherAvgDepth ) ) ;
|
|
1534 //pow( log( avgDepth ) / log(2.0), 2.0 / 3.0 ) - pow( log( otherAvgDepth ) / log(2.0), 2.0 / 3.0 );
|
|
1535 }*/
|
|
1536 // The remaining case the islands, (...)
|
|
1537 }
|
|
1538
|
|
1539 // go through each region of subexons, and only compute the ratio for the first and last hard boundary
|
|
1540 for ( i = 0 ; i < exonBlockCnt ; )
|
|
1541 {
|
|
1542 // the blocks from [i,j) corresponds to a region, namely consecutive subexons.
|
|
1543 for ( j = i + 1 ; j < exonBlockCnt ; ++j )
|
|
1544 if ( exonBlocks[j].contigId != exonBlocks[i].contigId )
|
|
1545 break ;
|
|
1546
|
|
1547 int leftSupport = 0 ;
|
|
1548 int rightSupport = 0 ;
|
|
1549 int leftTag = j, rightTag = i - 1 ;
|
|
1550 for ( int k = i ; k < j ; ++k )
|
|
1551 {
|
|
1552 if ( exonBlocks[k].leftType == 1 && exonBlocks[k].leftSplice > 0 )
|
|
1553 {
|
|
1554 leftSupport += exonBlocks[k].leftSplice ;
|
|
1555 if ( k < leftTag )
|
|
1556 leftTag = k ;
|
|
1557 }
|
|
1558 if ( exonBlocks[k].rightType == 2 && exonBlocks[k].rightSplice > 0 )
|
|
1559 {
|
|
1560 rightSupport += exonBlocks[k].rightSplice ;
|
|
1561 if ( k > rightTag )
|
|
1562 rightTag = k ;
|
|
1563 }
|
|
1564 }
|
|
1565
|
|
1566 if ( leftSupport > 0 && rightSupport > 0 )
|
|
1567 {
|
|
1568 // when the right splice support is much greater than that of the left side, there should be a soft boundary for the left side.
|
|
1569 // Wether we want to include this soft boundary or not will be determined in class, when it looked at whether the overhang block
|
|
1570 // should be kept or not.
|
|
1571 exonBlocks[ leftTag ].leftRatio = sqrt( rightSupport ) - log( ( rightSupport + leftSupport ) * 0.5 ) - ( sqrt( leftSupport ) ) ; //+ log( leftSupport ) ) ;
|
|
1572 exonBlocks[ rightTag ].rightRatio = sqrt( leftSupport ) - log( ( rightSupport + leftSupport ) * 0.5 ) - ( sqrt( rightSupport ) ) ; //+ log( rightSupport ) ) ;
|
|
1573 }
|
|
1574
|
|
1575 i = j ; // don't forget this.
|
|
1576 }
|
|
1577 }
|
|
1578 } ;
|
|
1579
|
|
1580 #endif
|