annotate PsiCLASS-1.0.2/blocks.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 // The class manage the blocks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 // Li Song
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #ifndef _LSONG_CLASSES_BLOCKS_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #define _LSONG_CLASSES_BLOCKS_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 #include <vector>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include <map>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include <assert.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include <math.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #include <set>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 #include <inttypes.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 #include "defs.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 extern bool VERBOSE ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 extern FILE *fpOut ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 extern int gMinDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 struct _splitSite // means the next position belongs to another block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 int64_t pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 char strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int type ; // 1-start of an exon. 2-end of an exon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 int support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 int uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 int mismatchSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 int64_t oppositePos ; // the position of the other sites to form the intron.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 struct _block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 int contigId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 int64_t start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 int64_t leftSplice, rightSplice ; // Some information about the left splice site and right splice site.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 // record the leftmost and rightmost coordinates of a splice site within this block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 //or the length of read alignment support the splice sites.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 //Or the number of support.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 int64_t depthSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 int leftType, rightType ; //0-soft boundary, 1-start of an exon, 2-end of an exon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 double leftRatio, rightRatio ; // the adjusted ratio-like value to the left and right anchor subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 double ratio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 char leftStrand, rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 int *depth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 int prevCnt, nextCnt ; // Some connection information for the subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 int *prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 int *next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 // adjacent graph
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 struct _adj
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 int ind ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 int info ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 int support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 int next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 class Blocks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 private:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 std::map<int, int> exonBlocksChrIdOffset ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 int64_t Overlap( int64_t s0, int64_t e0, int64_t s1, int64_t e1, int64_t &s, int64_t &e )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 s = e = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 if ( e0 < s1 || s0 > e1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 s = s0 > s1 ? s0 : s1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 e = e0 < e1 ? e0 : e1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 return e - s + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 void Split( const char *s, char delimit, std::vector<std::string> &fields )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 fields.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 if ( s == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 std::string f ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 for ( i = 0 ; s[i] ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if ( s[i] == delimit || s[i] == '\n' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 fields.push_back( f ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 f.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 f.append( 1, s[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 fields.push_back( f ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 f.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 void BuildBlockChrIdOffset()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 // Build the map for the offsets of chr id in the exonBlock list.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 exonBlocksChrIdOffset[ exonBlocks[0].chrId] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 int cnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 for ( int i = 1 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 if ( exonBlocks[i].chrId != exonBlocks[i - 1].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 exonBlocksChrIdOffset[ exonBlocks[i].chrId ] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 bool CanReach( int from, int to, struct _adj *adj, bool *visited )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 if ( visited[from] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 visited[from] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 int p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 p = adj[from].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 if ( adj[p].ind == to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 if ( adj[p].ind < to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 && CanReach( adj[p].ind, to, adj, visited ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 void AdjustAndCreateExonBlocks( int tag, std::vector<struct _block> &newExonBlocks )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 if ( exonBlocks[tag].depth != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 // Convert the increment and decrement into actual depth.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 int len = exonBlocks[tag].end - exonBlocks[tag].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 int *depth = exonBlocks[tag].depth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 for ( i = 1 ; i < len ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 depth[i] = depth[i - 1] + depth[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 struct _block island ; // the portion created by the hollow.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 island.start = island.end = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 island.depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 /*if ( exonBlocks[tag].start == 1562344 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 for ( i = 0 ; i < len ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 printf( "%d\n", depth[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 // Adjust boundary accordingly.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 int64_t adjustStart = exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 int64_t adjustEnd = exonBlocks[tag].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 if ( exonBlocks[tag].leftType == 0 && exonBlocks[tag].rightType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 for ( i = len - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 if ( depth[i] < gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 if ( exonBlocks[tag].rightType == 2 && i + exonBlocks[tag].start > exonBlocks[tag].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 i = exonBlocks[tag].rightSplice - exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 adjustStart = i + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 if ( adjustStart > exonBlocks[tag].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 int s, e ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 // firstly [s,e] is the range of depth array.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 for ( s = 0 ; s < i ; ++s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 if ( depth[s] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 for ( e = i - 1 ; e >= s ; -- e )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 if ( depth[e] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 if ( e >= s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 island = exonBlocks[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 island.depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 island.leftType = island.rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 for ( j = s ; j <= e ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 island.depthSum += depth[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 island.start = s + exonBlocks[tag].start ; // offset the coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 island.end = e + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 else if ( exonBlocks[tag].leftType != 0 && exonBlocks[tag].rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 for ( i = 0 ; i < len ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 if ( depth[i] < gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 --i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 if ( exonBlocks[tag].leftType == 1 && i + exonBlocks[tag].start < exonBlocks[tag].leftSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 i = exonBlocks[tag].leftSplice - exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 adjustEnd = i + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 if ( adjustEnd < exonBlocks[tag].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 int s, e ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 // firstly [s,e] is the range of depth array.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 for ( s = i + 1 ; s < len ; ++s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 if ( depth[s] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 for ( e = len - 1 ; e >= s ; --e )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 if ( depth[e] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 if ( e >= s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 island = exonBlocks[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 island.depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 island.leftType = island.rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 for ( j = s ; j <= e ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 island.depthSum += depth[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 island.start = s + exonBlocks[tag].start ; // offset the coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 island.end = e + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 else if ( exonBlocks[tag].leftType == 0 && exonBlocks[tag].rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 for ( i = 0 ; i < len ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 if ( depth[i] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 adjustStart = i + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 for ( i = len - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 if ( depth[i] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 adjustEnd = i + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 else if ( exonBlocks[tag].leftType == 1 && exonBlocks[tag].rightType == 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 && exonBlocks[tag].end - exonBlocks[tag].start + 1 >= 1000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 // The possible merge of two genes or merge of UTRs, if we don't break the low coverage part.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 // If we decide to cut, I'll reuse the variable "island" to represent the subexon on right hand side.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 int gapSize = 30 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 for ( i = 0 ; i <= len - ( gapSize + 1 ) ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 if ( depth[i] < gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 for ( l = i ; l < i + ( gapSize + 1 ) ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 if ( depth[l] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 if ( l < i + ( gapSize + 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 i = l - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 for ( j = len - 1 ; j >= ( gapSize + 1 ) - 1 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 if ( depth[j] < gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 for ( l = j ; l >= j - ( gapSize + 1 ) + 1 ; --l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 if ( depth[l] >= gMinDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 if ( l >= j - ( gapSize + 1 ) + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 j = l + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 if ( j - i + 1 > gapSize )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 // we break.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 --i ; ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 adjustEnd = i + exonBlocks[tag].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 if ( j < len - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 island = exonBlocks[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 island.depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 island.leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 for ( l = j ; l <= len - 1 ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 island.depthSum += depth[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 island.start = j + exonBlocks[tag].start ; // offset the coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 island.end = exonBlocks[tag].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 exonBlocks[tag].rightType = 0 ; // put it here, so the island can get the right info
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 //fprintf( stderr, "break %d %d %d %d.\n", (int)exonBlocks[tag].start, (int)adjustEnd, i + (int)exonBlocks[tag].start, j + (int)exonBlocks[tag].start ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 int lostDepthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 for ( i = exonBlocks[tag].start ; i < adjustStart ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 lostDepthSum += depth[i - exonBlocks[tag].start ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 for ( i = adjustEnd + 1 ; i < exonBlocks[tag].end ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 lostDepthSum += depth[i - exonBlocks[tag].start ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 exonBlocks[tag].depthSum -= lostDepthSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 delete[] exonBlocks[tag].depth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 exonBlocks[tag].depth = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 //if ( exonBlocks[tag].start == 1562344 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 // printf( "%d %d\n", adjustStart, adjustEnd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 if ( ( len > 1 && adjustEnd - adjustStart + 1 <= 1 ) || ( adjustEnd - adjustStart + 1 <= 0 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 exonBlocks[tag].start = adjustStart ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 exonBlocks[tag].end = adjustEnd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 if ( island.start != -1 && island.end < exonBlocks[tag].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 newExonBlocks.push_back( island ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 newExonBlocks.push_back( exonBlocks[tag] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 if ( island.start != -1 && island.start > exonBlocks[tag].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 newExonBlocks.push_back( island ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 public:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 std::vector<struct _block> exonBlocks ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 Blocks() { }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 ~Blocks()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 int blockCnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 for ( int i = 0 ; i < blockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 if ( exonBlocks[i].next != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 delete[] exonBlocks[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 if ( exonBlocks[i].prev != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 delete[] exonBlocks[i].prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 double GetAvgDepth( const struct _block &block )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 return block.depthSum / (double)( block.end - block.start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 int BuildExonBlocks( Alignments &alignments )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 while ( alignments.Next() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 int segCnt = alignments.segCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 struct _pair *segments = alignments.segments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 int eid = 0 ; // the exonblock id that the segment update
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 // locate the first exonblock that beyond the start of the read.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 while ( tag < (int)exonBlocks.size() && ( exonBlocks[tag].end < segments[0].a - 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 || exonBlocks[tag].chrId != alignments.GetChromId() ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 // due to the merge of exon blocks, we might need roll back
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 --tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 while ( tag >= 0 && ( exonBlocks[tag].end >= segments[0].a - 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 && exonBlocks[tag].chrId == alignments.GetChromId() ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 --tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 for ( i = 0 ; i < segCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 //if ( i == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 // printf( "hi %d %s %d %d\n", i, alignments.GetReadId(), segments[i].a, segments[i].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 for ( j = tag ; j < (int)exonBlocks.size() ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 if ( exonBlocks[j].end >= segments[i].a - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 if ( j >= (int)exonBlocks.size() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 // Append a new block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 struct _block newSeg ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 newSeg.chrId = alignments.GetChromId() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 newSeg.start = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 newSeg.end = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 newSeg.leftSplice = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 newSeg.rightSplice = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 if ( i > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 newSeg.leftSplice = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if ( i < segCnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 newSeg.rightSplice = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 exonBlocks.push_back( newSeg ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 eid = exonBlocks.size() - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 else if ( exonBlocks[j].end < segments[i].b ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 ( exonBlocks[j].start > segments[i].a && exonBlocks[j].start <= segments[i].b + 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 // If overlaps with a current exon block, so we extend it
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 if ( exonBlocks[j].end < segments[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 // extends toward right
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 exonBlocks[j].end = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 if ( i > 0 && ( exonBlocks[j].leftSplice == -1 || segments[i].a < exonBlocks[j].leftSplice ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 exonBlocks[j].leftSplice = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 if ( i < segCnt - 1 && segments[i].b > exonBlocks[j].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 exonBlocks[j].rightSplice = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 eid = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 // Merge with next few exon blocks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 for ( k = j + 1 ; k < (int)exonBlocks.size() ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 if ( exonBlocks[k].start <= exonBlocks[j].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 if ( exonBlocks[k].end > exonBlocks[j].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 exonBlocks[j].end = exonBlocks[k].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 if ( exonBlocks[k].leftSplice != -1 && ( exonBlocks[j].leftSplice == -1 || exonBlocks[k].leftSplice < exonBlocks[j].leftSplice ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 exonBlocks[j].leftSplice = exonBlocks[k].leftSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 if ( exonBlocks[k].rightSplice != -1 && exonBlocks[k].rightSplice > exonBlocks[j].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 exonBlocks[j].rightSplice = exonBlocks[k].rightSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 if ( k > j + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 // Remove the merged blocks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 int a, b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 for ( a = j + 1, b = k ; b < (int)exonBlocks.size() ; ++a, ++b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 exonBlocks[a] = exonBlocks[b] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 for ( a = 0 ; a < k - ( j + 1 ) ; ++a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 else if ( exonBlocks[j].start > segments[i].a && exonBlocks[j].start <= segments[i].b + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 // extends toward left
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 exonBlocks[j].start = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 if ( i > 0 && ( exonBlocks[j].leftSplice == -1 || segments[i].a < exonBlocks[j].leftSplice ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 exonBlocks[j].leftSplice = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 if ( i < segCnt - 1 && segments[i].b > exonBlocks[j].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 exonBlocks[j].rightSplice = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 eid = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 // Merge with few previous exon blocks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 for ( k = j - 1 ; k >= 0 ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 if ( exonBlocks[k].end >= exonBlocks[k + 1].start - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 if ( exonBlocks[k + 1].start < exonBlocks[k].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 exonBlocks[k].start = exonBlocks[k + 1].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 if ( exonBlocks[k].leftSplice != -1 && ( exonBlocks[j].leftSplice == -1 || exonBlocks[k].leftSplice < exonBlocks[j].leftSplice ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 exonBlocks[j].leftSplice = exonBlocks[k].leftSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 if ( exonBlocks[k].rightSplice != -1 && exonBlocks[k].rightSplice > exonBlocks[j].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 exonBlocks[j].rightSplice = exonBlocks[k].rightSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 if ( k < j - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 int a, b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 eid = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 for ( a = k + 2, b = j + 1 ; b < (int)exonBlocks.size() ; ++a, ++b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 exonBlocks[a] = exonBlocks[b] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 for ( a = 0 ; a < ( j - 1 ) - k ; ++a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 else if ( exonBlocks[j].start > segments[i].b + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 int size = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 int a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 // No overlap, insert a new block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 struct _block newSeg ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 newSeg.chrId = alignments.GetChromId() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 newSeg.start = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 newSeg.end = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 newSeg.leftSplice = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 newSeg.rightSplice = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 if ( i > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 newSeg.leftSplice = segments[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 if ( i < segCnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 newSeg.rightSplice = segments[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 // Insert at position j
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 exonBlocks.push_back( newSeg ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 for ( a = size ; a > j ; --a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 exonBlocks[a] = exonBlocks[a - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 exonBlocks[a] = newSeg ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 eid = a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 // The segment is contained in j
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 eid = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 // Merge the block with the mate if the gap is very small
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 // Note that since reads are already sorted by coordinate,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 // the previous exon blocks is built completely.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 /*int64_t matePos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 int mateChrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 alignments.GetMatePosition( mateChrId, matePos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 if ( i == 0 && eid > 0 && mateChrId == exonBlocks[ eid ].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 && matePos < segments[0].a
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 && matePos >= exonBlocks[eid - 1].start
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 && matePos <= exonBlocks[eid - 1].end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 && segments[0].a - matePos + 1 <= 500
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 && exonBlocks[eid-1].chrId == exonBlocks[eid].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 && exonBlocks[eid].start - exonBlocks[eid - 1].end - 1 <= 30 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 printf( "%d: (%d-%d) (%d-%d). %d %d\n", ( segments[0].a + alignments.readLen - 1 ) - matePos + 1, exonBlocks[eid - 1].start, exonBlocks[eid - 1].end,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 exonBlocks[eid].start,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 exonBlocks[eid].end, eid, exonBlocks.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 exonBlocks[eid - 1].end = exonBlocks[eid].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 if ( exonBlocks[eid].leftSplice != -1 && ( exonBlocks[eid - 1].leftSplice == -1 || exonBlocks[eid].leftSplice < exonBlocks[eid - 1].leftSplice ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 exonBlocks[eid - 1].leftSplice = exonBlocks[k].leftSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 if ( exonBlocks[eid].rightSplice != -1 && exonBlocks[eid].rightSplice > exonBlocks[eid - 1].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 exonBlocks[eid - 1].rightSplice = exonBlocks[eid].rightSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 int size = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 for ( j = eid ; j < size - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 exonBlocks[j] = exonBlocks[j + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 /*for ( int i = 0 ; i < (int)exonBlocks.size() ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 printf( "%d %d\n", exonBlocks[i].start, exonBlocks[i].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 if ( exonBlocks.size() > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 BuildBlockChrIdOffset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 int cnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 for ( int i = 0 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 exonBlocks[i].contigId = exonBlocks[i].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 exonBlocks[i].leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 exonBlocks[i].rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568 exonBlocks[i].depth = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 exonBlocks[i].nextCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 exonBlocks[i].prevCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 exonBlocks[i].leftStrand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 exonBlocks[i].rightStrand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 return exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 void FilterSplitSitesInRegions( std::vector<struct _splitSite> &sites )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 int size = sites.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 int bsize = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 for ( i = 0 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 while ( tag < size && ( sites[tag].chrId < exonBlocks[i].chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 ( sites[tag].chrId == exonBlocks[i].chrId && sites[tag].pos < exonBlocks[i].start ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 if ( tag >= size )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 if ( sites[tag].chrId != exonBlocks[i].chrId || sites[tag].pos > exonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 for ( j = tag + 1 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 if ( sites[j].chrId != exonBlocks[i].chrId || sites[j].pos > exonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 // [tag,j) holds the split sites in this region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 int leftStrandSupport[2] = {0, 0} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 int rightStrandSupport[2] = {0, 0} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 int strandCnt[2] = { 0, 0 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 if ( sites[k].strand == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 ++strandCnt[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 if ( sites[k].oppositePos < exonBlocks[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 leftStrandSupport[0] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 else if ( sites[k].oppositePos > exonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 rightStrandSupport[0] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 else if ( sites[k].strand == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 ++strandCnt[1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 if ( sites[k].oppositePos < exonBlocks[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 leftStrandSupport[1] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 else if ( sites[k].oppositePos > exonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 rightStrandSupport[1] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 if ( leftStrandSupport[0] == 0 && rightStrandSupport[0] == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 && leftStrandSupport[1] != 0 && rightStrandSupport[1] != 0 && strandCnt[0] == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 // check whether a different strand accidentally show up in this region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 bool remove = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 if ( sites[k].strand == '-' &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 sites[k].oppositePos >= sites[k].pos && sites[k].oppositePos <= exonBlocks[i].end &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 sites[k].support <= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 remove = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 if ( remove )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 if ( sites[k].strand == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 sites[k].support = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 else if ( leftStrandSupport[1] == 0 && rightStrandSupport[1] == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 && leftStrandSupport[0] != 0 && rightStrandSupport[0] != 0 && strandCnt[1] == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 // check whether a different strand accidentally show up in this region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 bool remove = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 if ( sites[k].strand == '+' &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 sites[k].oppositePos >= sites[k].pos && sites[k].oppositePos <= exonBlocks[i].end &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 sites[k].support <= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 remove = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 if ( remove )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 if ( sites[k].strand == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 sites[k].support = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 tag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 if ( sites[i].support > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 sites[k] = sites[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 sites.resize( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 // Filter the pair of split sites that is likely merge two genes.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 // We filter the case like [..]------------------------[..]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 // [..]--[..] [..]-[...]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 void FilterGeneMergeSplitSites( std::vector<struct _splitSite> &sites )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 int bsize = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 int ssize = sites.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 struct _pair32 *siteToBlock = new struct _pair32[ ssize ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 struct _adj *adj = new struct _adj[ ssize / 2 + bsize ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 bool *visited = new bool[bsize] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697 int adjCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698 for ( i = 0 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700 adj[i].info = 0 ; // in the header, info means the number of next block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 adj[i].support = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 adj[i].next = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704 adjCnt = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 memset( siteToBlock, -1, sizeof( struct _pair32 ) * ssize ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706 memset( visited, false, sizeof( bool ) * bsize ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 // Build the graph.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 for ( i = 0 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712 for ( ; k < ssize ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714 if ( sites[k].oppositePos < sites[k].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716 if ( sites[k].chrId < exonBlocks[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 || ( sites[k].chrId == exonBlocks[i].chrId && sites[k].pos < exonBlocks[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 for ( ; k < ssize ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724 if ( sites[k].chrId > exonBlocks[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 || sites[k].pos > exonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 if ( sites[k].oppositePos <= exonBlocks[i].end ) // ignore self-loop
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 for ( j = i + 1 ; j < bsize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 if ( sites[k].oppositePos >= exonBlocks[j].start && sites[k].oppositePos <= exonBlocks[j].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 if ( sites[k].oppositePos >= exonBlocks[j].start && sites[k].oppositePos <= exonBlocks[j].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736 int p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737 p = adj[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 if ( adj[p].ind == j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 ++adj[p].info ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743 adj[p].support += sites[k].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 if ( p == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 adj[ adjCnt ].ind = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 adj[ adjCnt ].info = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752 adj[ adjCnt ].support = sites[k].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 adj[ adjCnt ].next = adj[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 adj[i].next = adjCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755 ++adj[i].info ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756 ++adjCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759 siteToBlock[k].a = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 siteToBlock[k].b = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 for ( k = 0 ; k < ssize ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 if ( sites[k].oppositePos - sites[k].pos + 1 < 20000 || sites[k].uniqSupport >= 30 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 int from = siteToBlock[k].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 int to = siteToBlock[k].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771 if ( to - from - 1 < 2 || adj[from].info <= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 memset( &visited[from], false, sizeof( bool ) * ( to - from + 1 ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 int p = adj[from].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 int max = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779 int maxP = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 if ( adj[p].support >= 10 && adj[p].ind <= to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784 if ( adj[p].support > max || ( adj[p].support == max && adj[p].ind == to ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 max = adj[p].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787 maxP = p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 if ( adj[p].ind == to && adj[p].info > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792 cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
794 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
795 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
796 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
797 if ( cnt <= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
798 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
799
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
800 if ( max != -1 && adj[maxP].ind == to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
801 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
802
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
803 // No other path can reach "to"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
804 p = adj[from].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
805 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
806 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
807 if ( adj[p].ind != to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
808 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
809 //if ( sites[k].pos ==43917158 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
810 // printf( "hi %d %d. (%d %d)\n", from, adj[p].ind, exonBlocks[ adj[p].ind ].start, exonBlocks[ adj[p].ind ].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
811 if ( CanReach( adj[p].ind, to, adj, visited ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
812 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
813 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
814 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
815 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
816 if ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
817 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
818
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
819 //if ( sites[k].pos == 34073267 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
820 // printf( "hi %d %d. (%d %d)\n", from, to, exonBlocks[to].start, exonBlocks[to].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
821
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
822 // There are some blocks between that can reach "to"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
823 for ( i = from + 1 ; i < to ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
824 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
825 p = adj[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
826 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
827 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
828 if ( adj[p].ind == to && adj[p].support >= 10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
829 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
830 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
831 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
832 if ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
833 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
834 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
835 if ( i >= to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
836 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
837
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
838 // Filter the site
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
839 //printf( "filtered: %d %d\n", sites[k].pos, sites[k].oppositePos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
840 sites[k].support = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
841 for ( j = k + 1 ; j < ssize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
842 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
843 if ( sites[j].pos == sites[k].oppositePos && sites[j].oppositePos == sites[k].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
844 sites[j].support = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
845 if ( sites[j].pos > sites[k].oppositePos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
846 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
847 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
848 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
849
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
850 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
851 for ( i = 0 ; i < ssize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
852 if ( sites[i].support > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
853 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
854 sites[k] = sites[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
855 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
856 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
857 sites.resize( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
858
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
859 // Filter the intron on the different strand of a gene.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
860 ssize = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
861 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
862 for ( i = 0 ; i < bsize ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
863 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
864 int farthest = exonBlocks[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
865
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
866 for ( j = i ; j < bsize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
867 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
868 if ( exonBlocks[j].start > farthest || exonBlocks[i].chrId != exonBlocks[j].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
869 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
870 int p = adj[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
871 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
872 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
873 if ( exonBlocks[ adj[p].ind ].end > farthest )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
874 farthest = exonBlocks[ adj[p].ind ].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
875 p = adj[p].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
876 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
877 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
878
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
879 for ( ; k < ssize ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
880 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
881 if ( sites[k].chrId < exonBlocks[i].chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
882 ( sites[k].chrId == exonBlocks[i].chrId && sites[k].pos < exonBlocks[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
883 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
884 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
885 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
886
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
887 if ( sites[k].chrId > exonBlocks[i].chrId || sites[k].pos > farthest )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
888 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
889 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
890 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
891 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
892 //printf( "%d %d. %d %d. %d %d\n", i, j, exonBlocks[i].start, farthest, k, sites[k].pos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
893
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
894
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
895 int from = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
896 int to ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
897 for ( to = k ; to < ssize ; ++to )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
898 if ( sites[to].chrId != exonBlocks[i].chrId || sites[to].pos > farthest )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
899 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
900
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
901 int strandSupport[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
902 int strandCount[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
903 for ( k = from ; k < to ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
904 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
905 if ( sites[k].oppositePos <= sites[k].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
906 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
907
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
908 int tag = ( sites[k].strand == '+' ? 1 : 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
909 strandSupport[tag] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
910 ++strandCount[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
911 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
912
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
913 // Not mixtured.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
914 if ( strandCount[0] * strandCount[1] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
915 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
916 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
917 k = to ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
918 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
919 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
920
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
921 char removeStrand = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
922 if ( strandCount[0] == 1 && strandCount[1] >= 3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
923 && strandSupport[1] >= 20 * strandSupport[0] && strandSupport[0] <= 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
924 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
925 removeStrand = '-' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
926 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
927 else if ( strandCount[1] == 1 && strandCount[0] >= 3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
928 && strandSupport[0] >= 20 * strandSupport[1] && strandSupport[1] <= 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
929 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
930 removeStrand = '+' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
931 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
932
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
933 if ( removeStrand == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
934 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
935 i = j ; k = to ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
936 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
937 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
938
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
939 for ( k = from ; k < to ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
940 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
941 if ( sites[k].strand == removeStrand && sites[k].oppositePos >= exonBlocks[i].start && sites[k].oppositePos <= farthest )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
942 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
943 //printf( "filtered: %d %d\n", sites[k].pos, sites[k].oppositePos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
944 sites[k].support = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
945 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
946 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
947
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
948 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
949 k = to ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
950 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
951
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
952 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
953 for ( i = 0 ; i < ssize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
954 if ( sites[i].support > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
955 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
956 sites[k] = sites[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
957 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
958 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
959 sites.resize( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
960
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
961 delete[] visited ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
962 delete[] siteToBlock ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
963 delete[] adj ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
964 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
965
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
966 void SplitBlocks( Alignments &alignments, std::vector< struct _splitSite > &splitSites )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
967 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
968 std::vector<struct _block> rawExonBlocks = exonBlocks ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
969 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
970 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
971 int bsize = rawExonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
972 int ssize = splitSites.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
973
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
974 // Make sure not overflow
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
975 struct _splitSite tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
976 tmp.pos = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
977 splitSites.push_back( tmp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
978
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
979 exonBlocks.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
980 for ( i = 0 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
981 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
982 while ( tag < ssize && ( splitSites[tag].chrId < rawExonBlocks[i].chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
983 ( splitSites[tag].chrId == rawExonBlocks[i].chrId && splitSites[tag].pos < rawExonBlocks[i].start ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
984 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
985
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
986 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
987 for ( l = tag ; l < ssize ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
988 if ( splitSites[l].chrId != rawExonBlocks[i].chrId || splitSites[l].pos > rawExonBlocks[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
989 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
990 int64_t start = rawExonBlocks[i].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
991 int64_t end = rawExonBlocks[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
992 //printf( "%lld %lld\n", start, end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
993 //printf( "%s %s: %d %d\n", alignments.GetChromName( splitSites[tag].chrId ), alignments.GetChromName( rawExonBlocks[i].chrId ), tag, l ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
994 for ( j = tag ; j <= l ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
995 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
996 int leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
997 int rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
998 if ( j > tag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
999 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1000 start = splitSites[j - 1].pos ; // end is from previous stage
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1001 leftType = splitSites[j - 1].type ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1002 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1003 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1004 start = rawExonBlocks[i].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1005
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1006 if ( j <= l - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1007 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1008 end = splitSites[j].pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1009 rightType = splitSites[j].type ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1010 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1011 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1012 end = rawExonBlocks[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1013
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1014 if ( leftType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1015 ++start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1016 if ( rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1017 --end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1018
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1019 struct _block tmpB ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1020 tmpB = rawExonBlocks[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1021 tmpB.start = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1022 tmpB.end = end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1023 tmpB.depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1024 tmpB.ratio = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1025 //printf( "\t%lld %lld\n", start, end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1026 tmpB.leftType = leftType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1027 tmpB.rightType = rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1028 tmpB.depth = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1029
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1030 exonBlocks.push_back( tmpB ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1031 // handle the soft boundary is the same as the hard boundary case
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1032 // or adjacent splice sites
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1033 /*if ( j == tag && start == end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1034 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1035 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1036 --end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1037 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1038 else if ( j == l && start > end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1039 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1040 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1041 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1042 if ( start > end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1043 //|| ( tmpB.start == rawExonBlocks[i].start && tmpB.end != rawExonBlocks[i].end && tmpB.end - tmpB.start + 1 <= 7 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1044 //|| ( tmpB.end == rawExonBlocks[i].end && tmpB.start != rawExonBlocks[i].start && tmpB.end - tmpB.start + 1 <= 7 )) // the case of too short overhang
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1045 || ( tmpB.leftType == 0 && tmpB.rightType == 2 && tmpB.end - tmpB.start + 1 <= 9 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1046 || ( tmpB.leftType == 1 && tmpB.rightType == 0 && tmpB.end - tmpB.start + 1 <= 9 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1047 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1048 exonBlocks.pop_back() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1049 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1050 /*else if ( start == end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1051 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1052
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1053 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1054 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1055 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1056 BuildBlockChrIdOffset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1057 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1058
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1059 void ComputeDepth( Alignments &alignments )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1060 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1061 // Go through the alignment list again to fill in the depthSum;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1062 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1063 int j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1064 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1065 int blockCnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1066
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1067 std::vector<struct _block> newExonBlocks ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1068 while ( alignments.Next() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1069 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1070 int segCnt = alignments.segCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1071 struct _pair *segments = alignments.segments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1072
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1073 while ( tag < blockCnt && ( exonBlocks[tag].chrId < alignments.GetChromId() ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1074 ( exonBlocks[tag].chrId == alignments.GetChromId() && exonBlocks[tag].end < segments[0].a ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1075 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1076 AdjustAndCreateExonBlocks( tag, newExonBlocks ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1077 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1078 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1079 for ( i = 0 ; i < segCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1080 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1081 for ( j = tag ; j < blockCnt && ( exonBlocks[j].chrId == alignments.GetChromId() && exonBlocks[j].start <= segments[i].b ) ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1082 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1083 int64_t s = -1, e = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1084 exonBlocks[j].depthSum += Overlap( segments[i].a, segments[i].b, exonBlocks[j].start, exonBlocks[j].end, s, e ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1085 if ( s == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1086 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1087
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1088 if ( exonBlocks[j].depth == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1089 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1090 int len = exonBlocks[j].end - exonBlocks[j].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1091 exonBlocks[j].depth = new int[len + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1092 memset( exonBlocks[j].depth, 0, sizeof( int ) * ( len + 1 ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1093 exonBlocks[j].leftSplice = exonBlocks[j].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1094 exonBlocks[j].rightSplice = exonBlocks[j].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1095 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1096 int *depth = exonBlocks[j].depth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1097 ++depth[s - exonBlocks[j].start] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1098 --depth[e - exonBlocks[j].start + 1] ; // notice the +1 here, since the decrease of coverage actually happens at next base.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1099
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1100 // record the longest alignment stretch support the splice site.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1101 if ( exonBlocks[j].leftType == 1 && segments[i].a == exonBlocks[j].start
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1102 && e > exonBlocks[j].leftSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1103 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1104 exonBlocks[j].leftSplice = e ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1105 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1106 if ( exonBlocks[j].rightType == 2 && segments[i].b == exonBlocks[j].end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1107 && s < exonBlocks[j].rightSplice )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1108 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1109 exonBlocks[j].rightSplice = s ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1110 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1112 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1113 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1114
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1115 for ( ; tag < blockCnt ; ++tag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1116 AdjustAndCreateExonBlocks( tag, newExonBlocks ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1117 exonBlocks.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1118
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1119 // Due to multi-alignment, we may skip some alignments that determines leftSplice and
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1120 // rightSplice, hence filtered some subexons. As a result, some subexons' anchor subexon will disapper
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1121 // and we need to change its boundary type.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1122 blockCnt = newExonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1123 for ( i = 0 ; i < blockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1124 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1125 if ( ( i == 0 && newExonBlocks[i].leftType == 2 ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1126 ( i > 0 && newExonBlocks[i].leftType == 2 && newExonBlocks[i - 1].end + 1 != newExonBlocks[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1127 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1128 newExonBlocks[i].leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1129 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1130
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1131 if ( ( i == blockCnt - 1 && newExonBlocks[i].rightType == 1 ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1132 ( i < blockCnt - 1 && newExonBlocks[i].rightType == 1 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1133 newExonBlocks[i].end + 1 != newExonBlocks[i + 1].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1134 newExonBlocks[i].rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1135 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1136 exonBlocks = newExonBlocks ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1138
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1139 // If two blocks whose soft boundary are close to each other, we can merge them.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1140 void MergeNearBlocks()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1141 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1142 std::vector<struct _block> rawExonBlocks = exonBlocks ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1143 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1144 int bsize = rawExonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1145 int *newIdx = new int[bsize] ; // used for adjust prev and next. Note that by merging, it won't change the number of prevCnt or nextCnt.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1146
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1147 exonBlocks.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1148 exonBlocks.push_back( rawExonBlocks[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1149 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1150 newIdx[0] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1151 for ( i = 1 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1152 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1153 if ( rawExonBlocks[i].chrId == exonBlocks[k].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1154 && rawExonBlocks[i].leftType == 0 && exonBlocks[k].rightType == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1155 && rawExonBlocks[i].start - exonBlocks[k].end - 1 <= 50
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1156 && rawExonBlocks[i].depthSum / double( rawExonBlocks[i].end - rawExonBlocks[i].start + 1 ) < 3.0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1157 && exonBlocks[k].depthSum / double( exonBlocks[k].end - exonBlocks[i].start + 1 ) < 3.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1158 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1159 exonBlocks[k].end = rawExonBlocks[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1160 exonBlocks[k].rightType = rawExonBlocks[i].rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1161 exonBlocks[k].rightStrand = rawExonBlocks[i].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1162 exonBlocks[k].depthSum += rawExonBlocks[i].depthSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1163 if ( rawExonBlocks[i].rightSplice != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1164 exonBlocks[k].rightSplice = rawExonBlocks[i].rightSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1165
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1166 if ( exonBlocks[k].nextCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1167 delete[] exonBlocks[k].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1168 exonBlocks[k].nextCnt = rawExonBlocks[i].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1169 exonBlocks[k].next = rawExonBlocks[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1170 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1171 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1172 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1173 exonBlocks.push_back( rawExonBlocks[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1174 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1175 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1176 newIdx[i] = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1177 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1178 BuildBlockChrIdOffset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1179 bsize = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1180 for ( i = 0 ; i < bsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1181 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1182 int cnt = exonBlocks[i].prevCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1183 for ( k = 0 ; k < cnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1184 exonBlocks[i].prev[k] = newIdx[ exonBlocks[i].prev[k] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1185
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1186 cnt = exonBlocks[i].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1187 for ( k = 0 ; k < cnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1188 exonBlocks[i].next[k] = newIdx[ exonBlocks[i].next[k] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1189 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1190 delete[] newIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1191 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1192
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1193 void AddIntronInformation( std::vector<struct _splitSite> &sites, Alignments &alignments )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1194 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1195 // Add the connection information, support information and the strand information.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1196 int i, j, k, tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1197 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1198 int scnt = sites.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1199 int exonBlockCnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1200 bool flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1201
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1202 for ( i = 0 ; i < exonBlockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1203 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1204 exonBlocks[i].prevCnt = exonBlocks[i].nextCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1205 exonBlocks[i].prev = exonBlocks[i].next = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1206 exonBlocks[i].leftStrand = exonBlocks[i].rightStrand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1207 exonBlocks[i].leftSplice = exonBlocks[i].rightSplice = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1208 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1209
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1210 // Now, we add the region id and compute the length of each region, here the region does not contain possible IR event.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1211 int regionId = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1212 for ( i = 0 ; i < exonBlockCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1213 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1214 if ( exonBlocks[i].leftType == 2 && exonBlocks[i].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1215 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1216 j = i + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1217 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1218 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1219 for ( j = i + 1 ; j < exonBlockCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1220 if ( exonBlocks[j].start > exonBlocks[j - 1].end + 1 || ( exonBlocks[j].leftType == 2 && exonBlocks[j].rightType == 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1221 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1222 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1223 exonBlocks[k].contigId = regionId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1224 ++regionId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1225 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1226 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1227 int *regionLength = new int[regionId] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1228 memset( regionLength, 0, sizeof( int ) * regionId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1229 for ( i = 0 ; i < exonBlockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1230 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1231 regionLength[ exonBlocks[i].contigId ] += exonBlocks[i].end - exonBlocks[i].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1233
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1234 for ( i = 0 ; i < scnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1235 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1236 for ( j = i ; j < scnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1237 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1238 if ( sites[j].chrId != sites[i].chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1239 sites[j].pos != sites[i].pos ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1240 sites[j].type != sites[i].type )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1241 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1242 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1243 // [i,j-1] are the indices of the sites with same coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1244 // It is possible a sites corresponds to 2 strands, then we should only keep one of them.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1245 char strand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1246 int strandSupport[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1247 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1248 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1249 if ( sites[k].strand == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1250 strandSupport[0] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1251 else if ( sites[k].strand == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1252 strandSupport[1] += sites[k].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1253 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1254 if ( strandSupport[0] > strandSupport[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1255 strand = '-' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1256 else if ( strandSupport[1] > strandSupport[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1257 strand = '+' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1258
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1259
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1260 int cnt = j - i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1261 // Locate the first subexon that can overlap with this site
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1262 while ( tag < exonBlockCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1263 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1264 if ( ( exonBlocks[tag].chrId == sites[i].chrId && exonBlocks[tag].end >= sites[i].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1265 || exonBlocks[tag].chrId > sites[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1266 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1267 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1268 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1269 flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1270 for ( k = tag; k < exonBlockCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1271 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1272 if ( exonBlocks[tag].start > sites[i].pos ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1273 exonBlocks[tag].chrId > sites[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1274 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1275
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1276 if ( exonBlocks[tag].start == sites[i].pos ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1277 exonBlocks[tag].end == sites[i].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1278 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1279 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1280 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1281 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1282 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1283
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1284 if ( !flag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1285 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1286 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1287 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1288 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1289
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1290 if ( sites[i].type == 1 && exonBlocks[tag].start == sites[i].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1291 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1292 exonBlocks[tag].prevCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1293 exonBlocks[tag].prev = new int[cnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1294 exonBlocks[tag].leftStrand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1295
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1296 // And we also need to put the "next" here.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1297 // Here we assume the oppositePos sorted in increasing order
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1298 k = tag - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1299 for ( int l = j - 1 ; l >= i ; --l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1300 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1301 for ( ; k >= 0 ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1302 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1303 if ( exonBlocks[k].end < sites[l].oppositePos || exonBlocks[k].chrId != sites[l].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1304 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1305 if ( exonBlocks[k].end == sites[l].oppositePos ) //&& exonBlocks[k].rightType == sites[l].type )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1306 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1307 if ( sites[l].strand != strand ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1308 sites[l].strand != exonBlocks[k].rightStrand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1309 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1310 exonBlocks[k].next[ exonBlocks[k].nextCnt ] = tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1311 exonBlocks[tag].prev[ exonBlocks[tag].prevCnt] = k ; // Note that the prev are sorted in decreasing order
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1312 ++exonBlocks[k].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1313 ++exonBlocks[tag].prevCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1314 double factor = 1.0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1315 if ( regionLength[ exonBlocks[k].contigId ] < alignments.readLen )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1316 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1317 int left ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1318 for ( left = k ; left >= 0 ; --left )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1319 if ( exonBlocks[left].contigId != exonBlocks[k].contigId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1320 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1321 ++left ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1322
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1323 int prevType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1324 // only consider the portion upto k.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1325 for ( int m = left ; m <= k ; ++m )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1326 if ( exonBlocks[m].leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1327 ++prevType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1328
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1329 if ( prevType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1330 factor = 2 * (double)alignments.readLen / regionLength[ exonBlocks[k].contigId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1331 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1332 if ( regionLength[ exonBlocks[tag].contigId ] < alignments.readLen )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1333 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1334 int right ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1335 for ( right = tag ; right < exonBlockCnt ; ++right )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1336 if ( exonBlocks[right].contigId != exonBlocks[tag].contigId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1337 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1338 --right ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1339
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1340 int nextType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1341 for ( int m = tag ; m <= right ; ++m )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1342 if ( exonBlocks[m].rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1343 ++nextType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1344
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1345 if ( nextType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1346 factor = 2 * (double)alignments.readLen / regionLength[ exonBlocks[tag].contigId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1347 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1348 if ( factor > 10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1349 factor = 10 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1350
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1351 if ( exonBlocks[k].contigId != exonBlocks[tag].contigId ) // the support of introns between regions.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1352 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1353 // Adjust the support if one of the anchor exon is too short.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1354 // This avoid the creation of alternative 3'/5' end due to low coverage from too short anchor.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1355 exonBlocks[tag].leftSplice += sites[l].support * factor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1356 exonBlocks[k].rightSplice += sites[l].support * factor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1357 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1358 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1360 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1361 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1362 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1363 else if ( sites[i].type == 2 && exonBlocks[tag].end == sites[i].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1364 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1365 exonBlocks[tag].nextCnt = 0 ; // cnt ; it should reach cnt after putting the ids in
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1366 exonBlocks[tag].next = new int[cnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1367 exonBlocks[tag].rightStrand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1368 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1369
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1370 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1371 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1372 // Reverse the prev list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1373 for ( i = 0 ; i < exonBlockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1374 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1375 for ( j = 0, k = exonBlocks[i].prevCnt - 1 ; j < k ; ++j, --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1376 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1377 int tmp = exonBlocks[i].prev[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1378 exonBlocks[i].prev[j] = exonBlocks[i].prev[k] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1379 exonBlocks[i].prev[k] = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1380 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1381 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1382
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1383 // If all of the subexon anchored the intron are filtered, we need to change the type of the other anchor.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1384 for ( i = 0 ; i < exonBlockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1385 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1386 if ( exonBlocks[i].leftType == 1 && exonBlocks[i].prevCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1387 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1388 exonBlocks[i].leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1389 exonBlocks[i].leftStrand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1390 if ( i > 0 && exonBlocks[i - 1].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1391 exonBlocks[i - 1].rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1392 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1393 if ( exonBlocks[i].rightType == 2 && exonBlocks[i].nextCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1394 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1395 exonBlocks[i].rightType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1396 exonBlocks[i].rightStrand = '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1397 if ( i < exonBlockCnt - 1 && exonBlocks[i + 1].leftType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1398 exonBlocks[i + 1].leftType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1399 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1400 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1401 MergeNearBlocks() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1402
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1403 delete[] regionLength ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1404 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1405
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1406 double PickLeftAndRightRatio( double l, double r )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1407 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1408 if ( l >= 0 && r >= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1409 return l < r ? l : r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1410 else if ( l < 0 && r < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1411 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1412 else if ( l < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1413 return r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1414 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1415 return l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1416 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1417
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1418 double PickLeftAndRightRatio( const struct _block &b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1419 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1420 return PickLeftAndRightRatio( b.leftRatio, b.rightRatio ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1421 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1422
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1423 void ComputeRatios()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1424 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1425 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1426 int exonBlockCnt = exonBlocks.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1427 for ( i = 0 ; i < exonBlockCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1428 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1429 exonBlocks[i].leftRatio = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1430 exonBlocks[i].rightRatio = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1431 if ( exonBlocks[i].leftType == 2 && exonBlocks[i].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1432 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1433 // ]...[
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1434 double anchorAvg = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1435 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1436 if ( i >= 1 && exonBlocks[i - 1].chrId == exonBlocks[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1437 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1438 anchorAvg = GetAvgDepth( exonBlocks[i - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1439 if ( anchorAvg > 1 && avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1440 exonBlocks[i].leftRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1441 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1442 if ( i < exonBlockCnt - 1 && exonBlocks[i + 1].chrId == exonBlocks[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1443 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1444 anchorAvg = GetAvgDepth( exonBlocks[i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1445 if ( anchorAvg > 1 && avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1446 exonBlocks[i].rightRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1447 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1448 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1449
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1450 if ( exonBlocks[i].leftType == 0 && exonBlocks[i].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1451 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1452 // (..[ , overhang subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1453 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1454 double anchorAvg = GetAvgDepth( exonBlocks[i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1455 if ( avgDepth > 1 && anchorAvg > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1456 exonBlocks[i].rightRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1457 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1458
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1459 /*if ( exonBlocks[i].leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1460 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1461 // For the case (...[, the leftRatio is actuall the leftratio of the subexon on its right.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1462 int len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1463 double depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1464 int tag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1465
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1466 for ( j = 0 ; j < exonBlocks[tag].prevCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1467 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1468 int k = exonBlocks[tag].prev[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1469 len += ( exonBlocks[k].end - exonBlocks[k].start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1470 depthSum += exonBlocks[k].depthSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1471 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1472 double otherAvgDepth = depthSum / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1473 double avgDepth = GetAvgDepth( exonBlocks[tag] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1474
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1475 // adjust avgDepth by looking into the following exon blocks(subexon) in the same region whose left side is not hard end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1476 for ( j = tag + 1 ; j < exonBlockCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1477 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1478 if ( exonBlocks[j].start != exonBlocks[j - 1].end + 1 || exonBlocks[j].leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1479 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1480 double tmp = GetAvgDepth( exonBlocks[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1481 if ( tmp > avgDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1482 avgDepth = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1483 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1484
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1485 if ( avgDepth < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1486 avgDepth = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1487 if ( otherAvgDepth < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1488 otherAvgDepth = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1489
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1490 //exonBlocks[i].leftRatio = pow( log( avgDepth ) / log(2.0), 2.0 / 3.0 ) - pow( log( otherAvgDepth ) / log( 2.0 ), 2.0 / 3.0 );
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1491 exonBlocks[i].leftRatio = sqrt( avgDepth ) - log( avgDepth ) - ( sqrt( otherAvgDepth ) + log( otherAvgDepth ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1492
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1493 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1494 if ( exonBlocks[i].rightType == 0 && exonBlocks[i].leftType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1495 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1496 // for overhang subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1497 double avgDepth = GetAvgDepth( exonBlocks[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1498 double anchorAvg = GetAvgDepth( exonBlocks[i - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1499 if ( avgDepth > 1 && anchorAvg > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1500 exonBlocks[i].leftRatio = ( avgDepth - 1 ) / ( anchorAvg - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1501
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1502 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1503 /*if ( exonBlocks[i].rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1504 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1505 int len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1506 double depthSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1507 int tag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1508
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1509 for ( j = 0 ; j < exonBlocks[tag].nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1510 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1511 int k = exonBlocks[tag].next[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1512 len += ( exonBlocks[k].end - exonBlocks[k].start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1513 depthSum += exonBlocks[k].depthSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1514 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1515 double otherAvgDepth = depthSum / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1516 double avgDepth = GetAvgDepth( exonBlocks[tag] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1517
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1518 // adjust avgDepth by looking into the earlier exon blocks(subexon) in the same region whose right side is not hard end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1519 for ( j = tag - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1520 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1521 if ( exonBlocks[j].end + 1 != exonBlocks[j + 1].start || exonBlocks[j].rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1522 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1523 double tmp = GetAvgDepth( exonBlocks[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1524 if ( tmp > avgDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1525 avgDepth = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1526 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1527
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1528 if ( avgDepth < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1529 avgDepth = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1530 if ( otherAvgDepth < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1531 otherAvgDepth = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1532
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1533 exonBlocks[i].rightRatio = sqrt( avgDepth ) - log( avgDepth ) - ( sqrt( otherAvgDepth ) + log( otherAvgDepth ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1534 //pow( log( avgDepth ) / log(2.0), 2.0 / 3.0 ) - pow( log( otherAvgDepth ) / log(2.0), 2.0 / 3.0 );
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1535 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1536 // The remaining case the islands, (...)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1537 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1538
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1539 // go through each region of subexons, and only compute the ratio for the first and last hard boundary
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1540 for ( i = 0 ; i < exonBlockCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1541 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1542 // the blocks from [i,j) corresponds to a region, namely consecutive subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1543 for ( j = i + 1 ; j < exonBlockCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1544 if ( exonBlocks[j].contigId != exonBlocks[i].contigId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1545 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1546
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1547 int leftSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1548 int rightSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1549 int leftTag = j, rightTag = i - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1550 for ( int k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1551 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1552 if ( exonBlocks[k].leftType == 1 && exonBlocks[k].leftSplice > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1553 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1554 leftSupport += exonBlocks[k].leftSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1555 if ( k < leftTag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1556 leftTag = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1557 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1558 if ( exonBlocks[k].rightType == 2 && exonBlocks[k].rightSplice > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1559 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1560 rightSupport += exonBlocks[k].rightSplice ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1561 if ( k > rightTag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1562 rightTag = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1563 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1564 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1565
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1566 if ( leftSupport > 0 && rightSupport > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1567 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1568 // when the right splice support is much greater than that of the left side, there should be a soft boundary for the left side.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1569 // Wether we want to include this soft boundary or not will be determined in class, when it looked at whether the overhang block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1570 // should be kept or not.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1571 exonBlocks[ leftTag ].leftRatio = sqrt( rightSupport ) - log( ( rightSupport + leftSupport ) * 0.5 ) - ( sqrt( leftSupport ) ) ; //+ log( leftSupport ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1572 exonBlocks[ rightTag ].rightRatio = sqrt( leftSupport ) - log( ( rightSupport + leftSupport ) * 0.5 ) - ( sqrt( rightSupport ) ) ; //+ log( rightSupport ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1573 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1574
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1575 i = j ; // don't forget this.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1576 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1577 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1578 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1579
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1580 #endif