annotate PsiCLASS-1.0.2/SubexonGraph.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #ifndef _MOURISL_CLASSES_SUBEXONGRAPH_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #define _MOURISL_CLASSES_SUBEXONGRAPH_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include "alignments.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include "blocks.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 struct _subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 int geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 int leftType, rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 double avgDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 //double ratio, classifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 double leftRatio, rightRatio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 double leftClassifier, rightClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 int lcCnt, rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 int leftStrand, rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 int nextCnt, prevCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 int *next, *prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 bool canBeStart, canBeEnd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 struct _geneInterval
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int startIdx, endIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 int start, end ; // The start and end of a gene interval might be adjusted, so it does not
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 // need to be match with the corresponding subexons
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 class SubexonGraph
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 private:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 int *visit ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 double classifierThreshold ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 int usedGeneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 int baseGeneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 // The function to assign gene ids to subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 void SetGeneId( int tag, int strand, struct _subexon *subexons, int seCnt, int id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 void GetGeneBoundary( int tag, int &boundary, int timeStamp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 void UpdateGeneId( struct _subexon *subexons, int seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 public:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 std::vector<struct _subexon> subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 std::vector<struct _geneInterval> geneIntervals ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 ~SubexonGraph()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 int size = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 if ( subexons[i].next )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 delete[] subexons[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 if ( subexons[i].prev )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 delete[] subexons[i].prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 SubexonGraph( double classifierThreshold, Alignments &bam, FILE *fpSubexon )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 // Read in the subexons
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 rewind( fpSubexon ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 char buffer[2048] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 int subexonCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 while ( fgets( buffer, sizeof( buffer ), fpSubexon ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 if ( buffer[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 struct _subexon se ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 InputSubexon( buffer, bam, se, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 // filter.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 if ( ( se.leftType == 0 && se.rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 || ( se.leftType == 0 && se.rightType == 1 ) // overhang
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 || ( se.leftType == 2 && se.rightType == 0 ) // overhang
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 || ( se.leftType == 2 && se.rightType == 1 ) ) // ir
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 if ( ( se.leftType == 0 && se.rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 || ( se.leftType == 2 && se.rightType == 0 ) ) // if the overhang is too small
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 if ( se.end - se.start + 1 <= 7 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 if ( se.next )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 delete[] se.next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 if ( se.prev )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 delete[] se.prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 if ( se.leftClassifier >= classifierThreshold || se.leftClassifier < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 if ( se.next )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 delete[] se.next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 if ( se.prev )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 delete[] se.prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 // Adjust the coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 subexons.push_back( se ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 // Convert the coordinate to index
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 // Note that each coordinate can only associate with one subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 subexonCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 for ( i = 0 ; i < subexonCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 struct _subexon &se = subexons[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 //printf( "hi1 %d: %d %d\n", i, se.prevCnt, se.prev[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 // due to filter, we may not fully match the coordinate and the subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 int bound = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 if ( se.prevCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 bound = se.prev[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 for ( j = i - 1, k = 0 ; k < se.prevCnt && j >= 0 && subexons[j].end >= bound ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 //printf( " %d %d: %d %d\n", j, k, se.prev[ se.prevCnt - 1 - k], subexons[j].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 if ( subexons[j].end == se.prev[se.prevCnt - 1 - k] ) // notice the order is reversed
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 se.prev[se.prevCnt - 1 - cnt] = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 else if ( subexons[j].end < se.prev[ se.prevCnt - 1 - k ] ) // the corresponding subexon gets filtered.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 ++j ; // counter the --j in the loop
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 //printf( "hi2 %d : %d\n", i, se.prevCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 // shft the list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 for ( j = 0, k = se.prevCnt - cnt ; j < cnt ; ++j, ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 se.prev[j] = se.prev[k] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 se.prevCnt = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 if ( se.nextCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 bound = se.next[ se.nextCnt - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 for ( j = i + 1, k = 0 ; k < se.nextCnt && j < subexonCnt && subexons[j].start <= bound ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 if ( subexons[j].start == se.next[k] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 se.next[cnt] = j ; // cnt is always less than k, so we don't need to worry about overwrite.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 else if ( subexons[j].start > se.next[k] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 --j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 se.nextCnt = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 // Adjust the coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 int seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 --subexons[i].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 --subexons[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 rewind( fpSubexon ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 // Adjust the classifier for hard boundary, if there is a overhang attached to that region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 if ( subexons[i].leftType == 1 && subexons[i].leftClassifier < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 if ( subexons[j].end < subexons[j + 1].start - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 if ( subexons[j + 1].leftType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 subexons[i].leftClassifier = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 if ( subexons[i].rightType == 2 && subexons[i].rightClassifier < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 for ( j = i + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 if ( subexons[j - 1].rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 subexons[i].rightClassifier = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 // For the region of mixture of plus and minus strand subexons, if there is
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 // no overhang attached to it, we need to let the hard boundary be a candidate terminal sites.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 for ( i = 0 ; i < seCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 // [i,j) is a region
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 int support[2] = {0, 0} ; // the index, 0 is for minus strand, 1 is for plus strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 for ( j = i + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 if ( subexons[k].leftStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 ++support[ ( subexons[k].leftStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 if ( subexons[k].rightStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 ++support[ ( subexons[k].rightStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 if ( support[0] == 0 || support[1] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 // a mixture region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 // We force a terminal site if we have only coming-in and no going-out introns.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 int leftSupport[2] = {0, 0}, rightSupport[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 int cnt = subexons[k].prevCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 if ( subexons[k].leftStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 for ( l = 0 ; l < cnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 if ( subexons[k].prev[l] < i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 ++leftSupport[ ( subexons[k].leftStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 cnt = subexons[k].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 if ( subexons[k].rightStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 for ( l = 0 ; l < cnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 if ( subexons[k].next[l] >= j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 ++rightSupport[ ( subexons[k].rightStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 if ( ( ( leftSupport[0] > 0 && rightSupport[0] == 0 ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 ( leftSupport[1] > 0 && rightSupport[1] == 0 ) ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 subexons[j - 1].rightType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 subexons[j - 1].rightClassifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 if ( ( ( leftSupport[0] == 0 && rightSupport[0] > 0 ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 ( leftSupport[1] == 0 && rightSupport[1] > 0 ) ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 subexons[j - 1].leftType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 subexons[j - 1].leftClassifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 this->classifierThreshold = classifierThreshold ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 usedGeneId = baseGeneId = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 static bool IsSameStrand( int a, int b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 if ( a == 0 || b == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 if ( a != b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 // Parse the input line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 static int InputSubexon( char *in, Alignments &alignments, struct _subexon &se, bool needPrevNext = false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 char chrName[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 char ls[3], rs[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 sscanf( in, "%s %d %d %d %d %s %s %lf %lf %lf %lf %lf", chrName, &se.start, &se.end, &se.leftType, &se.rightType, ls, rs,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 &se.avgDepth, &se.leftRatio, &se.rightRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 &se.leftClassifier, &se.rightClassifier ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 se.chrId = alignments.GetChromIdFromName( chrName ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 se.nextCnt = se.prevCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 se.next = se.prev = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 se.lcCnt = se.rcCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 if ( ls[0] == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 se.leftStrand = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 else if ( ls[0] == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 se.leftStrand = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 se.leftStrand = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 if ( rs[0] == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 se.rightStrand = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 else if ( rs[0] == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 se.rightStrand = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 se.rightStrand = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 if ( needPrevNext )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 char *p = in ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 // Locate the offset for prevCnt
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 for ( i = 0 ; i <= 11 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 p = strchr( p, ' ' ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 sscanf( p, "%d", &se.prevCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 p = strchr( p, ' ' ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 se.prev = new int[ se.prevCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 for ( i = 0 ; i < se.prevCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 sscanf( p, "%d", &se.prev[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 p = strchr( p, ' ' ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 sscanf( p, "%d", &se.nextCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 p = strchr( p, ' ' ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 se.next = new int[ se.nextCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 for ( i = 0 ; i < se.nextCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 sscanf( p, "%d", &se.next[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 p = strchr( p, ' ' ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 int GetGeneIntervalIdx( int startIdx, int &endIdx, int timeStamp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 //@return: the number of intervals found
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 int ComputeGeneIntervals() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 // Return a list of subexons in that interval and in retList the id of subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 // should be adjusted to start from 0.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 int ExtractSubexons( int startIdx, int endIdx, struct _subexon *retList ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 #endif