annotate PsiCLASS-1.0.2/CombineSubexons.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include <algorithm>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <vector>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include <map>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include <string>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include "alignments.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include "blocks.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include "stats.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #include "SubexonGraph.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 char usage[] = "combineSubexons [options]\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 "Required options:\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 "\t-s STRING: the path to the predicted subexon information. Can use multiple -s to specify multiple subexon prediction files\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 "\t\tor\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 "\t--ls STRING: the path to file of the list of the predicted subexon information.\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 struct _overhang
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 int cnt ; // the number of samples support this subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 int validCnt ; // The number of samples that are used for compute probability.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 int length ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 double classifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 struct _intronicInfo
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 int leftSubexonIdx, rightSubexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 double irClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 int irCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 int validIrCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 struct _overhang leftOverhang, rightOverhang ; // leftOverhangClassifier is for the overhang subexon at the left side of this intron.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 struct _seInterval
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 int type ; // 0-subexon, 1-intronicInfo
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 int idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 struct _subexonSplit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 int pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 int type ; //1-start of a subexon. 2-end of a subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 int splitType ; //0-soft boundary, 1-start of an exon, 2-end of an exon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 int strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 int weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 struct _interval // exon or intron
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 int strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 int sampleSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 struct _subexonSupplement // supplement the subexon structure defined in SubexonGraph.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 int *nextSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 int *prevSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 char buffer[4096] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 bool CompSubexonSplit( struct _subexonSplit a, struct _subexonSplit b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 if ( a.chrId < b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 else if ( a.chrId > b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 else if ( a.pos != b.pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 return a.pos < b.pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 else if ( a.type != b.type )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 // the split site with no strand information should come first.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 /*if ( a.splitType != b.splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 if ( a.splitType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 else if ( b.splitType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 return a.type < b.type ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 else if ( a.splitType != b.splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 //return a.splitType < b.splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 if ( a.splitType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 else if ( b.splitType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 if ( a.type == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 return a.splitType > b.splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 return a.splitType < b.splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 bool CompInterval( struct _interval a, struct _interval b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 if ( a.chrId < b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 else if ( a.chrId > b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 else if ( a.start != b.start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 return a.start < b.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 else if ( a.end != b.end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 return a.end < b.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 bool CompSeInterval( struct _seInterval a, struct _seInterval b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 if ( a.chrId < b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 else if ( a.chrId > b.chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 else if ( a.start < b.start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 else if ( a.start > b.start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 else if ( a.end < b.end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 // Keep this the same as in SubexonInfo.cpp.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 double TransformCov( double c )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 double ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 //return sqrt( c ) - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 if ( c <= 2 + 1e-6 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 ret = 1e-6 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 ret = c - 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 double GetUpdateMixtureGammaClassifier( double ratio, double cov, double piRatio, double kRatio[2], double thetaRatio[2],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 double piCov, double kCov[2], double thetaCov[2], bool conservative )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 double p1 = 0, p2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 cov = TransformCov( cov ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 if ( cov < ( kCov[0] - 1 ) * thetaCov[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 cov = ( kCov[0] - 1 ) * thetaCov[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 if ( ratio > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 p1 = MixtureGammaAssignment( ratio, piRatio, kRatio, thetaRatio ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 // Make sure cov > 1?
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 p2 = MixtureGammaAssignment( cov, piCov, kCov, thetaCov ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 double ret = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 if ( conservative )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 if ( p1 >= p2 ) // we should use ratio.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 ret = LogGammaDensity( ratio, kRatio[1], thetaRatio[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 - LogGammaDensity( ratio, kRatio[0], thetaRatio[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 ret = LogGammaDensity( cov, kCov[1], thetaCov[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 - LogGammaDensity( cov, kCov[0], thetaCov[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 if ( p1 >= p2 ) // we should use ratio.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 ret = LogGammaDensity( ratio, kRatio[1], thetaRatio[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 - LogGammaDensity( ratio, kRatio[0], thetaRatio[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 ret = LogGammaDensity( cov, kCov[1], thetaCov[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 - LogGammaDensity( cov, kCov[0], thetaCov[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 double GetPValueOfGeneEnd( double cov )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 if ( cov <= 2.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 return 1.0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 double tmp = 2.0 * ( sqrt( cov ) - log( cov ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 if ( tmp <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 return 1.0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 return 2.0 * alnorm( tmp, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 char StrandNumToSymbol( int strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 if ( strand > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 return '+' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 else if ( strand < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 return '-' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 return '.' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 int StrandSymbolToNum( char c )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 if ( c == '+' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 else if ( c == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 int *MergePositions( int *old, int ocnt, int *add, int acnt, int &newCnt ) //, int **support )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 int *ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 if ( acnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 newCnt = ocnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 return old ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 if ( ocnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 newCnt = acnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 ret = new int[acnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 //*support = new int[acnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 for ( i = 0 ; i < acnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 //(*support)[i] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 ret[i] = add[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 newCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 for ( i = 0, j = 0 ; i < ocnt && j < acnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 if ( old[i] < add[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 ++newCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 else if ( old[i] == add[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 ++i ; ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 ++newCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 ++newCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 newCnt = newCnt + ( ocnt - i ) + ( acnt - j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 // no new elements.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 if ( newCnt == ocnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 /*i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 for ( j = 0 ; j < acnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 for ( ; old[i] < add[j] ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 ++(*support)[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 return old ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 //delete []old ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 ret = new int[ newCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 //int *bufferSupport = new int[newCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 for ( i = 0, j = 0 ; i < ocnt && j < acnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 if ( old[i] < add[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 ret[k] = old[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 //bufferSupport[k] = (*support)[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 else if ( old[i] == add[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 ret[k] = old[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 //bufferSupport[k] = (*support)[i] + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 ++i ; ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 ret[k] = add[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 //bufferSupport[k] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 for ( ; i < ocnt ; ++i, ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 ret[k] = old[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 //bufferSupport[k] = (*support)[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 for ( ; j < acnt ; ++j, ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 ret[k] = add[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 //bufferSupport[k] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 delete[] old ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 //delete[] *support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 //*support = bufferSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 void CoalesceSubexonSplits( std::vector<struct _subexonSplit> &splits, int mid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 int cnt = splits.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 //std::sort( splits.begin(), splits.end(), CompSubexonSplit ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 std::vector<struct _subexonSplit> sortedSplits ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 sortedSplits.resize( cnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 for ( i = 0, j = mid ; i < mid && j < cnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 if ( CompSubexonSplit( splits[i], splits[j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 sortedSplits[k] = splits[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 sortedSplits[k] = splits[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 for ( ; i < mid ; ++i, ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 sortedSplits[k] = splits[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 for ( ; j < cnt ; ++j, ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 sortedSplits[k] = splits[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 splits = sortedSplits ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 for ( i = 1 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 if ( splits[i].chrId == splits[k].chrId && splits[i].pos == splits[k].pos && splits[i].type == splits[k].type && splits[i].splitType == splits[k].splitType
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 && splits[i].strand == splits[k].strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 splits[k].weight += splits[i].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 splits[k] = splits[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 splits.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 void CoalesceDifferentStrandSubexonSplits( std::vector<struct _subexonSplit> &splits )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 int i, j, k, l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 int cnt = splits.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 for ( i = 0 ; i < cnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 for ( j = i + 1 ; j < cnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 if ( splits[i].chrId == splits[j].chrId && splits[i].pos == splits[j].pos && splits[i].type == splits[j].type && splits[i].splitType == splits[j].splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 int maxWeight = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 int weightSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 int strand = splits[i].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 for ( l = i ; l < j ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 weightSum += splits[l].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 if ( splits[l].strand != 0 && splits[l].weight > maxWeight )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 strand = splits[l].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 maxWeight = splits[l].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 //printf( "%d: %d %d %d\n", splits[i].pos, i, j, strand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 splits[k] = splits[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 splits[k].strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 splits[k].weight = weightSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 splits.resize( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 void CoalesceIntervals( std::vector<struct _interval> &intervals )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 std::sort( intervals.begin(), intervals.end(), CompInterval ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 int cnt = intervals.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 for ( i = 1 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 if ( intervals[i].chrId == intervals[k].chrId && intervals[i].start == intervals[k].start && intervals[i].end == intervals[k].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 intervals[k].sampleSupport += intervals[i].sampleSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 intervals[k] = intervals[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 intervals.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 void CleanIntervalIrOverhang( std::vector<struct _interval> &irOverhang )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 std::sort( irOverhang.begin(), irOverhang.end(), CompInterval ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 int cnt = irOverhang.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 for ( i = 0 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 if ( irOverhang[i].start == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 // locate the longest interval start at the same coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 int tag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 for ( j = i + 1 ; j < cnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 if ( irOverhang[j].chrId != irOverhang[i].chrId || irOverhang[j].start != irOverhang[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 if ( irOverhang[j].start == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 tag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 for ( k = i ; k < tag ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 irOverhang[k].start = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 for ( k = tag + 1 ; k < cnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 if ( irOverhang[k].chrId != irOverhang[tag].chrId || irOverhang[k].start > irOverhang[tag].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 if ( irOverhang[k].end <= irOverhang[tag].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 irOverhang[k].start = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 for ( i = 0 ; i < cnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 if ( irOverhang[i].start == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 irOverhang[k] = irOverhang[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 irOverhang.resize( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 // Remove the connection that does not match the boundary
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 // of subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 void CleanUpSubexonConnections( std::vector<struct _subexon> &subexons )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 int seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 int i, j, k, m ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 if ( subexons[i].prevCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 for ( k = i ; k >= 0 ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 if ( subexons[k].chrId != subexons[i].chrId || subexons[k].end <= subexons[i].prev[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 if ( subexons[k].chrId != subexons[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 m = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 for ( j = 0 ; j < subexons[i].prevCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 for ( ; k <= i ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 if ( subexons[k].end >= subexons[i].prev[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 if ( subexons[k].end == subexons[i].prev[j]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 && ( SubexonGraph::IsSameStrand( subexons[k].rightStrand, subexons[i].leftStrand ) || subexons[k].end + 1 == subexons[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 subexons[i].prev[m] = subexons[i].prev[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 ++m ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 subexons[i].prevCnt = m ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 m = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 k = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 for ( j = 0 ; j < subexons[i].nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 for ( ; k < seCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 if ( subexons[k].chrId != subexons[i].chrId || subexons[k].start >= subexons[i].next[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 if ( subexons[k].start == subexons[i].next[j]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 && ( SubexonGraph::IsSameStrand( subexons[i].rightStrand, subexons[k].leftStrand ) || subexons[i].end + 1 == subexons[k].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 subexons[i].next[m] = subexons[i].next[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 ++m ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 subexons[i].nextCnt = m ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 int main( int argc, char *argv[] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 FILE *fp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 std::vector<char *> files ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 Blocks regions ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 Alignments alignments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 if ( argc == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 printf( "%s", usage ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 for ( i = 1 ; i < argc ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 if ( !strcmp( argv[i], "-s" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 files.push_back( argv[i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 else if ( !strcmp( argv[i], "--ls" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 FILE *fpLs = fopen( argv[i + 1], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 char buffer[1024] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 while ( fgets( buffer, sizeof( buffer ), fpLs ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 int len = strlen( buffer ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 if ( buffer[len - 1] == '\n' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 buffer[len - 1] = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 --len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 char *fileName = strdup( buffer ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 files.push_back( fileName ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 int fileCnt = files.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 // Obtain the chromosome ids through bam file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 fp = fopen( files[0], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 int len = strlen( buffer ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 buffer[len - 1] = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568 alignments.Open( buffer + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 // Collect the split sites of subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 std::vector<struct _subexonSplit> subexonSplits ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 std::vector<struct _interval> intervalIrOverhang ; // intervals contains ir and overhang.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 std::vector<struct _interval> introns ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 std::vector<struct _interval> exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 for ( k = 0 ; k < fileCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 fp = fopen( files[k], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 struct _subexon se ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 struct _subexonSplit sp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 char chrName[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 int origSize = subexonSplits.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 while ( fgets( buffer, sizeof( buffer), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 if ( buffer[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 SubexonGraph::InputSubexon( buffer, alignments, se, false) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 // Record all the intron rentention, overhang from the samples
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 if ( ( se.leftType == 2 && se.rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 || ( se.leftType == 2 && se.rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 || ( se.leftType == 0 && se.rightType == 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597 struct _interval si ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 si.chrId = se.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 si.start = se.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 si.end = se.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 intervalIrOverhang.push_back( si ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 // Ignore overhang subexons and ir subexons for now.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 if ( ( se.leftType == 0 && se.rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 || ( se.leftType == 2 && se.rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 || ( se.leftType == 2 && se.rightType == 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 if ( se.leftType == 0 && se.rightType == 0 && ( se.leftClassifier == -1 || se.leftClassifier == 1 ) ) // ignore noisy single-exon island
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 if ( se.leftType == 0 && se.rightType == 0 && ( fileCnt >= 10 && se.leftClassifier > 0.99 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 if ( se.leftType == 1 && se.rightType == 2 ) // a full exon, we allow mixtured strand here.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 struct _interval ni ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 ni.chrId = se.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 ni.start = se.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 ni.end = se.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 ni.strand = se.rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 ni.sampleSupport = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 exons.push_back( ni ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 /*for ( i = 0 ; i < se.nextCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 struct _interval ni ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 ni.chrId = se.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 ni.start = se.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 ni.end = se.next[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 ni.strand = se.rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 ni.sampleSupport = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 if ( ni.start + 1 < ni.end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 introns.push_back( ni ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 sp.chrId = se.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 sp.pos = se.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 sp.type = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 sp.splitType = se.leftType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 sp.strand = se.leftStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 sp.weight = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 subexonSplits.push_back( sp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 sp.chrId = se.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 sp.pos = se.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 sp.type = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 sp.splitType = se.rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 sp.strand = se.rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 sp.weight = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 subexonSplits.push_back( sp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 /*if ( se.prevCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 delete[] se.prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 if ( se.nextCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 delete[] se.next ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 CoalesceIntervals( exons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 CoalesceIntervals( introns ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 CoalesceSubexonSplits( subexonSplits, origSize ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 CleanIntervalIrOverhang( intervalIrOverhang ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 CoalesceDifferentStrandSubexonSplits( subexonSplits ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 // Obtain the split sites from the introns.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 int intronCnt = introns.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 std::vector<struct _subexonSplit> intronSplits ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 for ( i = 0 ; i < intronCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 /*if ( introns[i].sampleSupport < 0.05 * fileCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 struct _interval &it = introns[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 struct _subexonSplit sp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 sp.chrId = it.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 sp.pos = it.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 sp.type = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 sp.splitType = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 sp.strand = it.strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 intronSplits.push_back( sp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 sp.chrId = it.chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 sp.pos = it.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 sp.type = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 sp.splitType = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 sp.strand = it.strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 intronSplits.push_back( sp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697 // Pair up the split sites to get subexons
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698 std::sort( intronSplits.begin(), intronSplits.end(), CompSubexonSplit ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 //std::sort( subexonSplits.begin(), subexonSplits.end(), CompSubexonSplit ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 // Convert the hard boundary to soft boundary if the split sites is filtered from the introns
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 // Seems NO need to do this now.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 int splitCnt = subexonSplits.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704 int intronSplitCnt = intronSplits.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706 //for ( i = 0 ; i < splitCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707 while ( 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 if ( subexonSplits[i].type != subexonSplits[i].splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712 while ( k < intronSplitCnt && ( intronSplits[k].chrId < subexonSplits[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713 || ( intronSplits[k].chrId == subexonSplits[i].chrId && intronSplits[k].pos < subexonSplits[i].pos ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 j = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716 while ( j < intronSplitCnt && intronSplits[j].chrId == subexonSplits[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 && intronSplits[j].pos == subexonSplits[i].pos && intronSplits[j].splitType != subexonSplits[i].splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 // the split site is filtered.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721 if ( j >= intronSplitCnt || intronSplits[j].chrId != subexonSplits[i].chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 intronSplits[j].pos > subexonSplits[i].pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724 //printf( "%d %d. %d %d\n", subexonSplits[i].pos, intronSplits[j].pos, intronSplits[j].chrId , subexonSplits[i].chrId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 subexonSplits[i].splitType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727 // Convert the adjacent subexon split.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 for ( int l = i + 1 ; i < splitCnt && subexonSplits[l].chrId == subexonSplits[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 && subexonSplits[l].pos == subexonSplits[i].pos + 1 ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 if ( subexonSplits[l].type != subexonSplits[i].type
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 && subexonSplits[l].splitType == subexonSplits[i].splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 subexonSplits[l].splitType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 // And the other direction
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 for ( int l = i - 1 ; l >= 0 && subexonSplits[l].chrId == subexonSplits[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 && subexonSplits[l].pos == subexonSplits[i].pos - 1 ; --l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 if ( subexonSplits[l].type != subexonSplits[i].type
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743 && subexonSplits[l].splitType == subexonSplits[i].splitType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745 subexonSplits[l].splitType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 intronSplits.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 std::vector<struct _subexonSplit>().swap( intronSplits ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 // Force the soft boundary that collides with hard boundaries to be hard boundary.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 for ( i = 0 ; i < splitCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756 if ( subexonSplits[i].splitType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758 int newSplitType = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759 int newStrand = subexonSplits[i].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 for ( j = i + 1 ; j < splitCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762 if ( subexonSplits[i].type != subexonSplits[j].type || subexonSplits[i].pos != subexonSplits[j].pos ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763 subexonSplits[i].chrId != subexonSplits[j].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765 if ( subexonSplits[j].splitType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 newSplitType = subexonSplits[j].splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768 newStrand = subexonSplits[j].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773 if ( newSplitType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 if ( subexonSplits[i].type != subexonSplits[j].type || subexonSplits[i].pos != subexonSplits[j].pos ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 subexonSplits[i].chrId != subexonSplits[j].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 if ( subexonSplits[j].splitType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 newSplitType = subexonSplits[j].splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783 newStrand = subexonSplits[j].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789 /*if ( subexonSplits[i].pos == 154464157 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791 printf( "force conversion: %d %d %d. %d %d\n", subexonSplits[i].pos, subexonSplits[i].splitType, subexonSplits[i].weight, subexonSplits[i + 1].pos, subexonSplits[i + 1].splitType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 subexonSplits[i].splitType = newSplitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
794 subexonSplits[i].strand = newStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
795 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
796
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
797 /*for ( i = 0 ; i < splitCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
798 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
799 printf( "%d: type=%d splitType=%d weight=%d\n", subexonSplits[i].pos, subexonSplits[i].type, subexonSplits[i].splitType, subexonSplits[i].weight ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
800 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
801
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
802 // Build subexons from the collected split sites.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
803
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
804 std::vector<struct _subexon> subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
805 int diffCnt = 0 ; // |start of subexon split| - |end of subexon split|
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
806 int seCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
807 for ( i = 0 ; i < splitCnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
808 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
809 struct _subexon se ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
810 /*if ( subexonSplits[i + 1].pos == 144177260 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
811 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
812 printf( "%d %d %d: %d %d %d. %d\n", subexonSplits[i].pos, subexonSplits[i].type, subexonSplits[i].splitType,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
813 subexonSplits[i + 1].pos, subexonSplits[i + 1].type, subexonSplits[i + 1].splitType, diffCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
814 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
815
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
816 if ( subexonSplits[i].type == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
817 diffCnt += subexonSplits[i].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
818 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
819 diffCnt -= subexonSplits[i].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
820
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
821 if ( subexonSplits[i + 1].chrId != subexonSplits[i].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
822 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
823 diffCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
824 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
825 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
826
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
827 if ( diffCnt == 0 ) // the interval between subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
828 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
829
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
830 se.chrId = subexonSplits[i].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
831 se.start = subexonSplits[i].pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
832 se.leftType = subexonSplits[i].splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
833 se.leftStrand = subexonSplits[i].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
834 if ( subexonSplits[i].type == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
835 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
836 se.leftStrand = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
837 ++se.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
838 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
839
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
840 se.end = subexonSplits[i + 1].pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
841 se.rightType = subexonSplits[i + 1].splitType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
842 se.rightStrand = subexonSplits[i + 1].strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
843 if ( subexonSplits[i + 1].type == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
844 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
845 se.rightStrand = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
846 --se.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
847 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
848
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
849 /*if ( se.end == 24613649 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
850 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
851 //printf( "%d %d %d: %d %d %d. %d\n", subexonSplits[i].pos, subexonSplits[i].type, subexonSplits[i].splitType,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
852 // subexonSplits[i + 1].pos, subexonSplits[i + 1].type, subexonSplits[i + 1].splitType, diffCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
853 printf( "%d %d %d. %d %d %d\n", se.start, se.leftType, se.leftStrand, se.end, se.rightType, se.rightStrand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
854 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
855
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
856 if ( se.start > se.end ) //Note: this handles the case of repeated subexon split.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
857 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
858 // handle the case in sample 0: [...[..]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
859 // in sample 1: [..]...]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
860 if ( seCnt > 0 && se.end == subexons[seCnt - 1].end && subexons[seCnt - 1].rightType < se.rightType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
861 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
862 subexons[seCnt - 1].rightType = se.rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
863 subexons[seCnt - 1].rightStrand = se.rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
864 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
865 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
866 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
867 se.leftClassifier = se.rightClassifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
868 se.lcCnt = se.rcCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
869
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
870 /*if ( 1 ) //se.chrId == 25 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
871 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
872 printf( "%d: %d-%d: %d %d\n", se.chrId, se.start, se.end, se.leftType, se.rightType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
873 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
874
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
875
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
876 se.next = se.prev = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
877 se.nextCnt = se.prevCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
878 subexons.push_back( se ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
879 ++seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
880 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
881 subexonSplits.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
882 std::vector<struct _subexonSplit>().swap( subexonSplits ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
883
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
884 // Adjust the split type.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
885 seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
886 for ( i = 1 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
887 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
888 if ( subexons[i - 1].end + 1 == subexons[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
889 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
890 if ( subexons[i - 1].rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
891 subexons[i - 1].rightType = subexons[i].leftType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
892 if ( subexons[i].leftType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
893 subexons[i].leftType = subexons[i - 1].rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
894 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
895 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
896
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
897 // Merge the adjacent soft boundaries
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
898 std::vector<struct _subexon> rawSubexons = subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
899 int exonCnt = exons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
900 subexons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
901
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
902 k = 0 ; // hold index for exon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
903 for ( i = 0 ; i < seCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
904 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
905 /*if ( rawSubexons[k].rightType == 0 && rawSubexons[i].leftType == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
906 && rawSubexons[k].end + 1 == rawSubexons[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
907 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
908 rawSubexons[k].end = rawSubexons[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
909 rawSubexons[k].rightType = rawSubexons[i].rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
910 rawSubexons[k].rightStrand = rawSubexons[i].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
911 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
912 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
913 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
914 subexons.push_back( rawSubexons[k] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
915 k = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
916 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
917
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
918 while ( k < exonCnt && ( exons[k].chrId < rawSubexons[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
919 || ( exons[k].chrId == rawSubexons[i].chrId && exons[k].start < rawSubexons[i].start ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
920 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
921
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
922 for ( j = i + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
923 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
924 if ( rawSubexons[j - 1].chrId != rawSubexons[j].chrId || rawSubexons[j - 1].rightType != 0 || rawSubexons[j].leftType != 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
925 || ( fileCnt > 1 && rawSubexons[j - 1].end + 1 != rawSubexons[j].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
926 || ( fileCnt == 1 && rawSubexons[j - 1].end + 50 < rawSubexons[j].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
927 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
928 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
929 // rawsubexons[i...j-1] will be merged.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
930
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
931 /*if ( rawSubexons[i].start == 119625875 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
932 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
933 printf( "merge j-1: %d %d %d %d\n", rawSubexons[j - 1].end, rawSubexons[j - 1].rightType,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
934 rawSubexons[j].start, rawSubexons[j].leftType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
935 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
936 bool merge = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
937 if ( rawSubexons[i].leftType == 1 && rawSubexons[j - 1].rightType == 2 && j - i > 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
938 && rawSubexons[j - 1].end - rawSubexons[i].start >= 1000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
939 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
940 merge = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
941 int sampleSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
942 for ( int l = k ; l < exonCnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
943 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
944 if ( exons[l].chrId != rawSubexons[i].chrId || exons[l].start > rawSubexons[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
945 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
946 if ( exons[l].end == rawSubexons[j - 1].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
947 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
948 merge = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
949 sampleSupport = exons[l].sampleSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
950 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
951 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
952 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
953
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
954 if ( merge == true && rawSubexons[j - 1].end - rawSubexons[i].start >= 1000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
955 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
956 if ( sampleSupport <= 0.2 * fileCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
957 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
958 merge = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
959 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
960 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
961
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
962 if ( merge == false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
963 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
964 if ( j - i >= 3 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
965 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
966 rawSubexons[i].end = rawSubexons[ (i + j - 1) / 2 ].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
967 rawSubexons[j - 1].start = rawSubexons[ (i + j - 1) / 2].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
968 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
969
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
970 if ( rawSubexons[i].end + 1 == rawSubexons[j - 1].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
971 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
972 --rawSubexons[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
973 ++rawSubexons[j - 1].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
974 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
975 subexons.push_back( rawSubexons[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
976 subexons.push_back( rawSubexons[j - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
977 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
978 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
979
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
980 if ( merge )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
981 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
982 rawSubexons[i].end = rawSubexons[j - 1].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
983 rawSubexons[i].rightType = rawSubexons[j - 1].rightType ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
984 rawSubexons[i].rightStrand = rawSubexons[j - 1].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
985
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
986 if ( rawSubexons[i].leftType == 0 && rawSubexons[i].rightType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
987 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
988 rawSubexons[i].start = rawSubexons[ ( i + j - 1 ) / 2 ].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
989 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
990 else if ( rawSubexons[i].rightType == 0 && rawSubexons[i].leftType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
991 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
992 rawSubexons[i].end = rawSubexons[ ( i + j - 1 ) / 2 ].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
993 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
994
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
995 subexons.push_back( rawSubexons[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
996 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
997
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
998 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
999 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1000 exons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1001 std::vector<struct _interval>().swap( exons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1002
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1003 // Remove overhang, ir subexons intron created after putting multiple sample to gether.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1004 // eg: s0: [......)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1005 // s1: [...]--------[....]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1006 // s2: [...]..)-----[....]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1007 // Though the overhang from s2 is filtered in readin, there will a new overhang created combining s0,s1.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1008 // But be careful about how to compute the classifier for the overhang part contributed from s0.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1009 // Furthermore, note that the case of single-exon island showed up in intron retention region after combining is not possible when get here.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1010 // eg: s0:[...]-----[...]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1011 // s1: (.)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1012 // s2:[.............]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1013 // After merge adjacent soft boundaries, the single-exon island will disappear.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1014 rawSubexons = subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1015 seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1016 subexons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1017 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1018 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1019 if ( ( rawSubexons[i].leftType == 2 && rawSubexons[i].rightType == 1 ) // ir
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1020 || ( rawSubexons[i].leftType == 2 && rawSubexons[i].rightType == 0 ) // overhang
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1021 || ( rawSubexons[i].leftType == 0 && rawSubexons[i].rightType == 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1022 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1023 subexons.push_back( rawSubexons[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1024 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1025
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1026 // Remove the single-exon island if it overlaps with intron retentioned or overhang.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1027 rawSubexons = subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1028 seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1029 subexons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1030 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1031 std::sort( intervalIrOverhang.begin(), intervalIrOverhang.end(), CompInterval ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1032 int irOverhangCnt = intervalIrOverhang.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1033
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1034 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1035 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1036 if ( rawSubexons[i].leftType != 0 || rawSubexons[i].rightType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1037 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1038 subexons.push_back( rawSubexons[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1039 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1040 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1041
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1042 while ( k < irOverhangCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1043 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1044 // Locate the interval that before the island
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1045 if ( intervalIrOverhang[k].chrId < rawSubexons[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1046 || ( intervalIrOverhang[k].chrId == rawSubexons[i].chrId && intervalIrOverhang[k].end < rawSubexons[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1047 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1048 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1049 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1050 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1051 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1052 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1053 bool overlap = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1054 for ( j = k ; j < irOverhangCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1055 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1056 if ( intervalIrOverhang[j].chrId > rawSubexons[i].chrId || intervalIrOverhang[j].start > rawSubexons[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1057 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1058 if ( ( intervalIrOverhang[j].start <= rawSubexons[i].start && intervalIrOverhang[j].end >= rawSubexons[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1059 || ( intervalIrOverhang[j].start <= rawSubexons[i].end && intervalIrOverhang[j].end >= rawSubexons[i].end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1060 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1061 overlap = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1062 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1063 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1064 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1065
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1066 if ( !overlap )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1067 subexons.push_back( rawSubexons[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1068 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1069 rawSubexons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1070 std::vector<struct _subexon>().swap( rawSubexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1071
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1072 intervalIrOverhang.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1073 std::vector<struct _interval>().swap( intervalIrOverhang ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1074
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1075 // Create the dummy intervals.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1076 seCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1077 std::vector<struct _intronicInfo> intronicInfos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1078 std::vector<struct _seInterval> seIntervals ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1079 std::vector<struct _subexonSupplement> subexonInfo ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1080
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1081 //subexonInfo.resize( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1082 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1083 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1084 struct _seInterval ni ; // new interval
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1085 ni.start = subexons[i].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1086 ni.end = subexons[i].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1087 ni.type = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1088 ni.idx = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1089 ni.chrId = subexons[i].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1090 seIntervals.push_back( ni ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1091
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1092 /*if ( subexons[i].end == 42671717 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1093 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1094 printf( "%d: %d-%d: %d\n", subexons[i].chrId, subexons[i].start, subexons[i].end, subexons[i].rightType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1095 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1096 //subexonInfo[i].prevSupport = subexonInfo[i].nextSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1097
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1098 /*int nexti ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1099 for ( nexti = i + 1 ; nexti < seCnt ; ++nexti )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1100 if ( subexons[ nexti ].leftType == 0 && subexons[nexti].rightType == 0 )*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1101
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1102 if ( i < seCnt - 1 && subexons[i].chrId == subexons[i + 1].chrId &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1103 subexons[i].end + 1 < subexons[i + 1].start &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1104 subexons[i].rightType + subexons[i + 1].leftType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1105 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1106 // Only consider the intervals like ]..[,]...(, )...[
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1107 // The case like ]...] is actaully things like ][...] in subexon perspective,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1108 // so they won't pass the if-statement
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1109 struct _intronicInfo nii ; // new intronic info
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1110 ni.start = subexons[i].end + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1111 ni.end = subexons[i + 1].start - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1112 ni.type = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1113 ni.idx = intronicInfos.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1114 seIntervals.push_back( ni ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1115
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1116 nii.chrId = subexons[i].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1117 nii.start = ni.start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1118 nii.end = ni.end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1119 nii.leftSubexonIdx = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1120 nii.rightSubexonIdx = i + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1121 nii.irClassifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1122 nii.irCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1123 nii.validIrCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1124 nii.leftOverhang.cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1125 nii.leftOverhang.validCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1126 nii.leftOverhang.length = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1127 nii.leftOverhang.classifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1128 nii.rightOverhang.cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1129 nii.rightOverhang.validCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1130 nii.rightOverhang.length = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1131 nii.rightOverhang.classifier = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1132 intronicInfos.push_back( nii ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1133 /*if ( nii.end == 23667 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1134 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1135 printf( "%d %d. %d (%d %d %d)\n", nii.start, nii.end, subexons[i].rightType, subexons[i+1].start, subexons[i + 1].end, subexons[i + 1].leftType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1136 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1138 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1139
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1140 // Go through all the files to get some statistics number
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1141 double avgIrPiRatio = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1142 double avgIrPiCov = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1143 double irPiRatio, irKRatio[2], irThetaRatio[2] ; // Some statistical results
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1144 double irPiCov, irKCov[2], irThetaCov[2] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1145
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1146 double avgOverhangPiRatio = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1147 double avgOverhangPiCov = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1148 double overhangPiRatio, overhangKRatio[2], overhangThetaRatio[2] ; // Some statistical results
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1149 double overhangPiCov, overhangKCov[2], overhangThetaCov[2] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1150
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1151 for ( k = 0 ; k < fileCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1152 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1153 fp = fopen( files[k], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1154
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1155 while ( fgets( buffer, sizeof( buffer), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1156 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1157 if ( buffer[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1158 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1159 char buffer2[100] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1160 sscanf( buffer, "%s", buffer2 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1161 if ( !strcmp( buffer2, "#fitted_ir_parameter_ratio:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1162 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1163 // TODO: ignore certain samples if the coverage seems wrong.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1164 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1165 buffer2, buffer2, &irPiRatio, buffer2, &irKRatio[0], buffer2, &irThetaRatio[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1166 buffer2, &irKRatio[1], buffer2, &irThetaRatio[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1167 avgIrPiRatio += irPiRatio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1168 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1169 else if ( !strcmp( buffer2, "#fitted_ir_parameter_cov:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1170 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1171 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1172 else if ( !strcmp( buffer2, "#fitted_overhang_parameter_ratio:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1173 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1174 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1175 buffer2, buffer2, &overhangPiRatio, buffer2, &overhangKRatio[0], buffer2, &overhangThetaRatio[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1176 buffer2, &overhangKRatio[1], buffer2, &overhangThetaRatio[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1177 avgOverhangPiRatio += overhangPiRatio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1178 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1179 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1180 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1181 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1182 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1183 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1184 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1185 avgIrPiRatio /= fileCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1186 avgOverhangPiRatio /= fileCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1187
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1188 // Go through all the files to put statistical results into each subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1189 std::vector< struct _subexon > sampleSubexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1190 int subexonCnt = subexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1191 for ( k = 0 ; k < fileCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1192 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1193 //if ( k == 220 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1194 // exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1195 fp = fopen( files[k], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1196 struct _subexon se ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1197 struct _subexonSplit sp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1198 char chrName[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1199
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1200 sampleSubexons.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1201
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1202 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1203 while ( fgets( buffer, sizeof( buffer), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1204 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1205 if ( buffer[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1206 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1207 char buffer2[200] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1208 sscanf( buffer, "%s", buffer2 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1209 if ( !strcmp( buffer2, "#fitted_ir_parameter_ratio:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1210 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1211 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1212 buffer2, buffer2, &irPiRatio, buffer2, &irKRatio[0], buffer2, &irThetaRatio[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1213 buffer2, &irKRatio[1], buffer2, &irThetaRatio[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1214 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1215 else if ( !strcmp( buffer2, "#fitted_ir_parameter_cov:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1216 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1217 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1218 buffer2, buffer2, &irPiCov, buffer2, &irKCov[0], buffer2, &irThetaCov[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1219 buffer2, &irKCov[1], buffer2, &irThetaCov[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1220 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1221 else if ( !strcmp( buffer2, "#fitted_overhang_parameter_ratio:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1222 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1223 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1224 buffer2, buffer2, &overhangPiRatio, buffer2, &overhangKRatio[0], buffer2, &overhangThetaRatio[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1225 buffer2, &overhangKRatio[1], buffer2, &overhangThetaRatio[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1226 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1227 else if ( !strcmp( buffer2, "#fitted_overhang_parameter_cov:" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1228 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1229 sscanf( buffer, "%s %s %lf %s %lf %s %lf %s %lf %s %lf",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1230 buffer2, buffer2, &overhangPiCov, buffer2, &overhangKCov[0], buffer2, &overhangThetaCov[0],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1231 buffer2, &overhangKCov[1], buffer2, &overhangThetaCov[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1233 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1234 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1235 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1236 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1237
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1238 //SubexonGraph::InputSubexon( buffer, alignments, se, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1239 //sampleSubexons.push_back( se ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1240 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1241
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1242 //int sampleSubexonCnt = sampleSubexons.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1243 int intervalCnt = seIntervals.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1244 //for ( i = 0 ; i < sampleSubexonCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1245 int iterCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1246 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1247 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1248 if ( iterCnt > 0 && fgets( buffer, sizeof( buffer), fp ) == NULL)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1249 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1250 ++iterCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1251
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1252 struct _subexon se ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1253 SubexonGraph::InputSubexon( buffer, alignments, se, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1254
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1255 while ( tag < intervalCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1256 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1257 if ( seIntervals[tag].chrId < se.chrId ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1258 ( seIntervals[tag].chrId == se.chrId && seIntervals[tag].end < se.start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1259 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1260 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1261 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1262 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1263 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1264 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1265 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1266
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1267 for ( j = tag ; j < intervalCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1268 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1269 if ( seIntervals[j].start > se.end || seIntervals[j].chrId > se.chrId ) // terminate if no overlap.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1270 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1271 int idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1272
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1273 if ( seIntervals[j].type == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1274 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1275 idx = seIntervals[j].idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1276 if ( subexons[idx].leftType == 1 && se.leftType == 1 && subexons[idx].start == se.start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1277 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1278 double tmp = se.leftClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1279 if ( se.leftClassifier == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1280 tmp = 1e-7 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1281 subexons[idx].leftClassifier -= 2.0 * log( tmp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1282 ++subexons[idx].lcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1283 subexons[idx].prev = MergePositions( subexons[idx].prev, subexons[idx].prevCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1284 se.prev, se.prevCnt, subexons[idx].prevCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1285
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1286 if ( se.rightType == 0 ) // a gene end here
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1287 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1288 for ( int l = idx ; l < subexonCnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1289 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1290 if ( l > idx && ( subexons[l].end > subexons[l - 1].start + 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1291 || subexons[l].chrId != subexons[l - 1].chrId ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1292 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1293 if ( subexons[l].rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1295 double adjustAvgDepth = se.avgDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1296 if ( se.end - se.start + 1 >= 100 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1297 adjustAvgDepth += se.avgDepth * 100.0 / ( se.end - se.start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1298 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1299 adjustAvgDepth *= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1300 double p = GetPValueOfGeneEnd( adjustAvgDepth ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1301 //if ( se.end - se.start + 1 >= 500 && p > 0.001 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1302 // p = 0.001 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1303
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1304 subexons[l].rightClassifier -= 2.0 * log( p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1305 ++subexons[l].rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1306 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1307 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1308 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1309 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1310 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1311 //if ( se.chrId == 25 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1312 // printf( "(%d %d %d. %d) (%d %d %d. %d)\n", se.chrId, se.start, se.end, se.rightType, subexons[idx].chrId, subexons[idx].start, subexons[idx].end, subexons[idx].rightType ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1313 if ( subexons[idx].rightType == 2 && se.rightType == 2 && subexons[idx].end == se.end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1314 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1315 double tmp = se.rightClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1316 if ( se.rightClassifier == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1317 tmp = 1e-7 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1318 subexons[idx].rightClassifier -= 2.0 * log( tmp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1319 ++subexons[idx].rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1320
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1321 subexons[idx].next = MergePositions( subexons[idx].next, subexons[idx].nextCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1322 se.next, se.nextCnt, subexons[idx].nextCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1323
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1324 if ( se.leftType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1325 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1326 for ( int l = idx ; l >= 0 ; --l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1327 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1328 if ( l < idx && ( subexons[l].end < subexons[l + 1].start - 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1329 || subexons[l].chrId != subexons[l + 1].chrId ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1330 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1331 if ( subexons[l].leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1332 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1333 double adjustAvgDepth = se.avgDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1334 if ( se.end - se.start + 1 >= 100 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1335 adjustAvgDepth += se.avgDepth * 100.0 / ( se.end - se.start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1336 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1337 adjustAvgDepth *= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1338 double p = GetPValueOfGeneEnd( adjustAvgDepth ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1339 //if ( se.end - se.start + 1 >= 500 && p >= 0.001 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1340 // p = 0.001 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1341 subexons[l].leftClassifier -= 2.0 * log( p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1342 ++subexons[l].lcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1343 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1344 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1345 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1346 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1347 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1348
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1349 if ( subexons[idx].leftType == 0 && subexons[idx].rightType == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1350 && se.leftType == 0 && se.rightType == 0 ) // the single-exon island.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1351 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1352 double tmp = se.leftClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1353 if ( se.leftClassifier == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1354 tmp = 1e-7 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1355 subexons[idx].leftClassifier -= 2.0 * log( tmp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1356 subexons[idx].rightClassifier = subexons[idx].leftClassifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1357 ++subexons[idx].lcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1358 ++subexons[idx].rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1360 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1361 else if ( seIntervals[j].type == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1362 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1363 idx = seIntervals[j].idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1364 // Overlap on the left part of intron
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1365 if ( se.start <= intronicInfos[idx].start && se.end < intronicInfos[idx].end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1366 && subexons[ intronicInfos[idx].leftSubexonIdx ].rightType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1367 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1368 int len = se.end - intronicInfos[idx].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1369 intronicInfos[idx].leftOverhang.length += len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1370 ++intronicInfos[idx].leftOverhang.cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1371
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1372 // Note that the sample subexon must have a soft boundary at right hand side,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1373 // otherwise, this part is not an intron and won't show up in intronic Info.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1374 if ( se.leftType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1375 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1376 if ( se.leftRatio > 0 && se.avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1377 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1378 ++intronicInfos[idx].leftOverhang.validCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1379
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1380 double update = GetUpdateMixtureGammaClassifier( se.leftRatio, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1381 overhangPiRatio, overhangKRatio, overhangThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1382 overhangPiCov, overhangKCov, overhangThetaCov, false ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1383 intronicInfos[idx].leftOverhang.classifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1384 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1385 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1386 else if ( se.leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1387 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1388 ++intronicInfos[idx].leftOverhang.validCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1389 double update = GetUpdateMixtureGammaClassifier( 1.0, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1390 overhangPiRatio, overhangKRatio, overhangThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1391 overhangPiCov, overhangKCov, overhangThetaCov, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1392 intronicInfos[idx].leftOverhang.classifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1393
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1394 int seIdx = intronicInfos[idx].leftSubexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1395 subexons[seIdx].rightClassifier -= 2.0 * log( GetPValueOfGeneEnd( se.avgDepth ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1396 ++subexons[ seIdx ].rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1397 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1398 // ignore the contribution of single-exon island here?
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1399 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1400 // Overlap on the right part of intron
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1401 else if ( se.start > intronicInfos[idx].start && se.end >= intronicInfos[idx].end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1402 && subexons[ intronicInfos[idx].rightSubexonIdx ].leftType != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1403 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1404 int len = intronicInfos[idx].end - se.start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1405 intronicInfos[idx].rightOverhang.length += len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1406 ++intronicInfos[idx].rightOverhang.cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1407
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1408 // Note that the sample subexon must have a soft boundary at left hand side,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1409 // otherwise, this won't show up in intronic Info
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1410 if ( se.rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1411 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1412 if ( se.rightRatio > 0 && se.avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1413 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1414 ++intronicInfos[idx].rightOverhang.validCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1415
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1416 double update = GetUpdateMixtureGammaClassifier( se.rightRatio, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1417 overhangPiRatio, overhangKRatio, overhangThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1418 overhangPiCov, overhangKCov, overhangThetaCov, false ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1419 intronicInfos[idx].rightOverhang.classifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1420 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1421 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1422 else if ( se.rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1423 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1424 ++intronicInfos[idx].rightOverhang.validCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1425
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1426 double update = GetUpdateMixtureGammaClassifier( 1, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1427 overhangPiRatio, overhangKRatio, overhangThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1428 overhangPiCov, overhangKCov, overhangThetaCov, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1429 intronicInfos[idx].rightOverhang.classifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1431 int seIdx = intronicInfos[idx].rightSubexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1432 /*if ( subexons[ seIdx ].start == 6873648 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1433 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1434 printf( "%lf %lf: %lf %lf %lf\n", subexons[seIdx].leftClassifier, GetPValueOfGeneEnd( se.avgDepth ), se.avgDepth, sqrt( se.avgDepth ), log( se.avgDepth ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1435 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1436 subexons[seIdx].leftClassifier -= 2.0 * log( GetPValueOfGeneEnd( se.avgDepth ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1437 ++subexons[ seIdx ].lcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1438 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1439 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1440 // Intron is fully contained in this sample subexon, then it is a ir candidate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1441 else if ( se.start <= intronicInfos[idx].start && se.end >= intronicInfos[idx].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1442 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1443 if ( se.leftType == 2 && se.rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1444 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1445 double ratio = regions.PickLeftAndRightRatio( se.leftRatio, se.rightRatio ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1446 ++intronicInfos[idx].irCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1447 if ( ratio > 0 && se.avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1448 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1449 double update = GetUpdateMixtureGammaClassifier( ratio, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1450 irPiRatio, irKRatio, irThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1451 irPiCov, irKCov, irThetaCov, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1452 //if ( intronicInfos[idx].start == 37617368 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1453 // printf( "hi %lf %d %d: %d %d\n", update, se.start, se.end, intronicInfos[idx].start, intronicInfos[idx].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1454 intronicInfos[idx].irClassifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1455 ++intronicInfos[idx].validIrCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1456 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1457 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1458 else if ( se.leftType == 1 || se.rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1459 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1460 //intronicInfos[idx].irClassifier += LogGammaDensity( 4.0, irKRatio[1], irThetaRatio[1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1461 // - LogGammaDensity( 4.0, irKRatio[0], irThetaRatio[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1462 /*if ( se.start == 37617368 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1463 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1464 printf( "%lf: %lf %lf\n", se.avgDepth, MixtureGammaAssignment( ( irKCov[0] - 1 ) * irThetaCov[0], irPiRatio, irKCov, irThetaCov ),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1465 MixtureGammaAssignment( TransformCov( 4.0 ), irPiRatio, irKCov, irThetaCov ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1466 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1467 if ( se.avgDepth > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1468 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1469 // let the depth be the threshold to determine.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1470 double update = GetUpdateMixtureGammaClassifier( 4.0, se.avgDepth,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1471 irPiRatio, irKRatio, irThetaRatio,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1472 irPiCov, irKCov, irThetaCov, true ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1473 //if ( intronicInfos[idx].start == 36266630 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1474 // printf( "hi %lf %d %d: %d %d\n", update, se.start, se.end, intronicInfos[idx].start, intronicInfos[idx].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1475 intronicInfos[idx].irClassifier += update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1476 ++intronicInfos[idx].irCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1477 ++intronicInfos[idx].validIrCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1478 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1479 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1480 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1481 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1482 // the intron is contained in a overhang subexon from the sample or single-exon island
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1483 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1484 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1485 // sample subexon is contained in the intron.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1486 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1487 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1488 // Do nothing.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1489 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1490 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1491 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1492
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1493 //if ( se.nextCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1494 delete[] se.next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1495 //if ( se.prevCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1496 delete[] se.prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1497 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1498 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1499
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1500 /*for ( i = 0 ; i < sampleSubexonCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1501 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1502 if ( sampleSubexons[i].nextCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1503 delete[] sampleSubexons[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1504 if ( sampleSubexons[i].prevCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1505 delete[] sampleSubexons[i].prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1506 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1507 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1508
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1509 CleanUpSubexonConnections( subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1510
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1511 // Convert the temporary statistics number into formal statistics result.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1512 for ( i = 0 ; i < subexonCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1513 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1514 struct _subexon &se = subexons[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1515 if ( se.leftType == 0 && se.rightType == 0 ) // single-exon txpt.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1516 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1517 se.leftClassifier = se.rightClassifier = 1 - chicdf( se.rightClassifier, 2 * se.rcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1518 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1519 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1520 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1521 if ( se.leftType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1522 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1523 se.leftClassifier = 1 - chicdf( se.leftClassifier, 2 * se.lcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1524 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1525 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1526 se.leftClassifier = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1527
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1528 if ( se.rightType == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1529 se.rightClassifier = 1 - chicdf( se.rightClassifier, 2 * se.rcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1530 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1531 se.rightClassifier = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1532 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1533 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1534
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1535 int iiCnt = intronicInfos.size() ; //intronicInfo count
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1536 for ( i = 0 ; i < iiCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1537 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1538 struct _intronicInfo &ii = intronicInfos[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1539 if ( ii.validIrCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1540 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1541 for ( j = 0 ; j < fileCnt - ii.validIrCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1542 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1543 ii.irClassifier -= log( 10.0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1544 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1545 /*if ( ii.validIrCnt < fileCnt * 0.15 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1546 ii.irClassifier -= log( 1000.0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1547 else if ( ii.validIrCnt < fileCnt * 0.5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1548 ii.irClassifier -= log( 100.0 ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1549 ii.irClassifier = (double)1.0 / ( 1.0 + exp( ii.irClassifier + log( 1 - avgIrPiRatio ) - log( avgIrPiRatio ) ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1550 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1551 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1552 ii.irClassifier = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1553
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1554 if ( ii.leftOverhang.validCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1555 ii.leftOverhang.classifier = (double)1.0 / ( 1.0 + exp( ii.leftOverhang.classifier +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1556 log( 1 - avgOverhangPiRatio ) - log( avgOverhangPiRatio ) ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1557 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1558 ii.leftOverhang.classifier = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1559
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1560 if ( ii.rightOverhang.validCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1561 ii.rightOverhang.classifier = (double)1.0 / ( 1.0 + exp( ii.rightOverhang.classifier +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1562 log( 1 - avgOverhangPiRatio ) - log( avgOverhangPiRatio ) ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1563 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1564 ii.rightOverhang.classifier = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1565 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1566
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1567 // Change the classifier for the hard boundaries if its adjacent intron has intron retention classifier
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1568 // which collide with overhang subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1569 int intervalCnt = seIntervals.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1570 for ( i = 0 ; i < intervalCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1571 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1572 if ( seIntervals[i].type == 1 && intronicInfos[ seIntervals[i].idx ].irCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1573 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1574 int idx = seIntervals[i].idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1575 if ( intronicInfos[idx].leftOverhang.cnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1576 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1577 int k = seIntervals[i - 1].idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1578 // Should aim for more conservative?
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1579 if ( subexons[k].rightClassifier > intronicInfos[idx].leftOverhang.classifier )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1580 subexons[k].rightClassifier = intronicInfos[idx].leftOverhang.classifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1581 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1582
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1583 if ( intronicInfos[idx].rightOverhang.cnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1584 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1585 int k = seIntervals[i + 1].idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1586 if ( subexons[k].leftClassifier > intronicInfos[idx].rightOverhang.classifier )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1587 subexons[k].leftClassifier = intronicInfos[idx].rightOverhang.classifier ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1588 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1589 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1590 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1591
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1592 // Output the result.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1593 for ( i = 0 ; i < intervalCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1594 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1595 if ( seIntervals[i].type == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1596 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1597 struct _subexon &se = subexons[ seIntervals[i].idx ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1598
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1599 char ls, rs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1600
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1601 ls = StrandNumToSymbol( se.leftStrand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1602 rs = StrandNumToSymbol( se.rightStrand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1603
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1604 printf( "%s %d %d %d %d %c %c -1 -1 -1 %lf %lf ", alignments.GetChromName( se.chrId ), se.start, se.end,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1605 se.leftType, se.rightType, ls, rs, se.leftClassifier, se.rightClassifier ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1606 if ( i > 0 && seIntervals[i - 1].chrId == seIntervals[i].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1607 && seIntervals[i - 1].end + 1 == seIntervals[i].start
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1608 && !( seIntervals[i - 1].type == 0 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1609 subexons[ seIntervals[i - 1].idx ].rightType != se.leftType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1610 && !( seIntervals[i - 1].type == 1 && intronicInfos[ seIntervals[i - 1].idx ].irCnt == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1611 && intronicInfos[ seIntervals[i - 1].idx ].rightOverhang.cnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1612 && ( se.prevCnt == 0 || se.start - 1 != se.prev[ se.prevCnt - 1 ] ) ) // The connection showed up in the subexon file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1613 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1614 printf( "%d ", se.prevCnt + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1615 for ( j = 0 ; j < se.prevCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1616 printf( "%d ", se.prev[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1617 printf( "%d ", se.start - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1618 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1619 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1620 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1621 printf( "%d ", se.prevCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1622 for ( j = 0 ; j < se.prevCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1623 printf( "%d ", se.prev[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1624 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1625
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1626 if ( i < intervalCnt - 1 && seIntervals[i].chrId == seIntervals[i + 1].chrId
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1627 && seIntervals[i].end == seIntervals[i + 1].start - 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1628 && !( seIntervals[i + 1].type == 0 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1629 subexons[ seIntervals[i + 1].idx ].leftType != se.rightType )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1630 && !( seIntervals[i + 1].type == 1 && intronicInfos[ seIntervals[i + 1].idx ].irCnt == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1631 && intronicInfos[ seIntervals[i + 1].idx ].leftOverhang.cnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1632 && ( se.nextCnt == 0 || se.end + 1 != se.next[0] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1633 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1634 printf( "%d %d ", se.nextCnt + 1, se.end + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1635 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1636 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1637 printf( "%d ", se.nextCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1638 for ( j = 0 ; j < se.nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1639 printf( "%d ", se.next[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1640 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1641 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1642 else if ( seIntervals[i].type == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1643 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1644 struct _intronicInfo &ii = intronicInfos[ seIntervals[i].idx ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1645 if ( ii.irCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1646 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1647 printf( "%s %d %d 2 1 . . -1 -1 -1 %lf %lf 1 %d 1 %d\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1648 alignments.GetChromName( ii.chrId ), ii.start, ii.end,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1649 ii.irClassifier, ii.irClassifier,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1650 seIntervals[i - 1].end, seIntervals[i + 1].start ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1651 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1652 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1653 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1654 // left overhang.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1655 if ( ii.leftOverhang.cnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1656 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1657 printf( "%s %d %d 2 0 . . -1 -1 -1 %lf %lf 1 %d 0\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1658 alignments.GetChromName( ii.chrId ), ii.start,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1659 ii.start + ( ii.leftOverhang.length / ii.leftOverhang.cnt ) - 1,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1660 ii.leftOverhang.classifier, ii.leftOverhang.classifier,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1661 ii.start - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1662 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1663
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1664 // right overhang.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1665 if ( ii.rightOverhang.cnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1666 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1667 printf( "%s %d %d 0 1 . . -1 -1 -1 %lf %lf 0 1 %d\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1668 alignments.GetChromName( ii.chrId ),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1669 ii.end - ( ii.rightOverhang.length / ii.rightOverhang.cnt ) + 1, ii.end,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1670 ii.rightOverhang.classifier, ii.rightOverhang.classifier,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1671 ii.end + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1672 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1673
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1674 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1675 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1676 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1677
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1678 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1679 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1680