annotate PsiCLASS-1.0.2/grader.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 // Compare the reference and the predictions
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 // Format: ./a.out ref.gtf prediction.gtf
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #define MAX_TXPT 2000000
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #define DEBUG 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 bool flagMultiExonOnly = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 bool flagValidChr = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 int relaxWidth = 20 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 struct _pair
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 int a, b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 struct _transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 char tid[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 char chrom[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 char strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 struct _pair *exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 int ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 struct _info
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 double coverage ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 int byId ; // It gets this coverage by comparing with byId.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 struct _intron
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 char chrom[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 int tindex ; // The index to the corresponding transcripts
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 struct _exon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 char chrom[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 //int tindex ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 int soft ; // left-bit: left side is soft, right-bit: right side is soft
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 bool matched ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 int TranscriptComp( const void *p1, const void *p2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 const struct _transcript *a = (struct _transcript *)p1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 const struct _transcript *b = ( struct _transcript *)p2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 int tmp = strcmp( a->chrom, b->chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 if ( tmp != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 return tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 return a->exons[0].a - b->exons[0].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 int TranscriptComp_ByIntron( const void *p1, const void *p2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 const struct _transcript *a = (struct _transcript *)p1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 const struct _transcript *b = ( struct _transcript *)p2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 if ( a->strand != b->strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 return a->strand - b->strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 int tmp = strcmp( a->chrom, b->chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 if ( tmp != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 return tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 if ( a->ecnt != b->ecnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 return a->ecnt - b->ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 for ( i = 0 ; i < a->ecnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 if ( a->exons[i].b != b->exons[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 return a->exons[i].b - b->exons[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 if ( a->exons[i + 1].a != b->exons[i + 1].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 return a->exons[i + 1].a - b->exons[i + 1].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 return 0 ; //strcmp( a->tid, b->tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 bool validChrom( char *chrom )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 if ( !flagValidChr )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 if ( chrom[0] != 'c' || chrom[1] != 'h' || chrom[2] != 'r' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 // only consider chr1-22,x,y,z
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 if ( chrom[3]=='x' || chrom[3]=='X'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 || chrom[3] == 'y' || chrom[3] == 'Y'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 || chrom[3] == 'm' || chrom[3] == 'M'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 || ( chrom[3] >= '3' && chrom[3] <= '9' ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if ( chrom[4] == '\0' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 if ( chrom[3] == '1' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 if ( chrom[4] == '\0' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 if ( chrom[4] >= '0' && chrom[4] <= '9' && chrom[5] == '\0' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 if ( chrom[3] == '2' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 if ( chrom[4] == '\0' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 if ( chrom[4] >= '0' && chrom[4] <= '2' && chrom[5] == '\0' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 void ReverseExonList( struct _pair *exons, int ecnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 if ( ecnt < 2 || ( exons[0].a < exons[1].a ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 struct _pair tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 for ( i = 0, j = ecnt - 1 ; i < j ; ++i, --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 tmp = exons[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 exons[i] = exons[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 exons[j] = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 Merge exons that are next to each other. Showed up in scripture.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 int CleanExonList( struct _pair *exons, int ecnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 for ( i = 0 ; i < ecnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 if ( exons[i + 1].a - exons[i].b - 1 < 20 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 exons[i + 1].a = exons[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 exons[i].a = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 for ( i = 0 ; i < ecnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 if ( exons[i].a == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 exons[k] = exons[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 return k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 Remove the duplicated intron chain in the list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 int RemoveDuplicateIntronChain( struct _transcript *t, int tcnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 qsort( t, tcnt, sizeof( *t ), TranscriptComp_ByIntron ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 for ( i = 1 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 k = i - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 while ( t[k].ecnt == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 --k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 if ( t[i].ecnt != t[k].ecnt || strcmp( t[i].chrom, t[k].chrom ) || t[i].strand != t[k].strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 for ( j = 0 ; j < t[i].ecnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 if ( t[i].exons[j].b != t[k].exons[j].b ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 t[i].exons[j + 1].a != t[k].exons[j + 1].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 if ( j >= t[i].ecnt - 1 && t[i].ecnt != 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 t[i].ecnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 if ( t[i].ecnt == 1 && t[i].exons[0].a == t[k].exons[0].a &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 t[i].exons[0].b == t[k].exons[0].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 t[i].ecnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 /*if ( t[i].ecnt == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 printf( "%s <- %s\n", t[i].tid, t[k].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 if ( t[i].ecnt == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 free( t[i].exons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 t[k] = t[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 tcnt = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 return tcnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 double CompareTranscripts( const struct _transcript &ref, const struct _transcript &pred )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 struct _pair *refExons = ref.exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 int refECnt = ref.ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 struct _pair *predExons = pred.exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 int predECnt = pred.ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 // Prediction must be a "subset" of the reference
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 if ( refECnt < predECnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 // single exon case
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 if ( refECnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 if ( refExons[0].b < predExons[0].a || refExons[0].a > predExons[0].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 if ( predECnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 for ( i = 0 ; i < refECnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 if ( refExons[i].b < predExons[0].a || refExons[i].a > predExons[0].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 /*if ( i == 0 && predExons[0].a >= refExons[0].a - relaxWidth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 else if ( i == refECnt - 1 && predExons[0].b <= refExons[i].b + relaxWidth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 else if ( i > 0 && i < refECnt - 1 && predExons[0].a >= refExons[i].a - relaxWidth &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 predExons[0].b <= refExons[i].b + relaxWidth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 return -1 ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 if ( predECnt > 0 && ref.strand != pred.strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 // Test the intron chain
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 for ( i = 0 ; i < refECnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 if ( refExons[i].b == predExons[0].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 if ( i >= refECnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 //if ( i != 0 && predExons[0].a < refExons[i].a - relaxWidth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 // return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 for ( j = 0 ; i < refECnt - 1 && j < predECnt - 1 ; ++i, ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 if ( refExons[i].b != predExons[j].b || refExons[i + 1].a != predExons[j + 1].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 if ( j >= predECnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 /*if ( i == refECnt - 1 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 predExons[ predECnt - 1 ].b <= refExons[i].b + relaxWidth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 return (double)( predECnt - 1 ) / (double)( refECnt - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 return -1 ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 return (double)( predECnt - 1 ) / (double)( refECnt - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 bool WithinRange( int a, int b, int w = 10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 if ( a - b >= -w && a - b <= w )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 int GetIntrons( struct _transcript *transcripts, int tcnt, struct _intron *introns )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 int i, j, k, tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 for ( j = 0 ; j < transcripts[i].ecnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 bool flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 for ( k = cnt - 1 ; k >= 0 ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 if ( strcmp( introns[k].chrom, transcripts[i].chrom ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 //printf( "hi\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 tag = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 if ( introns[k].start < transcripts[i].exons[j].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 tag = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 else if ( introns[k].start == transcripts[i].exons[j].b &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 introns[k].end == transcripts[i].exons[j + 1].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 if ( !flag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 for ( k = cnt ; k > tag ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 introns[k] = introns[k - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 strcpy( introns[k].chrom, transcripts[i].chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 introns[k].start = transcripts[i].exons[j].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 introns[k].end = transcripts[i].exons[j + 1].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 introns[k].tindex = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 return cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 int GetExons( struct _transcript *transcripts, int tcnt, struct _exon *exons )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 int i, j, k, tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 for ( j = 0 ; j < transcripts[i].ecnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 bool flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 int soft = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 if ( j == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 soft = soft | 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 if ( j == transcripts[i].ecnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 soft = soft | 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 for ( k = cnt - 1 ; k >= 0 ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 if ( strcmp( exons[k].chrom, transcripts[i].chrom ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 //printf( "hi\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 tag = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 if ( exons[k].start < transcripts[i].exons[j].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 tag = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 else if ( exons[k].start == transcripts[i].exons[j].a &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 exons[k].end == transcripts[i].exons[j].b &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 exons[k].soft == soft )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 if ( !flag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 for ( k = cnt ; k > tag ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 exons[k] = exons[k - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 strcpy( exons[k].chrom, transcripts[i].chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 exons[k].start = transcripts[i].exons[j].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 exons[k].end = transcripts[i].exons[j].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 //exons[k].tindex = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 exons[k].soft = soft ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 exons[k].matched = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 return cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 //chr1 HAVANA transcript 320162 324461 . + . gene_id "ENSG00000237094.6"; transcript_id "ENST00000423728.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP4-669L17.10"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP4-669L17.10-002"; level 2; havana_gene "OTTHUMG00000156968.4"; havana_transcript "OTTHUMT00000346879.1";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 int main( int argc, char *argv[] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 FILE *fp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 struct _transcript *ref, *pred ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 struct _info *refInfo, *predInfo ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 int refCnt, predCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 char line[10000], filename[10000] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 struct _pair tmpExons[10000] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 int tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 char chrom[50], tool[20], type[40], strand[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 char tid[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 char buffer[50] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 int i, j, k, tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 if ( argc == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 printf( "Compare the reference transcripts and predicted transcripts.\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 "Format: ./grader ref.gtf prediction.gtf\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 // Parse the arguments
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 for ( i = 3 ; i < argc ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 if ( !strcmp( argv[i], "-M" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 flagMultiExonOnly = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 else if ( !strcmp( argv[i], "-ac" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 flagValidChr = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 printf( "Unknown argument: %s\n", argv[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 /*========================================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 Stage 1: Read the transcripts from the reference and prediction's GTF files.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 ========================================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 fp = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 fp = fopen( argv[1], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 if ( fp == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 printf( "Could not open file %s.\n", argv[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 refCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 ref = ( struct _transcript *)malloc( MAX_TXPT * sizeof( struct _transcript ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 strcpy( ref[0].tid, "tmp_-1" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 while ( fgets( line, sizeof( line ), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 if ( line[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 sscanf( line, "%s %s %s %d %d %s %s", chrom, tool, type, &start, &end, buffer, strand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 if ( strcmp( type, "exon" ) || !validChrom( chrom ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 char *p = strstr( line, "transcript_id" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 for ( ; *p != ' ' ; ++p )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 p += 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 sscanf( p, "%s", tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 //printf( "+%s %d\n", tid, strlen( tid ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 p = tid + strlen( tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 while ( *p != '\"' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 --p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 *p = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 //printf( "%s\n", tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 if ( strcmp( tid, ref[ refCnt ].tid ) && tmpECnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 ReverseExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 if ( tmpECnt > 1 || !flagMultiExonOnly )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 ref[ refCnt ].ecnt = tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 ref[ refCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 memcpy( ref[ refCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 ++refCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 tmpExons[ tmpECnt ].a = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 tmpExons[ tmpECnt ].b = end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 ++tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 ref[ refCnt ].strand = strand[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 strcpy( ref[ refCnt ].chrom, chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 strcpy( ref[ refCnt ].tid, tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 if ( tmpECnt != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 ReverseExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 if ( tmpECnt > 1 || !flagMultiExonOnly )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 ref[ refCnt ].ecnt = tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 ref[ refCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 memcpy( ref[ refCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 ++refCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 fp = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 fp = fopen( argv[2], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 if ( fp == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 printf( "Could not open file %s.\n", argv[2] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 predCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 pred = ( struct _transcript *)malloc( MAX_TXPT * sizeof( struct _transcript ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 strcpy( pred[0].tid, "tmp_-1" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 while ( fgets( line, sizeof( line ), fp ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 if ( line[0] == '#' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 sscanf( line, "%s %s %s %d %d %s %s", chrom, tool, type, &start, &end, buffer, strand ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 if ( strcmp( type, "exon" ) || !validChrom( chrom ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 char *p = strstr( line, "transcript_id" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 //char *p = strstr( line, "gene_name" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 for ( ; *p != ' ' ; ++p )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 p += 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 sscanf( p, "%s", tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 //tid[ strlen( tid ) - 2 ] = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 p = tid + strlen( tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 while ( *p != '\"' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 --p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 *p = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 if ( strcmp( tid, pred[ predCnt ].tid ) && tmpECnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 ReverseExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 if ( tmpECnt > 1 || !flagMultiExonOnly )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 pred[ predCnt ].ecnt = tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 pred[ predCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 memcpy( pred[ predCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 ++predCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 tmpExons[ tmpECnt ].a = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 tmpExons[ tmpECnt ].b = end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 ++tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 pred[ predCnt ].strand = strand[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 strcpy( pred[ predCnt ].chrom, chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 strcpy( pred[ predCnt ].tid, tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 if ( tmpECnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 ReverseExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 if ( tmpECnt > 1 || !flagMultiExonOnly )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 pred[ predCnt ].ecnt = tmpECnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 pred[ predCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 memcpy( pred[ predCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 ++predCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 tmpECnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 refCnt = RemoveDuplicateIntronChain( ref, refCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 //printf( "predCnt = %d\n", predCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 predCnt = RemoveDuplicateIntronChain( pred, predCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 //printf( "predCnt = %d\n", predCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 qsort( ref, refCnt, sizeof( ref[0] ), TranscriptComp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 qsort( pred, predCnt, sizeof( pred[0] ), TranscriptComp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 /*========================================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 Stage 2: Compute the recall and precision.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 ========================================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 refInfo = ( struct _info * )malloc( refCnt * sizeof( *refInfo ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 predInfo = ( struct _info * )malloc( predCnt * sizeof( *predInfo ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 for ( i = 0 ; i < refCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 refInfo[i].coverage = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 refInfo[i].byId = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 #if DEBUG
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 printf( "=%d %s %s %d\n", i, ref[i].chrom, ref[i].tid, ref[i].ecnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 for ( j = 0 ; j < ref[i].ecnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 printf( "%d %d\n", ref[i].exons[j].a, ref[i].exons[j].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 for ( i = 0 ; i < predCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 predInfo[i].coverage = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 predInfo[i].byId = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597 #if DEBUG
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 printf( "#%d %s %s %d\n", i, pred[i].chrom, pred[i].tid, pred[i].ecnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 for ( j = 0 ; j < pred[i].ecnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 printf( "%d %d\n", pred[i].exons[j].a, pred[i].exons[j].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 for ( i = 0 ; i < predCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 while ( tag < refCnt &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 ( ( strcmp( ref[tag].chrom, pred[i].chrom ) < 0 ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 ( !strcmp( ref[tag].chrom, pred[i].chrom ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 ref[tag].exons[ ref[tag].ecnt - 1 ].b < pred[i].exons[0].a - relaxWidth ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 // if ( i == 16474 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 // printf( "%d %d %d", i, tag, refCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 for ( j = tag ; j < refCnt && ref[j].exons[0].a <= pred[i].exons[ pred[i].ecnt - 1 ].b + relaxWidth && !strcmp( ref[j].chrom, pred[i].chrom ); ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 /*if ( !strcmp( pred[i].tid, "CUFF.4256.1" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 printf( "%s ? %s\n", pred[i].tid, ref[j].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 // if ( i == 16474 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 // {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 // printf( "%d %d\n", i, j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 // }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 double coverage = CompareTranscripts( ref[j], pred[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 if ( coverage > refInfo[j].coverage )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 refInfo[j].coverage = coverage ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 refInfo[j].byId = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 if ( coverage > predInfo[i].coverage )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 predInfo[i].coverage = coverage ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 predInfo[i].byId = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 /*========================================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 Stage 3: Dump the information into output files.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 ========================================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 //sprintf( filename, "grader.%s.recall", argv[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 sprintf( filename, "grader.recall" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 fp = fopen( filename, "w" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 for ( i = 0 ; i < refCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 fprintf( fp, "%s\t%d\t%lf\t%s\n", ref[i].tid, ref[i].ecnt, refInfo[i].coverage,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 refInfo[i].byId == -1 ? "-" : pred[ refInfo[i].byId ].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 //sprintf( filename, "grader.%s.precision", argv[2] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 sprintf( filename, "grader.precision" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 fp = fopen( filename, "w" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 for ( i = 0 ; i < predCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 fprintf( fp, "%s\t%d\t%lf\t%s\n", pred[i].tid, pred[i].ecnt, predInfo[i].coverage,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 predInfo[i].byId == -1 ? "-" : ref[predInfo[i].byId].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 // print the summary to the stdout
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 const int binCnt = 20 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 int bins[binCnt + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 memset( bins, 0, sizeof( bins ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 for ( i = 0 ; i < refCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 if ( refInfo[i].coverage == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 ++bins[ (int)( refInfo[i].coverage * binCnt ) ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 for ( i = 0 ; i <= binCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 printf( "recall %lf %lf %d/%d\n", (double)i / binCnt, (double)k / refCnt, k, refCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 k -= bins[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 memset( bins, 0, sizeof( bins ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 for ( i = 0 ; i < predCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 if ( predInfo[i].coverage == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 ++bins[ (int)( predInfo[i].coverage * binCnt ) ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 for ( i = 0 ; i <= binCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 printf( "precision %lf %lf %d/%d\n", (double)i / binCnt, (double)k / predCnt, k, predCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 k -= bins[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 /*========================================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 Stage 4: Evaluations for introns.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696 ========================================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697 struct _intron *refIntrons = ( struct _intron * )malloc( sizeof( struct _intron ) * MAX_TXPT ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698 int riCnt = GetIntrons( ref, refCnt, refIntrons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 struct _intron *predIntrons = ( struct _intron * )malloc( sizeof( struct _intron ) * MAX_TXPT ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700 int piCnt = GetIntrons( pred, predCnt, predIntrons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 int matchedIntron = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 /*for ( i = 0 ; i < riCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704 printf( "%d %s %d %d\n", i, refIntrons[i].chrom, refIntrons[i].start, refIntrons[i].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707 fp = fopen( "grader.intron", "w" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 for ( i = 0 ; i < piCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711 bool flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714 if ( tag >= riCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716 int tmp = strcmp( refIntrons[tag].chrom, predIntrons[i].chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 if ( tmp < 0 || ( tmp == 0 && refIntrons[tag].start < predIntrons[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 //printf( "%d (%s %d %d) %d=>", i, predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, tag ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 /*if ( predIntrons[i].start == 1613457 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724 printf( "%d %d\n", tag, riCnt ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 for ( j = tag ; j < riCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727 if ( strcmp( refIntrons[j].chrom, predIntrons[i].chrom ) > 0 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 refIntrons[j].start > predIntrons[i].start /*+ 10*/ )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730 if ( refIntrons[j].start == predIntrons[i].start &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 refIntrons[j].end == predIntrons[i].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 //if ( WithinRange( refIntrons[j].start, predIntrons[i].start ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733 // WithinRange( refIntrons[j].end, predIntrons[i].end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 ++matchedIntron ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 //printf( "%d\n", j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 if ( flag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 fprintf( fp, "Y\t%s\t%d\t%d\t%s\n", predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, pred[ predIntrons[i].tindex ].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744 fprintf( fp, "N\t%s\t%d\t%d\t%s\n", predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, pred[ predIntrons[i].tindex ].tid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 fclose( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 printf( "Intron evaluation\n") ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 //printf( "\tmatched\t%d\n", matchedIntron ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 printf( "\trecall\t%lf\t%d/%d\n", matchedIntron / (double)riCnt, matchedIntron, riCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 printf( "\tprecision\t%lf\t%d/%d\n", matchedIntron / (double)piCnt, matchedIntron, piCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 /*========================================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 Stage 4: Evaluations for exons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755 ========================================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756 struct _exon *refExons = ( struct _exon * )malloc( sizeof( struct _exon ) * MAX_TXPT ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757 int reCnt = GetExons( ref, refCnt, refExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758 struct _exon *predExons = ( struct _exon * )malloc( sizeof( struct _exon ) * MAX_TXPT ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759 int peCnt = GetExons( pred, predCnt, predExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 int matchedExons = 0, matchedInternalExons = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 int refInternalExons = 0, predInternalExons = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763 for ( i = 0 ; i < reCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765 //printf( "%d %d\n", refExons[i].start, refExons[i].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 if ( refExons[i].soft == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 ++refInternalExons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768 //if ( refExons[i].soft == 3 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 // printf( "hi\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771 for ( i = 0 ; i < peCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772 if ( predExons[i].soft == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773 ++predInternalExons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775 for ( i = 0 ; i < peCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 bool flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 if ( tag >= reCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 int tmp = strcmp( refExons[tag].chrom, predExons[i].chrom ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783 if ( tmp < 0 || ( tmp == 0 && refExons[tag].end < predExons[i].start ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788 for ( j = tag ; j < reCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 if ( strcmp( refExons[j].chrom, predExons[i].chrom ) > 0 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791 refExons[j].start > predExons[i].end /*+ 10*/ )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 if ( refExons[j].soft != predExons[i].soft
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
794 || refExons[j].end < predExons[i].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
795 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
796
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
797 if ( refExons[j].start == predExons[i].start &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
798 refExons[j].end == predExons[i].end && predExons[i].soft == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
799 //if ( WithinRange( refIntrons[j].start, predIntrons[i].start ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
800 // WithinRange( refIntrons[j].end, predIntrons[i].end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
801 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
802 refExons[j].matched = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
803 predExons[i].matched = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
804 ++matchedInternalExons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
805 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
806 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
807 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
808 else if ( ( refExons[j].start == predExons[i].start || ( predExons[i].soft & 2 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
809 && ( refExons[j].end == predExons[i].end || (predExons[i].soft & 1 ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
810 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
811 refExons[j].matched = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
812 predExons[i].matched = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
813 flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
814 //break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
815 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
816 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
817 //printf( "%d\n", j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
818 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
819
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
820 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
821 printf( "Exon evaluation\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
822 //printf( "\tmatched\t%d\n", matchedExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
823 matchedExons = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
824 for ( i = 0 ; i < reCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
825 if ( refExons[i].matched )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
826 ++matchedExons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
827 printf( "\trecall\t%lf\t%d/%d\n", (double)matchedExons / reCnt, matchedExons, reCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
828 matchedExons = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
829 for ( i = 0 ; i < peCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
830 if ( predExons[i].matched )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
831 ++matchedExons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
832 printf( "\tprecision\t%lf\t%d/%d\n", (double)matchedExons / peCnt, matchedExons, peCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
833
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
834 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
835 printf( "Internal exon evaluation\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
836 //printf( "\tmatched\t%d\n", matchedInternalExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
837 printf( "\trecall\t%lf\t%d/%d\n", (double)matchedInternalExons / refInternalExons, matchedInternalExons, refInternalExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
838 printf( "\tprecision\t%lf\t%d/%d\n", (double)matchedInternalExons / predInternalExons, matchedInternalExons, predInternalExons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
839 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
840 }