annotate PsiCLASS-1.0.2/TranscriptDecider.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #ifndef _MOURISL_CLASSES_TRANSCRIPTDECIDER_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #define _MOURISL_CLASSES_TRANSCRIPTDECIDER_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include <pthread.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <map>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include <time.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include <stdarg.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include "alignments.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include "SubexonGraph.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include "SubexonCorrelation.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #include "BitTable.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 #include "Constraints.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 #define HASH_MAX 100003 // default HASH_MAX
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 #define USE_DP 200000
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 struct _transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 BitTable seVector ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 double abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 double correlationScore ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 double FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 double *constraintsSupport ; // record the assign ment of constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 int first, last ; // indicate the index of the first and last subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 bool partial ; // wehther this is a partial transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int id ; // the id for various usage: i.e transcript index in the alltranscripts.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 struct _outputTranscript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 int chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 int geneId, transcriptId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 struct _pair32 *exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 int ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 char strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int sampleId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 double FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 double TPM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 double cov ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 struct _dp
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 BitTable seVector ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 int first, last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 // The "cnt" is for the hash structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 // the first cnt set bits represent the subexons that are the key of the hash
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 // the remaining set bits are the optimal subtranscript follow the key.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 int cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 double cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 double minAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 int timeStamp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 int strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 struct _dpAttribute
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 struct _dp *f1, **f2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 struct _dp *hash ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 struct _transcript bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 bool forAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 struct _subexon *subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 int seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 std::map<uint64_t, int> uncoveredPair ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 double minAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 int timeStamp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 class MultiThreadOutputTranscript ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 struct _transcriptDeciderThreadArg
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 int tid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 struct _subexon *subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 int seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 int sampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 int numThreads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 int maxDpConstraintSize ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 double FPKMFraction, classifierThreshold, txptMinReadDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 Alignments *alignments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 std::vector<Constraints> constraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 SubexonCorrelation subexonCorrelation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 MultiThreadOutputTranscript *outputHandler ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 int *freeThreads ; // the stack for free threads
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 int *ftCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 pthread_mutex_t *ftLock ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 pthread_cond_t *fullWorkCond ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 class MultiThreadOutputTranscript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 private:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 std::vector<struct _outputTranscript> outputQueue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 pthread_t *threads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 pthread_mutex_t outputLock ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 int sampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 int numThreads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 std::vector<FILE *> outputFPs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 Alignments &alignments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 public:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 static int CompTranscripts( const struct _outputTranscript &a, const struct _outputTranscript &b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 if ( a.geneId != b.geneId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 return a.geneId - b.geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 if ( a.ecnt != b.ecnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 return a.ecnt - b.ecnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 for ( i = 0 ; i < a.ecnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 if ( a.exons[i].a != b.exons[i].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 return a.exons[i].a - b.exons[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 if ( a.exons[i].b != b.exons[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 return a.exons[i].b - b.exons[i].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 static bool CompSortTranscripts( const struct _outputTranscript &a, const struct _outputTranscript &b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 int tmp = CompTranscripts( a, b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 if ( tmp < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 else if ( tmp > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 return a.sampleId < b.sampleId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 MultiThreadOutputTranscript( int cnt, Alignments &a ): alignments( a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 sampleCnt = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 pthread_mutex_init( &outputLock, NULL ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 ~MultiThreadOutputTranscript()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 pthread_mutex_destroy( &outputLock ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 fclose( outputFPs[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 void SetThreadsPointer( pthread_t *t, int n )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 threads = t ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 numThreads = n ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 void SetOutputFPs( char *outputPrefix )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 char buffer[1024] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 if ( outputPrefix[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 sprintf( buffer, "%s_sample_%d.gtf", outputPrefix, i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 sprintf( buffer, "sample_%d.gtf", i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 FILE *fp = fopen( buffer, "w" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 outputFPs.push_back( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 void Add( struct _outputTranscript &t )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 pthread_mutex_lock( &outputLock ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 outputQueue.push_back( t ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 pthread_mutex_unlock( &outputLock ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 void Add_SingleThread( struct _outputTranscript &t )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 outputQueue.push_back( t ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 void ComputeFPKMTPM( std::vector<Alignments> &alignmentFiles )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 int qsize = outputQueue.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 double *totalFPK = new double[ sampleCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 memset( totalFPK, 0, sizeof( double ) * sampleCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 for ( i = 0 ; i < qsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 totalFPK[ outputQueue[i].sampleId ] += outputQueue[i].FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 for ( i = 0 ; i < qsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 outputQueue[i].TPM = outputQueue[i].FPKM / ( totalFPK[ outputQueue[i].sampleId ] / 1000000.0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 outputQueue[i].FPKM /= ( alignmentFiles[ outputQueue[i].sampleId ].totalReadCnt / 1000000.0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 delete[] totalFPK ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 void OutputCommandInfo( int argc, char *argv[] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 int j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 fprintf( outputFPs[i], "#PsiCLASS_v1.0.1\n#" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 for ( j = 0 ; j < argc - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 fprintf( outputFPs[i], "%s ", argv[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 fprintf( outputFPs[i], "%s\n", argv[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 void OutputCommentToSampleGTF( int sampleId, char *s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 fprintf( outputFPs[ sampleId ], "#%s\n", s ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 void Flush()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 std::sort( outputQueue.begin(), outputQueue.end(), CompSortTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 int qsize = outputQueue.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 char prefix[10] = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 // Recompute the transcript id
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 int gid = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 int tid = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 for ( i = 0 ; i < qsize ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 for ( j = i + 1 ; j < qsize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 if ( CompTranscripts( outputQueue[i], outputQueue[j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 if ( outputQueue[i].geneId != gid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 gid = outputQueue[i].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 tid = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 ++tid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 for ( l = i ; l < j ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 outputQueue[l].transcriptId = tid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 // output
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 for ( i = 0 ; i < qsize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 struct _outputTranscript &t = outputQueue[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 char *chrom = alignments.GetChromName( t.chrId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 fprintf( outputFPs[t.sampleId], "%s\tPsiCLASS\ttranscript\t%d\t%d\t1000\t%c\t.\tgene_id \"%s%s.%d\"; transcript_id \"%s%s.%d.%d\"; FPKM \"%.6lf\"; TPM \"%.6lf\"; cov \"%.6lf\";\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 chrom, t.exons[0].a, t.exons[t.ecnt - 1].b, t.strand,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 prefix, chrom, t.geneId,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 prefix, chrom, t.geneId, t.transcriptId, t.FPKM, t.TPM, t.cov ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 for ( j = 0 ; j < t.ecnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 fprintf( outputFPs[ t.sampleId ], "%s\tPsiCLASS\texon\t%d\t%d\t1000\t%c\t.\tgene_id \"%s%s.%d\"; "
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 "transcript_id \"%s%s.%d.%d\"; exon_number \"%d\"; FPKM \"%.6lf\"; TPM \"%.6lf\"; cov \"\%.6lf\";\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 chrom, t.exons[j].a, t.exons[j].b, t.strand,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 prefix, chrom, t.geneId,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 prefix, chrom, t.geneId, t.transcriptId,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 j + 1, t.FPKM, t.TPM, t.cov ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 delete []t.exons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 class TranscriptDecider
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 private:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 int sampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 int numThreads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 double FPKMFraction ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 double txptMinReadDepth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 int hashMax ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 int maxDpConstraintSize ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 Constraints *constraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 //struct _subexon *subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 //int seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 int usedGeneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 int baseGeneId, defaultGeneId[2] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 int *transcriptId ; // the next transcript id for each gene id (we shift the gene id to 0 in this array.)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 Alignments &alignments ; // for obtain the chromosome names.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 std::vector<FILE *> outputFPs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 BitTable compatibleTestVectorT, compatibleTestVectorC ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 double canBeSoftBoundaryThreshold ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 MultiThreadOutputTranscript *outputHandler ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 // Test whether subexon tag is a start subexon in a mixture region that corresponds to the start of a gene on another strand.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 bool IsStartOfMixtureStrandRegion( int tag, struct _subexon *subexons, int seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 // The functions to pick transcripts through dynamic programming
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 struct _dp *dpHash ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 void SearchSubTranscript( int tag, int strand, int parents[], int pcnt, struct _dp &pdp, int visit[], int vcnt, int extends[], int extendCnt, std::vector<struct _constraint> &tc, int tcStartInd, struct _dpAttribute &attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 struct _dp SolveSubTranscript( int visit[], int vcnt, int strand, std::vector<struct _constraint> &tc, int tcStartInd, struct _dpAttribute &attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 void PickTranscriptsByDP( struct _subexon *subexons, int seCnt, int iterBound, Constraints &constraints, SubexonCorrelation &correlation, struct _dpAttribute &attr, std::vector<struct _transcript> &allTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 void SetDpContent( struct _dp &a, struct _dp &b, const struct _dpAttribute &attr )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 a.seVector.Assign( b.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 a.first = b.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 a.last = b.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 a.cnt = b.cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 a.cover = b.cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 a.strand = b.strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 a.minAbundance = attr.minAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 a.timeStamp = attr.timeStamp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 void ResetDpContent( struct _dp &d )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 d.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 d.first = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 d.last = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 d.cnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 d.cover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 d.minAbundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 d.timeStamp = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 void AugmentTranscripts( struct _subexon *subexons, std::vector<struct _transcript> &alltranscripts, int limit, bool extend ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 // Test whether a constraints is compatible with the transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 // Return 0 - uncompatible or does not overlap at all. 1 - fully compatible. 2 - Head of the constraints compatible with the tail of the transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 int IsConstraintInTranscript( struct _transcript transcript, struct _constraint &c ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 int IsConstraintInTranscriptDebug( struct _transcript transcript, struct _constraint &c ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 // Count how many transcripts are possible starting from subexons[tag].
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 int SubTranscriptCount( int tag, struct _subexon *subexons, int f[] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 // The methods when there is no need for DP
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 void EnumerateTranscript( int tag, int strand, int visit[], int vcnt, struct _subexon *subexons, SubexonCorrelation &correlation, double correlationScore, std::vector<struct _transcript> &alltranscripts, int &atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 // For the simpler case, we can pick sample by sample.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 void PickTranscripts( struct _subexon *subexons, std::vector<struct _transcript> &alltranscripts, Constraints &constraints, SubexonCorrelation &seCorrelation, std::vector<struct _transcript> &transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 static bool CompSortTranscripts( const struct _transcript &a, const struct _transcript &b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 if ( a.first < b.first )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 else if ( a.first > b.first )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 int diffPos = a.seVector.GetFirstDifference( b.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 if ( diffPos == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 if ( a.seVector.Test( diffPos ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 static bool CompSortPairs( const struct _pair32 &x, const struct _pair32 &y )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 if ( x.a != y.a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 return x.a < y.a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 return x.b < y.b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 static bool CompSortPairsByB( const struct _pair32 &x, const struct _pair32 &y )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 return x.b < y.b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 static int CompPairsByB( const void *p1, const void *p2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 return ((struct _pair32 *)p1)->b - ((struct _pair32 *)p2)->b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 double ComputeScore( double cnt, double weight, double a, double A, double correlation )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 if ( a > A * 0.1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 return ( cnt * weight ) * ( 1 + pow( a / A, 0.25 ) ) + correlation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 return ( cnt * weight ) * ( 1 + a / A ) + correlation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 //return ( cnt ) * ( exp( 1 + a / A ) ) + correlation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 int GetFather( int f, int *father ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 void ConvertTranscriptAbundanceToFPKM( struct _subexon *subexons, struct _transcript &t, int readCnt = 1000000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 int txptLen = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 int i, size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 std::vector<int> subexonInd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 t.seVector.GetOnesIndices( subexonInd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 size = subexonInd.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 txptLen += ( subexons[ subexonInd[i] ].end - subexons[ subexonInd[i] ].start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 double factor = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 if ( alignments.matePaired )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 factor = 0.5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 t.FPKM = t.abundance * factor / ( ( readCnt / 1000000.0 ) * ( txptLen / 1000.0 ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 int GetTranscriptLengthFromAbundanceAndFPKM( double abundance, double FPKM, int readCnt = 1000000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 double factor = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 if ( alignments.matePaired )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 factor = 0.5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 return int( abundance * factor / ( FPKM / 1000.0 ) / ( readCnt / 1000000.0 ) + 0.5 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 void CoalesceSameTranscripts( std::vector<struct _transcript> &t ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 // Initialize the structure to store transcript id
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 void InitTranscriptId() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 int GetTranscriptGeneId( std::vector<int> &subexonInd, struct _subexon *subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 int GetTranscriptGeneId( struct _transcript &t, struct _subexon *subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 int RemoveNegativeAbundTranscripts( std::vector<struct _transcript> &transcripts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 int tcnt = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 j = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 if ( transcripts[i].abundance < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 transcripts[i].seVector.Release() ; // Don't forget release the memory.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 transcripts[j] = transcripts[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 transcripts.resize( j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 return j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 void AbundanceEstimation( struct _subexon *subexons, int seCnt, Constraints &constraints, std::vector<struct _transcript> &transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 int RefineTranscripts( struct _subexon *subexons, int seCnt, bool aggressive, std::map<int, int> *subexonChainSupport, int *txptSampleSupport, std::vector<struct _transcript> &transcripts, Constraints &constraints ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 void ComputeTranscriptsScore( struct _subexon *subexons, int seCnt, std::map<int, int> *subexonChainSupport, std::vector<struct _transcript> &transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 void OutputTranscript( int sampleId, struct _subexon *subexons, struct _transcript &transcript ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 void PrintLog( const char *fmt, ... )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 char buffer[10021] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 va_list args ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 va_start( args, fmt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 vsprintf( buffer, fmt, args ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 time_t mytime = time(NULL) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 struct tm *localT = localtime( &mytime ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 char stime[500] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 strftime( stime, sizeof( stime ), "%c", localT ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 fprintf( stderr, "[%s] %s\n", stime, buffer ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 public:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 TranscriptDecider( double f, double c, double d, int sampleCnt, Alignments &a ): alignments( a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 FPKMFraction = f ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 canBeSoftBoundaryThreshold = c ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 txptMinReadDepth = d ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 usedGeneId = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 defaultGeneId[0] = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 defaultGeneId[1] = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 maxDpConstraintSize = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 numThreads = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 this->sampleCnt = sampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 dpHash = new struct _dp[ HASH_MAX ] ; // pre-allocated buffer to hold dp information.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 ~TranscriptDecider()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 if ( numThreads == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 int size = outputFPs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 fclose( outputFPs[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 delete[] dpHash ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 // @return: the number of assembled transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 int Solve( struct _subexon *subexons, int seCnt, std::vector<Constraints> &constraints, SubexonCorrelation &subexonCorrelation ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 void SetOutputFPs( char *outputPrefix )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 char buffer[1024] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 if ( outputPrefix[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 sprintf( buffer, "%s_sample_%d.gtf", outputPrefix, i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 sprintf( buffer, "sample_%d.gtf", i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 FILE *fp = fopen( buffer, "w" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 outputFPs.push_back( fp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 void SetMultiThreadOutputHandler( MultiThreadOutputTranscript *h )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 outputHandler = h ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 void SetNumThreads( int t )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 numThreads = t ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 void SetMaxDpConstraintSize(int size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 maxDpConstraintSize = size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 void *TranscriptDeciderSolve_Wrapper( void *arg ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 #endif