Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/SubexonCorrelation.hpp @ 0:903fc43d6227 draft default tip
Uploaded
| author | lsong10 |
|---|---|
| date | Fri, 26 Mar 2021 16:52:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:903fc43d6227 |
|---|---|
| 1 #ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER | |
| 2 #define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER | |
| 3 | |
| 4 #include "SubexonGraph.hpp" | |
| 5 | |
| 6 #include <stdio.h> | |
| 7 #include <math.h> | |
| 8 | |
| 9 class SubexonCorrelation | |
| 10 { | |
| 11 private: | |
| 12 struct _subexon *lastSubexons ; | |
| 13 std::vector<FILE *> fileList ; | |
| 14 int offset ; | |
| 15 int prevSeCnt ; | |
| 16 int seCnt ; | |
| 17 | |
| 18 double **correlation ; | |
| 19 | |
| 20 int OverlapSize( int s0, int e0, int s1, int e1 ) | |
| 21 { | |
| 22 int s = -1, e = -1 ; | |
| 23 if ( e0 < s1 || s0 > e1 ) | |
| 24 return 0 ; | |
| 25 s = s0 > s1 ? s0 : s1 ; | |
| 26 e = e0 < e1 ? e0 : e1 ; | |
| 27 return e - s + 1 ; | |
| 28 } | |
| 29 public: | |
| 30 SubexonCorrelation() | |
| 31 { | |
| 32 offset = 0 ; | |
| 33 lastSubexons = NULL ; | |
| 34 correlation = NULL ; | |
| 35 prevSeCnt = 0 ; | |
| 36 } | |
| 37 | |
| 38 ~SubexonCorrelation() | |
| 39 { | |
| 40 int cnt = fileList.size() ; | |
| 41 int i ; | |
| 42 for ( i = 0 ; i < cnt ; ++i ) | |
| 43 fclose( fileList[i] ) ; | |
| 44 delete[] lastSubexons ; | |
| 45 } | |
| 46 | |
| 47 void Initialize( char *f ) | |
| 48 { | |
| 49 FILE *fpSl ; | |
| 50 fpSl = fopen( f, "r" ) ; | |
| 51 char buffer[1024] ; | |
| 52 while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL ) | |
| 53 { | |
| 54 int len = strlen( buffer ) ; | |
| 55 if ( buffer[len - 1] == '\n' ) | |
| 56 { | |
| 57 buffer[len - 1] = '\0' ; | |
| 58 --len ; | |
| 59 | |
| 60 } | |
| 61 FILE *fp = fopen( buffer, "r" ) ; | |
| 62 fileList.push_back( fp ) ; | |
| 63 } | |
| 64 | |
| 65 lastSubexons = new struct _subexon[ fileList.size() ] ; | |
| 66 int i, cnt ; | |
| 67 cnt = fileList.size() ; | |
| 68 for ( i = 0 ; i < cnt ; ++i ) | |
| 69 lastSubexons[i].chrId = -1 ; | |
| 70 | |
| 71 } | |
| 72 | |
| 73 // We assume the subexons only contains the subexons we are interested in. | |
| 74 // And we assume that each time we call this function, the subexons we are interested are | |
| 75 // sorted in order. | |
| 76 void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments ) | |
| 77 { | |
| 78 int sampleCnt = fileList.size() ; | |
| 79 if ( sampleCnt <= 1 ) | |
| 80 return ; | |
| 81 int i, j, k ; | |
| 82 seCnt = cnt ; | |
| 83 | |
| 84 // Obtain the depth matrix. | |
| 85 double **depth ; | |
| 86 depth = new double* [sampleCnt] ; | |
| 87 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 88 { | |
| 89 depth[i] = new double[ cnt ] ; | |
| 90 memset( depth[i], 0, sizeof( double ) * cnt ) ; | |
| 91 } | |
| 92 | |
| 93 char buffer[2048] ; | |
| 94 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 95 { | |
| 96 struct _subexon se ; | |
| 97 bool useLastSubexon = false ; | |
| 98 if ( lastSubexons[i].chrId != -1 ) | |
| 99 { | |
| 100 se = lastSubexons[i] ; | |
| 101 useLastSubexon = true ; | |
| 102 } | |
| 103 else | |
| 104 se.chrId = -1 ; | |
| 105 | |
| 106 // Locate the subexon | |
| 107 while ( 1 ) | |
| 108 { | |
| 109 if ( se.chrId < subexons[0].chrId | |
| 110 || ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) ) | |
| 111 { | |
| 112 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) | |
| 113 { | |
| 114 buffer[0] = '\0' ; | |
| 115 break ; | |
| 116 } | |
| 117 if ( buffer[0] == '#' ) | |
| 118 continue ; | |
| 119 | |
| 120 SubexonGraph::InputSubexon( buffer, alignments, se ) ; | |
| 121 --se.start ; --se.end ; | |
| 122 useLastSubexon = false ; | |
| 123 continue ; | |
| 124 } | |
| 125 break ; | |
| 126 } | |
| 127 if ( buffer[0] == '\0' ) | |
| 128 { | |
| 129 lastSubexons[i] = se ; | |
| 130 continue ; | |
| 131 } | |
| 132 | |
| 133 // Process the subexon. | |
| 134 int tag = 0 ; | |
| 135 while ( 1 ) | |
| 136 { | |
| 137 lastSubexons[i] = se ; | |
| 138 | |
| 139 if ( useLastSubexon == false ) | |
| 140 { | |
| 141 SubexonGraph::InputSubexon( buffer, alignments, se ) ; | |
| 142 --se.start ; --se.end ; | |
| 143 } | |
| 144 | |
| 145 if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end ) | |
| 146 break ; | |
| 147 | |
| 148 while ( 1 ) | |
| 149 { | |
| 150 if ( tag > cnt || subexons[tag].end >= se.start ) | |
| 151 break ; | |
| 152 ++tag ; | |
| 153 } | |
| 154 | |
| 155 for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j ) | |
| 156 { | |
| 157 int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ; | |
| 158 depth[i][j] += overlap * se.avgDepth ; | |
| 159 } | |
| 160 | |
| 161 if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) | |
| 162 break ; | |
| 163 } | |
| 164 } | |
| 165 // Normalize the depth | |
| 166 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 167 { | |
| 168 for ( j = 0 ; j < cnt ; ++j ) | |
| 169 depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ; | |
| 170 } | |
| 171 | |
| 172 // Compute the correlation. | |
| 173 double *avg = new double[cnt] ; | |
| 174 double *var = new double[cnt] ; | |
| 175 if ( correlation != NULL ) | |
| 176 { | |
| 177 for ( i = 0 ; i < prevSeCnt ; ++i ) | |
| 178 delete[] correlation[i] ; | |
| 179 delete[] correlation ; | |
| 180 } | |
| 181 | |
| 182 correlation = new double*[cnt] ; | |
| 183 for ( i = 0 ; i < cnt ; ++i ) | |
| 184 correlation[i] = new double[cnt] ; | |
| 185 | |
| 186 memset( avg, 0, sizeof( double ) * cnt ) ; | |
| 187 memset( var, 0, sizeof( double ) * cnt ) ; | |
| 188 for ( i = 0 ; i < cnt ; ++i ) | |
| 189 { | |
| 190 //avg[i] = 0 ; | |
| 191 //var[i] = 0 ; | |
| 192 memset( correlation[i], 0, sizeof( double ) * cnt ) ; | |
| 193 correlation[i][i] = 1 ; | |
| 194 } | |
| 195 | |
| 196 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 197 for ( j = 0 ; j < cnt ; ++j ) | |
| 198 { | |
| 199 avg[j] += depth[i][j] ; | |
| 200 var[j] += depth[i][j] * depth[i][j] ; | |
| 201 } | |
| 202 for ( i = 0 ; i < cnt ; ++i ) | |
| 203 { | |
| 204 avg[i] /= sampleCnt ; | |
| 205 var[i] = var[i] / sampleCnt - avg[i] * avg[i] ; | |
| 206 } | |
| 207 | |
| 208 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 209 for ( j = 0 ; j < cnt ; ++j ) | |
| 210 for ( k = j + 1 ; k < cnt ; ++k ) | |
| 211 { | |
| 212 //if ( subexons[j].geneId == subexons[k].geneId ) | |
| 213 correlation[j][k] += depth[i][j] * depth[i][k] ; | |
| 214 } | |
| 215 for ( j = 0 ; j < cnt ; ++j ) | |
| 216 for ( k = j + 1 ; k < cnt ; ++k ) | |
| 217 { | |
| 218 if ( var[j] > 1e-6 && var[k] > 1e-6 ) | |
| 219 { | |
| 220 correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ; | |
| 221 } | |
| 222 else | |
| 223 correlation[j][k] = 0 ; | |
| 224 //printf( "%d %d %d\n", j, k, cnt ) ; | |
| 225 correlation[k][j] = correlation[j][k] ; | |
| 226 } | |
| 227 //printf( "%lf\n", correlation[0][1] ) ; | |
| 228 // Release the memory. | |
| 229 for ( i = 0 ; i < sampleCnt ; ++i ) | |
| 230 delete[] depth[i] ; | |
| 231 delete[] depth ; | |
| 232 delete[] avg ; | |
| 233 delete[] var ; | |
| 234 | |
| 235 prevSeCnt = cnt ; | |
| 236 } | |
| 237 | |
| 238 double Query( int i, int j ) | |
| 239 { | |
| 240 if ( fileList.size() > 1 ) | |
| 241 return correlation[i][j] ; | |
| 242 else | |
| 243 return 0 ; | |
| 244 } | |
| 245 | |
| 246 void Assign( const SubexonCorrelation &c ) | |
| 247 { | |
| 248 int i ; | |
| 249 fileList = c.fileList ; | |
| 250 if ( fileList.size() <= 1 ) | |
| 251 return ; | |
| 252 | |
| 253 if ( correlation != NULL ) | |
| 254 { | |
| 255 for ( i = 0 ; i < seCnt ; ++i ) | |
| 256 delete[] correlation[i] ; | |
| 257 delete[] correlation ; | |
| 258 } | |
| 259 | |
| 260 seCnt = c.seCnt ; | |
| 261 correlation = new double*[seCnt] ; | |
| 262 for ( i = 0 ; i < seCnt ; ++i ) | |
| 263 { | |
| 264 correlation[i] = new double[seCnt] ; | |
| 265 memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ; | |
| 266 } | |
| 267 } | |
| 268 } ; | |
| 269 | |
| 270 #endif |
