Mercurial > repos > lsong10 > psiclass
diff PsiCLASS-1.0.2/SubexonCorrelation.hpp @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PsiCLASS-1.0.2/SubexonCorrelation.hpp Fri Mar 26 16:52:45 2021 +0000 @@ -0,0 +1,270 @@ +#ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER +#define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER + +#include "SubexonGraph.hpp" + +#include <stdio.h> +#include <math.h> + +class SubexonCorrelation +{ +private: + struct _subexon *lastSubexons ; + std::vector<FILE *> fileList ; + int offset ; + int prevSeCnt ; + int seCnt ; + + double **correlation ; + + int OverlapSize( int s0, int e0, int s1, int e1 ) + { + int s = -1, e = -1 ; + if ( e0 < s1 || s0 > e1 ) + return 0 ; + s = s0 > s1 ? s0 : s1 ; + e = e0 < e1 ? e0 : e1 ; + return e - s + 1 ; + } +public: + SubexonCorrelation() + { + offset = 0 ; + lastSubexons = NULL ; + correlation = NULL ; + prevSeCnt = 0 ; + } + + ~SubexonCorrelation() + { + int cnt = fileList.size() ; + int i ; + for ( i = 0 ; i < cnt ; ++i ) + fclose( fileList[i] ) ; + delete[] lastSubexons ; + } + + void Initialize( char *f ) + { + FILE *fpSl ; + fpSl = fopen( f, "r" ) ; + char buffer[1024] ; + while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL ) + { + int len = strlen( buffer ) ; + if ( buffer[len - 1] == '\n' ) + { + buffer[len - 1] = '\0' ; + --len ; + + } + FILE *fp = fopen( buffer, "r" ) ; + fileList.push_back( fp ) ; + } + + lastSubexons = new struct _subexon[ fileList.size() ] ; + int i, cnt ; + cnt = fileList.size() ; + for ( i = 0 ; i < cnt ; ++i ) + lastSubexons[i].chrId = -1 ; + + } + + // We assume the subexons only contains the subexons we are interested in. + // And we assume that each time we call this function, the subexons we are interested are + // sorted in order. + void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments ) + { + int sampleCnt = fileList.size() ; + if ( sampleCnt <= 1 ) + return ; + int i, j, k ; + seCnt = cnt ; + + // Obtain the depth matrix. + double **depth ; + depth = new double* [sampleCnt] ; + for ( i = 0 ; i < sampleCnt ; ++i ) + { + depth[i] = new double[ cnt ] ; + memset( depth[i], 0, sizeof( double ) * cnt ) ; + } + + char buffer[2048] ; + for ( i = 0 ; i < sampleCnt ; ++i ) + { + struct _subexon se ; + bool useLastSubexon = false ; + if ( lastSubexons[i].chrId != -1 ) + { + se = lastSubexons[i] ; + useLastSubexon = true ; + } + else + se.chrId = -1 ; + + // Locate the subexon + while ( 1 ) + { + if ( se.chrId < subexons[0].chrId + || ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) ) + { + if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) + { + buffer[0] = '\0' ; + break ; + } + if ( buffer[0] == '#' ) + continue ; + + SubexonGraph::InputSubexon( buffer, alignments, se ) ; + --se.start ; --se.end ; + useLastSubexon = false ; + continue ; + } + break ; + } + if ( buffer[0] == '\0' ) + { + lastSubexons[i] = se ; + continue ; + } + + // Process the subexon. + int tag = 0 ; + while ( 1 ) + { + lastSubexons[i] = se ; + + if ( useLastSubexon == false ) + { + SubexonGraph::InputSubexon( buffer, alignments, se ) ; + --se.start ; --se.end ; + } + + if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end ) + break ; + + while ( 1 ) + { + if ( tag > cnt || subexons[tag].end >= se.start ) + break ; + ++tag ; + } + + for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j ) + { + int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ; + depth[i][j] += overlap * se.avgDepth ; + } + + if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL ) + break ; + } + } + // Normalize the depth + for ( i = 0 ; i < sampleCnt ; ++i ) + { + for ( j = 0 ; j < cnt ; ++j ) + depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ; + } + + // Compute the correlation. + double *avg = new double[cnt] ; + double *var = new double[cnt] ; + if ( correlation != NULL ) + { + for ( i = 0 ; i < prevSeCnt ; ++i ) + delete[] correlation[i] ; + delete[] correlation ; + } + + correlation = new double*[cnt] ; + for ( i = 0 ; i < cnt ; ++i ) + correlation[i] = new double[cnt] ; + + memset( avg, 0, sizeof( double ) * cnt ) ; + memset( var, 0, sizeof( double ) * cnt ) ; + for ( i = 0 ; i < cnt ; ++i ) + { + //avg[i] = 0 ; + //var[i] = 0 ; + memset( correlation[i], 0, sizeof( double ) * cnt ) ; + correlation[i][i] = 1 ; + } + + for ( i = 0 ; i < sampleCnt ; ++i ) + for ( j = 0 ; j < cnt ; ++j ) + { + avg[j] += depth[i][j] ; + var[j] += depth[i][j] * depth[i][j] ; + } + for ( i = 0 ; i < cnt ; ++i ) + { + avg[i] /= sampleCnt ; + var[i] = var[i] / sampleCnt - avg[i] * avg[i] ; + } + + for ( i = 0 ; i < sampleCnt ; ++i ) + for ( j = 0 ; j < cnt ; ++j ) + for ( k = j + 1 ; k < cnt ; ++k ) + { + //if ( subexons[j].geneId == subexons[k].geneId ) + correlation[j][k] += depth[i][j] * depth[i][k] ; + } + for ( j = 0 ; j < cnt ; ++j ) + for ( k = j + 1 ; k < cnt ; ++k ) + { + if ( var[j] > 1e-6 && var[k] > 1e-6 ) + { + correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ; + } + else + correlation[j][k] = 0 ; + //printf( "%d %d %d\n", j, k, cnt ) ; + correlation[k][j] = correlation[j][k] ; + } + //printf( "%lf\n", correlation[0][1] ) ; + // Release the memory. + for ( i = 0 ; i < sampleCnt ; ++i ) + delete[] depth[i] ; + delete[] depth ; + delete[] avg ; + delete[] var ; + + prevSeCnt = cnt ; + } + + double Query( int i, int j ) + { + if ( fileList.size() > 1 ) + return correlation[i][j] ; + else + return 0 ; + } + + void Assign( const SubexonCorrelation &c ) + { + int i ; + fileList = c.fileList ; + if ( fileList.size() <= 1 ) + return ; + + if ( correlation != NULL ) + { + for ( i = 0 ; i < seCnt ; ++i ) + delete[] correlation[i] ; + delete[] correlation ; + } + + seCnt = c.seCnt ; + correlation = new double*[seCnt] ; + for ( i = 0 ; i < seCnt ; ++i ) + { + correlation[i] = new double[seCnt] ; + memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ; + } + } +} ; + +#endif