view PsiCLASS-1.0.2/grader.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
line wrap: on
line source

// Compare the reference and the predictions
// Format: ./a.out ref.gtf prediction.gtf
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_TXPT 2000000

#define DEBUG 0

bool flagMultiExonOnly = false ;
bool flagValidChr = true ;
int relaxWidth = 20 ;

struct _pair
{
	int a, b ;
} ;

struct _transcript
{
	char tid[50] ;
	char chrom[50] ;
	char strand ;
	struct _pair *exons ;
	int ecnt ;
} ;

struct _info
{
	double coverage ;
	int byId ; // It gets this coverage by comparing with byId.
} ;

struct _intron
{
	char chrom[50] ;
	int start, end  ;
	int tindex ; // The index to the corresponding transcripts
} ;

struct _exon
{
	char chrom[50] ;
	int start, end ;
	//int tindex ; 
	int soft ; // left-bit: left side is soft, right-bit: right side is soft
	bool matched ;
} ;

int TranscriptComp( const void *p1, const void *p2 )
{
	const struct _transcript *a = (struct _transcript *)p1 ;
	const struct _transcript *b = ( struct _transcript *)p2 ;
	int tmp = strcmp( a->chrom, b->chrom ) ;
	if ( tmp != 0 )
		return tmp ;
	return a->exons[0].a - b->exons[0].a ;
}

int TranscriptComp_ByIntron( const void *p1, const void *p2 )
{
	const struct _transcript *a = (struct _transcript *)p1 ;
	const struct _transcript *b = ( struct _transcript *)p2 ;
	if ( a->strand != b->strand )
		return a->strand - b->strand ;
	int tmp = strcmp( a->chrom, b->chrom ) ;
	if ( tmp != 0 )
		return tmp ;
	if ( a->ecnt != b->ecnt ) 
		return a->ecnt - b->ecnt ;
	int i ;
	for ( i = 0 ; i < a->ecnt - 1 ; ++i )
	{
		if ( a->exons[i].b != b->exons[i].b )
			return a->exons[i].b - b->exons[i].b ;
		if ( a->exons[i + 1].a != b->exons[i + 1].a )
			return a->exons[i + 1].a - b->exons[i + 1].a ;
	}

	return 0 ; //strcmp( a->tid, b->tid ) ;
}

bool validChrom( char *chrom )
{
	if ( !flagValidChr )
		return true ;
	if ( chrom[0] != 'c' || chrom[1] != 'h' || chrom[2] != 'r' )
		return false ;
	// only consider chr1-22,x,y,z
	if ( chrom[3]=='x' || chrom[3]=='X' 
		|| chrom[3] == 'y' || chrom[3] == 'Y' 
		|| chrom[3] == 'm' || chrom[3] == 'M'
		|| ( chrom[3] >= '3' && chrom[3] <= '9' ) )
	{
		if ( chrom[4] == '\0' )
			return true ;
		else
			return false ;
	}
	if ( chrom[3] == '1' )
	{
		if ( chrom[4] == '\0' )
			return true ;
		if ( chrom[4] >= '0' && chrom[4] <= '9' && chrom[5] == '\0' )
			return true ;
		return false ;
	}
	if ( chrom[3] == '2' )
	{
		if ( chrom[4] == '\0' )
			return true ;
		if ( chrom[4] >= '0' && chrom[4] <= '2' && chrom[5] == '\0' )
			return true ;
		return false ;
	}
	return false ;
}

void ReverseExonList( struct _pair *exons, int ecnt )
{
	if ( ecnt < 2 || ( exons[0].a < exons[1].a ) )
		return ;
	int i, j ;
	struct _pair tmp ;

	for ( i = 0, j = ecnt - 1 ; i < j ; ++i, --j )
	{
		tmp = exons[i] ;
		exons[i] = exons[j] ;
		exons[j] = tmp ;
	}
}

/**
  Merge exons that are next to each other. Showed up in scripture.
*/
int CleanExonList( struct _pair *exons, int ecnt )
{
	int i, k ;
	for ( i = 0 ; i < ecnt - 1 ; ++i )
		if ( exons[i + 1].a - exons[i].b - 1 < 20 )
		{
			exons[i + 1].a = exons[i].a ;
			exons[i].a = -1 ;
		}
	k = 0 ;
	for ( i = 0 ; i < ecnt ; ++i )
	{
		if ( exons[i].a == -1 )
			continue ;
		exons[k] = exons[i] ;
		++k ;
	}
	return k ;
}

/**
  Remove the duplicated intron chain in the list
*/
int RemoveDuplicateIntronChain( struct _transcript *t, int tcnt )
{
	int i, j, k ;

	qsort( t, tcnt, sizeof( *t ), TranscriptComp_ByIntron ) ;
	for ( i = 1 ; i < tcnt ; ++i )
	{
		k = i - 1 ;
		while ( t[k].ecnt == -1 )
			--k ;

		if ( t[i].ecnt != t[k].ecnt || strcmp( t[i].chrom, t[k].chrom ) || t[i].strand != t[k].strand )
			continue ;
		for ( j = 0 ; j < t[i].ecnt - 1 ; ++j )
			if ( t[i].exons[j].b != t[k].exons[j].b || 
				t[i].exons[j + 1].a != t[k].exons[j + 1].a )
				break ;
		if ( j >= t[i].ecnt - 1 && t[i].ecnt != 1 )
			t[i].ecnt = -1 ;
		if ( t[i].ecnt == 1 && t[i].exons[0].a == t[k].exons[0].a &&
			t[i].exons[0].b == t[k].exons[0].b )
			t[i].ecnt = -1 ;
		/*if ( t[i].ecnt == -1 )
		{
			printf( "%s <- %s\n", t[i].tid, t[k].tid ) ;
		}*/
	}

	k = 0 ;
	for ( i = 0 ; i < tcnt ; ++i )
	{
		if ( t[i].ecnt == -1 )
		{
			free( t[i].exons ) ;
			continue ;
		}
		t[k] = t[i] ;
		++k ; 
	}
	tcnt = k ;
	return tcnt ;
}


double CompareTranscripts( const struct _transcript &ref, const struct _transcript &pred )
{
	int i, j ;
	struct _pair *refExons = ref.exons ;
	int refECnt = ref.ecnt ;
	struct _pair *predExons = pred.exons  ;
	int predECnt = pred.ecnt ;

	// Prediction must be a "subset" of the reference
	if ( refECnt < predECnt )
		return -1 ;

	// single exon case
	if ( refECnt == 1 )
	{
		if ( refExons[0].b < predExons[0].a || refExons[0].a > predExons[0].b )
			return -1 ;
		else
			return 1 ;
	}
	
	if ( predECnt == 1 )
	{
		for ( i = 0 ; i < refECnt ; ++i )
		{
			if ( refExons[i].b < predExons[0].a || refExons[i].a > predExons[0].b )
				continue ;
			else
			{
				/*if ( i == 0 && predExons[0].a >= refExons[0].a - relaxWidth )
					return 0 ;
				else if ( i == refECnt - 1 && predExons[0].b <= refExons[i].b + relaxWidth )
					return 0 ;
				else if ( i > 0 && i < refECnt - 1 && predExons[0].a >= refExons[i].a - relaxWidth && 
					predExons[0].b <= refExons[i].b + relaxWidth )
					return 0 ;

				return -1 ;*/
				return 0 ;
			}
		}
		return -1 ;
	}

	if ( predECnt > 0 && ref.strand != pred.strand )
		return -1 ;

	// Test the intron chain
	for ( i = 0 ; i < refECnt - 1 ; ++i )
		if ( refExons[i].b == predExons[0].b )
			break ;
	if ( i >= refECnt - 1 )
		return -1 ;
	//if ( i != 0 && predExons[0].a < refExons[i].a - relaxWidth )
	//	return -1 ;

	for ( j = 0 ; i < refECnt - 1 && j < predECnt - 1 ; ++i, ++j )
	{
		if ( refExons[i].b != predExons[j].b || refExons[i + 1].a != predExons[j + 1].a )	
			break ;
	}
	if ( j >= predECnt - 1 )
	{
		/*if ( i == refECnt - 1 ||
			predExons[ predECnt - 1 ].b <= refExons[i].b + relaxWidth )
			return (double)( predECnt - 1 ) / (double)( refECnt - 1 ) ;
		else
			return -1 ;*/
		return (double)( predECnt - 1 ) / (double)( refECnt - 1 ) ;
	}
	else
		return -1 ;
}

bool WithinRange( int a, int b, int w = 10 )
{
	if ( a - b >= -w && a - b <= w )
		return true ;
	return false ;
}

int GetIntrons( struct _transcript *transcripts, int tcnt, struct _intron *introns )
{
	int i, j, k, tag ;
	int cnt = 0 ;
	for ( i = 0 ; i < tcnt ; ++i )
	{
		for ( j = 0 ; j < transcripts[i].ecnt - 1 ; ++j )
		{
			bool flag = false ;

			tag = 0 ;
			for ( k = cnt - 1 ; k >= 0 ; --k )
			{
				if ( strcmp( introns[k].chrom, transcripts[i].chrom ) )
				{
					//printf( "hi\n" ) ;
					tag = k + 1 ;
					break ;
				}
				if ( introns[k].start < transcripts[i].exons[j].b )
				{
					tag = k + 1 ;
					break ;
				}
				else if ( introns[k].start == transcripts[i].exons[j].b &&
					introns[k].end == transcripts[i].exons[j + 1].a )
				{
					flag = true ;
					break ;
				}
			}

			if ( !flag )
			{
				for ( k = cnt ; k > tag ; --k )
					introns[k] = introns[k - 1] ;
				strcpy( introns[k].chrom, transcripts[i].chrom ) ;
				introns[k].start = transcripts[i].exons[j].b ;
				introns[k].end = transcripts[i].exons[j + 1].a ;
				introns[k].tindex = i ;
				++cnt ;
			}
		}
	}
	return cnt ;
}

int GetExons( struct _transcript *transcripts, int tcnt, struct _exon *exons  )
{
	int i, j, k, tag ;
	int cnt = 0 ;
	for ( i = 0 ; i < tcnt ; ++i )
	{
		for ( j = 0 ; j < transcripts[i].ecnt ; ++j )
		{
			bool flag = false ;
			int soft = 0 ;
			if ( j == 0 )
				soft = soft | 2 ;
			if ( j == transcripts[i].ecnt - 1 )
				soft = soft | 1 ;

			tag = 0 ;
			for ( k = cnt - 1 ; k >= 0 ; --k )
			{
				if ( strcmp( exons[k].chrom, transcripts[i].chrom ) )
				{
					//printf( "hi\n" ) ;
					tag = k + 1 ;
					break ;
				}

				if ( exons[k].start < transcripts[i].exons[j].a )
				{
					tag = k + 1 ;
					break ;
				}
				else if ( exons[k].start == transcripts[i].exons[j].a &&
					exons[k].end == transcripts[i].exons[j].b &&
					exons[k].soft == soft )
				{
					flag = true ;
					break ;
				}
			}

			if ( !flag )
			{
				for ( k = cnt ; k > tag ; --k )
					exons[k] = exons[k - 1] ;
				strcpy( exons[k].chrom, transcripts[i].chrom ) ;
				exons[k].start = transcripts[i].exons[j].a ;
				exons[k].end = transcripts[i].exons[j].b ;
				//exons[k].tindex = i ;
				exons[k].soft = soft ;
				exons[k].matched = false ;
				++cnt ;
			}
		}
	}
	return cnt ;
}

//chr1	HAVANA	transcript	320162	324461	.	+	.	gene_id "ENSG00000237094.6"; transcript_id "ENST00000423728.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP4-669L17.10"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP4-669L17.10-002"; level 2; havana_gene "OTTHUMG00000156968.4"; havana_transcript "OTTHUMT00000346879.1";
int main( int argc, char *argv[] )
{
	FILE *fp ;
	struct _transcript *ref, *pred ;		
	struct _info *refInfo, *predInfo ;

	int refCnt, predCnt ;
	char line[10000], filename[10000] ;
	struct _pair tmpExons[10000] ;
	int tmpECnt = 0 ;

	char chrom[50], tool[20], type[40], strand[3] ;
	char tid[50] ;
	char buffer[50] ;
	int start, end ;

	int i, j, k, tag ;

	if ( argc == 1 )
	{
		printf( "Compare the reference transcripts and predicted transcripts.\n"
			"Format: ./grader ref.gtf prediction.gtf\n" ) ;
		exit( 1 ) ;
	}
	// Parse the arguments
	for ( i = 3 ; i < argc ; ++i )
	{
		if ( !strcmp( argv[i], "-M" ) )
			flagMultiExonOnly = true ;
		else if ( !strcmp( argv[i], "-ac" ) )
			flagValidChr = false ;
		else
		{
			printf( "Unknown argument: %s\n", argv[i] ) ;
			exit( 1 ) ;
		}
	}

	/*========================================================================================
	Stage 1: Read the transcripts from the reference and prediction's GTF files.
	========================================================================================*/
	fp = NULL ;
	fp = fopen( argv[1], "r" ) ;
	if ( fp == NULL )
	{
		printf( "Could not open file %s.\n", argv[1] ) ;
		exit( 1 ) ;
	}
	refCnt = 0 ;
	ref = ( struct _transcript *)malloc( MAX_TXPT * sizeof( struct _transcript ) ) ;
	
	strcpy( ref[0].tid, "tmp_-1" ) ;
	while ( fgets( line, sizeof( line ), fp ) != NULL )
	{
		if ( line[0] == '#' )
			continue ;

		sscanf( line, "%s %s %s %d %d %s %s", chrom, tool, type, &start, &end, buffer, strand ) ;
		
		if ( strcmp( type, "exon" ) || !validChrom( chrom ) )
			continue ;

		char *p = strstr( line, "transcript_id" ) ;
		for ( ; *p != ' ' ; ++p )
			;
		p += 2 ;
		sscanf( p, "%s", tid ) ;
		//printf( "+%s %d\n", tid, strlen( tid )  ) ;
		p = tid + strlen( tid ) ;
		while ( *p != '\"' )
			--p ;
		*p = '\0' ;
		//printf( "%s\n", tid ) ;
		if ( strcmp( tid, ref[ refCnt ].tid ) && tmpECnt )
		{
			ReverseExonList( tmpExons, tmpECnt ) ;
			tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
			if ( tmpECnt > 1 || !flagMultiExonOnly )
			{
				ref[ refCnt ].ecnt = tmpECnt ;
				ref[ refCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ; 
				memcpy( ref[ refCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
				++refCnt ;
			}
			tmpECnt = 0 ;
		}

		tmpExons[ tmpECnt ].a = start ;
		tmpExons[ tmpECnt ].b = end ;
		++tmpECnt ;
		
		ref[ refCnt ].strand = strand[0] ;
		strcpy( ref[ refCnt ].chrom, chrom ) ;
		strcpy( ref[ refCnt ].tid, tid ) ;
	}
	if ( tmpECnt != 0 )
	{
		ReverseExonList( tmpExons, tmpECnt ) ;
		tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
		if ( tmpECnt > 1 || !flagMultiExonOnly )
		{
			ref[ refCnt ].ecnt = tmpECnt ;
			ref[ refCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ; 
			memcpy( ref[ refCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
			++refCnt ;
		}
		tmpECnt = 0 ;
	}
	fclose( fp ) ;

	fp = NULL ;
	fp = fopen( argv[2], "r" ) ;
	if ( fp == NULL )
	{
		printf( "Could not open file %s.\n", argv[2] ) ;
		exit( 1 ) ;
	}
	predCnt = 0 ;
	pred = ( struct _transcript *)malloc( MAX_TXPT * sizeof( struct _transcript ) ) ;
	
	strcpy( pred[0].tid, "tmp_-1" ) ;
	tmpECnt = 0 ;
	while ( fgets( line, sizeof( line ), fp ) != NULL )
	{
		if ( line[0] == '#' )
			continue ;
		sscanf( line, "%s %s %s %d %d %s %s", chrom, tool, type, &start, &end, buffer, strand ) ;
		
		if ( strcmp( type, "exon" ) || !validChrom( chrom ) )
			continue ;

		char *p = strstr( line, "transcript_id" ) ;
		//char *p = strstr( line, "gene_name" ) ;
		for ( ; *p != ' ' ; ++p )
			;
		p += 2 ;
		sscanf( p, "%s", tid ) ;
		//tid[ strlen( tid ) - 2 ] = '\0' ;
		p = tid + strlen( tid ) ;
		while ( *p != '\"' )
			--p ;
		*p = '\0' ;

		if ( strcmp( tid, pred[ predCnt ].tid ) && tmpECnt )
		{
			ReverseExonList( tmpExons, tmpECnt ) ;
			tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
			if ( tmpECnt > 1 || !flagMultiExonOnly )
			{
				pred[ predCnt ].ecnt = tmpECnt ;
				pred[ predCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ; 
				memcpy( pred[ predCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
				++predCnt ;
			}
			tmpECnt = 0 ;
		}
		
		tmpExons[ tmpECnt ].a = start ;
		tmpExons[ tmpECnt ].b = end ;
		++tmpECnt ;
		
		pred[ predCnt ].strand = strand[0] ;
		strcpy( pred[ predCnt ].chrom, chrom ) ;
		strcpy( pred[ predCnt ].tid, tid ) ;
	}
	if ( tmpECnt > 0 )
	{
		ReverseExonList( tmpExons, tmpECnt ) ;
		tmpECnt = CleanExonList( tmpExons, tmpECnt ) ;
		if ( tmpECnt > 1 || !flagMultiExonOnly )
		{
			pred[ predCnt ].ecnt = tmpECnt ;
			pred[ predCnt ].exons = ( struct _pair * )malloc( sizeof( struct _pair ) * tmpECnt ) ; 
			memcpy( pred[ predCnt ].exons, tmpExons, sizeof( tmpExons[0] ) * tmpECnt ) ;
			++predCnt ;
		}
		tmpECnt = 0 ;
	}
	
	refCnt = RemoveDuplicateIntronChain( ref, refCnt ) ;
	//printf( "predCnt = %d\n", predCnt ) ;	
	predCnt = RemoveDuplicateIntronChain( pred, predCnt ) ;
	//printf( "predCnt = %d\n", predCnt ) ;	
	qsort( ref, refCnt, sizeof( ref[0] ), TranscriptComp ) ;
	qsort( pred, predCnt, sizeof( pred[0] ), TranscriptComp ) ;

	
	/*========================================================================================
	  Stage 2: Compute the recall and precision.
	  ========================================================================================*/
	refInfo = ( struct _info * )malloc( refCnt * sizeof( *refInfo ) ) ;
	predInfo = ( struct _info * )malloc( predCnt * sizeof( *predInfo ) ) ;
	for ( i = 0 ; i < refCnt ; ++i )
	{
		refInfo[i].coverage = -1 ;
		refInfo[i].byId = -1 ;
#if DEBUG	
		printf( "=%d %s %s %d\n", i, ref[i].chrom, ref[i].tid, ref[i].ecnt ) ;
		for ( j = 0 ; j < ref[i].ecnt ; ++j )
			printf( "%d %d\n", ref[i].exons[j].a, ref[i].exons[j].b ) ;
#endif

	}
	for ( i = 0 ; i < predCnt ; ++i )
	{
		predInfo[i].coverage = -1 ;
		predInfo[i].byId = -1 ;
#if DEBUG	
		printf( "#%d %s %s %d\n", i, pred[i].chrom, pred[i].tid, pred[i].ecnt ) ;
		for ( j = 0 ; j < pred[i].ecnt ; ++j )
			printf( "%d %d\n", pred[i].exons[j].a, pred[i].exons[j].b ) ;
#endif	
	}
	tag = 0 ;
	for ( i = 0 ; i < predCnt ; ++i )
	{
		while ( tag < refCnt && 
				( ( strcmp( ref[tag].chrom, pred[i].chrom ) < 0 )  || 
				  ( !strcmp( ref[tag].chrom, pred[i].chrom  ) && 
				    ref[tag].exons[ ref[tag].ecnt - 1 ].b < pred[i].exons[0].a - relaxWidth ) ) )
			++tag ;

		//	if ( i == 16474 )
		//		printf( "%d %d %d", i, tag, refCnt ) ;
		for ( j = tag ; j < refCnt && ref[j].exons[0].a <= pred[i].exons[ pred[i].ecnt - 1 ].b + relaxWidth && !strcmp( ref[j].chrom, pred[i].chrom ); ++j )
		{
			/*if ( !strcmp( pred[i].tid, "CUFF.4256.1" ) )
			{
				printf( "%s ? %s\n", pred[i].tid, ref[j].tid ) ;
			}*/
			//		if ( i == 16474 )
			//		{
			//			printf( "%d %d\n", i, j ) ;
			//		}
			double coverage = CompareTranscripts( ref[j], pred[i] ) ;
			if ( coverage > refInfo[j].coverage )
			{
				refInfo[j].coverage = coverage ;
				refInfo[j].byId = i ;
			}
			if ( coverage > predInfo[i].coverage )
			{
				predInfo[i].coverage = coverage ;
				predInfo[i].byId = j ;
			}
		}
	}

	/*========================================================================================
	  Stage 3: Dump the information into output files.
	  ========================================================================================*/
	//sprintf( filename, "grader.%s.recall", argv[1] ) ;
	sprintf( filename, "grader.recall" ) ;
	fp = fopen( filename, "w" ) ;
	for ( i = 0 ; i < refCnt ; ++i )
	{
		fprintf( fp, "%s\t%d\t%lf\t%s\n", ref[i].tid, ref[i].ecnt, refInfo[i].coverage, 
			refInfo[i].byId == -1 ? "-" :  pred[ refInfo[i].byId ].tid  ) ;		
	}
	fclose( fp ) ;

	//sprintf( filename, "grader.%s.precision", argv[2] ) ;
	sprintf( filename, "grader.precision" ) ;
	fp = fopen( filename, "w" ) ;
	for ( i = 0 ; i < predCnt ; ++i )
	{
		fprintf( fp, "%s\t%d\t%lf\t%s\n", pred[i].tid, pred[i].ecnt, predInfo[i].coverage,
			predInfo[i].byId == -1 ? "-" : ref[predInfo[i].byId].tid ) ;		
	}
	fclose( fp ) ;

	// print the summary to the stdout
	const int binCnt = 20 ;
	int bins[binCnt + 1] ;
	memset( bins, 0, sizeof( bins ) ) ;
	k = 0 ;
	for ( i = 0 ; i < refCnt ; ++i )
	{
		if ( refInfo[i].coverage == -1 )
			continue ;
		++bins[ (int)( refInfo[i].coverage * binCnt ) ] ;
		++k ;
	}
	for ( i = 0 ; i <= binCnt ; ++i )
	{
		printf( "recall %lf %lf %d/%d\n", (double)i / binCnt, (double)k / refCnt, k, refCnt ) ;
		k -= bins[i] ;
	}

	memset( bins, 0, sizeof( bins ) ) ;
	k = 0 ;
	for ( i = 0 ; i < predCnt ; ++i )
	{
		if ( predInfo[i].coverage == -1 )
			continue ;
		++bins[ (int)( predInfo[i].coverage * binCnt ) ] ;
		++k ;
	}
	for ( i = 0 ; i <= binCnt ; ++i )
	{
		printf( "precision %lf %lf %d/%d\n", (double)i / binCnt, (double)k / predCnt, k, predCnt ) ;
		k -= bins[i] ;
	}

	/*========================================================================================
	Stage 4: Evaluations for introns.
	========================================================================================*/
	struct _intron *refIntrons = ( struct _intron * )malloc( sizeof( struct _intron ) * MAX_TXPT ) ;
	int riCnt = GetIntrons( ref, refCnt, refIntrons ) ;
	struct _intron *predIntrons = ( struct _intron * )malloc( sizeof( struct _intron ) * MAX_TXPT ) ;
	int piCnt = GetIntrons( pred, predCnt, predIntrons )  ;
	int matchedIntron = 0 ;
	/*for ( i = 0 ; i < riCnt ; ++i )
	{
		printf( "%d %s %d %d\n", i, refIntrons[i].chrom, refIntrons[i].start, refIntrons[i].end ) ;
	}*/

	fp = fopen( "grader.intron", "w" ) ;
	tag = 0 ;
	for ( i = 0 ; i < piCnt ; ++i )
	{
		bool flag = false ;
		while ( 1 )
		{
			if ( tag >= riCnt )
				break ;
			int tmp = strcmp( refIntrons[tag].chrom, predIntrons[i].chrom ) ;			
			if ( tmp < 0 || ( tmp == 0 && refIntrons[tag].start < predIntrons[i].start ) )
				++tag ;
			else
				break ;
		}
		//printf( "%d (%s %d %d) %d=>", i, predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, tag ) ;
		/*if ( predIntrons[i].start == 1613457 )
			printf( "%d %d\n", tag, riCnt ) ;*/
		for ( j = tag ; j < riCnt ; ++j )
		{
			if ( strcmp( refIntrons[j].chrom, predIntrons[i].chrom ) > 0 || 
				refIntrons[j].start > predIntrons[i].start /*+ 10*/ )	
				break ;
			if ( refIntrons[j].start == predIntrons[i].start &&
				refIntrons[j].end == predIntrons[i].end )
			//if ( WithinRange( refIntrons[j].start, predIntrons[i].start ) &&
			//	WithinRange( refIntrons[j].end, predIntrons[i].end ) )
			{
				++matchedIntron ;
				flag = true ;
				break ;
			}
		}
		//printf( "%d\n", j ) ;
		if ( flag )
			fprintf( fp, "Y\t%s\t%d\t%d\t%s\n", predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, pred[ predIntrons[i].tindex ].tid ) ;
		else
			fprintf( fp, "N\t%s\t%d\t%d\t%s\n", predIntrons[i].chrom, predIntrons[i].start, predIntrons[i].end, pred[ predIntrons[i].tindex ].tid  ) ;
	}
	fclose( fp ) ;
	printf( "\n" ) ;
	printf( "Intron evaluation\n") ;
	//printf( "\tmatched\t%d\n", matchedIntron ) ;
	printf( "\trecall\t%lf\t%d/%d\n", matchedIntron / (double)riCnt, matchedIntron, riCnt ) ;
	printf( "\tprecision\t%lf\t%d/%d\n", matchedIntron / (double)piCnt, matchedIntron, piCnt ) ;

	/*========================================================================================
	Stage 4: Evaluations for exons.
	========================================================================================*/
	struct _exon *refExons = ( struct _exon * )malloc( sizeof( struct _exon ) * MAX_TXPT ) ;
	int reCnt = GetExons( ref, refCnt, refExons ) ;
	struct _exon *predExons = ( struct _exon * )malloc( sizeof( struct _exon ) * MAX_TXPT ) ;
	int peCnt = GetExons( pred, predCnt, predExons )  ;
	int matchedExons = 0, matchedInternalExons = 0 ;
	int refInternalExons = 0, predInternalExons = 0 ; 

	for ( i = 0 ; i < reCnt ; ++i )
	{
		//printf( "%d %d\n", refExons[i].start, refExons[i].end ) ;
		if ( refExons[i].soft == 0 )
			++refInternalExons ;
		//if ( refExons[i].soft == 3 )
		//	printf( "hi\n" ) ;
	}
	for ( i = 0 ; i < peCnt ; ++i )
		if ( predExons[i].soft == 0 )
			++predInternalExons ;
	tag = 0 ;
	for ( i = 0 ; i < peCnt ; ++i )
	{
		bool flag = false ;
		while ( 1 )
		{
			if ( tag >= reCnt )
				break ;
			int tmp = strcmp( refExons[tag].chrom, predExons[i].chrom ) ;			
			if ( tmp < 0 || ( tmp == 0 && refExons[tag].end < predExons[i].start ) )
				++tag ;
			else
				break ;
		}
		for ( j = tag ; j < reCnt ; ++j )
		{
			if ( strcmp( refExons[j].chrom, predExons[i].chrom ) > 0 || 
				refExons[j].start > predExons[i].end /*+ 10*/ )	
				break ;
			if ( refExons[j].soft != predExons[i].soft 
				|| refExons[j].end < predExons[i].start )
				continue ;

			if ( refExons[j].start == predExons[i].start &&
				refExons[j].end == predExons[i].end && predExons[i].soft == 0 )
			//if ( WithinRange( refIntrons[j].start, predIntrons[i].start ) &&
			//	WithinRange( refIntrons[j].end, predIntrons[i].end ) )
			{
				refExons[j].matched = true ;
				predExons[i].matched = true ;
				++matchedInternalExons ;
				flag = true ;
				break ;
			}
			else if ( ( refExons[j].start == predExons[i].start || ( predExons[i].soft & 2 ) ) 
				&& ( refExons[j].end == predExons[i].end || (predExons[i].soft & 1 ) ) )
			{
				refExons[j].matched = true ;
				predExons[i].matched = true ;
				flag = true ;
				//break ;
			}
		}
		//printf( "%d\n", j ) ;
	}

	printf( "\n" ) ;
	printf( "Exon evaluation\n" ) ;
	//printf( "\tmatched\t%d\n", matchedExons ) ;
	matchedExons = 0 ;
	for ( i = 0 ; i < reCnt ; ++i )
		if ( refExons[i].matched )
			++matchedExons ;
	printf( "\trecall\t%lf\t%d/%d\n", (double)matchedExons / reCnt, matchedExons, reCnt ) ;
	matchedExons = 0 ;
	for ( i = 0 ; i < peCnt ; ++i )
		if ( predExons[i].matched )
			++matchedExons ;
	printf( "\tprecision\t%lf\t%d/%d\n", (double)matchedExons / peCnt, matchedExons, peCnt ) ;

	printf( "\n" ) ;
	printf( "Internal exon evaluation\n" ) ;
	//printf( "\tmatched\t%d\n", matchedInternalExons ) ;
	printf( "\trecall\t%lf\t%d/%d\n", (double)matchedInternalExons / refInternalExons, matchedInternalExons, refInternalExons ) ;
	printf( "\tprecision\t%lf\t%d/%d\n", (double)matchedInternalExons / predInternalExons, matchedInternalExons, predInternalExons ) ;
	return 0 ;
}