| 0 | 1 /* | 
|  | 2  * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University | 
|  | 3  * All rights reserved. | 
|  | 4  * | 
|  | 5  * Redistribution and use in source and binary forms, with or without modification, | 
|  | 6  * are permitted provided that the following conditions are met: | 
|  | 7  * | 
|  | 8  * Redistributions of source code must retain the above copyright notice, this list | 
|  | 9  * of conditions and the following disclaimer. | 
|  | 10  * - Redistributions in binary form must reproduce the above copyright notice, this | 
|  | 11  *   list of conditions and the following disclaimer in the documentation and/or other | 
|  | 12  *   materials provided with the distribution. | 
|  | 13  * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be | 
|  | 14  *   used to endorse or promote products derived from this software without specific | 
|  | 15  *   prior written permission. | 
|  | 16  * | 
|  | 17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|  | 18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|  | 19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
|  | 20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | 
|  | 21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
|  | 22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
|  | 23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
|  | 24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
|  | 25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 
|  | 26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
|  | 27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | 28  */ | 
|  | 29 | 
|  | 30 /* | 
|  | 31  * Author         : Faraz Hach | 
|  | 32  * Email          : fhach AT cs DOT sfu | 
|  | 33  * Last Update    : 2009-01-29 | 
|  | 34  */ | 
|  | 35 | 
|  | 36 #include <stdio.h> | 
|  | 37 #include <stdlib.h> | 
|  | 38 #include <string.h> | 
|  | 39 #include <math.h> | 
|  | 40 #include "Common.h" | 
|  | 41 #include "Reads.h" | 
|  | 42 #include "HashTable.h" | 
|  | 43 #include "Output.h" | 
|  | 44 #include "MrsFAST.h" | 
|  | 45 #include "RefGenome.h" | 
|  | 46 | 
|  | 47 float calculateScore(int index, char *seq, char *qual, int *err); | 
|  | 48 unsigned char		mrFAST = 0; | 
|  | 49 char				*versionNumberF="0.2"; | 
|  | 50 | 
|  | 51 long long			verificationCnt = 0; | 
|  | 52 long long 			mappingCnt = 0; | 
|  | 53 long long			mappedSeqCnt = 0; | 
|  | 54 long long			completedSeqCnt = 0; | 
|  | 55 char				*mappingOutput; | 
|  | 56 /**********************************************/ | 
|  | 57 char				*_msf_refGen = NULL; | 
|  | 58 int					_msf_refGenLength = 0; | 
|  | 59 int					_msf_refGenOffset = 0; | 
|  | 60 char				*_msf_refGenName = NULL; | 
|  | 61 | 
|  | 62 int					_msf_refGenBeg; | 
|  | 63 int					_msf_refGenEnd; | 
|  | 64 | 
|  | 65 IHashTable			*_msf_hashTable = NULL; | 
|  | 66 | 
|  | 67 int					*_msf_samplingLocs; | 
|  | 68 int					*_msf_samplingLocsEnds; | 
|  | 69 int					_msf_samplingLocsSize; | 
|  | 70 | 
|  | 71 Read				*_msf_seqList; | 
|  | 72 int					_msf_seqListSize; | 
|  | 73 | 
|  | 74 ReadIndexTable		*_msf_rIndex = NULL; | 
|  | 75 int					_msf_rIndexSize; | 
|  | 76 int					_msf_rIndexMax; | 
|  | 77 | 
|  | 78 SAM					_msf_output; | 
|  | 79 | 
|  | 80 OPT_FIELDS			*_msf_optionalFields; | 
|  | 81 | 
|  | 82 char				*_msf_op; | 
|  | 83 | 
|  | 84 char				_msf_numbers[200][3]; | 
|  | 85 char				_msf_cigar[5]; | 
|  | 86 | 
|  | 87 MappingInfo			*_msf_mappingInfo; | 
|  | 88 | 
|  | 89 int					*_msf_seqHits; | 
|  | 90 int					_msf_openFiles = 0; | 
|  | 91 int					_msf_maxLSize=0; | 
|  | 92 int					_msf_maxRSize=0; | 
|  | 93 /**********************************************/ | 
|  | 94 int compare (const void *a, const void *b) | 
|  | 95 { | 
|  | 96 	return ((Pair *)a)->hv - ((Pair *)b)->hv; | 
|  | 97 } | 
|  | 98 /**********************************************/ | 
|  | 99 void preProcessReads() | 
|  | 100 { | 
|  | 101 	int i=0; | 
|  | 102 	int j=0; | 
|  | 103 	int pos = 0; | 
|  | 104 | 
|  | 105 	_msf_rIndexMax = -1; | 
|  | 106 | 
|  | 107 	int tmpSize = _msf_seqListSize*_msf_samplingLocsSize*2; | 
|  | 108 	Pair *tmp = getMem(sizeof(Pair)*tmpSize); | 
|  | 109 | 
|  | 110 	for (i=0; i<_msf_seqListSize; i++) | 
|  | 111 	{ | 
|  | 112 		for (j=0; j< _msf_samplingLocsSize; j++) | 
|  | 113 		{ | 
|  | 114 | 
|  | 115 			tmp[pos].hv = hashVal(_msf_seqList[i].seq+_msf_samplingLocs[j]); | 
|  | 116 			tmp[pos].seqInfo = pos; | 
|  | 117 			pos++; | 
|  | 118 		} | 
|  | 119 		for (j=0; j<_msf_samplingLocsSize; j++) | 
|  | 120 		{ | 
|  | 121 			tmp[pos].hv = hashVal(_msf_seqList[i].rseq+_msf_samplingLocs[j]); | 
|  | 122 			tmp[pos].seqInfo = pos; | 
|  | 123 			pos++; | 
|  | 124 		} | 
|  | 125 	} | 
|  | 126 | 
|  | 127 	qsort(tmp, tmpSize, sizeof(Pair), compare); | 
|  | 128 | 
|  | 129 | 
|  | 130 	int uniq = 0; | 
|  | 131 	int prev = -2; | 
|  | 132 	int beg = -1; | 
|  | 133 	int end = -1; | 
|  | 134 | 
|  | 135 	for (i=0; i<tmpSize; i++) | 
|  | 136 	{ | 
|  | 137 		if (prev != tmp[i].hv) | 
|  | 138 		{ | 
|  | 139 			uniq ++; | 
|  | 140 			prev = tmp[i].hv; | 
|  | 141 		} | 
|  | 142 	} | 
|  | 143 | 
|  | 144 	_msf_rIndexSize = uniq; | 
|  | 145 	_msf_rIndex = getMem(sizeof(ReadIndexTable)*_msf_rIndexSize); | 
|  | 146 	prev = -2; | 
|  | 147 | 
|  | 148 	j=0; | 
|  | 149 	beg =0; | 
|  | 150 	while (beg < tmpSize) | 
|  | 151 	{ | 
|  | 152 		end = beg; | 
|  | 153 		while (end+1<tmpSize && tmp[end+1].hv==tmp[beg].hv) | 
|  | 154 			end++; | 
|  | 155 | 
|  | 156 		_msf_rIndex[j].hv = tmp[beg].hv; | 
|  | 157 		_msf_rIndex[j].seqInfo = getMem(sizeof(int)*(end-beg+2)); | 
|  | 158 		_msf_rIndex[j].seqInfo[0] = end-beg+1; | 
|  | 159 		if ((end-beg+1) > _msf_rIndexMax) | 
|  | 160 			_msf_rIndexMax = end-beg+1; | 
|  | 161 | 
|  | 162 		for (i=1; i<=_msf_rIndex[j].seqInfo[0]; i++) | 
|  | 163 		{ | 
|  | 164 			_msf_rIndex[j].seqInfo[i]=tmp[beg+i-1].seqInfo; | 
|  | 165 		} | 
|  | 166 		j++; | 
|  | 167 		beg = end+1; | 
|  | 168 	} | 
|  | 169 	freeMem(tmp, sizeof(Pair)*tmpSize); | 
|  | 170 } | 
|  | 171 /**********************************************/ | 
|  | 172 void initFAST(Read *seqList, int seqListSize, int *samplingLocs, int samplingLocsSize, char *genFileName) | 
|  | 173 { | 
|  | 174 	if (_msf_optionalFields == NULL) | 
|  | 175 	{ | 
|  | 176 		_msf_op = getMem(SEQ_LENGTH); | 
|  | 177 		if (pairedEndMode) | 
|  | 178 		{ | 
|  | 179 			_msf_optionalFields = getMem(4*sizeof(OPT_FIELDS)); | 
|  | 180 		} | 
|  | 181 		else | 
|  | 182 		{ | 
|  | 183 			_msf_optionalFields = getMem(2*sizeof(OPT_FIELDS)); | 
|  | 184 		} | 
|  | 185 | 
|  | 186 		int i; | 
|  | 187 		for (i=0; i<200;i++) | 
|  | 188 		{ | 
|  | 189 			sprintf(_msf_numbers[i],"%d%c",i, '\0'); | 
|  | 190 		} | 
|  | 191 		sprintf(_msf_cigar, "%dM", SEQ_LENGTH); | 
|  | 192 	} | 
|  | 193 | 
|  | 194 	if (_msf_samplingLocsEnds == NULL) | 
|  | 195 	{ | 
|  | 196 		int i; | 
|  | 197 		_msf_samplingLocs = samplingLocs; | 
|  | 198 		_msf_samplingLocsSize = samplingLocsSize; | 
|  | 199 | 
|  | 200 		_msf_samplingLocsEnds = malloc(sizeof(int)*_msf_samplingLocsSize); | 
|  | 201 		for (i=0; i<_msf_samplingLocsSize; i++) | 
|  | 202 		{ | 
|  | 203 			_msf_samplingLocsEnds[i]=_msf_samplingLocs[i]+WINDOW_SIZE-1; | 
|  | 204 		} | 
|  | 205 | 
|  | 206 		_msf_seqList = seqList; | 
|  | 207 		_msf_seqListSize = seqListSize; | 
|  | 208 | 
|  | 209 		preProcessReads(); | 
|  | 210 	} | 
|  | 211 	if (_msf_refGenName == NULL) | 
|  | 212 	{ | 
|  | 213 		_msf_refGenName = getMem(SEQ_LENGTH); | 
|  | 214 	} | 
|  | 215 	_msf_refGen =  getRefGenome(); | 
|  | 216 	_msf_refGenLength = strlen(_msf_refGen); | 
|  | 217 	_msf_refGenOffset = getRefGenomeOffset(); | 
|  | 218 	sprintf(_msf_refGenName,"%s%c", getRefGenomeName(), '\0'); | 
|  | 219 | 
|  | 220 	if (pairedEndMode && _msf_seqHits == NULL) | 
|  | 221 	{ | 
|  | 222 		_msf_mappingInfo  = getMem(seqListSize * sizeof (MappingInfo)); | 
|  | 223 | 
|  | 224 		int i=0; | 
|  | 225 		for (i=0; i<seqListSize; i++) | 
|  | 226 		{ | 
|  | 227 			//_msf_mappingInfo[i].next = getMem(sizeof(MappingLocations)); | 
|  | 228 			_msf_mappingInfo[i].next = NULL; | 
|  | 229 			_msf_mappingInfo[i].size = 0; | 
|  | 230 		} | 
|  | 231 | 
|  | 232 		_msf_seqHits = getMem((_msf_seqListSize/2) * sizeof(int)); | 
|  | 233 | 
|  | 234 | 
|  | 235 		for (i=0; i<_msf_seqListSize/2; i++) | 
|  | 236 		{ | 
|  | 237 			_msf_seqHits[i] = 0; | 
|  | 238 		} | 
|  | 239 | 
|  | 240 		initLoadingRefGenome(genFileName); | 
|  | 241 	} | 
|  | 242 | 
|  | 243 	if (_msf_refGenOffset == 0) | 
|  | 244 	{ | 
|  | 245 		_msf_refGenBeg = 1; | 
|  | 246 	} | 
|  | 247 	else | 
|  | 248 	{ | 
|  | 249 		_msf_refGenBeg = CONTIG_OVERLAP - SEQ_LENGTH + 2; | 
|  | 250 	} | 
|  | 251 	_msf_refGenEnd = _msf_refGenLength - SEQ_LENGTH + 1; | 
|  | 252 | 
|  | 253 | 
|  | 254 } | 
|  | 255 /**********************************************/ | 
|  | 256 void finalizeFAST() | 
|  | 257 { | 
|  | 258 	freeMem(_msf_seqHits, (_msf_seqListSize/2) * sizeof(int)); | 
|  | 259 	freeMem(_msf_refGenName, SEQ_LENGTH); | 
|  | 260 	int i; | 
|  | 261 	for (i=0; i<_msf_rIndexSize; i++) | 
|  | 262 	{ | 
|  | 263 		freeMem(_msf_rIndex[i].seqInfo, _msf_rIndex[i].seqInfo[0]+1); | 
|  | 264 	} | 
|  | 265 	freeMem(_msf_rIndex, _msf_rIndexSize); | 
|  | 266 } | 
|  | 267 | 
|  | 268 /**********************************************/ | 
|  | 269 int verifySingleEnd(int index, char* seq, int offset) | 
|  | 270 { | 
|  | 271 	int curOff = 0; | 
|  | 272 	int i; | 
|  | 273 | 
|  | 274 	char *ref; | 
|  | 275 | 
|  | 276 	int err; | 
|  | 277 	int errCnt =0; | 
|  | 278 	int errCntOff = 0; | 
|  | 279 	int NCntOff = 0; | 
|  | 280 | 
|  | 281 	int matchCnt = 0; | 
|  | 282 	int pp = 0; | 
|  | 283 | 
|  | 284 	ref = _msf_refGen + index - 1; | 
|  | 285 | 
|  | 286 	verificationCnt++; | 
|  | 287 | 
|  | 288 	for (i = 0; i < SEQ_LENGTH; i++) | 
|  | 289 	{ | 
|  | 290 		err	= *ref != *seq; | 
|  | 291 | 
|  | 292 		errCnt += err; | 
|  | 293 		if (errCnt > errThreshold) | 
|  | 294 		{ | 
|  | 295 | 
|  | 296 			return -1; | 
|  | 297 		} | 
|  | 298 | 
|  | 299 		if (i >= _msf_samplingLocs[curOff] && i <= _msf_samplingLocsEnds[curOff]) | 
|  | 300 		{ | 
|  | 301 			errCntOff +=  err; | 
|  | 302 			NCntOff += (*seq == 'N'); | 
|  | 303 		} | 
|  | 304 		else if (curOff < _msf_samplingLocsSize && i>=_msf_samplingLocs[curOff+1]) | 
|  | 305 		{ | 
|  | 306 | 
|  | 307 			if (errCntOff == 0 && NCntOff == 0 && offset > curOff) | 
|  | 308 			{ | 
|  | 309 				return -1; | 
|  | 310 			} | 
|  | 311 | 
|  | 312 			errCntOff = 0; | 
|  | 313 			NCntOff = 0; | 
|  | 314 			curOff++; | 
|  | 315 | 
|  | 316 			if ( i >= _msf_samplingLocs[curOff]) | 
|  | 317 			{ | 
|  | 318 				errCntOff += err; | 
|  | 319 				NCntOff += (*seq == 'N'); | 
|  | 320 			} | 
|  | 321 		} | 
|  | 322 | 
|  | 323 		ref++; | 
|  | 324 		seq++; | 
|  | 325 	} | 
|  | 326 	return errCnt; | 
|  | 327 } | 
|  | 328 | 
|  | 329 /**********************************************/ | 
|  | 330 int calculateMD(int index, char *seq, int err, char **opSeq) | 
|  | 331 { | 
|  | 332 	int i; | 
|  | 333 	char *ref; | 
|  | 334 	char *ver; | 
|  | 335 	short matchCnt = 0; | 
|  | 336 	char *op = *opSeq; | 
|  | 337 	int pp = 0; | 
|  | 338 | 
|  | 339 	ref = _msf_refGen + index-1; | 
|  | 340 	ver = seq; | 
|  | 341 | 
|  | 342 	if (err>0 || err == -1 ) | 
|  | 343 	{ | 
|  | 344 | 
|  | 345 		err = 0; | 
|  | 346 		for (i=0; i < SEQ_LENGTH; i++) | 
|  | 347 		{ | 
|  | 348 			if (* ref != *ver) | 
|  | 349 			{ | 
|  | 350 				err++; | 
|  | 351 				if (matchCnt) | 
|  | 352 				{ | 
|  | 353 					if (matchCnt < 10) | 
|  | 354 					{ | 
|  | 355 						op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 356 					} | 
|  | 357 					else if (matchCnt < 100) | 
|  | 358 					{ | 
|  | 359 						op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 360 						op[pp++]=_msf_numbers[matchCnt][1]; | 
|  | 361 					} | 
|  | 362 					else | 
|  | 363 					{ | 
|  | 364 						op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 365 						op[pp++]=_msf_numbers[matchCnt][1]; | 
|  | 366 						op[pp++]=_msf_numbers[matchCnt][2]; | 
|  | 367 					} | 
|  | 368 | 
|  | 369 					matchCnt = 0; | 
|  | 370 				} | 
|  | 371 				op[pp++]=*ref; | 
|  | 372 			} | 
|  | 373 			else | 
|  | 374 			{ | 
|  | 375 				matchCnt++; | 
|  | 376 			} | 
|  | 377 			ref++; | 
|  | 378 			ver++; | 
|  | 379 		} | 
|  | 380 | 
|  | 381 	} | 
|  | 382 	if (err == 0) | 
|  | 383 	{ | 
|  | 384 		matchCnt = SEQ_LENGTH; | 
|  | 385 	} | 
|  | 386 | 
|  | 387 	if (matchCnt>0) | 
|  | 388 	{ | 
|  | 389 		if (matchCnt < 10) | 
|  | 390 		{ | 
|  | 391 			op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 392 		} | 
|  | 393 		else if (matchCnt < 100) | 
|  | 394 		{ | 
|  | 395 			op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 396 			op[pp++]=_msf_numbers[matchCnt][1]; | 
|  | 397 		} | 
|  | 398 		else | 
|  | 399 		{ | 
|  | 400 			op[pp++]=_msf_numbers[matchCnt][0]; | 
|  | 401 			op[pp++]=_msf_numbers[matchCnt][1]; | 
|  | 402 			op[pp++]=_msf_numbers[matchCnt][2]; | 
|  | 403 		} | 
|  | 404 	} | 
|  | 405 	op[pp]='\0'; | 
|  | 406 | 
|  | 407 	return err; | 
|  | 408 } | 
|  | 409 | 
|  | 410 /**********************************************/ | 
|  | 411 void mapSingleEndSeqListBal(unsigned int *l1, int s1, unsigned int *l2, int s2, int dir) | 
|  | 412 { | 
|  | 413 | 
|  | 414 	if (s1 == 0 || s2 == 0) | 
|  | 415 	{ | 
|  | 416 		return; | 
|  | 417 	} | 
|  | 418 	else if (s1 == s2 && s1 <= 50) | 
|  | 419 	{ | 
|  | 420 | 
|  | 421 		int j = 0; | 
|  | 422 		int z = 0; | 
|  | 423 		int *locs; | 
|  | 424 		int *seqInfo; | 
|  | 425 		char *_tmpSeq, *_tmpQual; | 
|  | 426 		char rqual[QUAL_LENGTH+1]; | 
|  | 427 		rqual[QUAL_LENGTH]='\0'; | 
|  | 428 | 
|  | 429 		if (dir > 0) | 
|  | 430 		{ | 
|  | 431 			locs		= (int *) l1; | 
|  | 432 			seqInfo		= (int *) l2; | 
|  | 433 		} | 
|  | 434 		else | 
|  | 435 		{ | 
|  | 436 			locs		= (int *) l2; | 
|  | 437 			seqInfo		= (int *) l1; | 
|  | 438 		} | 
|  | 439 | 
|  | 440 | 
|  | 441 		for (j=0; j<s2; j++) | 
|  | 442 		{ | 
|  | 443 			int re = _msf_samplingLocsSize * 2; | 
|  | 444 			int r = seqInfo[j]/re; | 
|  | 445 			if (maxHits!=0 && _msf_seqList[r].hits[0] == maxHits) | 
|  | 446 			{ | 
|  | 447 				continue; | 
|  | 448 			} | 
|  | 449 | 
|  | 450 			int x = seqInfo[j] % re; | 
|  | 451 			int o = x % _msf_samplingLocsSize; | 
|  | 452 			char d = (x/_msf_samplingLocsSize)?1:0; | 
|  | 453 | 
|  | 454 | 
|  | 455 			if (d) | 
|  | 456 			{ | 
|  | 457 				reverse(_msf_seqList[r].qual, rqual, QUAL_LENGTH); | 
|  | 458 				_tmpQual = rqual; | 
|  | 459 				_tmpSeq = _msf_seqList[r].rseq; | 
|  | 460 			} | 
|  | 461 			else | 
|  | 462 			{ | 
|  | 463 				_tmpQual = _msf_seqList[r].qual; | 
|  | 464 				_tmpSeq = _msf_seqList[r].seq; | 
|  | 465 			} | 
|  | 466 | 
|  | 467 | 
|  | 468 			for (z=0; z<s1; z++) | 
|  | 469 			{ | 
|  | 470 				int genLoc = locs[z]-_msf_samplingLocs[o]; | 
|  | 471 | 
|  | 472 | 
|  | 473 				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd ) | 
|  | 474 					continue; | 
|  | 475 | 
|  | 476 				int err = -1; | 
|  | 477 | 
|  | 478 | 
|  | 479 | 
|  | 480 				err = verifySingleEnd(genLoc, _tmpSeq, o); | 
|  | 481 | 
|  | 482 | 
|  | 483 | 
|  | 484 				if (err != -1) | 
|  | 485 				{ | 
|  | 486 					calculateMD(genLoc, _tmpSeq, err, &_msf_op); | 
|  | 487 					mappingCnt++; | 
|  | 488 					_msf_seqList[r].hits[0]++; | 
|  | 489 | 
|  | 490 					_msf_output.QNAME		= _msf_seqList[r].name; | 
|  | 491 					_msf_output.FLAG		= 16 * d; | 
|  | 492 					_msf_output.RNAME		= _msf_refGenName; | 
|  | 493 					_msf_output.POS			= genLoc + _msf_refGenOffset; | 
|  | 494 					_msf_output.MAPQ		= 255; | 
|  | 495 					_msf_output.CIGAR		= _msf_cigar; | 
|  | 496 					_msf_output.MRNAME		= "*"; | 
|  | 497 					_msf_output.MPOS		= 0; | 
|  | 498 					_msf_output.ISIZE		= 0; | 
|  | 499 					_msf_output.SEQ			= _tmpSeq; | 
|  | 500 					_msf_output.QUAL		= _tmpQual; | 
|  | 501 | 
|  | 502 					_msf_output.optSize		= 2; | 
|  | 503 					_msf_output.optFields	= _msf_optionalFields; | 
|  | 504 | 
|  | 505 					_msf_optionalFields[0].tag = "NM"; | 
|  | 506 					_msf_optionalFields[0].type = 'i'; | 
|  | 507 					_msf_optionalFields[0].iVal = err; | 
|  | 508 | 
|  | 509 					_msf_optionalFields[1].tag = "MD"; | 
|  | 510 					_msf_optionalFields[1].type = 'Z'; | 
|  | 511 					_msf_optionalFields[1].sVal = _msf_op; | 
|  | 512 | 
|  | 513 					output(_msf_output); | 
|  | 514 | 
|  | 515 | 
|  | 516 					if (_msf_seqList[r].hits[0] == 1) | 
|  | 517 					{ | 
|  | 518 						mappedSeqCnt++; | 
|  | 519 					} | 
|  | 520 | 
|  | 521 					if ( maxHits == 0 ) | 
|  | 522 					{ | 
|  | 523 						_msf_seqList[r].hits[0] = 2; | 
|  | 524 					} | 
|  | 525 | 
|  | 526 | 
|  | 527 					if ( maxHits!=0 && _msf_seqList[r].hits[0] == maxHits) | 
|  | 528 					{ | 
|  | 529 						completedSeqCnt++; | 
|  | 530 						break; | 
|  | 531 					} | 
|  | 532 				} | 
|  | 533 | 
|  | 534 			} | 
|  | 535 		} | 
|  | 536 	} | 
|  | 537 	else | 
|  | 538 	{ | 
|  | 539 		int tmp1=s1/2, tmp2= s2/2; | 
|  | 540 		if (tmp1 != 0) | 
|  | 541 			mapSingleEndSeqListBal(l1, tmp1, l2+tmp2, s2-tmp2, dir); | 
|  | 542 		mapSingleEndSeqListBal(l2+tmp2, s2-tmp2, l1+tmp1, s1-tmp1, -dir); | 
|  | 543 		if (tmp2 !=0) | 
|  | 544 			mapSingleEndSeqListBal(l1+tmp1, s1-tmp1, l2, tmp2, dir); | 
|  | 545 		if (tmp1 + tmp2 != 0) | 
|  | 546 			mapSingleEndSeqListBal(l2, tmp2, l1, tmp1, -dir); | 
|  | 547 	} | 
|  | 548 } | 
|  | 549 | 
|  | 550 | 
|  | 551 /**********************************************/ | 
|  | 552 void mapSingleEndSeqListTOP(unsigned int *l1, int s1, unsigned int *l2, int s2) | 
|  | 553 { | 
|  | 554 	if (s1 < s2) | 
|  | 555 	{ | 
|  | 556 		mapSingleEndSeqListBal(l1, s1, l2, s1,1); | 
|  | 557 		mapSingleEndSeqListTOP(l1, s1, l2+s1, s2-s1); | 
|  | 558 	} | 
|  | 559 	else if (s1 > s2) | 
|  | 560 	{ | 
|  | 561 		mapSingleEndSeqListBal(l1, s2, l2, s2,1); | 
|  | 562 		mapSingleEndSeqListTOP(l1+s2, s1-s2, l2, s2); | 
|  | 563 	} | 
|  | 564 	else | 
|  | 565 	{ | 
|  | 566 		mapSingleEndSeqListBal(l1, s1, l2, s2,1); | 
|  | 567 	} | 
|  | 568 } | 
|  | 569 | 
|  | 570 | 
|  | 571 /**********************************************/ | 
|  | 572 void mapSingleEndSeqList(unsigned int *l1, int s1, unsigned int *l2, int s2) | 
|  | 573 { | 
|  | 574 	if ( s2/s1 <= 2) | 
|  | 575 	{ | 
|  | 576 		int j = 0; | 
|  | 577 		int z = 0; | 
|  | 578 		int *locs = (int *) l1; | 
|  | 579 		int *seqInfo = (int *) l2; | 
|  | 580 		char *_tmpSeq, *_tmpQual; | 
|  | 581 		char rqual[QUAL_LENGTH+1]; | 
|  | 582 		rqual[QUAL_LENGTH]='\0'; | 
|  | 583 | 
|  | 584 		for (j=0; j<s2; j++) | 
|  | 585 		{ | 
|  | 586 			int re = _msf_samplingLocsSize * 2; | 
|  | 587 			int r = seqInfo[j]/re; | 
|  | 588 			if (maxHits!=0 && _msf_seqList[r].hits[0] == maxHits) | 
|  | 589 			{ | 
|  | 590 				continue; | 
|  | 591 			} | 
|  | 592 | 
|  | 593 			int x = seqInfo[j] % re; | 
|  | 594 			int o = x % _msf_samplingLocsSize; | 
|  | 595 			char d = (x/_msf_samplingLocsSize)?1:0; | 
|  | 596 | 
|  | 597 | 
|  | 598 			if (d) | 
|  | 599 			{ | 
|  | 600 				reverse(_msf_seqList[r].qual, rqual, QUAL_LENGTH); | 
|  | 601 				_tmpQual = rqual; | 
|  | 602 				_tmpSeq = _msf_seqList[r].rseq; | 
|  | 603 			} | 
|  | 604 			else | 
|  | 605 			{ | 
|  | 606 				_tmpQual = _msf_seqList[r].qual; | 
|  | 607 				_tmpSeq = _msf_seqList[r].seq; | 
|  | 608 			} | 
|  | 609 | 
|  | 610 | 
|  | 611 			for (z=0; z<s1; z++) | 
|  | 612 			{ | 
|  | 613 				int genLoc = locs[z]-_msf_samplingLocs[o]; | 
|  | 614 | 
|  | 615 | 
|  | 616 				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd ) | 
|  | 617 					continue; | 
|  | 618 | 
|  | 619 				int err = -1; | 
|  | 620 | 
|  | 621 | 
|  | 622 | 
|  | 623 				err = verifySingleEnd(genLoc, _tmpSeq, o); | 
|  | 624 | 
|  | 625 | 
|  | 626 | 
|  | 627 				if (err != -1) | 
|  | 628 				{ | 
|  | 629 					calculateMD(genLoc, _tmpSeq, err, &_msf_op); | 
|  | 630 					mappingCnt++; | 
|  | 631 					_msf_seqList[r].hits[0]++; | 
|  | 632 | 
|  | 633 					_msf_output.QNAME		= _msf_seqList[r].name; | 
|  | 634 					_msf_output.FLAG		= 16 * d; | 
|  | 635 					_msf_output.RNAME		= _msf_refGenName; | 
|  | 636 					_msf_output.POS			= genLoc + _msf_refGenOffset; | 
|  | 637 					_msf_output.MAPQ		= 255; | 
|  | 638 					_msf_output.CIGAR		= _msf_cigar; | 
|  | 639 					_msf_output.MRNAME		= "*"; | 
|  | 640 					_msf_output.MPOS		= 0; | 
|  | 641 					_msf_output.ISIZE		= 0; | 
|  | 642 					_msf_output.SEQ			= _tmpSeq; | 
|  | 643 					_msf_output.QUAL		= _tmpQual; | 
|  | 644 | 
|  | 645 					_msf_output.optSize		= 2; | 
|  | 646 					_msf_output.optFields	= _msf_optionalFields; | 
|  | 647 | 
|  | 648 					_msf_optionalFields[0].tag = "NM"; | 
|  | 649 					_msf_optionalFields[0].type = 'i'; | 
|  | 650 					_msf_optionalFields[0].iVal = err; | 
|  | 651 | 
|  | 652 					_msf_optionalFields[1].tag = "MD"; | 
|  | 653 					_msf_optionalFields[1].type = 'Z'; | 
|  | 654 					_msf_optionalFields[1].sVal = _msf_op; | 
|  | 655 | 
|  | 656 					output(_msf_output); | 
|  | 657 | 
|  | 658 | 
|  | 659 					if (_msf_seqList[r].hits[0] == 1) | 
|  | 660 					{ | 
|  | 661 						mappedSeqCnt++; | 
|  | 662 					} | 
|  | 663 | 
|  | 664 					if ( maxHits == 0 ) | 
|  | 665 					{ | 
|  | 666 						_msf_seqList[r].hits[0] = 2; | 
|  | 667 					} | 
|  | 668 | 
|  | 669 | 
|  | 670 					if ( maxHits!=0 && _msf_seqList[r].hits[0] == maxHits) | 
|  | 671 					{ | 
|  | 672 						completedSeqCnt++; | 
|  | 673 						break; | 
|  | 674 					} | 
|  | 675 				} | 
|  | 676 | 
|  | 677 			} | 
|  | 678 		} | 
|  | 679 	} | 
|  | 680 	else if (s1 == 1) | 
|  | 681 	{ | 
|  | 682 		//	fprintf(stderr, "1"); | 
|  | 683 		int tmp = s2/2; | 
|  | 684 		mapSingleEndSeqList(l1, s1, l2, tmp); | 
|  | 685 		mapSingleEndSeqList(l1, s1, l2+tmp, s2-tmp); | 
|  | 686 	} | 
|  | 687 	else if (s2 == 1) | 
|  | 688 	{ | 
|  | 689 		//	fprintf(stderr, "2"); | 
|  | 690 		int tmp = s1/2; | 
|  | 691 		mapSingleEndSeqList(l1, tmp, l2, s2); | 
|  | 692 		mapSingleEndSeqList(l1+tmp, s1-tmp, l2, s2); | 
|  | 693 	} | 
|  | 694 	else | 
|  | 695 	{ | 
|  | 696 		//	fprintf(stderr, "3"); | 
|  | 697 		int tmp1=s1/2, tmp2= s2/2; | 
|  | 698 		mapSingleEndSeqList(l1, tmp1, l2, tmp2); | 
|  | 699 		mapSingleEndSeqList(l1+tmp1, s1-tmp1, l2, tmp2); | 
|  | 700 		mapSingleEndSeqList(l1+tmp1, s1-tmp1, l2+tmp2, s2-tmp2); | 
|  | 701 		mapSingleEndSeqList(l1, tmp1, l2+tmp2, s2-tmp2); | 
|  | 702 	} | 
|  | 703 } | 
|  | 704 /**********************************************/ | 
|  | 705 int	 mapSingleEndSeq() | 
|  | 706 { | 
|  | 707 	int i = 0; | 
|  | 708 	unsigned int *locs = NULL; | 
|  | 709 	unsigned int *seqInfo = NULL; | 
|  | 710 	while ( i < _msf_rIndexSize ) | 
|  | 711 	{ | 
|  | 712 | 
|  | 713 		locs = getCandidates (_msf_rIndex[i].hv); | 
|  | 714 		if ( locs != NULL) | 
|  | 715 		{ | 
|  | 716 			seqInfo  = _msf_rIndex[i].seqInfo; | 
|  | 717 			mapSingleEndSeqListTOP (locs+1, locs[0], seqInfo+1, seqInfo[0]); | 
|  | 718 		} | 
|  | 719 		i++; | 
|  | 720 	} | 
|  | 721 	return 1; | 
|  | 722 } | 
|  | 723 | 
|  | 724 | 
|  | 725 /**********************************************/ | 
|  | 726 /**********************************************/ | 
|  | 727 /**********************************************/ | 
|  | 728 /**********************************************/ | 
|  | 729 /**********************************************/ | 
|  | 730 int compareOut (const void *a, const void *b) | 
|  | 731 { | 
|  | 732 	FullMappingInfo *aInfo = (FullMappingInfo *)a; | 
|  | 733 	FullMappingInfo *bInfo = (FullMappingInfo *)b; | 
|  | 734 	return aInfo->loc - bInfo->loc; | 
|  | 735 } | 
|  | 736 | 
|  | 737 /**********************************************/ | 
|  | 738 void mapPairedEndSeqList(unsigned int *l1, int s1, unsigned int *l2, int s2) | 
|  | 739 { | 
|  | 740 	if ( s2/s1 <= 2) | 
|  | 741 	{ | 
|  | 742 		int j = 0; | 
|  | 743 		int z = 0; | 
|  | 744 		int *locs = (int *) l1; | 
|  | 745 		int *seqInfo = (int *) l2; | 
|  | 746 		char *_tmpSeq, *_tmpQual; | 
|  | 747 		char rqual[QUAL_LENGTH+1]; | 
|  | 748 		rqual[QUAL_LENGTH]='\0'; | 
|  | 749 | 
|  | 750 		for (j=0; j<s2; j++) | 
|  | 751 		{ | 
|  | 752 			int re = _msf_samplingLocsSize * 2; | 
|  | 753 			int r = seqInfo[j]/re; | 
|  | 754 | 
|  | 755 			if (pairedEndDiscordantMode && (_msf_seqList[r].hits[0] == 1 || (_msf_seqHits[r/2] > DISCORDANT_CUT_OFF) )) | 
|  | 756 			{ | 
|  | 757 				continue; | 
|  | 758 			} | 
|  | 759 | 
|  | 760 			int x = seqInfo[j] % re; | 
|  | 761 			int o = x % _msf_samplingLocsSize; | 
|  | 762 			char d = (x/_msf_samplingLocsSize)?-1:1; | 
|  | 763 | 
|  | 764 | 
|  | 765 			if (d==-1) | 
|  | 766 			{ | 
|  | 767 				_tmpSeq = _msf_seqList[r].rseq; | 
|  | 768 			} | 
|  | 769 			else | 
|  | 770 			{ | 
|  | 771 				_tmpSeq = _msf_seqList[r].seq; | 
|  | 772 			} | 
|  | 773 | 
|  | 774 | 
|  | 775 			for (z=0; z<s1; z++) | 
|  | 776 			{ | 
|  | 777 				int genLoc = locs[z]-_msf_samplingLocs[o]; | 
|  | 778 | 
|  | 779 | 
|  | 780 				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd ) | 
|  | 781 					continue; | 
|  | 782 | 
|  | 783 				int err = -1; | 
|  | 784 | 
|  | 785 | 
|  | 786 | 
|  | 787 				err = verifySingleEnd(genLoc, _tmpSeq, o); | 
|  | 788 | 
|  | 789 | 
|  | 790 				if (err != -1) | 
|  | 791 				{ | 
|  | 792 					MappingLocations *parent = NULL; | 
|  | 793 					MappingLocations *child = _msf_mappingInfo[r].next; | 
|  | 794 | 
|  | 795 					genLoc+= _msf_refGenOffset; | 
|  | 796 | 
|  | 797 					int i = 0; | 
|  | 798 					for (i=0; i<(_msf_mappingInfo[r].size/MAP_CHUNKS); i++) | 
|  | 799 					{ | 
|  | 800 						parent = child; | 
|  | 801 						child = child->next; | 
|  | 802 					} | 
|  | 803 | 
|  | 804 					if (child==NULL) | 
|  | 805 					{ | 
|  | 806 						MappingLocations *tmp = getMem(sizeof(MappingLocations)); | 
|  | 807 						tmp->next = NULL; | 
|  | 808 						tmp->loc[0]=genLoc * d; | 
|  | 809 						if (parent == NULL) | 
|  | 810 							_msf_mappingInfo[r].next = tmp; | 
|  | 811 						else | 
|  | 812 							parent->next = tmp; | 
|  | 813 					} | 
|  | 814 					else | 
|  | 815 					{ | 
|  | 816 						child->loc[_msf_mappingInfo[r].size % MAP_CHUNKS] = genLoc * d; | 
|  | 817 					} | 
|  | 818 | 
|  | 819 | 
|  | 820 | 
|  | 821 					_msf_mappingInfo[r].size++; | 
|  | 822 | 
|  | 823 				} | 
|  | 824 | 
|  | 825 			} | 
|  | 826 		} | 
|  | 827 	} | 
|  | 828 	else if (s1 == 1) | 
|  | 829 	{ | 
|  | 830 		int tmp = s2/2; | 
|  | 831 		mapPairedEndSeqList(l1, s1, l2, tmp); | 
|  | 832 		mapPairedEndSeqList(l1, s1, l2+tmp, s2-tmp); | 
|  | 833 	} | 
|  | 834 	else if (s2 == 1) | 
|  | 835 	{ | 
|  | 836 		int tmp = s1/2; | 
|  | 837 		mapPairedEndSeqList(l1, tmp, l2, s2); | 
|  | 838 		mapPairedEndSeqList(l1+tmp, s1-tmp, l2, s2); | 
|  | 839 	} | 
|  | 840 	else | 
|  | 841 	{ | 
|  | 842 		int tmp1=s1/2, tmp2= s2/2; | 
|  | 843 		mapPairedEndSeqList(l1, tmp1, l2, tmp2); | 
|  | 844 		mapPairedEndSeqList(l1+tmp1, s1-tmp1, l2, tmp2); | 
|  | 845 		mapPairedEndSeqList(l1+tmp1, s1-tmp1, l2+tmp2, s2-tmp2); | 
|  | 846 		mapPairedEndSeqList(l1, tmp1, l2+tmp2, s2-tmp2); | 
|  | 847 	} | 
|  | 848 } | 
|  | 849 | 
|  | 850 /**********************************************/ | 
|  | 851 int	 mapPairedEndSeq() | 
|  | 852 { | 
|  | 853 	int i = 0; | 
|  | 854 	unsigned int *locs = NULL; | 
|  | 855 	unsigned int *seqInfo = NULL; | 
|  | 856 	while ( i < _msf_rIndexSize ) | 
|  | 857 	{ | 
|  | 858 		locs = getCandidates (_msf_rIndex[i].hv); | 
|  | 859 		if ( locs != NULL) | 
|  | 860 		{ | 
|  | 861 			seqInfo  = _msf_rIndex[i].seqInfo; | 
|  | 862 			mapPairedEndSeqList(locs+1, locs[0], seqInfo+1, seqInfo[0]); | 
|  | 863 | 
|  | 864 		} | 
|  | 865 		i++; | 
|  | 866 	} | 
|  | 867 | 
|  | 868 | 
|  | 869 	char fname1[FILE_NAME_LENGTH]; | 
|  | 870 	char fname2[FILE_NAME_LENGTH]; | 
|  | 871 	MappingLocations *cur, *tmp; | 
|  | 872 	int tmpOut; | 
|  | 873 	int j; | 
|  | 874 	int lmax=0, rmax=0; | 
|  | 875 | 
|  | 876 	sprintf(fname1, "%s__%s__%d__1",mappingOutputPath, mappingOutput, _msf_openFiles); | 
|  | 877 	sprintf(fname2, "%s__%s__%d__2",mappingOutputPath, mappingOutput, _msf_openFiles); | 
|  | 878 | 
|  | 879 	FILE* out; | 
|  | 880 	FILE* out1 = fileOpen(fname1, "w"); | 
|  | 881 	FILE* out2 = fileOpen(fname2, "w"); | 
|  | 882 | 
|  | 883 	_msf_openFiles++; | 
|  | 884 | 
|  | 885 	for (i=0; i<_msf_seqListSize; i++) | 
|  | 886 	{ | 
|  | 887 | 
|  | 888 		if (i%2==0) | 
|  | 889 		{ | 
|  | 890 			out = out1; | 
|  | 891 | 
|  | 892 			if (lmax <  _msf_mappingInfo[i].size) | 
|  | 893 			{ | 
|  | 894 				lmax = _msf_mappingInfo[i].size; | 
|  | 895 			} | 
|  | 896 		} | 
|  | 897 		else | 
|  | 898 		{ | 
|  | 899 			out = out2; | 
|  | 900 			if (rmax < _msf_mappingInfo[i].size) | 
|  | 901 			{ | 
|  | 902 				rmax = _msf_mappingInfo[i].size; | 
|  | 903 			} | 
|  | 904 		} | 
|  | 905 | 
|  | 906 		tmpOut = fwrite(&(_msf_mappingInfo[i].size), sizeof(int), 1, out); | 
|  | 907 		if (_msf_mappingInfo[i].size > 0) | 
|  | 908 		{ | 
|  | 909 			cur = _msf_mappingInfo[i].next; | 
|  | 910 			for (j=0; j < _msf_mappingInfo[i].size; j++) | 
|  | 911 			{ | 
|  | 912 				if ( j>0  && j%MAP_CHUNKS==0) | 
|  | 913 				{ | 
|  | 914 					cur = cur->next; | 
|  | 915 				} | 
|  | 916 				tmpOut = fwrite(&(cur->loc[j % MAP_CHUNKS]), sizeof(int), 1, out); | 
|  | 917 			} | 
|  | 918 			_msf_mappingInfo[i].size = 0; | 
|  | 919 			//	_msf_mappingInfo[i].next = NULL; | 
|  | 920 		} | 
|  | 921 	} | 
|  | 922 | 
|  | 923 	_msf_maxLSize += lmax; | 
|  | 924 	_msf_maxRSize += rmax; | 
|  | 925 | 
|  | 926 	fclose(out1); | 
|  | 927 	fclose(out2); | 
|  | 928 | 
|  | 929 	//fprintf(stdout, "%d %d\n", _msf_maxLSize, _msf_maxRSize); | 
|  | 930 | 
|  | 931 } | 
|  | 932 | 
|  | 933 /**********************************************/ | 
|  | 934 void outputPairedEnd() | 
|  | 935 { | 
|  | 936 | 
|  | 937 	char *curGen; | 
|  | 938 	char *curGenName; | 
|  | 939 	int tmpOut; | 
|  | 940 | 
|  | 941 	loadRefGenome(&_msf_refGen, &_msf_refGenName, &tmpOut); | 
|  | 942 | 
|  | 943 	FILE* in1[_msf_openFiles]; | 
|  | 944 	FILE* in2[_msf_openFiles]; | 
|  | 945 | 
|  | 946 	char fname1[_msf_openFiles][FILE_NAME_LENGTH]; | 
|  | 947 	char fname2[_msf_openFiles][FILE_NAME_LENGTH]; | 
|  | 948 | 
|  | 949 	// discordant | 
|  | 950 	FILE *out, *out1, *out2; | 
|  | 951 | 
|  | 952 	char fname3[FILE_NAME_LENGTH]; | 
|  | 953 	char fname4[FILE_NAME_LENGTH]; | 
|  | 954 	char fname5[FILE_NAME_LENGTH]; | 
|  | 955 | 
|  | 956 	if (pairedEndDiscordantMode) | 
|  | 957 	{ | 
|  | 958 		sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); | 
|  | 959 		sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput); | 
|  | 960 		sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput); | 
|  | 961 		out = fileOpen(fname3, "a"); | 
|  | 962 		out1 = fileOpen(fname4, "a"); | 
|  | 963 		out2 = fileOpen(fname5, "a"); | 
|  | 964 	} | 
|  | 965 | 
|  | 966 | 
|  | 967 | 
|  | 968 	int i; | 
|  | 969 | 
|  | 970 	FullMappingInfo *mi1 = getMem(sizeof(FullMappingInfo) * _msf_maxLSize); | 
|  | 971 	FullMappingInfo *mi2 = getMem(sizeof(FullMappingInfo) * _msf_maxRSize); | 
|  | 972 | 
|  | 973 | 
|  | 974 	for (i=0; i<_msf_openFiles; i++) | 
|  | 975 	{ | 
|  | 976 		sprintf(fname1[i], "%s__%s__%d__1", mappingOutputPath, mappingOutput, i); | 
|  | 977 		sprintf(fname2[i], "%s__%s__%d__2", mappingOutputPath, mappingOutput, i); | 
|  | 978 		in1[i] = fileOpen(fname1[i], "r"); | 
|  | 979 		in2[i] = fileOpen(fname2[i], "r"); | 
|  | 980 	} | 
|  | 981 | 
|  | 982 | 
|  | 983 	int size; | 
|  | 984 	int j, k; | 
|  | 985 	int size1, size2; | 
|  | 986 | 
|  | 987 	for (i=0; i<_msf_seqListSize/2; i++) | 
|  | 988 	{ | 
|  | 989 		size1 = size2 = 0; | 
|  | 990 		for (j=0; j<_msf_openFiles; j++) | 
|  | 991 		{ | 
|  | 992 			tmpOut = fread(&size, sizeof(int), 1, in1[j]); | 
|  | 993 			if ( size > 0 ) | 
|  | 994 			{ | 
|  | 995 				for (k=0; k<size; k++) | 
|  | 996 				{ | 
|  | 997 | 
|  | 998 					mi1[size1+k].dir = 1; | 
|  | 999 					tmpOut = fread (&(mi1[size1+k].loc), sizeof(int), 1, in1[j]); | 
|  | 1000 					if (mi1[size1+k].loc<1) | 
|  | 1001 					{ | 
|  | 1002 						mi1[size1+k].loc *= -1; | 
|  | 1003 						mi1[size1+k].dir = -1; | 
|  | 1004 					} | 
|  | 1005 				} | 
|  | 1006 				qsort(mi1+size1, size, sizeof(FullMappingInfo), compareOut); | 
|  | 1007 				size1+=size; | 
|  | 1008 			} | 
|  | 1009 		} | 
|  | 1010 | 
|  | 1011 		for (j=0; j<_msf_openFiles; j++) | 
|  | 1012 		{ | 
|  | 1013 			tmpOut = fread(&size, sizeof(int), 1, in2[j]); | 
|  | 1014 			if ( size > 0 ) | 
|  | 1015 			{ | 
|  | 1016 				for (k=0; k<size; k++) | 
|  | 1017 				{ | 
|  | 1018 | 
|  | 1019 					mi2[size2+k].dir = 1; | 
|  | 1020 					tmpOut = fread (&(mi2[size2+k].loc), sizeof(int), 1, in2[j]); | 
|  | 1021 | 
|  | 1022 					if (mi2[size2+k].loc<1) | 
|  | 1023 					{ | 
|  | 1024 						mi2[size2+k].loc *= -1; | 
|  | 1025 						mi2[size2+k].dir = -1; | 
|  | 1026 					} | 
|  | 1027 				} | 
|  | 1028 				qsort(mi2+size2, size, sizeof(FullMappingInfo), compareOut); | 
|  | 1029 				size2+=size; | 
|  | 1030 			} | 
|  | 1031 		} | 
|  | 1032 | 
|  | 1033 		//if (i == 6615) | 
|  | 1034 		//	fprintf(stdout, "%d: %s %d %d ",i, _msf_seqList[i*2].name, size1, size2); | 
|  | 1035 | 
|  | 1036 		int lm, ll, rl, rm; | 
|  | 1037 		int pos = 0; | 
|  | 1038 | 
|  | 1039 		if (pairedEndDiscordantMode) | 
|  | 1040 		{ | 
|  | 1041 | 
|  | 1042 			for (j=0; j<size1; j++) | 
|  | 1043 			{ | 
|  | 1044 				lm = mi1[j].loc - maxPairEndedDiscordantDistance + 1; | 
|  | 1045 				ll = mi1[j].loc - minPairEndedDiscordantDistance + 1; | 
|  | 1046 				rl = mi1[j].loc + minPairEndedDiscordantDistance - 1; | 
|  | 1047 				rm = mi1[j].loc + maxPairEndedDiscordantDistance - 1; | 
|  | 1048 | 
|  | 1049 				while (pos<size2 && mi2[pos].loc < lm) | 
|  | 1050 				{ | 
|  | 1051 					pos++; | 
|  | 1052 				} | 
|  | 1053 | 
|  | 1054 				k = pos; | 
|  | 1055 				while (k<size2 && mi2[k].loc<=rm) | 
|  | 1056 				{ | 
|  | 1057 					if ( mi2[k].loc <= ll || mi2[k].loc >= rl) | 
|  | 1058 					{ | 
|  | 1059 						if ( (mi1[j].loc < mi2[k].loc && mi1[j].dir==1 && mi2[k].dir == -1)  || | 
|  | 1060 								(mi1[j].loc > mi2[k].loc && mi1[j].dir==-1 && mi2[k].dir == 1) ) | 
|  | 1061 						{ | 
|  | 1062 							_msf_seqList[i*2].hits[0]=1; | 
|  | 1063 							_msf_seqList[i*2+1].hits[0]=1; | 
|  | 1064 							size1=0; | 
|  | 1065 							size2=0; | 
|  | 1066 							break; | 
|  | 1067 						} | 
|  | 1068 					} | 
|  | 1069 					k++; | 
|  | 1070 				} | 
|  | 1071 			} | 
|  | 1072 | 
|  | 1073 			_msf_seqHits[i]+= size1*size2; | 
|  | 1074 			if (_msf_seqHits[i]> DISCORDANT_CUT_OFF) | 
|  | 1075 			{ | 
|  | 1076 				_msf_seqList[i*2].hits[0]=1; | 
|  | 1077 				_msf_seqList[i*2+1].hits[0]=1; | 
|  | 1078 				size1=0; | 
|  | 1079 				size2=0; | 
|  | 1080 			} | 
|  | 1081 		} | 
|  | 1082 		//if (i == 6615) | 
|  | 1083 		//	fprintf(stdout, "%d %d\n", size1, size2); | 
|  | 1084 | 
|  | 1085 		char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2; | 
|  | 1086 		char rqual1[QUAL_LENGTH+1], rqual2[QUAL_LENGTH+1]; | 
|  | 1087 		rqual1[QUAL_LENGTH] = rqual2[QUAL_LENGTH] = '\0'; | 
|  | 1088 		seq1 = _msf_seqList[i*2].seq; | 
|  | 1089 		rseq1 = _msf_seqList[i*2].rseq; | 
|  | 1090 		qual1 = _msf_seqList[i*2].qual; | 
|  | 1091 		reverse(_msf_seqList[i*2].qual, rqual1, QUAL_LENGTH); | 
|  | 1092 | 
|  | 1093 		seq2 = _msf_seqList[i*2+1].seq; | 
|  | 1094 		rseq2 = _msf_seqList[i*2+1].rseq; | 
|  | 1095 		qual2 = _msf_seqList[i*2+1].qual; | 
|  | 1096 		reverse(_msf_seqList[i*2+1].qual, rqual2, QUAL_LENGTH); | 
|  | 1097 | 
|  | 1098 | 
|  | 1099 		if (pairedEndDiscordantMode) | 
|  | 1100 		{ | 
|  | 1101 			for (k=0; k<size1; k++) | 
|  | 1102 			{ | 
|  | 1103 				int tm = -1; | 
|  | 1104 				mi1[k].score = calculateScore(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, (mi1[k].dir==-1)?rqual1:qual1, &tm); | 
|  | 1105 				mi1[k].err = tm; | 
|  | 1106 			} | 
|  | 1107 | 
|  | 1108 			for (k=0; k<size2; k++) | 
|  | 1109 			{ | 
|  | 1110 				int tm = -1; | 
|  | 1111 				mi2[k].score = calculateScore(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, (mi2[k].dir==-1)?rqual2:qual2, &tm); | 
|  | 1112 				mi2[k].err = tm; | 
|  | 1113 			} | 
|  | 1114 | 
|  | 1115 		} | 
|  | 1116 		else | 
|  | 1117 		{ | 
|  | 1118 			for (k=0; k<size1; k++) | 
|  | 1119 			{ | 
|  | 1120 				mi1[k].err = calculateMD(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, -1, &_msf_op); | 
|  | 1121 				sprintf(mi1[k].md, "%s", _msf_op); | 
|  | 1122 			} | 
|  | 1123 | 
|  | 1124 			for (k=0; k<size2; k++) | 
|  | 1125 			{ | 
|  | 1126 				mi2[k].err = calculateMD(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, -1, &_msf_op); | 
|  | 1127 				sprintf(mi2[k].md, "%s", _msf_op); | 
|  | 1128 			} | 
|  | 1129 		} | 
|  | 1130 		pos = 0; | 
|  | 1131 | 
|  | 1132 		for (j=0; j<size1; j++) | 
|  | 1133 		{ | 
|  | 1134 			lm = mi1[j].loc - maxPairEndedDistance + 1; | 
|  | 1135 			ll = mi1[j].loc - minPairEndedDistance + 1; | 
|  | 1136 			rl = mi1[j].loc + minPairEndedDistance - 1; | 
|  | 1137 			rm = mi1[j].loc + maxPairEndedDistance - 1; | 
|  | 1138 | 
|  | 1139 			//fprintf(stdout, "%d %d %d %d %d\n",lm, ll,mi1[j].loc ,rl, rm); | 
|  | 1140 | 
|  | 1141 			while (pos<size2 && mi2[pos].loc < lm) | 
|  | 1142 			{ | 
|  | 1143 				pos++; | 
|  | 1144 			} | 
|  | 1145 | 
|  | 1146 			//fprintf(stdout, "POS: %d %d \n", pos, mi2[pos].loc); | 
|  | 1147 | 
|  | 1148 			k = pos; | 
|  | 1149 			while (k<size2 && mi2[k].loc <= rm) | 
|  | 1150 			{ | 
|  | 1151 				if (mi2[k].loc <= ll || mi2[k].loc >= rl) | 
|  | 1152 				{ | 
|  | 1153 					if (pairedEndDiscordantMode) | 
|  | 1154 					{ | 
|  | 1155 						int tmp; | 
|  | 1156 						int rNo = i; | 
|  | 1157 						int loc = mi1[j].loc*mi1[j].dir; | 
|  | 1158 						int err = mi1[j].err; | 
|  | 1159 						float sc = mi1[j].score; | 
|  | 1160 | 
|  | 1161 						char l = strlen(_msf_refGenName); | 
|  | 1162 | 
|  | 1163 						tmp = fwrite(&rNo, sizeof(int), 1, out); | 
|  | 1164 | 
|  | 1165 						tmp = fwrite(&l, sizeof(char), 1, out); | 
|  | 1166 						tmp = fwrite(_msf_refGenName, sizeof(char), l, out); | 
|  | 1167 | 
|  | 1168 						tmp = fwrite(&loc, sizeof(int), 1, out); | 
|  | 1169 						tmp = fwrite(&err, sizeof(char), 1, out); | 
|  | 1170 						tmp = fwrite(&sc, sizeof(float), 1, out); | 
|  | 1171 | 
|  | 1172 | 
|  | 1173 						loc = mi2[k].loc*mi2[k].dir; | 
|  | 1174 						err = mi2[k].err; | 
|  | 1175 						sc = mi2[k].score; | 
|  | 1176 | 
|  | 1177 						tmp = fwrite(&loc, sizeof(int), 1, out); | 
|  | 1178 						tmp = fwrite(&err, sizeof(char), 1, out); | 
|  | 1179 						tmp = fwrite(&sc, sizeof(float), 1, out); | 
|  | 1180 					} // end discordant | 
|  | 1181 					else | 
|  | 1182 					{ //start sampe | 
|  | 1183 						char *seq; | 
|  | 1184 						char *qual; | 
|  | 1185 						char d1; | 
|  | 1186 						char d2; | 
|  | 1187 						int isize; | 
|  | 1188 						int proper=0; | 
|  | 1189 						// ISIZE CALCULATION | 
|  | 1190 						// The distance between outer edges | 
|  | 1191 						isize = abs(mi1[j].loc - mi2[k].loc)+SEQ_LENGTH-1; | 
|  | 1192 						if (mi1[j].loc - mi2[k].loc > 0) | 
|  | 1193 						{ | 
|  | 1194 							isize *= -1; | 
|  | 1195 						} | 
|  | 1196 | 
|  | 1197 						d1 = (mi1[j].dir == -1)?1:0; | 
|  | 1198 						d2 = (mi2[k].dir == -1)?1:0; | 
|  | 1199 | 
|  | 1200 						if ( d1 ) | 
|  | 1201 						{ | 
|  | 1202 							seq = rseq1; | 
|  | 1203 							qual = rqual1; | 
|  | 1204 						} | 
|  | 1205 						else | 
|  | 1206 						{ | 
|  | 1207 							seq = seq1; | 
|  | 1208 							qual = qual1; | 
|  | 1209 						} | 
|  | 1210 | 
|  | 1211 						if ( (mi1[j].loc < mi2[k].loc && !d1 && d2) || | 
|  | 1212 							 (mi1[j].loc > mi2[k].loc && d1 && !d2) ) | 
|  | 1213 						{ | 
|  | 1214 							proper = 2; | 
|  | 1215 						} | 
|  | 1216 						else | 
|  | 1217 						{ | 
|  | 1218 							proper = 0; | 
|  | 1219 						} | 
|  | 1220 | 
|  | 1221 | 
|  | 1222 						_msf_output.POS			= mi1[j].loc; | 
|  | 1223 						_msf_output.MPOS		= mi2[k].loc; | 
|  | 1224 						_msf_output.FLAG		= 1+proper+16*d1+32*d2+64; | 
|  | 1225 						_msf_output.ISIZE		= isize; | 
|  | 1226 						_msf_output.SEQ			= seq, | 
|  | 1227 						_msf_output.QUAL		= qual; | 
|  | 1228 						_msf_output.QNAME		= _msf_seqList[i*2].name; | 
|  | 1229 						_msf_output.RNAME		= _msf_refGenName; | 
|  | 1230 						_msf_output.MAPQ		= 255; | 
|  | 1231 						_msf_output.CIGAR		= _msf_cigar; | 
|  | 1232 						_msf_output.MRNAME		= "="; | 
|  | 1233 | 
|  | 1234 						_msf_output.optSize	= 2; | 
|  | 1235 						_msf_output.optFields	= _msf_optionalFields; | 
|  | 1236 | 
|  | 1237 						_msf_optionalFields[0].tag = "NM"; | 
|  | 1238 						_msf_optionalFields[0].type = 'i'; | 
|  | 1239 						_msf_optionalFields[0].iVal = mi1[j].err; | 
|  | 1240 | 
|  | 1241 						_msf_optionalFields[1].tag = "MD"; | 
|  | 1242 						_msf_optionalFields[1].type = 'Z'; | 
|  | 1243 						_msf_optionalFields[1].sVal = mi1[j].md; | 
|  | 1244 | 
|  | 1245 | 
|  | 1246 						output(_msf_output); | 
|  | 1247 | 
|  | 1248 						if ( d2 ) | 
|  | 1249 						{ | 
|  | 1250 							seq = rseq2; | 
|  | 1251 							qual = rqual2; | 
|  | 1252 						} | 
|  | 1253 						else | 
|  | 1254 						{ | 
|  | 1255 							seq = seq2; | 
|  | 1256 							qual = qual2; | 
|  | 1257 						} | 
|  | 1258 | 
|  | 1259 						_msf_output.POS			= mi2[k].loc; | 
|  | 1260 						_msf_output.MPOS		= mi1[j].loc; | 
|  | 1261 						_msf_output.FLAG		= 1+proper+16*d2+32*d1+128; | 
|  | 1262 						_msf_output.ISIZE		= -isize; | 
|  | 1263 						_msf_output.SEQ			= seq, | 
|  | 1264 						_msf_output.QUAL		= qual; | 
|  | 1265 						_msf_output.QNAME		= _msf_seqList[i*2].name; | 
|  | 1266 						_msf_output.RNAME		= _msf_refGenName; | 
|  | 1267 						_msf_output.MAPQ		= 255; | 
|  | 1268 						_msf_output.CIGAR		= _msf_cigar; | 
|  | 1269 						_msf_output.MRNAME		= "="; | 
|  | 1270 | 
|  | 1271 						_msf_output.optSize	= 2; | 
|  | 1272 						_msf_output.optFields	= _msf_optionalFields; | 
|  | 1273 | 
|  | 1274 						_msf_optionalFields[0].tag = "NM"; | 
|  | 1275 						_msf_optionalFields[0].type = 'i'; | 
|  | 1276 						_msf_optionalFields[0].iVal = mi2[k].err;; | 
|  | 1277 | 
|  | 1278 						_msf_optionalFields[1].tag = "MD"; | 
|  | 1279 						_msf_optionalFields[1].type = 'Z'; | 
|  | 1280 						_msf_optionalFields[1].sVal = mi2[k].md; | 
|  | 1281 | 
|  | 1282 						output(_msf_output); | 
|  | 1283 					} //end sampe | 
|  | 1284 				} | 
|  | 1285 				k++; | 
|  | 1286 			} | 
|  | 1287 		} | 
|  | 1288 	} | 
|  | 1289 | 
|  | 1290 	if (pairedEndDiscordantMode) | 
|  | 1291 	{ | 
|  | 1292 		fclose(out); | 
|  | 1293 	} | 
|  | 1294 | 
|  | 1295 | 
|  | 1296 	freeMem(mi1, sizeof(FullMappingInfo)*_msf_maxLSize); | 
|  | 1297 	freeMem(mi2, sizeof(FullMappingInfo)*_msf_maxRSize); | 
|  | 1298 | 
|  | 1299 	for (i=0; i<_msf_openFiles; i++) | 
|  | 1300 	{ | 
|  | 1301 		fclose(in1[i]); | 
|  | 1302 		fclose(in2[i]); | 
|  | 1303 		//fprintf(stdout, "%s %s \n", fname1[i], fname2[i]); | 
|  | 1304 		unlink(fname1[i]); | 
|  | 1305 		unlink(fname2[i]); | 
|  | 1306 	} | 
|  | 1307 	_msf_openFiles = 0; | 
|  | 1308 | 
|  | 1309 } | 
|  | 1310 | 
|  | 1311 /**********************************************/ | 
|  | 1312 /**********************************************/ | 
|  | 1313 /**********************************************/ | 
|  | 1314 /**********************************************/ | 
|  | 1315 float calculateScore(int index, char *seq, char *qual, int *err) | 
|  | 1316 { | 
|  | 1317 	int i; | 
|  | 1318 	char *ref; | 
|  | 1319 	char *ver; | 
|  | 1320 | 
|  | 1321 	ref = _msf_refGen + index-1; | 
|  | 1322 	ver = seq; | 
|  | 1323 	float score = 1; | 
|  | 1324 | 
|  | 1325 	if (*err > 0 || *err == -1) | 
|  | 1326 	{ | 
|  | 1327 		*err = 0; | 
|  | 1328 | 
|  | 1329 		for (i=0; i < SEQ_LENGTH; i++) | 
|  | 1330 		{ | 
|  | 1331 			if (*ref != *ver) | 
|  | 1332 			{ | 
|  | 1333 				//fprintf(stdout, "%c %c %d", *ref, *ver, *err); | 
|  | 1334 				(*err)++; | 
|  | 1335 				score *= 0.001 + 1/pow( 10, ((qual[i]-33)/10.0) ); | 
|  | 1336 			} | 
|  | 1337 			ref++; | 
|  | 1338 			ver++; | 
|  | 1339 		} | 
|  | 1340 | 
|  | 1341 	} | 
|  | 1342 	return score; | 
|  | 1343 } | 
|  | 1344 | 
|  | 1345 /**********************************************/ | 
|  | 1346 void outputPairedEndDiscPP() | 
|  | 1347 { | 
|  | 1348 	char genName[SEQ_LENGTH]; | 
|  | 1349 	char fname1[FILE_NAME_LENGTH]; | 
|  | 1350 	char fname2[FILE_NAME_LENGTH]; | 
|  | 1351 	char fname3[FILE_NAME_LENGTH]; | 
|  | 1352 	char fname4[FILE_NAME_LENGTH]; | 
|  | 1353 	char fname5[FILE_NAME_LENGTH]; | 
|  | 1354 	char fname6[FILE_NAME_LENGTH]; | 
|  | 1355 	char l; | 
|  | 1356 	int loc1, loc2; | 
|  | 1357 	char err1, err2; | 
|  | 1358 	char dir1, dir2; | 
|  | 1359 	float sc1, sc2, lsc=0; | 
|  | 1360 	int flag = 0; | 
|  | 1361 	int rNo,lrNo = -1; | 
|  | 1362 	int tmp; | 
|  | 1363 	FILE *in, *in1, *in2, *out, *out1, *out2; | 
|  | 1364 | 
|  | 1365 	sprintf(fname1, "%s__%s__disc", mappingOutputPath, mappingOutput); | 
|  | 1366 	sprintf(fname2, "%s__%s__oea1", mappingOutputPath, mappingOutput); | 
|  | 1367 	sprintf(fname3, "%s__%s__oea2", mappingOutputPath, mappingOutput); | 
|  | 1368 	sprintf(fname4, "%s%s_DIVET.vh", mappingOutputPath, mappingOutput); | 
|  | 1369 	sprintf(fname5, "%s%s_OEA1.vh", mappingOutputPath, mappingOutput); | 
|  | 1370 	sprintf(fname6, "%s%s_OEA2.vh", mappingOutputPath, mappingOutput); | 
|  | 1371 | 
|  | 1372 	in   = fileOpen(fname1, "r"); | 
|  | 1373 	in1  = fileOpen(fname2, "r"); | 
|  | 1374 	in2  = fileOpen(fname3, "r"); | 
|  | 1375 	out  = fileOpen(fname4, "w"); | 
|  | 1376 	out1 = fileOpen(fname5, "w"); | 
|  | 1377 	out2 = fileOpen(fname6, "w"); | 
|  | 1378 	if (in != NULL) | 
|  | 1379 	{ | 
|  | 1380 		flag = fread(&rNo, sizeof(int), 1, in); | 
|  | 1381 	} | 
|  | 1382 	else | 
|  | 1383 	{ | 
|  | 1384 		flag  = 0; | 
|  | 1385 	} | 
|  | 1386 | 
|  | 1387 | 
|  | 1388 	while (flag) | 
|  | 1389 	{ | 
|  | 1390 | 
|  | 1391 		tmp = fread(&l, sizeof(char), 1, in); | 
|  | 1392 		tmp = fread(genName, sizeof(char), l, in); | 
|  | 1393 		genName[l]='\0'; | 
|  | 1394 		tmp = fread(&loc1, sizeof(int), 1, in); | 
|  | 1395 		tmp = fread(&err1, sizeof(char), 1, in); | 
|  | 1396 		tmp = fread(&sc1, sizeof(float), 1, in); | 
|  | 1397 		tmp = fread(&loc2, sizeof(int), 1, in); | 
|  | 1398 		tmp = fread(&err2, sizeof(char), 1, in); | 
|  | 1399 		tmp = fread(&sc2, sizeof(float), 1, in); | 
|  | 1400 | 
|  | 1401 		//if (rNo ==6615) | 
|  | 1402 		//	fprintf(stdout, "%s %d: %d %0.20f %d %d %0.20f\n", genName, loc1, err1, sc1, loc2, err2, sc2); | 
|  | 1403 | 
|  | 1404 		if (_msf_seqList[rNo*2].hits[0] % 2 == 0 && _msf_seqHits[rNo] < DISCORDANT_CUT_OFF) | 
|  | 1405 		{ | 
|  | 1406 			dir1 = dir2 = 'F'; | 
|  | 1407 | 
|  | 1408 			if (loc1 < 0) | 
|  | 1409 			{ | 
|  | 1410 				dir1 = 'R'; | 
|  | 1411 				loc1 = -loc1; | 
|  | 1412 			} | 
|  | 1413 | 
|  | 1414 			if (loc2 < 0) | 
|  | 1415 			{ | 
|  | 1416 				dir2 = 'R'; | 
|  | 1417 				loc2 = -loc2; | 
|  | 1418 			} | 
|  | 1419 | 
|  | 1420 			if (rNo != lrNo) | 
|  | 1421 			{ | 
|  | 1422 				int j; | 
|  | 1423 				for (j=0; j<SEQ_LENGTH; j++) | 
|  | 1424 				{ | 
|  | 1425 					lsc += _msf_seqList[rNo*2].qual[j]+_msf_seqList[rNo*2+1].qual[j]; | 
|  | 1426 				} | 
|  | 1427 				lsc /= 2*SEQ_LENGTH; | 
|  | 1428 				lsc -= 33; | 
|  | 1429 				lrNo = rNo; | 
|  | 1430 			} | 
|  | 1431 | 
|  | 1432 			int inv = 0; | 
|  | 1433 			int eve = 0; | 
|  | 1434 			int dist = 0; | 
|  | 1435 			char event; | 
|  | 1436 | 
|  | 1437 			//fprintf(stdout, "%c %c ", dir1, dir2); | 
|  | 1438 | 
|  | 1439 			if ( dir1 == dir2 ) | 
|  | 1440 			{ | 
|  | 1441 				event = 'V'; | 
|  | 1442 				//fprintf(stdout, "Inverstion \n"); | 
|  | 1443 			} | 
|  | 1444 			else | 
|  | 1445 			{ | 
|  | 1446 				if (loc1 < loc2) | 
|  | 1447 				{ | 
|  | 1448 | 
|  | 1449 					//fprintf(stdout, "< %d ", loc2-loc1-SEQ_LENGTH); | 
|  | 1450 | 
|  | 1451 					if (dir1 == 'R' && dir2 == 'F') | 
|  | 1452 					{ | 
|  | 1453 						event = 'E'; | 
|  | 1454 | 
|  | 1455 						//fprintf(stdout, "Everted \n"); | 
|  | 1456 					} | 
|  | 1457 					else if ( loc2 - loc1 >= maxPairEndedDiscordantDistance ) | 
|  | 1458 					{ | 
|  | 1459 						event = 'D'; | 
|  | 1460 						//fprintf(stdout, "Deletion \n"); | 
|  | 1461 					} | 
|  | 1462 					else | 
|  | 1463 					{ | 
|  | 1464 						event = 'I'; | 
|  | 1465 						//fprintf(stdout, "Insertion \n"); | 
|  | 1466 					} | 
|  | 1467 				} | 
|  | 1468 				else if (loc2 < loc1) | 
|  | 1469 				{ | 
|  | 1470 					//fprintf(stdout, "> %d ", loc1-loc2-SEQ_LENGTH); | 
|  | 1471 					if (dir2 == 'R' && dir1 == 'F') | 
|  | 1472 					{ | 
|  | 1473 						event = 'E'; | 
|  | 1474 						//fprintf(stdout, "Everted \n"); | 
|  | 1475 					} | 
|  | 1476 					else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance ) | 
|  | 1477 					{ | 
|  | 1478 						event = 'D'; | 
|  | 1479 						//fprintf(stdout, "Deletion \n"); | 
|  | 1480 					} | 
|  | 1481 					else | 
|  | 1482 					{ | 
|  | 1483 						event = 'I'; | 
|  | 1484 						//fprintf(stdout, "Insertion \n"); | 
|  | 1485 					} | 
|  | 1486 				} | 
|  | 1487 			} | 
|  | 1488 			_msf_seqList[rNo*2].hits[0] = 2; | 
|  | 1489 			fprintf(out, "%s\t%s\t%d\t%d\t%c\t%s\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%0.20f\n", | 
|  | 1490 					_msf_seqList[rNo*2].name, genName, loc1, (loc1+SEQ_LENGTH-1), dir1, genName, loc2, (loc2+SEQ_LENGTH-1), dir2, event, (err1+err2), lsc, sc1*sc2); | 
|  | 1491 		} | 
|  | 1492 		flag = fread(&rNo, sizeof(int), 1, in); | 
|  | 1493 | 
|  | 1494 	} | 
|  | 1495 | 
|  | 1496 	/* | 
|  | 1497 	   MappingInfoNode *lr[_msf_seqListSize/2]; | 
|  | 1498 	   MappingInfoNode *rr[_msf_seqListSize/2]; | 
|  | 1499 	   MappingInfoNode *cur, *tmpDel, *cur2; | 
|  | 1500 | 
|  | 1501 | 
|  | 1502 	   int ls[_msf_seqListSize/2]; | 
|  | 1503 	   int rs[_msf_seqListSize/2]; | 
|  | 1504 | 
|  | 1505 | 
|  | 1506 	   int i=0; | 
|  | 1507 | 
|  | 1508 	   for (i = 0; i<_msf_seqListSize/2; i++) | 
|  | 1509 	   { | 
|  | 1510 	   lr[i] = rr[i] = NULL; | 
|  | 1511 	   ls[i] = rs[i] = 0; | 
|  | 1512 	   } | 
|  | 1513 | 
|  | 1514 | 
|  | 1515 | 
|  | 1516 	   if (in1 != NULL) | 
|  | 1517 	   { | 
|  | 1518 	   flag = fread(&rNo, sizeof(int), 1, in1); | 
|  | 1519 	   } | 
|  | 1520 	   else | 
|  | 1521 	   { | 
|  | 1522 	   flag  = 0; | 
|  | 1523 	   } | 
|  | 1524 | 
|  | 1525 | 
|  | 1526 	   while (flag) | 
|  | 1527 	   { | 
|  | 1528 	   tmp = fread(&loc1, sizeof(int), 1, in1); | 
|  | 1529 	   tmp = fread(&err1, sizeof(char), 1, in1); | 
|  | 1530 	   tmp = fread(&sc1, sizeof(float), 1, in1); | 
|  | 1531 	   tmp = fread(&l, sizeof(char), 1, in1); | 
|  | 1532 	   tmp = fread(genName, sizeof(char), l, in1); | 
|  | 1533 	   genName[l]='\0'; | 
|  | 1534 | 
|  | 1535 	   if (_msf_seqList[rNo*2].hits[0] == 0) | 
|  | 1536 	   { | 
|  | 1537 | 
|  | 1538 	   if ( ls[rNo] < DISCORDANT_CUT_OFF ) | 
|  | 1539 	   { | 
|  | 1540 	   ls[rNo]++; | 
|  | 1541 | 
|  | 1542 	   cur = lr[rNo]; | 
|  | 1543 | 
|  | 1544 	   if (cur !=NULL) | 
|  | 1545 	   { | 
|  | 1546 	   if (err1 == cur->err) | 
|  | 1547 	   { | 
|  | 1548 	   MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1549 | 
|  | 1550 	   nr->loc = loc1; | 
|  | 1551 	   nr->err = err1; | 
|  | 1552 	   nr->score = sc1; | 
|  | 1553 	   nr->next = lr[rNo]; | 
|  | 1554 	   sprintf(nr->chr,"%s", genName); | 
|  | 1555 	   lr[rNo] = nr; | 
|  | 1556 	   } | 
|  | 1557 	   else if (err1 < cur->err) | 
|  | 1558 	   { | 
|  | 1559 	   MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1560 | 
|  | 1561 	   nr->loc = loc1; | 
|  | 1562 	   nr->err = err1; | 
|  | 1563 	   nr->score = sc1; | 
|  | 1564 	   sprintf(nr->chr,"%s", genName); | 
|  | 1565 	   nr->next = NULL; | 
|  | 1566 	   lr[rNo] = nr; | 
|  | 1567 	while (cur!=NULL) | 
|  | 1568 	{ | 
|  | 1569 		tmpDel = cur; | 
|  | 1570 		cur = cur->next; | 
|  | 1571 		freeMem(tmpDel, sizeof(MappingInfoNode)); | 
|  | 1572 	} | 
|  | 1573 } | 
|  | 1574 } | 
|  | 1575 else | 
|  | 1576 { | 
|  | 1577 | 
|  | 1578 	MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1579 | 
|  | 1580 	nr->loc = loc1; | 
|  | 1581 	nr->err = err1; | 
|  | 1582 	nr->score = sc1; | 
|  | 1583 	sprintf(nr->chr,"%s", genName); | 
|  | 1584 	nr->next = NULL; | 
|  | 1585 	lr[rNo] = nr; | 
|  | 1586 } | 
|  | 1587 | 
|  | 1588 if (ls[rNo] > DISCORDANT_CUT_OFF) | 
|  | 1589 { | 
|  | 1590 	cur = lr[rNo]; | 
|  | 1591 	while (cur!=NULL) | 
|  | 1592 	{ | 
|  | 1593 		tmpDel = cur; | 
|  | 1594 		cur = cur->next; | 
|  | 1595 		freeMem(tmpDel, sizeof(MappingInfoNode)); | 
|  | 1596 	} | 
|  | 1597 } | 
|  | 1598 } | 
|  | 1599 | 
|  | 1600 } | 
|  | 1601 flag = fread(&rNo, sizeof(int), 1, in1); | 
|  | 1602 | 
|  | 1603 } | 
|  | 1604 | 
|  | 1605 | 
|  | 1606 if (in2 != NULL) | 
|  | 1607 { | 
|  | 1608 	flag = fread(&rNo, sizeof(int), 1, in2); | 
|  | 1609 } | 
|  | 1610 else | 
|  | 1611 { | 
|  | 1612 	flag  = 0; | 
|  | 1613 } | 
|  | 1614 | 
|  | 1615 | 
|  | 1616 while (flag) | 
|  | 1617 { | 
|  | 1618 	tmp = fread(&loc1, sizeof(int), 1, in2); | 
|  | 1619 	tmp = fread(&err1, sizeof(char), 1, in2); | 
|  | 1620 	tmp = fread(&sc1, sizeof(float), 1, in2); | 
|  | 1621 	tmp = fread(&l, sizeof(char), 1, in2); | 
|  | 1622 	tmp = fread(genName, sizeof(char), l, in2); | 
|  | 1623 	genName[l]='\0'; | 
|  | 1624 | 
|  | 1625 	if (_msf_seqList[rNo*2].hits[0] == 0) | 
|  | 1626 	{ | 
|  | 1627 | 
|  | 1628 		if ( rs[rNo] < DISCORDANT_CUT_OFF ) | 
|  | 1629 		{ | 
|  | 1630 			rs[rNo]++; | 
|  | 1631 | 
|  | 1632 			cur = rr[rNo]; | 
|  | 1633 | 
|  | 1634 			if (cur !=NULL) | 
|  | 1635 			{ | 
|  | 1636 				if (err1 == cur->err) | 
|  | 1637 				{ | 
|  | 1638 					MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1639 | 
|  | 1640 					nr->loc = loc1; | 
|  | 1641 					nr->err = err1; | 
|  | 1642 					nr->score = sc1; | 
|  | 1643 					nr->next = rr[rNo]; | 
|  | 1644 					sprintf(nr->chr,"%s", genName); | 
|  | 1645 					rr[rNo] = nr; | 
|  | 1646 				} | 
|  | 1647 				else if (err1 < cur->err) | 
|  | 1648 				{ | 
|  | 1649 					MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1650 | 
|  | 1651 					nr->loc = loc1; | 
|  | 1652 					nr->err = err1; | 
|  | 1653 					nr->score = sc1; | 
|  | 1654 					sprintf(nr->chr,"%s", genName); | 
|  | 1655 					nr->next = NULL; | 
|  | 1656 					rr[rNo] = nr; | 
|  | 1657 					while (cur!=NULL) | 
|  | 1658 					{ | 
|  | 1659 						tmpDel = cur; | 
|  | 1660 						cur = cur->next; | 
|  | 1661 						freeMem(tmpDel, sizeof(MappingInfoNode)); | 
|  | 1662 					} | 
|  | 1663 				} | 
|  | 1664 			} | 
|  | 1665 			else | 
|  | 1666 			{ | 
|  | 1667 | 
|  | 1668 				MappingInfoNode *nr = getMem(sizeof(MappingInfoNode)); | 
|  | 1669 | 
|  | 1670 				nr->loc = loc1; | 
|  | 1671 				nr->err = err1; | 
|  | 1672 				nr->score = sc1; | 
|  | 1673 				sprintf(nr->chr,"%s", genName); | 
|  | 1674 				nr->next = NULL; | 
|  | 1675 				rr[rNo] = nr; | 
|  | 1676 			} | 
|  | 1677 | 
|  | 1678 			if (rs[rNo] > DISCORDANT_CUT_OFF) | 
|  | 1679 			{ | 
|  | 1680 				cur = rr[rNo]; | 
|  | 1681 				while (cur!=NULL) | 
|  | 1682 				{ | 
|  | 1683 					tmpDel = cur; | 
|  | 1684 					cur = cur->next; | 
|  | 1685 					freeMem(tmpDel, sizeof(MappingInfoNode)); | 
|  | 1686 				} | 
|  | 1687 			} | 
|  | 1688 		} | 
|  | 1689 	} | 
|  | 1690 	flag = fread(&rNo, sizeof(int), 1, in2); | 
|  | 1691 | 
|  | 1692 } | 
|  | 1693 | 
|  | 1694 | 
|  | 1695 for (i=0; i<_msf_seqListSize/2; i++) | 
|  | 1696 { | 
|  | 1697 	int j; | 
|  | 1698 	for (j=0; j<SEQ_LENGTH; j++) | 
|  | 1699 	{ | 
|  | 1700 		lsc += _msf_seqList[i*2].qual[j]+_msf_seqList[i*2+1].qual[j]; | 
|  | 1701 	} | 
|  | 1702 	lsc /= 2*SEQ_LENGTH; | 
|  | 1703 	lsc -= 33; | 
|  | 1704 	if (ls[i] * rs[i] < DISCORDANT_CUT_OFF && ls[i] & rs[i] > 0) | 
|  | 1705 	{ | 
|  | 1706 		cur = lr[i]; | 
|  | 1707 		while (cur != NULL) | 
|  | 1708 		{ | 
|  | 1709 			cur2 = rr[i]; | 
|  | 1710 			if (cur->loc < 0) | 
|  | 1711 			{ | 
|  | 1712 				dir1 = 'R'; | 
|  | 1713 				loc1 = -cur->loc; | 
|  | 1714 			} | 
|  | 1715 			else | 
|  | 1716 			{ | 
|  | 1717 				dir1 = 'F'; | 
|  | 1718 				loc1 = cur->loc; | 
|  | 1719 			} | 
|  | 1720 			while (cur2 != NULL) | 
|  | 1721 			{ | 
|  | 1722 | 
|  | 1723 				if (cur2->loc < 0) | 
|  | 1724 				{ | 
|  | 1725 					dir2 = 'R'; | 
|  | 1726 					loc2 = -cur2->loc; | 
|  | 1727 				} | 
|  | 1728 				else | 
|  | 1729 				{ | 
|  | 1730 					dir2 = 'F'; | 
|  | 1731 					loc2 = cur2->loc; | 
|  | 1732 				} | 
|  | 1733 | 
|  | 1734 				fprintf(out, "%s\t%s\t%d\t%d\t%c\t%s\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%0.20f\n", | 
|  | 1735 						_msf_seqList[i*2].name, cur->chr, loc1, (loc1+SEQ_LENGTH-1), dir1, cur2->chr, loc2, (loc2+SEQ_LENGTH-1), dir2, 'T', (cur->err+cur2->err), lsc, cur->score*cur2->score); | 
|  | 1736 				cur2 = cur2->next; | 
|  | 1737 			} | 
|  | 1738 			cur = cur->next; | 
|  | 1739 		} | 
|  | 1740 	} | 
|  | 1741 | 
|  | 1742 }*/ | 
|  | 1743 | 
|  | 1744 | 
|  | 1745 fclose(in); | 
|  | 1746 fclose(in1); | 
|  | 1747 fclose(in2); | 
|  | 1748 fclose(out); | 
|  | 1749 fclose(out1); | 
|  | 1750 fclose(out2); | 
|  | 1751 | 
|  | 1752 unlink(fname1); | 
|  | 1753 unlink(fname2); | 
|  | 1754 unlink(fname3); | 
|  | 1755 unlink(fname5); | 
|  | 1756 unlink(fname6); | 
|  | 1757 } |