comparison mrfast-2.1.0.5/MrFAST.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents
children
comparison
equal deleted inserted replaced
0:7b3dc85dc7fd 1:d4054b05b015
1 /*
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the names of the University of Washington, Simon Fraser University,
14 * nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 Authors:
33 Farhad Hormozdiari
34 Faraz Hach
35 Can Alkan
36 Emails:
37 farhadh AT uw DOT edu
38 fhach AT cs DOT sfu DOT ca
39 calkan AT uw DOT edu
40 */
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <math.h>
46 #include <dirent.h>
47 #include <xmmintrin.h>
48 #include <emmintrin.h>
49 #include <mmintrin.h>
50
51
52 #include "Common.h"
53 #include "Reads.h"
54 #include "HashTable.h"
55 #include "Output.h"
56 #include "MrFAST.h"
57 #include "RefGenome.h"
58
59
60 #define min(a,b) ((a)>(b)?(b):(a))
61 #define min3(a,b,c) ((a)>(b)?(b>c?c:b):(a>c?c:a))
62 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4))))
63
64 #define MAX_REF_SIZE 18
65
66
67 float calculateScore(int index, char *seq, char *qual, char *md);
68 unsigned char mrFAST = 1;
69 char *versionNumberF="0.5";
70
71 long long verificationCnt = 0;
72 long long mappingCnt = 0;
73 long long mappedSeqCnt = 0;
74 long long completedSeqCnt = 0;
75 char *mappingOutput;
76 /**********************************************/
77 char *_msf_refGen = NULL;
78 int _msf_refGenLength = 0;
79 int _msf_refGenOffset = 0;
80 char *_msf_refGenName = NULL;
81
82 int _msf_refGenBeg;
83 int _msf_refGenEnd;
84
85 IHashTable *_msf_hashTable = NULL;
86
87 int *_msf_samplingLocs;
88 int *_msf_samplingLocsEnds;
89 int _msf_samplingLocsSize;
90
91 Read *_msf_seqList;
92 int _msf_seqListSize;
93
94 Pair *_msf_sort_seqList = NULL;
95 int *_msf_map_sort_seqList;
96
97 ReadIndexTable *_msf_rIndex = NULL;
98 int _msf_rIndexSize;
99 int _msf_rIndexMax;
100
101 SAM _msf_output;
102
103 OPT_FIELDS *_msf_optionalFields;
104
105 char *_msf_op;
106
107 int *_msf_verifiedLocs = NULL;
108
109 char _msf_numbers[200][3];
110 char _msf_cigar[5];
111
112 MappingInfo *_msf_mappingInfo;
113
114 int *_msf_seqHits;
115 int _msf_openFiles = 0;
116 int _msf_maxLSize=0;
117 int _msf_maxRSize=0;
118
119 BestFullMappingInfo *bestHitMappingInfo;
120
121 /*************************/
122 int _msf_maxFile=0;
123 char _msf_fileName[4000][200][2][FILE_NAME_LENGTH];
124 int _msf_fileCount[4000];
125
126 char *_msf_readHasConcordantMapping; //boolean if a read has concordant mapping :D
127
128 int *_msf_oeaMapping;
129 int *_msf_discordantMapping;
130
131 FILE *bestConcordantFILE;
132 FILE *bestDiscordantFILE;
133
134 int counter = 0;
135
136 int scoreF[200][200];
137 int scoreB[200][200];
138
139 int score[200][200];
140 int direction1[200][200];
141 int direction2[200][200];
142
143 __m128i MASK;
144
145 int lookUpTable[15625][15625];
146
147 /**************************************************Methods***************************************************/
148 int smallEditDistanceF(char *a, int lena, char *b, int lenb)
149 {
150 int matrix[20][20];
151 int i = 0;
152 int j = 0;
153
154 for(i = 0; i <= lena; i++)
155 {
156 matrix[0][i] = i;
157 }
158
159 for(i = 0; i <= lenb; i++)
160 {
161 matrix[i][0] = i;
162 }
163
164
165 for(i = 1; i <= lenb; i++)
166 {
167 for(j = 1; j <= lena; j++)
168 {
169 matrix[i][j] = min3(matrix[i-1][j-1]+ (a[j-1] != b[i-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
170 }
171 }
172 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]);
173 }
174
175 int smallEditDistanceB(char *a, int lena, char *b, int lenb)
176 {
177 int matrix[20][20];
178 int i = 0;
179 int j = 0;
180
181 for(i = 0; i <= lena; i++)
182 {
183 matrix[0][i] = i;
184 }
185
186 for(i = 0; i <= lenb; i++)
187 {
188 matrix[i][0] = i;
189 }
190
191
192 for(i = 1; i <= lenb; i++)
193 {
194 for(j = 1; j <= lena; j++)
195 {
196 matrix[i][j] = min3(matrix[i-1][j-1]+ (*(a-j+1) != *(b-i+1)),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
197 }
198 }
199
200 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]);
201 }
202
203 char fastEditDistance(int per1, int per2)
204 {
205
206 int i = 0;
207 int j = 0;
208
209 char str1[7];
210 char str2[7];
211
212 int val1 = per1;
213 int val2 = per2;
214
215 int index = 0;
216 int mod = 0;
217
218 int matrix[7][7];
219
220 int min = 20;
221
222 while(index < 6)
223 {
224 mod = val1%5;
225 str1[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N'));
226 val1 = val1 /5;
227 index++;
228 }
229
230 str1[6] = '\0';
231
232 index = 0;
233 while(index < 6)
234 {
235 mod=val2%5;
236 str2[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N'));
237 val2 = val2 / 5;
238 index++;
239 }
240 str2[6] = '\0';
241
242 for(i = 0; i < 7; i++)
243 {
244 matrix[0][i] = i;
245 matrix[i][0] = i;
246 }
247
248 for(i = 1; i < 7; i++)
249 {
250 for(j = 1; j < 7; j++)
251 {
252 matrix[i][j] = min3(matrix[i-1][j-1]+ (str1[i-1] != str2[j-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
253 }
254 }
255
256 for(i = 0; i < 7; i++)
257 {
258 if(matrix[i][6] < min)
259 min = matrix[i][6];
260 }
261
262 for(i = 0; i < 7; i++)
263 {
264 if(matrix[6][i] < min)
265 min = matrix[6][i];
266 }
267 return min;
268 }
269
270 void initLookUpTable()
271 {
272 int i = 0;
273
274 MASK = _mm_insert_epi16(MASK,1,0);
275 MASK = _mm_insert_epi16(MASK,1,1);
276 MASK = _mm_insert_epi16(MASK,1,2);
277 MASK = _mm_insert_epi16(MASK,1,3);
278 MASK = _mm_insert_epi16(MASK,1,4);
279 MASK = _mm_insert_epi16(MASK,0,5);
280 MASK = _mm_insert_epi16(MASK,0,6);
281 MASK = _mm_insert_epi16(MASK,0,7);
282
283 for(i = 0 ; i < errThreshold + 1; i++)
284 {
285 scoreF[0][i] = i;
286 scoreF[i][0] = i;
287 }
288
289 for(i = 0 ; i < errThreshold + 1; i++)
290 {
291 scoreB[0][i] = i;
292 scoreB[i][0] = i;
293 }
294
295
296 }
297
298 int backwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb)
299 {
300 if(lenb == 0 || lena == 0)
301 return 0;
302
303 int i = 0;
304 int j = 0;
305 int k = 0;
306
307
308 int e = errThreshold;
309
310 char flag = 0;
311
312 int minError = 2*e;
313
314 __m128i R0, R1;
315 __m128i Diag;
316 __m128i Side1, Side2;
317 __m128i Down1, Down2;
318 __m128i Error;
319 __m128i tmp;
320
321 /* initialize */
322 R0 = _mm_setzero_si128 ();
323 R1 = _mm_setzero_si128 ();
324 Diag = _mm_setzero_si128 ();
325 Side1 = _mm_setzero_si128 ();
326 Side2 = _mm_setzero_si128 ();
327 Down1 = _mm_setzero_si128 ();
328 Down2 = _mm_setzero_si128 ();
329 Error = _mm_setzero_si128 ();
330 tmp = _mm_setzero_si128 ();
331 /* end initialize */
332
333 if(lenb <= e)
334 {
335 return smallEditDistanceB(a,lena,b,lenb);
336 }
337
338
339 R1 = _mm_xor_si128(R1, R1);
340 R0 = _mm_xor_si128(R0, R0);
341
342 Diag = _mm_xor_si128(Diag, Diag);
343 Side1 = _mm_xor_si128(Side1, Side1);
344 Down1 = _mm_xor_si128(Down1, Down1);
345
346 Diag = _mm_insert_epi16(Diag,2*e,0);
347
348 Side1 = _mm_insert_epi16(Side1,1,0);
349 Side1 = _mm_insert_epi16(Side1,2*e,1);
350
351 Down1 = _mm_insert_epi16(Down1,2*e,0);
352 Down1 = _mm_insert_epi16(Down1,1,1);
353 Down1 = _mm_insert_epi16(Down1,2*e,2);
354
355 R0 = _mm_insert_epi16(R0,0,0);
356
357 R1 = _mm_insert_epi16(R1,1,0);
358 R1 = _mm_insert_epi16(R1,1,1);
359
360 for(i=2; i <= e; i++)
361 {
362 //set side
363 Side1 = _mm_slli_si128(Side1,2);
364 Side1 = _mm_insert_epi16(Side1,1,0);
365
366 Down1 = _mm_insert_epi16(Down1,1,0);
367 Down1 = _mm_slli_si128(Down1,2);
368 Down1 = _mm_insert_epi16(Down1,2*e,0);
369
370 Diag = _mm_xor_si128(Diag, Diag);
371 if( i%2 == 0)
372 {
373 Diag = _mm_insert_epi16(Diag,2*e,0);
374
375 for(j=1;j<=i-1;j++)
376 {
377 Diag = _mm_slli_si128(Diag, 2);
378 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0);
379 }
380 Diag = _mm_slli_si128(Diag, 2);
381 Diag = _mm_insert_epi16(Diag, 2*e,0);
382
383 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
384 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
385 }
386
387 else
388 {
389 Diag = _mm_insert_epi16(Diag,2*e,0);
390 for(j=i/2-1;j>=-i/2;j--)
391 {
392 Diag = _mm_slli_si128(Diag, 2);
393 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0);
394 }
395 Diag = _mm_slli_si128(Diag, 2);
396 Diag = _mm_insert_epi16(Diag, 2*e,0);
397
398 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
399 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
400 }
401 }
402 Error = _mm_xor_si128(Error, Error);
403 Side2 = _mm_xor_si128(Side2, Side2);
404 Down2 = _mm_xor_si128(Down2, Down2);
405 Down1 = _mm_xor_si128(Down1, Down1);
406
407 Error = _mm_insert_epi16(Error,e,0);
408 Side1 = _mm_insert_epi16(Side2,2*e,0);
409 Side2 = _mm_insert_epi16(Side2,2*e,0);
410 Down1 = _mm_insert_epi16(Down1,2*e,0);
411
412
413 for(j=0; j < e; j++)
414 {
415 Side2 = _mm_slli_si128(Side2, 2);
416 Side2 = _mm_insert_epi16(Side2,1,0);
417
418 Side1 = _mm_slli_si128(Side1, 2);
419 Side1 = _mm_insert_epi16(Side1,1,0);
420
421 Down1 = _mm_slli_si128(Down1, 2);
422 Down1 = _mm_insert_epi16(Down1,1,0);
423
424 Down2 = _mm_slli_si128(Down2, 2);
425 Down2 = _mm_insert_epi16(Down2,1,0);
426
427 Error = _mm_slli_si128(Error, 2);
428 Error = _mm_insert_epi16(Error, e, 0);
429 }
430
431 Down2= _mm_slli_si128(Down2, 2);
432 Down2 = _mm_insert_epi16(Down2,2*e,0);
433
434 for(; i <= 2*lenb-(e-1);i++)
435 {
436 flag = 0;
437 Diag = _mm_xor_si128(Diag, Diag);
438 if( i%2 == 0)
439 {
440 for(j=e/2;j>=-e/2;j--)
441 {
442 Diag = _mm_slli_si128(Diag, 2);
443 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0);
444 }
445
446 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
447 R0 = _mm_min_epi16(R0, R1+Down1);
448
449
450 if(_mm_extract_epi16(R0,0) <= e)
451 flag = 1;
452 tmp = _mm_srli_si128(R0,2);
453 for(j=0; j <= e;j++)
454 {
455 if(_mm_extract_epi16(tmp,0) <= e)
456 flag = 1;
457 tmp = _mm_srli_si128(tmp,2);
458 }
459
460 if(flag == 0)
461 return -1;
462
463 if(i == 2*lenb-e)
464 {
465 tmp = _mm_srli_si128(R0,2);
466 for(k=0; k < e-2;k++)
467 tmp = _mm_srli_si128(tmp,2);
468 minError = _mm_extract_epi16(tmp,0);
469 }
470
471 }
472
473 else
474 {
475 for(j=e/2;j>=-e/2-1;j--)
476 {
477 Diag = _mm_slli_si128(Diag, 2);
478 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i)/2-j-1)),0);
479 }
480
481 // printf("@%d %d %d %d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1), _mm_extract_epi16(Diag,2),
482 // _mm_extract_epi16(Diag,3));
483
484 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
485
486 // printf("#~%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
487 // _mm_extract_epi16(R1,3));
488
489 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
490
491 // printf("$%d %d %d %d\n", _mm_extract_epi16(Side2,0), _mm_extract_epi16(Side2,1), _mm_extract_epi16(Side2,2),
492 // _mm_extract_epi16(Side2,3));
493
494 // printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
495 // _mm_extract_epi16(R1,3));
496
497
498
499 if(i >= 2*lenb-e)
500 {
501 tmp = _mm_srli_si128(R1,2);
502 for(k=0; k < e-1;k++)
503 tmp = _mm_srli_si128(tmp,2);
504 minError = min(minError, _mm_extract_epi16(tmp,0));
505 }
506 }
507 }
508
509 //first cell
510 Diag = _mm_xor_si128(Diag,Diag);
511 Diag = _mm_insert_epi16(Diag, *(b-(lenb-3)) != *(a-lena), 0);
512 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena-1)), 1);
513 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-2)), 2);
514 Diag = _mm_insert_epi16(Diag, 2*e, 3);
515 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
516 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
517
518 minError = min(minError, _mm_extract_epi16(R1,2));
519
520 //second cell
521 Diag = _mm_xor_si128(Diag,Diag);
522 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena)), 0);
523 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-1)), 1);
524 Diag = _mm_insert_epi16(Diag, 2*e, 2);
525
526 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
527 R0 = _mm_min_epi16(R0, R1+Down1);
528
529 minError = min(minError, _mm_extract_epi16(R0,1));
530
531 //third cell
532 Diag = _mm_xor_si128(Diag,Diag);
533 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena+1)), 0);
534 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena)), 1);
535 Diag = _mm_insert_epi16(Diag, 2*e, 2);
536
537 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
538 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
539
540 minError = min(minError, _mm_extract_epi16(R1,1));
541
542 //forth
543 Diag = _mm_xor_si128(Diag,Diag);
544 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+1)), 0);
545 Diag = _mm_insert_epi16(Diag, 2*e, 1);
546
547 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
548 R0 = _mm_min_epi16(R0, R1+Down1);
549
550 minError = min(minError, _mm_extract_epi16(R0,0));
551
552 //fifth
553 Diag = _mm_xor_si128(Diag,Diag);
554 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+2)), 0);
555 Diag = _mm_insert_epi16(Diag, 2*e, 1);
556
557 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
558 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
559
560 minError = min(minError, _mm_extract_epi16(R0,0));
561
562 if(minError > e)
563 return -1;
564 return minError;
565 }
566
567 int backwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb)
568 {
569 if(lenb == 0 || lena == 0)
570 return 0;
571
572 int i = 0;
573 int j = 0;
574 int k = 0;
575
576
577 int e = errThreshold;
578
579 char flag = 0;
580
581 int minError = 2*e;
582
583 __m128i R0, R1;
584 __m128i Diag;
585 __m128i Side1, Side2;
586 __m128i Down1, Down2;
587 __m128i Error;
588 __m128i tmp;
589
590 /* initialize */
591 R0 = _mm_setzero_si128 ();
592 R1 = _mm_setzero_si128 ();
593 Diag = _mm_setzero_si128 ();
594 Side1 = _mm_setzero_si128 ();
595 Side2 = _mm_setzero_si128 ();
596 Down1 = _mm_setzero_si128 ();
597 Down2 = _mm_setzero_si128 ();
598 Error = _mm_setzero_si128 ();
599 tmp = _mm_setzero_si128 ();
600 /* end initialize */
601
602 if(lenb <= e)
603 {
604 return smallEditDistanceB(a,lena,b,lenb);
605 }
606
607
608 R1 = _mm_xor_si128(R1, R1);
609 R0 = _mm_xor_si128(R0, R0);
610
611 Diag = _mm_xor_si128(Diag, Diag);
612 Side1 = _mm_xor_si128(Side1, Side1);
613 Down1 = _mm_xor_si128(Down1, Down1);
614
615 Diag = _mm_insert_epi16(Diag,2*e,0);
616
617 Side1 = _mm_insert_epi16(Side1,1,0);
618 Side1 = _mm_insert_epi16(Side1,2*e,1);
619
620 Down1 = _mm_insert_epi16(Down1,2*e,0);
621 Down1 = _mm_insert_epi16(Down1,1,1);
622 Down1 = _mm_insert_epi16(Down1,2*e,2);
623
624 R0 = _mm_insert_epi16(R0,0,0);
625
626 R1 = _mm_insert_epi16(R1,1,0);
627 R1 = _mm_insert_epi16(R1,1,1);
628
629 for(i=2; i <= e; i++)
630 {
631 //set side
632 Side1 = _mm_slli_si128(Side1,2);
633 Side1 = _mm_insert_epi16(Side1,1,0);
634
635 Down1 = _mm_insert_epi16(Down1,1,0);
636 Down1 = _mm_slli_si128(Down1,2);
637 Down1 = _mm_insert_epi16(Down1,2*e,0);
638
639 Diag = _mm_xor_si128(Diag, Diag);
640 if( i%2 == 0)
641 {
642 Diag = _mm_insert_epi16(Diag,2*e,0);
643
644 for(j=1;j<=i-1;j++)
645 {
646 Diag = _mm_slli_si128(Diag, 2);
647 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0);
648 }
649 Diag = _mm_slli_si128(Diag, 2);
650 Diag = _mm_insert_epi16(Diag, 2*e,0);
651
652 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
653 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
654 }
655
656 else
657 {
658 Diag = _mm_insert_epi16(Diag,2*e,0);
659 for(j=i/2-1;j>=-i/2;j--)
660 {
661 Diag = _mm_slli_si128(Diag, 2);
662 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0);
663 }
664 Diag = _mm_slli_si128(Diag, 2);
665 Diag = _mm_insert_epi16(Diag, 2*e,0);
666
667 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
668 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
669 }
670 }
671 Error = _mm_xor_si128(Error, Error);
672 Side2 = _mm_xor_si128(Side2, Side2);
673 Down2 = _mm_xor_si128(Down2, Down2);
674 Down1 = _mm_xor_si128(Down1, Down1);
675
676 Error = _mm_insert_epi16(Error,e,0);
677 Side2 = _mm_insert_epi16(Side2,2*e,0);
678 Down1 = _mm_insert_epi16(Down1,2*e,0);
679
680
681 for(j=0; j < e; j++)
682 {
683 Side2 = _mm_slli_si128(Side2, 2);
684 Side2 = _mm_insert_epi16(Side2,1,0);
685
686 Down1 = _mm_slli_si128(Down1, 2);
687 Down1 = _mm_insert_epi16(Down1,1,0);
688
689 Down2 = _mm_slli_si128(Down2, 2);
690 Down2 = _mm_insert_epi16(Down2,1,0);
691
692 Error = _mm_slli_si128(Error, 2);
693 Error = _mm_insert_epi16(Error, e, 0);
694 }
695
696 Down2= _mm_slli_si128(Down2, 2);
697 Down2 = _mm_insert_epi16(Down2,2*e,0);
698
699 for(; i <= 2*lenb-(e-1);i++)
700 {
701 flag = 0;
702 Diag = _mm_xor_si128(Diag, Diag);
703 if( i%2 == 0)
704 {
705 for(j=e/2;j>=-e/2;j--)
706 {
707 Diag = _mm_slli_si128(Diag, 2);
708 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0);
709 }
710
711 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
712 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
713
714 if(_mm_extract_epi16(R0,0) <= e)
715 flag = 1;
716 tmp = _mm_srli_si128(R0,2);
717 for(j=0; j <= e;j++)
718 {
719 if(_mm_extract_epi16(tmp,0) <= e)
720 flag = 1;
721 tmp = _mm_srli_si128(tmp,2);
722 }
723
724 if(flag == 0)
725 return -1;
726
727 if(i == 2*lenb-e)
728 {
729 tmp = _mm_srli_si128(R0,2);
730 for(k=0; k < e-1;k++)
731 tmp = _mm_srli_si128(tmp,2);
732 minError = _mm_extract_epi16(tmp,0);
733 }
734
735 }
736
737 else
738 {
739 for(j=-e/2+1;j<=e/2;j++)
740 {
741 Diag = _mm_slli_si128(Diag, 2);
742 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2-j-1)) != *(a-((i-1)/2+j-1)),0);
743 }
744
745 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
746 R1 = _mm_min_epi16(R1, R0+Down1);
747
748
749 if(i >= 2*lenb-e)
750 {
751 tmp = _mm_srli_si128(R1,2);
752 for(k=0; k < e-2;k++)
753 tmp = _mm_srli_si128(tmp,2);
754 minError = min(minError, _mm_extract_epi16(tmp,0));
755 }
756 }
757 }
758
759 j=0;
760 int tmpE = e;
761 for(;j<2*(e-2)+1;j++)
762 {
763
764 Diag = _mm_xor_si128(Diag, Diag);
765 //set the first element
766 if(j==0)
767 {
768 for( k=0;k<=e-1;k++ )
769 {
770 Diag = _mm_slli_si128(Diag, 2);
771 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
772 }
773
774 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
775 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
776
777
778 tmpE--;
779 tmp = _mm_srli_si128(R0,2);
780 for(k=0; k < e-2;k++)
781 tmp = _mm_srli_si128(tmp,2);
782 minError = min(minError, _mm_extract_epi16(tmp,0));
783 }
784 else if(j%2 == 0)
785 {
786 for(k=0;k<tmpE;k++)
787 {
788 Diag = _mm_slli_si128(Diag, 2);
789 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
790 }
791
792 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
793 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
794
795 tmpE--;
796
797 tmp = _mm_srli_si128(R0,2);
798 for(k=0; k < tmpE-1;k++)
799 tmp = _mm_srli_si128(tmp,2);
800 minError = min(minError, _mm_extract_epi16(tmp,0));
801 }
802
803
804 else
805 {
806 for(k=0;k<tmpE;k++)
807 {
808 Diag = _mm_slli_si128(Diag, 2);
809 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
810 }
811
812 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
813 R1 = _mm_min_epi16(R1, R0+Down1);
814
815 tmp = _mm_srli_si128(R1,2);
816 for(k=0; k < tmpE-2;k++)
817 tmp = _mm_srli_si128(tmp,2);
818 minError = min(minError, _mm_extract_epi16(tmp,0));
819 }
820 i++;
821 }
822 //Diag
823
824 Diag = _mm_xor_si128(Diag,Diag);
825 Diag = _mm_insert_epi16(Diag, 2*e, 0);
826 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
827
828 Side1 = _mm_insert_epi16(Side1,1,0);
829 Side1 = _mm_insert_epi16(Side1,1,1);
830
831 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
832 Down1 = _mm_insert_epi16(Down1, 1, 1);
833
834 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
835 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
836
837 minError = min(minError, _mm_extract_epi16(R1,1));
838
839 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
840 Down1 = _mm_insert_epi16(Down1, 1, 0);
841
842 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
843 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
844
845 minError = min(minError, _mm_extract_epi16(R0,0));
846
847 if(minError > e)
848 return -1;
849 return minError;
850 }
851
852 inline int backwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb)
853 {
854 if(lenb == 0 || lena == 0)
855 return 0;
856
857 int i = 0;
858 int j = 0;
859 int k = 0;
860
861 int i0;
862 int i1;
863 int i2;
864 int i4;
865 int i5;
866
867 int e = 4;
868 int mismatch = errThreshold;
869
870 int minError = 2*errThreshold;
871 int index = 0;
872 int tmpValue = 0;
873
874 if(lenb <= e)
875 {
876 return smallEditDistanceB(a,lena,b,lenb);
877 }
878
879
880 __m128i R0, R1;
881 __m128i Diag;
882 __m128i Side1, Side2;
883 __m128i Down1, Down2;
884 __m128i tmp;
885 __m128i SeqA, SeqB;
886 __m128i Result;
887
888 /* initialize */
889 R0 = _mm_setzero_si128 ();
890 R1 = _mm_setzero_si128 ();
891 Diag = _mm_setzero_si128 ();
892 Side1 = _mm_setzero_si128 ();
893 Side2 = _mm_setzero_si128 ();
894 Down1 = _mm_setzero_si128 ();
895 Down2 = _mm_setzero_si128 ();
896 SeqA = _mm_setzero_si128 ();
897 SeqB = _mm_setzero_si128 ();
898 Result = _mm_setzero_si128 ();
899 /* end initialize */
900
901 R1 = _mm_xor_si128(R1, R1);
902 R0 = _mm_xor_si128(R0, R0);
903
904 Diag = _mm_xor_si128(Diag, Diag);
905 Diag = _mm_insert_epi16(Diag,minError,0);
906
907 i0 = (a[0] != b[0]);
908 i1 = min(i0, ( *(a-1)!=*b) )+1;
909 i2 = min(i0,( a[0] != *(b-1) ) )+1;
910
911 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1);
912 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1;
913 i5 = min(i2, (a[0] != *(b-2))+1)+1;
914
915 R1 = _mm_insert_epi16(R1, 3, 0);
916 R1 = _mm_insert_epi16(R1, i1, 1);
917 R1 = _mm_insert_epi16(R1, i2, 2);
918 R1 = _mm_insert_epi16(R1, 3, 3);
919
920
921 R0 = _mm_insert_epi16(R0, 4, 0);
922 R0 = _mm_insert_epi16(R0, i4, 1);
923 R0 = _mm_insert_epi16(R0, i0, 2);
924 R0 = _mm_insert_epi16(R0, i5, 3);
925 R0 = _mm_insert_epi16(R0, 4, 4);
926
927
928 Side2 = _mm_xor_si128(Side2, Side2);
929 Down2 = _mm_xor_si128(Down2, Down2);
930 Down1 = _mm_xor_si128(Down1, Down1);
931 Side1 = _mm_xor_si128(Side1, Side1);
932
933 Side2 = _mm_insert_epi16(Side2,minError,0);
934 Down1 = _mm_insert_epi16(Down1,minError,0);
935
936 Side1 = _mm_insert_epi16(Side1,1,0);
937
938 index = 0;
939 for(j=0; j < e; j++)
940 {
941 Side2 = _mm_slli_si128(Side2, 2);
942 Side2 = _mm_insert_epi16(Side2,1,0);
943
944 Down1 = _mm_slli_si128(Down1, 2);
945 Down1 = _mm_insert_epi16(Down1,1,0);
946
947 Down2 = _mm_slli_si128(Down2, 2);
948 Down2 = _mm_insert_epi16(Down2,1,0);
949
950 Side1 = _mm_slli_si128(Side1, 2);
951 Side1 = _mm_insert_epi16(Side1,1,0);
952
953 SeqA = _mm_slli_si128(SeqA, 2);
954 SeqB = _mm_slli_si128(SeqB, 2);
955 SeqA = _mm_insert_epi16(SeqA,*(a-index),0);
956 SeqB = _mm_insert_epi16(SeqB,*(b-index),0);
957 index++;
958 }
959
960 Down2= _mm_slli_si128(Down2, 2);
961 Down2 = _mm_insert_epi16(Down2,minError,0);
962
963 index = 4;
964 i = 5;
965
966 int loopEnd = 2*lenb-(e-1);
967 for(; i <= loopEnd ;i++)
968 {
969
970 Diag = _mm_xor_si128(Diag, Diag);
971 if( i%2 == 0)
972 {
973 SeqA = _mm_slli_si128(SeqA, 2);
974 SeqB = _mm_slli_si128(SeqB, 2);
975 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0);
976 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0);
977
978 index++;
979
980 tmp = _mm_shufflelo_epi16(SeqB,27);
981 tmp = _mm_slli_si128(tmp, 2);
982 tmpValue = _mm_extract_epi16(tmp, 5);
983 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
984
985 Result = _mm_cmpeq_epi16(SeqA, tmp);
986 Diag = _mm_andnot_si128(Result, MASK);
987
988 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
989 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
990
991 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold
992 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold && _mm_extract_epi16(R1, 0) > errThreshold
993 && _mm_extract_epi16(R1, 1) > errThreshold && _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold)
994 return -1;
995
996 if(i == 2*lenb-e)
997 {
998 tmp = _mm_srli_si128(R0,2);
999 for(k=0; k < e-1;k++)
1000 tmp = _mm_srli_si128(tmp,2);
1001 minError = _mm_extract_epi16(tmp,0);
1002 }
1003
1004 }
1005
1006 else
1007 {
1008 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
1009 Diag = _mm_andnot_si128(Result, MASK);
1010
1011 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1012 R1 = _mm_min_epi16(R1, R0+Down1);
1013
1014
1015 if(i >= 2*lenb-e)
1016 {
1017 tmp = _mm_srli_si128(R1,2);
1018 for(k=0; k < e-2;k++)
1019 tmp = _mm_srli_si128(tmp,2);
1020 minError = min(minError, _mm_extract_epi16(tmp,0));
1021 }
1022 }
1023
1024
1025 }
1026
1027 j=0;
1028 int tmpE = e;
1029 for(;j<2*(e-2)+1;j++)
1030 {
1031
1032 Diag = _mm_xor_si128(Diag, Diag);
1033 //set the first element
1034 if(j==0)
1035 {
1036 for( k=0;k<=e-1;k++ )
1037 {
1038 Diag = _mm_slli_si128(Diag, 2);
1039 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1040 }
1041
1042 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1043 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1044
1045 tmpE--;
1046
1047 tmp = _mm_srli_si128(R0,2);
1048 for(k=0; k < e-2;k++)
1049 tmp = _mm_srli_si128(tmp,2);
1050 minError = min(minError, _mm_extract_epi16(tmp,0));
1051 }
1052 else if(j%2 == 0)
1053 {
1054 for(k=0;k<tmpE;k++)
1055 {
1056 Diag = _mm_slli_si128(Diag, 2);
1057 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1058 }
1059
1060 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1061 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1062
1063 tmpE--;
1064
1065 tmp = _mm_srli_si128(R0,2);
1066 for(k=0; k < tmpE-1;k++)
1067 tmp = _mm_srli_si128(tmp,2);
1068 minError = min(minError, _mm_extract_epi16(tmp,0));
1069 }
1070
1071
1072 else
1073 {
1074 for(k=0;k<tmpE;k++)
1075 {
1076 Diag = _mm_slli_si128(Diag, 2);
1077 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1078 }
1079
1080 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1081 R1 = _mm_min_epi16(R1, R0+Down1);
1082
1083 tmp = _mm_srli_si128(R1,2);
1084 for(k=0; k < tmpE-2;k++)
1085 tmp = _mm_srli_si128(tmp,2);
1086 minError = min(minError, _mm_extract_epi16(tmp,0));
1087 }
1088 i++;
1089 }
1090 //Diag
1091
1092 Diag = _mm_xor_si128(Diag,Diag);
1093 Diag = _mm_insert_epi16(Diag, 2*errThreshold, 0);
1094 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
1095
1096 Side1 = _mm_insert_epi16(Side1,1,0);
1097 Side1 = _mm_insert_epi16(Side1,1,1);
1098
1099 Down1 = _mm_insert_epi16(Down1, 2*errThreshold, 0);
1100 Down1 = _mm_insert_epi16(Down1, 1, 1);
1101
1102 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
1103 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
1104
1105 minError = min(minError, _mm_extract_epi16(R1,1));
1106
1107 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
1108 Down1 = _mm_insert_epi16(Down1, 1, 0);
1109
1110 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
1111 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
1112
1113 minError = min(minError, _mm_extract_epi16(R0,0));
1114
1115 if(minError > mismatch)
1116 return -1;
1117 return minError;
1118 }
1119
1120 int backwardEditDistance4SSE2(char *a, int lena, char *b,int lenb)
1121 {
1122 if(lenb == 0 || lena == 0)
1123 return 0;
1124
1125 int i = 0;
1126 int j = 0;
1127 int k = 0;
1128
1129 int i0;
1130 int i1;
1131 int i2;
1132 int i4;
1133 int i5;
1134
1135 int e = errThreshold;
1136
1137 int minError = 2*e;
1138 int index = 0;
1139 int tmpValue = 0;
1140
1141 if(lenb <= e)
1142 {
1143 return smallEditDistanceB(a,lena,b,lenb);
1144 }
1145
1146 __m128i R0, R1;
1147 __m128i Diag;
1148 __m128i Side1, Side2;
1149 __m128i Down1, Down2;
1150 __m128i tmp;
1151 __m128i SeqA, SeqB;
1152 __m128i Result;
1153
1154 /* initialize */
1155 R0 = _mm_setzero_si128 ();
1156 R1 = _mm_setzero_si128 ();
1157 Diag = _mm_setzero_si128 ();
1158 Side1 = _mm_setzero_si128 ();
1159 Side2 = _mm_setzero_si128 ();
1160 Down1 = _mm_setzero_si128 ();
1161 Down2 = _mm_setzero_si128 ();
1162 SeqA = _mm_setzero_si128 ();
1163 SeqB = _mm_setzero_si128 ();
1164 Result = _mm_setzero_si128 ();
1165 /* end initialize */
1166
1167 R1 = _mm_xor_si128(R1, R1);
1168 R0 = _mm_xor_si128(R0, R0);
1169
1170 Diag = _mm_xor_si128(Diag, Diag);
1171 Diag = _mm_insert_epi16(Diag,2*e,0);
1172
1173 i0 = (a[0] != b[0]);
1174 i1 = min(i0, ( *(a-1)!=*b) )+1;
1175 i2 = min(i0,( a[0] != *(b-1) ) )+1;
1176
1177 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1);
1178 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1;
1179 i5 = min(i2, (a[0] != *(b-2))+1)+1;
1180
1181 R1 = _mm_insert_epi16(R1, 3, 0);
1182 R1 = _mm_insert_epi16(R1, i1, 1);
1183 R1 = _mm_insert_epi16(R1, i2, 2);
1184 R1 = _mm_insert_epi16(R1, 3, 3);
1185
1186
1187 R0 = _mm_insert_epi16(R0, 4, 0);
1188 R0 = _mm_insert_epi16(R0, i4, 1);
1189 R0 = _mm_insert_epi16(R0, i0, 2);
1190 R0 = _mm_insert_epi16(R0, i5, 3);
1191 R0 = _mm_insert_epi16(R0, 4, 4);
1192
1193 Side2 = _mm_xor_si128(Side2, Side2);
1194 Down2 = _mm_xor_si128(Down2, Down2);
1195 Down1 = _mm_xor_si128(Down1, Down1);
1196 Side1 = _mm_xor_si128(Side1, Side1);
1197
1198 Side2 = _mm_insert_epi16(Side2,2*e,0);
1199 Down1 = _mm_insert_epi16(Down1,2*e,0);
1200
1201 Side1 = _mm_insert_epi16(Side1,1,0);
1202
1203 index = 0;
1204 for(j=0; j < e; j++)
1205 {
1206 Side2 = _mm_slli_si128(Side2, 2);
1207 Side2 = _mm_insert_epi16(Side2,1,0);
1208
1209 Down1 = _mm_slli_si128(Down1, 2);
1210 Down1 = _mm_insert_epi16(Down1,1,0);
1211
1212 Down2 = _mm_slli_si128(Down2, 2);
1213 Down2 = _mm_insert_epi16(Down2,1,0);
1214
1215 Side1 = _mm_slli_si128(Side1, 2);
1216 Side1 = _mm_insert_epi16(Side1,1,0);
1217
1218 SeqA = _mm_slli_si128(SeqA, 2);
1219 SeqB = _mm_slli_si128(SeqB, 2);
1220 SeqA = _mm_insert_epi16(SeqA,*(a-index),0);
1221 SeqB = _mm_insert_epi16(SeqB,*(b-index),0);
1222 index++;
1223 }
1224
1225 Down2= _mm_slli_si128(Down2, 2);
1226 Down2 = _mm_insert_epi16(Down2,2*e,0);
1227
1228 index = 4;
1229 i = 5;
1230 int loopEnd = 2*lenb-(e-1);
1231 for(; i <= loopEnd ;i++)
1232 {
1233
1234 Diag = _mm_xor_si128(Diag, Diag);
1235 if( i%2 == 0)
1236 {
1237 SeqA = _mm_slli_si128(SeqA, 2);
1238 SeqB = _mm_slli_si128(SeqB, 2);
1239 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0);
1240 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0);
1241
1242 index++;
1243
1244 tmp = _mm_shufflelo_epi16(SeqB,27);
1245 tmp = _mm_slli_si128(tmp, 2);
1246 tmpValue = _mm_extract_epi16(tmp, 5);
1247 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
1248
1249 Result = _mm_cmpeq_epi16(SeqA, tmp);
1250 Diag = _mm_andnot_si128(Result, MASK);
1251
1252 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1253 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1254
1255 //tmp = _mm_sub_epi16(Error, R0);
1256 //i0 = _mm_movemask_epi8(tmp);
1257
1258 if( _mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e
1259 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e &&
1260 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e )
1261 return -1;
1262
1263 if(i == 2*lenb-e)
1264 {
1265 tmp = _mm_srli_si128(R0,2);
1266 for(k=0; k < e-1;k++)
1267 tmp = _mm_srli_si128(tmp,2);
1268 minError = _mm_extract_epi16(tmp,0);
1269 }
1270
1271 }
1272
1273 else
1274 {
1275 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
1276 Diag = _mm_andnot_si128(Result, MASK);
1277
1278 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1279 R1 = _mm_min_epi16(R1, R0+Down1);
1280
1281 if(i >= 2*lenb-e)
1282 {
1283 tmp = _mm_srli_si128(R1,2);
1284 for(k=0; k < e-2;k++)
1285 tmp = _mm_srli_si128(tmp,2);
1286 minError = min(minError, _mm_extract_epi16(tmp,0));
1287 }
1288 }
1289
1290
1291 }
1292
1293 j=0;
1294
1295 int tmpE = e;
1296
1297 for(;j<2*(e-2)+1;j++)
1298 {
1299
1300 Diag = _mm_xor_si128(Diag, Diag);
1301 //set the first element
1302 if(j==0)
1303 {
1304 for( k=0;k<=e-1;k++ )
1305 {
1306 Diag = _mm_slli_si128(Diag, 2);
1307 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1308 }
1309
1310 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1311 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1312
1313 tmpE--;
1314
1315 tmp = _mm_srli_si128(R0,2);
1316 for(k=0; k < e-2;k++)
1317 tmp = _mm_srli_si128(tmp,2);
1318 minError = min(minError, _mm_extract_epi16(tmp,0));
1319 }
1320 else if(j%2 == 0)
1321 {
1322 for(k=0;k<tmpE;k++)
1323 {
1324 Diag = _mm_slli_si128(Diag, 2);
1325 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1326 }
1327
1328 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1329 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1330
1331 tmpE--;
1332
1333 tmp = _mm_srli_si128(R0,2);
1334 for(k=0; k < tmpE-1;k++)
1335 tmp = _mm_srli_si128(tmp,2);
1336 minError = min(minError, _mm_extract_epi16(tmp,0));
1337 }
1338
1339
1340 else
1341 {
1342 for(k=0;k<tmpE;k++)
1343 {
1344 Diag = _mm_slli_si128(Diag, 2);
1345 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
1346 }
1347
1348 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1349 R1 = _mm_min_epi16(R1, R0+Down1);
1350
1351 tmp = _mm_srli_si128(R1,2);
1352 for(k=0; k < tmpE-2;k++)
1353 tmp = _mm_srli_si128(tmp,2);
1354 minError = min(minError, _mm_extract_epi16(tmp,0));
1355 }
1356 i++;
1357 }
1358 //Diag
1359
1360 Diag = _mm_xor_si128(Diag,Diag);
1361 Diag = _mm_insert_epi16(Diag, 2*e, 0);
1362 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
1363
1364 Side1 = _mm_insert_epi16(Side1,1,0);
1365 Side1 = _mm_insert_epi16(Side1,1,1);
1366
1367 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
1368 Down1 = _mm_insert_epi16(Down1, 1, 1);
1369
1370 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
1371 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
1372
1373 minError = min(minError, _mm_extract_epi16(R1,1));
1374
1375 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
1376 Down1 = _mm_insert_epi16(Down1, 1, 0);
1377
1378 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
1379 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
1380
1381 minError = min(minError, _mm_extract_epi16(R0,0));
1382
1383 if(minError > e)
1384 return -1;
1385 return minError;
1386 }
1387
1388 inline int forwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb)
1389 {
1390 if(lenb == 0 || lena == 0)
1391 return 0;
1392
1393 int i = 0;
1394 int j = 0;
1395 int k = 0;
1396
1397 int i0=0;
1398 int i1=0;
1399 int i2=0;
1400 int i4=0;
1401 int i5=0;
1402
1403 int mismatch = errThreshold;
1404 int e = 4;
1405
1406 int minError = 4*mismatch+1;
1407 int index = 0;
1408 int tmpValue = 0;
1409
1410 if(lenb <= e)
1411 {
1412 return smallEditDistanceF(a,lena,b,lenb);
1413 }
1414
1415
1416 register __m128i R0, R1;
1417 __m128i Diag;
1418 __m128i Side1, Side2;
1419 __m128i Down1, Down2;
1420 __m128i tmp;
1421 register __m128i SeqA, SeqB;
1422 __m128i Result;
1423
1424 __m128i tmpSeqA;
1425 __m128i tmpSeqB;
1426
1427 /* initialize */
1428 R0 = _mm_setzero_si128 ();
1429 R1 = _mm_setzero_si128 ();
1430 Diag = _mm_setzero_si128 ();
1431 Side1 = _mm_setzero_si128 ();
1432 Side2 = _mm_setzero_si128 ();
1433 Down1 = _mm_setzero_si128 ();
1434 Down2 = _mm_setzero_si128 ();
1435 SeqA = _mm_setzero_si128 ();
1436 SeqB = _mm_setzero_si128 ();
1437 Result = _mm_setzero_si128 ();
1438 /* end initialize */
1439
1440
1441 R1 = _mm_xor_si128(R1, R1);
1442 R0 = _mm_xor_si128(R0, R0);
1443
1444 Diag = _mm_xor_si128(Diag, Diag);
1445 Diag = _mm_insert_epi16(Diag,minError,0);
1446
1447 i0 = (a[0] != b[0]);
1448 i1 = min(i0, (a[1]!=b[0]))+1;
1449 i2 = min(i0,(a[0]!=b[1]))+1;
1450
1451 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1);
1452 i4 = min(i1, (a[2]!=b[0])+1)+1;
1453 i5 = min(i2, (a[0]!=b[2])+1)+1;
1454
1455 R1 = _mm_insert_epi16(R1, 3, 0);
1456 R1 = _mm_insert_epi16(R1, i1, 1);
1457 R1 = _mm_insert_epi16(R1, i2, 2);
1458 R1 = _mm_insert_epi16(R1, 3, 3);
1459
1460 R0 = _mm_insert_epi16(R0, 4, 0);
1461 R0 = _mm_insert_epi16(R0, i4, 1);
1462 R0 = _mm_insert_epi16(R0, i0, 2);
1463 R0 = _mm_insert_epi16(R0, i5, 3);
1464 R0 = _mm_insert_epi16(R0, 4, 4);
1465
1466 Side2 = _mm_xor_si128(Side2, Side2);
1467 Down2 = _mm_xor_si128(Down2, Down2);
1468 Down1 = _mm_xor_si128(Down1, Down1);
1469 Side1 = _mm_xor_si128(Side1, Side1);
1470
1471 Side2 = _mm_insert_epi16(Side2,minError,0);
1472 Down1 = _mm_insert_epi16(Down1,minError,0);
1473
1474 Side1 = _mm_insert_epi16(Side1,1,0);
1475
1476 index = 0;
1477 for(j=0; j < e; j++)
1478 {
1479 Side2 = _mm_slli_si128(Side2, 2);
1480 Side2 = _mm_insert_epi16(Side2,1,0);
1481
1482 Down1 = _mm_slli_si128(Down1, 2);
1483 Down1 = _mm_insert_epi16(Down1,1,0);
1484
1485 Down2 = _mm_slli_si128(Down2, 2);
1486 Down2 = _mm_insert_epi16(Down2,1,0);
1487
1488 Side1 = _mm_slli_si128(Side1, 2);
1489 Side1 = _mm_insert_epi16(Side1,1,0);
1490
1491 SeqA = _mm_slli_si128(SeqA, 2);
1492 SeqB = _mm_slli_si128(SeqB, 2);
1493 SeqA = _mm_insert_epi16(SeqA,a[index],0);
1494 SeqB = _mm_insert_epi16(SeqB,b[index],0);
1495 index++;
1496 }
1497
1498 Down2= _mm_slli_si128(Down2, 2);
1499 Down2 = _mm_insert_epi16(Down2,minError,0);
1500
1501 index = 4;
1502 i = 5;
1503
1504 int loopEnd = 2*lenb-(e-1);
1505 for(; i <= loopEnd ;i++)
1506 {
1507 if( i%2 == 0)
1508 {
1509 tmpSeqA = _mm_slli_si128(SeqA, 2);
1510 tmpSeqB = _mm_slli_si128(SeqB, 2);
1511 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0);
1512 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0);
1513
1514 index++;
1515
1516 tmp = _mm_shufflelo_epi16(SeqB,27);
1517 tmp = _mm_slli_si128(tmp, 2);
1518 tmpValue = _mm_extract_epi16(tmp, 5);
1519 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
1520
1521 Result = _mm_cmpeq_epi16(SeqA, tmp);
1522 Diag = _mm_andnot_si128(Result, MASK);
1523
1524 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1525 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1526
1527 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold
1528 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold &&
1529 _mm_extract_epi16(R1, 0) > errThreshold && _mm_extract_epi16(R1, 1) > errThreshold &&
1530 _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold)
1531 return -1;
1532
1533 if(i == 2*lenb-e)
1534 {
1535 tmp = _mm_srli_si128(R0,2);
1536 for(k=0; k < e-1;k++)
1537 tmp = _mm_srli_si128(tmp,2);
1538 minError = _mm_extract_epi16(tmp,0);
1539 }
1540
1541 }
1542
1543 else
1544 {
1545 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
1546 Diag = _mm_andnot_si128(Result, MASK);
1547
1548 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1549 R1 = _mm_min_epi16(R1, R0+Down1);
1550
1551 if(i >= 2*lenb-e)
1552 {
1553 tmp = _mm_srli_si128(R1,2);
1554 for(k=0; k < e-2;k++)
1555 tmp = _mm_srli_si128(tmp,2);
1556 minError = min(minError, _mm_extract_epi16(tmp,0));
1557 }
1558 }
1559 }
1560
1561 j=0;
1562 int tmpE = e;
1563 for(;j<2*(e-2)+1;j++)
1564 {
1565
1566 Diag = _mm_xor_si128(Diag, Diag);
1567 //set the first element
1568 if(j==0)
1569 {
1570 for( k=0;k<=e-1;k++ )
1571 {
1572 Diag = _mm_slli_si128(Diag, 2);
1573 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1574 }
1575
1576 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1577 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1578
1579 tmpE--;
1580
1581 tmp = _mm_srli_si128(R0,2);
1582 for(k=0; k < e-2;k++)
1583 tmp = _mm_srli_si128(tmp,2);
1584 minError = min(minError, _mm_extract_epi16(tmp,0));
1585 }
1586 else if(j%2 == 0)
1587 {
1588 for(k=0;k<tmpE;k++)
1589 {
1590 Diag = _mm_slli_si128(Diag, 2);
1591 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1592 }
1593
1594 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1595 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1596
1597 tmpE--;
1598
1599 tmp = _mm_srli_si128(R0,2);
1600 for(k=0; k < tmpE-1;k++)
1601 tmp = _mm_srli_si128(tmp,2);
1602 minError = min(minError, _mm_extract_epi16(tmp,0));
1603 }
1604
1605
1606 else
1607 {
1608 for(k=0;k<tmpE;k++)
1609 {
1610 Diag = _mm_slli_si128(Diag, 2);
1611 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1612 }
1613
1614 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1615 R1 = _mm_min_epi16(R1, R0+Down1);
1616
1617 tmp = _mm_srli_si128(R1,2);
1618 for(k=0; k < tmpE-2;k++)
1619 tmp = _mm_srli_si128(tmp,2);
1620 minError = min(minError, _mm_extract_epi16(tmp,0));
1621 }
1622 i++;
1623 }
1624 //Diag
1625
1626 Diag = _mm_xor_si128(Diag,Diag);
1627 Diag = _mm_insert_epi16(Diag, minError, 0);
1628 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
1629
1630 Side1 = _mm_insert_epi16(Side1,1,0);
1631 Side1 = _mm_insert_epi16(Side1,1,1);
1632
1633 Down1 = _mm_insert_epi16(Down1, minError, 0);
1634 Down1 = _mm_insert_epi16(Down1, 1, 1);
1635
1636 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
1637 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
1638
1639 minError = min(minError, _mm_extract_epi16(R1,1));
1640
1641 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0);
1642 Down1 = _mm_insert_epi16(Down1, 1, 0);
1643
1644 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
1645 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
1646
1647
1648 minError = min(minError, _mm_extract_epi16(R0,0));
1649
1650
1651 if(minError > mismatch)
1652 return -1;
1653 return minError;
1654 }
1655
1656
1657
1658 int forwardEditDistance4SSE2(char *a, int lena, char *b,int lenb)
1659 {
1660 if(lenb == 0 || lena == 0)
1661 return 0;
1662
1663 int i = 0;
1664 int j = 0;
1665 int k = 0;
1666
1667 int i0=0;
1668 int i1=0;
1669 int i2=0;
1670 int i4=0;
1671 int i5=0;
1672
1673 int e = errThreshold;
1674
1675 int minError = 2*e;
1676 int index = 0;
1677 int tmpValue = 0;
1678
1679 if(lenb <= e)
1680 {
1681 return smallEditDistanceF(a,lena,b,lenb);
1682 }
1683
1684
1685 register __m128i R0, R1;
1686 __m128i Diag;
1687 __m128i Side1, Side2;
1688 __m128i Down1, Down2;
1689 __m128i tmp;
1690 register __m128i SeqA, SeqB;
1691 __m128i Result;
1692
1693 __m128i tmpSeqA;
1694 __m128i tmpSeqB;
1695
1696 /* initialize */
1697 R0 = _mm_setzero_si128 ();
1698 R1 = _mm_setzero_si128 ();
1699 Diag = _mm_setzero_si128 ();
1700 Side1 = _mm_setzero_si128 ();
1701 Side2 = _mm_setzero_si128 ();
1702 Down1 = _mm_setzero_si128 ();
1703 Down2 = _mm_setzero_si128 ();
1704 SeqA = _mm_setzero_si128 ();
1705 SeqB = _mm_setzero_si128 ();
1706 Result = _mm_setzero_si128 ();
1707 /* end initialize */
1708
1709 R1 = _mm_xor_si128(R1, R1);
1710 R0 = _mm_xor_si128(R0, R0);
1711
1712 Diag = _mm_xor_si128(Diag, Diag);
1713 Diag = _mm_insert_epi16(Diag,2*e,0);
1714
1715 i0 = (a[0] != b[0]);
1716 i1 = min(i0, (a[1]!=b[0]))+1;
1717 i2 = min(i0,(a[0]!=b[1]))+1;
1718
1719 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1);
1720 i4 = min(i1, (a[2]!=b[0])+1)+1;
1721 i5 = min(i2, (a[0]!=b[2])+1)+1;
1722
1723 R1 = _mm_insert_epi16(R1, 3, 0);
1724 R1 = _mm_insert_epi16(R1, i1, 1);
1725 R1 = _mm_insert_epi16(R1, i2, 2);
1726 R1 = _mm_insert_epi16(R1, 3, 3);
1727
1728 R0 = _mm_insert_epi16(R0, 4, 0);
1729 R0 = _mm_insert_epi16(R0, i4, 1);
1730 R0 = _mm_insert_epi16(R0, i0, 2);
1731 R0 = _mm_insert_epi16(R0, i5, 3);
1732 R0 = _mm_insert_epi16(R0, 4, 4);
1733
1734 Side2 = _mm_xor_si128(Side2, Side2);
1735 Down2 = _mm_xor_si128(Down2, Down2);
1736 Down1 = _mm_xor_si128(Down1, Down1);
1737 Side1 = _mm_xor_si128(Side1, Side1);
1738
1739 Side2 = _mm_insert_epi16(Side2,2*e,0);
1740 Down1 = _mm_insert_epi16(Down1,2*e,0);
1741
1742 Side1 = _mm_insert_epi16(Side1,1,0);
1743
1744 index = 0;
1745 for(j=0; j < e; j++)
1746 {
1747 Side2 = _mm_slli_si128(Side2, 2);
1748 Side2 = _mm_insert_epi16(Side2,1,0);
1749
1750 Down1 = _mm_slli_si128(Down1, 2);
1751 Down1 = _mm_insert_epi16(Down1,1,0);
1752
1753 Down2 = _mm_slli_si128(Down2, 2);
1754 Down2 = _mm_insert_epi16(Down2,1,0);
1755
1756 Side1 = _mm_slli_si128(Side1, 2);
1757 Side1 = _mm_insert_epi16(Side1,1,0);
1758
1759 SeqA = _mm_slli_si128(SeqA, 2);
1760 SeqB = _mm_slli_si128(SeqB, 2);
1761 SeqA = _mm_insert_epi16(SeqA,a[index],0);
1762 SeqB = _mm_insert_epi16(SeqB,b[index],0);
1763 index++;
1764 }
1765
1766 Down2= _mm_slli_si128(Down2, 2);
1767 Down2 = _mm_insert_epi16(Down2,2*e,0);
1768
1769 index = 4;
1770 i = 5;
1771
1772 int loopEnd = 2*lenb-(e-1);
1773 for(; i <= loopEnd ;i++)
1774 {
1775 //Diag = _mm_xor_si128(Diag, Diag);
1776 if( i%2 == 0)
1777 {
1778 tmpSeqA = _mm_slli_si128(SeqA, 2);
1779 tmpSeqB = _mm_slli_si128(SeqB, 2);
1780 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0);
1781 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0);
1782
1783 index++;
1784
1785 tmp = _mm_shufflelo_epi16(SeqB,27);
1786 tmp = _mm_slli_si128(tmp, 2);
1787 tmpValue = _mm_extract_epi16(tmp, 5);
1788 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
1789
1790 Result = _mm_cmpeq_epi16(SeqA, tmp);
1791 Diag = _mm_andnot_si128(Result, MASK);
1792
1793 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1794 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1795
1796 if(_mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e
1797 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e &&
1798 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e)
1799 return -1;
1800
1801 if(i == 2*lenb-e)
1802 {
1803 tmp = _mm_srli_si128(R0,2);
1804 for(k=0; k < e-1;k++)
1805 tmp = _mm_srli_si128(tmp,2);
1806 minError = _mm_extract_epi16(tmp,0);
1807 }
1808
1809 }
1810
1811 else
1812 {
1813 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
1814 Diag = _mm_andnot_si128(Result, MASK);
1815
1816 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1817 R1 = _mm_min_epi16(R1, R0+Down1);
1818
1819 if(i >= 2*lenb-e)
1820 {
1821 tmp = _mm_srli_si128(R1,2);
1822 for(k=0; k < e-2;k++)
1823 tmp = _mm_srli_si128(tmp,2);
1824 minError = min(minError, _mm_extract_epi16(tmp,0));
1825 }
1826 }
1827
1828
1829 }
1830 j=0;
1831 int tmpE = e;
1832 for(;j<2*(e-2)+1;j++)
1833 {
1834
1835 Diag = _mm_xor_si128(Diag, Diag);
1836 //set the first element
1837 if(j==0)
1838 {
1839 for( k=0;k<=e-1;k++ )
1840 {
1841 Diag = _mm_slli_si128(Diag, 2);
1842 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1843 }
1844
1845 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1846 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1847
1848 tmpE--;
1849
1850 tmp = _mm_srli_si128(R0,2);
1851 for(k=0; k < e-2;k++)
1852 tmp = _mm_srli_si128(tmp,2);
1853 minError = min(minError, _mm_extract_epi16(tmp,0));
1854 }
1855 else if(j%2 == 0)
1856 {
1857 for(k=0;k<tmpE;k++)
1858 {
1859 Diag = _mm_slli_si128(Diag, 2);
1860 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1861 }
1862
1863 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
1864 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
1865
1866 tmpE--;
1867
1868 tmp = _mm_srli_si128(R0,2);
1869 for(k=0; k < tmpE-1;k++)
1870 tmp = _mm_srli_si128(tmp,2);
1871 minError = min(minError, _mm_extract_epi16(tmp,0));
1872 }
1873
1874
1875 else
1876 {
1877 for(k=0;k<tmpE;k++)
1878 {
1879 Diag = _mm_slli_si128(Diag, 2);
1880 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
1881 }
1882
1883 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
1884 R1 = _mm_min_epi16(R1, R0+Down1);
1885
1886 tmp = _mm_srli_si128(R1,2);
1887 for(k=0; k < tmpE-2;k++)
1888 tmp = _mm_srli_si128(tmp,2);
1889 minError = min(minError, _mm_extract_epi16(tmp,0));
1890 }
1891 i++;
1892 }
1893 //Diag
1894
1895 Diag = _mm_xor_si128(Diag,Diag);
1896 Diag = _mm_insert_epi16(Diag, 2*e, 0);
1897 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
1898
1899 Side1 = _mm_insert_epi16(Side1,1,0);
1900 Side1 = _mm_insert_epi16(Side1,1,1);
1901
1902 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
1903 Down1 = _mm_insert_epi16(Down1, 1, 1);
1904
1905 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
1906 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
1907
1908 minError = min(minError, _mm_extract_epi16(R1,1));
1909
1910 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0);
1911 Down1 = _mm_insert_epi16(Down1, 1, 0);
1912
1913 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
1914 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
1915
1916 minError = min(minError, _mm_extract_epi16(R0,0));
1917
1918 if(minError > e)
1919 return -1;
1920 return minError;
1921 }
1922
1923 int forwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb)
1924 {
1925 if(lenb == 0 || lena == 0)
1926 return 0;
1927
1928 int i = 0;
1929 int j = 0;
1930 int k = 0;
1931
1932 int e = errThreshold;
1933
1934 int minError = 2*e;
1935
1936 char flag = 0;
1937
1938 if(lenb <= e)
1939 {
1940 return smallEditDistanceF(a,lena,b,lenb);
1941 }
1942
1943
1944 __m128i R0, R1;
1945 __m128i Diag;
1946 __m128i Side1, Side2;
1947 __m128i Down1, Down2;
1948 __m128i Error;
1949 __m128i tmp;
1950
1951 /* initialize */
1952 R0 = _mm_setzero_si128 ();
1953 R1 = _mm_setzero_si128 ();
1954 Diag = _mm_setzero_si128 ();
1955 Side1 = _mm_setzero_si128 ();
1956 Side2 = _mm_setzero_si128 ();
1957 Down1 = _mm_setzero_si128 ();
1958 Down2 = _mm_setzero_si128 ();
1959 Error = _mm_setzero_si128 ();
1960 tmp = _mm_setzero_si128 ();
1961 /* end initialize */
1962
1963 R1 = _mm_xor_si128(R1, R1);
1964 R0 = _mm_xor_si128(R0, R0);
1965
1966 Diag = _mm_xor_si128(Diag, Diag);
1967 Side1 = _mm_xor_si128(Side1, Side1);
1968 Down1 = _mm_xor_si128(Down1, Down1);
1969
1970 Diag = _mm_insert_epi16(Diag,2*e,0);
1971
1972 Side1 = _mm_insert_epi16(Side1,1,0);
1973 Side1 = _mm_insert_epi16(Side1,2*e,1);
1974
1975 Down1 = _mm_insert_epi16(Down1,2*e,0);
1976 Down1 = _mm_insert_epi16(Down1,1,1);
1977 Down1 = _mm_insert_epi16(Down1,2*e,2);
1978
1979 R0 = _mm_insert_epi16(R0,0,0);
1980
1981 R1 = _mm_insert_epi16(R1,1,0);
1982 R1 = _mm_insert_epi16(R1,1,1);
1983
1984 for(i=2; i <= e; i++)
1985 {
1986 //set side
1987 Side1 = _mm_slli_si128(Side1,2);
1988 Side1 = _mm_insert_epi16(Side1,1,0);
1989
1990 Down1 = _mm_insert_epi16(Down1,1,0);
1991 Down1 = _mm_slli_si128(Down1,2);
1992 Down1 = _mm_insert_epi16(Down1,2*e,0);
1993
1994 Diag = _mm_xor_si128(Diag, Diag);
1995 if( i%2 == 0)
1996 {
1997 Diag = _mm_insert_epi16(Diag,2*e,0);
1998
1999 for(j=1;j<=i-1;j++)
2000 {
2001 Diag = _mm_slli_si128(Diag, 2);
2002 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0);
2003 }
2004 Diag = _mm_slli_si128(Diag, 2);
2005 Diag = _mm_insert_epi16(Diag, 2*e,0);
2006
2007 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
2008 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
2009
2010 }
2011
2012 else
2013 {
2014 Diag = _mm_insert_epi16(Diag,2*e,0);
2015 for(j=i/2-1;j>=-i/2;j--)
2016 {
2017 Diag = _mm_slli_si128(Diag, 2);
2018 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0);
2019 }
2020 Diag = _mm_slli_si128(Diag, 2);
2021 Diag = _mm_insert_epi16(Diag, 2*e,0);
2022
2023 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
2024 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
2025
2026 }
2027 }
2028 Error = _mm_xor_si128(Error, Error);
2029 Side2 = _mm_xor_si128(Side2, Side2);
2030 Side1 = _mm_xor_si128(Side1, Side1);
2031 Down2 = _mm_xor_si128(Down2, Down2);
2032 Down1 = _mm_xor_si128(Down1, Down1);
2033
2034
2035 Error = _mm_insert_epi16(Error,e,0);
2036 Side2 = _mm_insert_epi16(Side2,2*e,0);
2037 Side1 = _mm_insert_epi16(Side2,2*e,0);
2038 Down1 = _mm_insert_epi16(Down1,2*e,0);
2039
2040
2041 for(j=0; j < e; j++)
2042 {
2043 Side2 = _mm_slli_si128(Side2, 2);
2044 Side2 = _mm_insert_epi16(Side2,1,0);
2045
2046 Side1 = _mm_slli_si128(Side1, 2);
2047 Side1 = _mm_insert_epi16(Side1,1,0);
2048
2049 Down1 = _mm_slli_si128(Down1, 2);
2050 Down1 = _mm_insert_epi16(Down1,1,0);
2051
2052 Down2 = _mm_slli_si128(Down2, 2);
2053 Down2 = _mm_insert_epi16(Down2,1,0);
2054
2055 Error = _mm_slli_si128(Error, 2);
2056 Error = _mm_insert_epi16(Error, e, 0);
2057 }
2058
2059 Down2= _mm_slli_si128(Down2, 2);
2060 Down2 = _mm_insert_epi16(Down2,2*e,0);
2061
2062 for(; i <= 2*lenb-(e-1);i++)
2063 {
2064 flag = 0;
2065 Diag = _mm_xor_si128(Diag, Diag);
2066 if( i%2 == 0)
2067 {
2068 for(j=e/2;j>=-e/2;j--)
2069 {
2070 Diag = _mm_slli_si128(Diag, 2);
2071 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0);
2072 }
2073
2074
2075 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
2076 R0 = _mm_min_epi16(R0, R1+Down1);
2077
2078 if(_mm_extract_epi16(R0,0) <= e)
2079 flag = 1;
2080
2081 tmp = _mm_srli_si128(R0,2);
2082 for(j=0; j < e-1;j++)
2083 {
2084 if(_mm_extract_epi16(tmp,0) <= e)
2085 flag = 1;
2086 tmp = _mm_srli_si128(tmp,2);
2087 }
2088 // printf("#%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2));
2089 if(flag == 0)
2090 return -1;
2091
2092 if(i == 2*lenb-(e-1))
2093 {
2094 tmp = _mm_srli_si128(R0,2);
2095 for(k=0; k < e-2;k++)
2096 tmp = _mm_srli_si128(tmp,2);
2097 minError = _mm_extract_epi16(tmp,0);
2098 }
2099
2100 }
2101
2102 else
2103 {
2104 for(j=e/2;j>=-e/2-1;j--)
2105 {
2106 Diag = _mm_slli_si128(Diag, 2);
2107 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i)/2-j-1],0);
2108 }
2109
2110 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
2111 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
2112
2113 //printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
2114 // _mm_extract_epi16(R1,3));
2115
2116 if(i >= 2*lenb-e)
2117 {
2118 tmp = _mm_srli_si128(R1,2);
2119 for(k=0; k < e-1;k++)
2120 tmp = _mm_srli_si128(tmp,2);
2121 minError = min(minError, _mm_extract_epi16(tmp,0));
2122 }
2123 }
2124 }
2125
2126 //first cell
2127 Diag = _mm_xor_si128(Diag,Diag);
2128 Diag = _mm_insert_epi16(Diag, b[lenb-3] != a[lena], 0);
2129 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena-1], 1);
2130 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-2], 2);
2131 Diag = _mm_insert_epi16(Diag, 2*e, 3);
2132 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
2133 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
2134
2135
2136 minError = min(minError, _mm_extract_epi16(R1,2));
2137
2138 //second cell
2139 Diag = _mm_xor_si128(Diag,Diag);
2140 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena], 0);
2141 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-1], 1);
2142 Diag = _mm_insert_epi16(Diag, 2*e, 2);
2143
2144 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
2145 R0 = _mm_min_epi16(R0, R1+Down1);
2146
2147
2148 minError = min(minError, _mm_extract_epi16(R0,1));
2149
2150 //third cell
2151 Diag = _mm_xor_si128(Diag,Diag);
2152 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena+1], 0);
2153 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena], 1);
2154 Diag = _mm_insert_epi16(Diag, 2*e, 2);
2155
2156 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
2157 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
2158
2159
2160 minError = min(minError, _mm_extract_epi16(R1,1));
2161
2162 //forth
2163 Diag = _mm_xor_si128(Diag,Diag);
2164 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+1], 0);
2165 Diag = _mm_insert_epi16(Diag, 2*e, 1);
2166
2167 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
2168 R0 = _mm_min_epi16(R0, R1+Down1);
2169
2170 minError = min(minError, _mm_extract_epi16(R0,0));
2171
2172 //fifth
2173 Diag = _mm_xor_si128(Diag,Diag);
2174 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+2], 0);
2175 Diag = _mm_insert_epi16(Diag, 2*e, 1);
2176
2177 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
2178 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
2179
2180
2181 minError = min(minError, _mm_extract_epi16(R1,0));
2182
2183 if(minError > e)
2184 return -1;
2185 return minError;
2186
2187 }
2188
2189 int forwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb)
2190 {
2191 if(lenb == 0 || lena == 0)
2192 return 0;
2193
2194 int i = 0;
2195 int j = 0;
2196 int k = 0;
2197
2198 int e = errThreshold;
2199
2200 int minError = 2*e;
2201
2202 char flag = 0;
2203
2204 if(lenb <= e)
2205 {
2206 return smallEditDistanceF(a,lena,b,lenb);
2207 }
2208
2209
2210 __m128i R0, R1;
2211 __m128i Diag;
2212 __m128i Side1, Side2;
2213 __m128i Down1, Down2;
2214 __m128i Error;
2215 __m128i tmp;
2216
2217 /* initialize */
2218 R0 = _mm_setzero_si128 ();
2219 R1 = _mm_setzero_si128 ();
2220 Diag = _mm_setzero_si128 ();
2221 Side1 = _mm_setzero_si128 ();
2222 Side2 = _mm_setzero_si128 ();
2223 Down1 = _mm_setzero_si128 ();
2224 Down2 = _mm_setzero_si128 ();
2225 Error = _mm_setzero_si128 ();
2226 tmp = _mm_setzero_si128 ();
2227 /* end initialize */
2228
2229 R1 = _mm_xor_si128(R1, R1);
2230 R0 = _mm_xor_si128(R0, R0);
2231
2232 Diag = _mm_xor_si128(Diag, Diag);
2233 Side1 = _mm_xor_si128(Side1, Side1);
2234 Down1 = _mm_xor_si128(Down1, Down1);
2235
2236 Diag = _mm_insert_epi16(Diag,2*e,0);
2237
2238 Side1 = _mm_insert_epi16(Side1,1,0);
2239 Side1 = _mm_insert_epi16(Side1,2*e,1);
2240
2241 Down1 = _mm_insert_epi16(Down1,2*e,0);
2242 Down1 = _mm_insert_epi16(Down1,1,1);
2243 Down1 = _mm_insert_epi16(Down1,2*e,2);
2244
2245 R0 = _mm_insert_epi16(R0,0,0);
2246
2247 R1 = _mm_insert_epi16(R1,1,0);
2248 R1 = _mm_insert_epi16(R1,1,1);
2249
2250 for(i=2; i <= e; i++)
2251 {
2252 //set side
2253 Side1 = _mm_slli_si128(Side1,2);
2254 Side1 = _mm_insert_epi16(Side1,1,0);
2255
2256 Down1 = _mm_insert_epi16(Down1,1,0);
2257 Down1 = _mm_slli_si128(Down1,2);
2258 Down1 = _mm_insert_epi16(Down1,2*e,0);
2259
2260 Diag = _mm_xor_si128(Diag, Diag);
2261 if( i%2 == 0)
2262 {
2263 Diag = _mm_insert_epi16(Diag,2*e,0);
2264
2265 for(j=1;j<=i-1;j++)
2266 {
2267 Diag = _mm_slli_si128(Diag, 2);
2268 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0);
2269 }
2270 Diag = _mm_slli_si128(Diag, 2);
2271 Diag = _mm_insert_epi16(Diag, 2*e,0);
2272
2273 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
2274 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
2275 }
2276
2277 else
2278 {
2279 Diag = _mm_insert_epi16(Diag,2*e,0);
2280 for(j=i/2-1;j>=-i/2;j--)
2281 {
2282 Diag = _mm_slli_si128(Diag, 2);
2283 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0);
2284 }
2285 Diag = _mm_slli_si128(Diag, 2);
2286 Diag = _mm_insert_epi16(Diag, 2*e,0);
2287
2288 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
2289 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
2290 }
2291 }
2292 Error = _mm_xor_si128(Error, Error);
2293 Side2 = _mm_xor_si128(Side2, Side2);
2294 Down2 = _mm_xor_si128(Down2, Down2);
2295 Down1 = _mm_xor_si128(Down1, Down1);
2296
2297 Error = _mm_insert_epi16(Error,e,0);
2298 Side2 = _mm_insert_epi16(Side2,2*e,0);
2299 Down1 = _mm_insert_epi16(Down1,2*e,0);
2300
2301
2302 for(j=0; j < e; j++)
2303 {
2304 Side2 = _mm_slli_si128(Side2, 2);
2305 Side2 = _mm_insert_epi16(Side2,1,0);
2306
2307 Down1 = _mm_slli_si128(Down1, 2);
2308 Down1 = _mm_insert_epi16(Down1,1,0);
2309
2310 Down2 = _mm_slli_si128(Down2, 2);
2311 Down2 = _mm_insert_epi16(Down2,1,0);
2312
2313 Error = _mm_slli_si128(Error, 2);
2314 Error = _mm_insert_epi16(Error, e, 0);
2315 }
2316
2317 Down2= _mm_slli_si128(Down2, 2);
2318 Down2 = _mm_insert_epi16(Down2,2*e,0);
2319
2320 for(; i <= 2*lenb-(e-1);i++)
2321 {
2322 flag = 0;
2323 Diag = _mm_xor_si128(Diag, Diag);
2324 if( i%2 == 0)
2325 {
2326 for(j=e/2;j>=-e/2;j--)
2327 {
2328 Diag = _mm_slli_si128(Diag, 2);
2329 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0);
2330 }
2331
2332 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
2333 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
2334
2335
2336 if(_mm_extract_epi16(R0,0) <= e)
2337 flag = 1;
2338
2339 tmp = _mm_srli_si128(R0,2);
2340 for(j=0; j < e-1;j++)
2341 {
2342 if(_mm_extract_epi16(tmp,0) <= e)
2343 flag = 1;
2344 tmp = _mm_srli_si128(tmp,2);
2345 }
2346
2347
2348 if(flag == 0)
2349 return -1;
2350
2351 if(i == 2*lenb-e)
2352 {
2353 tmp = _mm_srli_si128(R0,2);
2354 for(k=0; k < e-1;k++)
2355 tmp = _mm_srli_si128(tmp,2);
2356 minError = _mm_extract_epi16(tmp,0);
2357 }
2358
2359 }
2360
2361 else
2362 {
2363 for(j=-e/2+1;j<=e/2;j++)
2364 {
2365 Diag = _mm_slli_si128(Diag, 2);
2366 Diag = _mm_insert_epi16(Diag, b[(i+1)/2-j-1] != a[(i-1)/2+j-1],0);
2367 }
2368
2369 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
2370 R1 = _mm_min_epi16(R1, R0+Down1);
2371
2372 if(i >= 2*lenb-e)
2373 {
2374 tmp = _mm_srli_si128(R1,2);
2375 for(k=0; k < e-2;k++)
2376 tmp = _mm_srli_si128(tmp,2);
2377 minError = min(minError, _mm_extract_epi16(tmp,0));
2378 }
2379 }
2380 }
2381
2382 j=0;
2383 int tmpE = e;
2384 for(;j<2*(e-2)+1;j++)
2385 {
2386
2387 Diag = _mm_xor_si128(Diag, Diag);
2388 //set the first element
2389 if(j==0)
2390 {
2391 for( k=0;k<=e-1;k++ )
2392 {
2393 Diag = _mm_slli_si128(Diag, 2);
2394 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
2395 }
2396
2397 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
2398 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
2399
2400 tmpE--;
2401
2402 tmp = _mm_srli_si128(R0,2);
2403 for(k=0; k < e-2;k++)
2404 tmp = _mm_srli_si128(tmp,2);
2405 minError = min(minError, _mm_extract_epi16(tmp,0));
2406 }
2407 else if(j%2 == 0)
2408 {
2409 for(k=0;k<tmpE;k++)
2410 {
2411 Diag = _mm_slli_si128(Diag, 2);
2412 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
2413 }
2414
2415 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
2416 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
2417
2418 tmpE--;
2419
2420 tmp = _mm_srli_si128(R0,2);
2421 for(k=0; k < tmpE-1;k++)
2422 tmp = _mm_srli_si128(tmp,2);
2423 minError = min(minError, _mm_extract_epi16(tmp,0));
2424 }
2425
2426
2427 else
2428 {
2429 for(k=0;k<tmpE;k++)
2430 {
2431 Diag = _mm_slli_si128(Diag, 2);
2432 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
2433 }
2434
2435 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
2436 R1 = _mm_min_epi16(R1, R0+Down1);
2437
2438 tmp = _mm_srli_si128(R1,2);
2439 for(k=0; k < tmpE-1;k++)
2440 tmp = _mm_srli_si128(tmp,2);
2441 minError = min(minError, _mm_extract_epi16(tmp,0));
2442 }
2443 i++;
2444 }
2445 //Diag
2446
2447 Diag = _mm_xor_si128(Diag,Diag);
2448 Diag = _mm_insert_epi16(Diag, 2*e, 0);
2449 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
2450
2451 Side1 = _mm_insert_epi16(Side1,1,0);
2452 Side1 = _mm_insert_epi16(Side1,1,1);
2453
2454 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
2455 Down1 = _mm_insert_epi16(Down1, 1, 1);
2456
2457 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
2458 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
2459
2460 minError = min(minError, _mm_extract_epi16(R1,1));
2461
2462 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 1);
2463 Down1 = _mm_insert_epi16(Down1, 1, 0);
2464
2465 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
2466 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
2467
2468 minError = min(minError, _mm_extract_epi16(R0,0));
2469
2470 if(minError > e)
2471 return -1;
2472 return minError;
2473 }
2474
2475
2476 int forwardEditDistance2SSE2(char *a, int lena, char *b,int lenb)
2477 {
2478 if(lenb == 0 || lena == 0)
2479 return 0;
2480
2481
2482
2483 int i0 = 0;
2484 int i1 = 0;
2485
2486
2487 int error; //0: if the two character are equal 1: if not
2488
2489 int i = 0; //loop index
2490
2491 int e = 2; //error bound
2492
2493 int totalError = 0;
2494
2495 __m128i R0;
2496 __m128i R1;
2497
2498 __m128i Side1, Side2,Side; //side matrix
2499 __m128i Down1, Down2,Down; //down matrix
2500 __m128i Diag;
2501
2502 __m128i tmp;
2503 __m128i ERROR_REACH;
2504
2505 /* initialize */
2506 R0 = _mm_setzero_si128 ();
2507 R1 = _mm_setzero_si128 ();
2508 Diag = _mm_setzero_si128 ();
2509 Side1 = _mm_setzero_si128 ();
2510 Side2 = _mm_setzero_si128 ();
2511 Down1 = _mm_setzero_si128 ();
2512 Down2 = _mm_setzero_si128 ();
2513 Side = _mm_setzero_si128 ();
2514 Down = _mm_setzero_si128 ();
2515 tmp = _mm_setzero_si128 ();
2516 ERROR_REACH = _mm_setzero_si128 ();
2517 /* end initialize */
2518
2519
2520 if(lenb <= e)
2521 {
2522 return smallEditDistanceF(a,lena,b,lenb);
2523 }
2524
2525 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e);
2526
2527 R0 = _mm_insert_epi16(R0,0,0);
2528
2529 R1 = _mm_insert_epi16(R1,1,0);
2530 R1 = _mm_insert_epi16(R1,1,1);
2531
2532 // error = ((a[0]) != (b[0]));
2533
2534 Diag = _mm_set_epi16(0,0,0,0,0,2*e,((a[0]) != (b[0])),2*e);
2535 Side1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1);
2536 Side2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e);
2537 Down1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1);
2538 Down2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e);
2539
2540 tmp = _mm_slli_si128(R1,2);
2541
2542 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2543 R0 = _mm_min_epi16(R0,tmp+Down2);
2544
2545 for (i = 3; i < 2*lena; i++)
2546 {
2547 if(i % 2 ==1)
2548 {
2549
2550 Diag = _mm_xor_si128(Diag, Diag);
2551 error = ((a[(i+1)/2-1]) != (b[(i-1)/2-1]));
2552 Diag = _mm_insert_epi16(Diag,error,0);
2553 error = ((a[(i-1)/2-1]) != (b[(i+1)/2-1]));
2554 Diag = _mm_insert_epi16(Diag,error,1);
2555 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0, ((a[(i-1)/2-1]) != (b[(i+1)/2-1])) ,((a[(i+1)/2-1]) != (b[(i-1)/2-1])));
2556
2557
2558 tmp = _mm_srli_si128(R0,2);
2559
2560 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
2561 R1 = _mm_min_epi16(R1,R0+Down1);
2562
2563 if(i > 2 * lenb - 2)
2564 {
2565 i1 = _mm_extract_epi16(R1, 1);
2566 totalError = min(totalError, i1);
2567 }
2568 }
2569
2570 else if(i % 2 == 0)
2571 {
2572 error = ((a[i/2]) != (b[i/2-2]));
2573 Diag = _mm_insert_epi16(Diag,error,0);
2574 error = ((a[i/2-1]) != (b[i/2-1]));
2575 Diag = _mm_insert_epi16(Diag,error,1);
2576 error = ((a[i/2-2]) != (b[i/2]));
2577 Diag = _mm_insert_epi16(Diag,error,2);
2578
2579 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, ((a[i/2-2]) != (b[i/2])) , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) );
2580
2581 tmp = _mm_slli_si128(R1,2);
2582
2583 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2584 R0 = _mm_min_epi16(R0,tmp+Down2);
2585
2586 tmp = _mm_sub_epi16(ERROR_REACH, R0);
2587 i0 = _mm_movemask_epi8(tmp);
2588
2589 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2)
2590 return -1;
2591 if(i == 2 * lenb - 2) {
2592 totalError = _mm_extract_epi16(R0, 2);
2593 }
2594 }
2595 }
2596
2597 Down1 = _mm_insert_epi16(Down1,2*e,0);
2598
2599 //fill the first part of the error
2600 error = ((a[i/2]) != (b[i/2-2]));
2601 Diag = _mm_insert_epi16(Diag,error,0);
2602 error = ((a[i/2-1]) != (b[i/2-1]));
2603 Diag = _mm_insert_epi16(Diag,error,1);
2604 Diag = _mm_insert_epi16(Diag,2*e,2);
2605 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) );
2606
2607 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2608 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
2609
2610 // i0 = _mm_extract_epi16(R0, 0);
2611 i1 = _mm_extract_epi16(R0, 1);
2612
2613 totalError = min(totalError, i1);
2614
2615 //fill the second part of the error
2616 i++;
2617
2618 Diag = _mm_xor_si128(Diag, Diag);
2619 Diag = _mm_insert_epi16(Diag,2*e,0);
2620 error = ((a[i/2]) != (b[lenb-1]));
2621 Diag = _mm_insert_epi16(Diag,error,1);
2622 Diag = _mm_insert_epi16(Diag,2*e,2);
2623 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2]) != (b[lenb-1])) , 2*e );
2624
2625
2626 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
2627 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
2628
2629 // i0 = _mm_extract_epi16(R1, 0);
2630 i1 = _mm_extract_epi16(R1, 1);
2631
2632 totalError = min(totalError, i1);
2633 //fill the last the last element of the matrix
2634 i++;
2635
2636 Diag = _mm_xor_si128(Diag, Diag);
2637 error = ((a[i/2]) != (b[lenb-1]));
2638 Diag = _mm_insert_epi16(Diag,error,0);
2639
2640 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0 , 0 , ((a[i/2]) != (b[lenb-1])) );
2641
2642
2643 Down = _mm_insert_epi16(Down,1,0);
2644
2645 Side = _mm_insert_epi16(Side,1,0);
2646
2647 tmp = _mm_srli_si128(R1,2);
2648
2649 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag);
2650 R0 = _mm_min_epi16(R0,tmp+Side);
2651
2652 i0 = _mm_extract_epi16(R0, 0);
2653
2654 totalError = min(totalError, i0);
2655
2656 if(totalError > e)
2657 return -1;
2658
2659 return totalError;
2660
2661 }
2662
2663 int backwardEditDistance2SSE2(char *a, int lena, char *b,int lenb)
2664 {
2665 if(lenb == 0 || lena == 0)
2666 return 0;
2667
2668 int i0 = 0;
2669 int i1 = 0;
2670
2671 int error; //0: if the two character are equal 1: if not
2672
2673 int i = 0; //loop index
2674
2675 int e = 2; //error bound
2676
2677 int totalError = 0;
2678
2679 __m128i R0;
2680 __m128i R1;
2681
2682 __m128i Side1, Side2,Side; //side matrix
2683 __m128i Down1, Down2,Down; //down matrix
2684 __m128i Diag; //diag matrix
2685
2686 __m128i tmp;
2687 __m128i ERROR_REACH;
2688
2689 /* initialize */
2690 R0 = _mm_setzero_si128 ();
2691 R1 = _mm_setzero_si128 ();
2692 Diag = _mm_setzero_si128 ();
2693 Side1 = _mm_setzero_si128 ();
2694 Side2 = _mm_setzero_si128 ();
2695 Side = _mm_setzero_si128 ();
2696 Down1 = _mm_setzero_si128 ();
2697 Down2 = _mm_setzero_si128 ();
2698 Down = _mm_setzero_si128 ();
2699 ERROR_REACH = _mm_setzero_si128 ();
2700 tmp = _mm_setzero_si128 ();
2701 /* end initialize */
2702
2703 if(lenb <= e)
2704 {
2705 return smallEditDistanceB(a,lena,b,lenb);
2706 }
2707
2708
2709 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e);
2710
2711 R0 = _mm_insert_epi16(R0,0,0);
2712
2713 R1 = _mm_insert_epi16(R1,1,0);
2714 R1 = _mm_insert_epi16(R1,1,1);
2715
2716 error = ((a[0]) != (b[0]));
2717
2718 Diag = _mm_insert_epi16(Diag,2*e,0);
2719 Diag = _mm_insert_epi16(Diag,error,1);
2720 Diag = _mm_insert_epi16(Diag,2*e,2);
2721
2722 Side1 = _mm_insert_epi16(Side1,1,0);
2723 Side1 = _mm_insert_epi16(Side1,1,1);
2724 Side1 = _mm_insert_epi16(Side1,2*e,2);
2725
2726 Side2 = _mm_insert_epi16(Side2,2*e,0);
2727 Side2 = _mm_insert_epi16(Side2,1,1);
2728 Side2 = _mm_insert_epi16(Side2,1,2);
2729
2730 Down1 = _mm_insert_epi16(Down1,1,0);
2731 Down1 = _mm_insert_epi16(Down1,1,1);
2732 Down1 = _mm_insert_epi16(Down1,2*e,2);
2733
2734 Down2 = _mm_insert_epi16(Down2,2*e,0);
2735 Down2 = _mm_insert_epi16(Down2,1,1);
2736 Down2 = _mm_insert_epi16(Down2,1,2);
2737
2738 tmp = _mm_slli_si128(R1,2);
2739
2740 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2741 R0 = _mm_min_epi16(R0,tmp+Down2);
2742
2743 // printf("%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2));
2744 for (i = 3; i < 2*lena; i++)
2745 {
2746 if(i % 2 ==1)
2747 {
2748 Diag = _mm_sub_epi8(Diag, Diag);
2749 error = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) );
2750 Diag = _mm_insert_epi16(Diag,error,0);
2751 error = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) );
2752 Diag = _mm_insert_epi16(Diag,error,1);
2753 //printf("#%d #%d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1));
2754 tmp = _mm_srli_si128(R0,2);
2755
2756 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
2757 R1 = _mm_min_epi16(R1,R0+Down1);
2758
2759 if(i > 2 * lenb - 2) {
2760 i1 = _mm_extract_epi16(R1, 1);
2761 totalError = min(totalError, i1);
2762 }
2763 // printf("%d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1));
2764 }
2765
2766 else if(i % 2 == 0)
2767 {
2768 error = ( *(a-(i/2)) != *(b-(i/2-2)) );
2769 Diag = _mm_insert_epi16(Diag,error,0);
2770 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
2771 Diag = _mm_insert_epi16(Diag,error,1);
2772 error = ( *(a-(i/2-2)) != *(b-(i/2)));
2773 Diag = _mm_insert_epi16(Diag,error,2);
2774
2775 tmp = _mm_slli_si128(R1,2);
2776
2777 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2778 R0 = _mm_min_epi16(R0,tmp+Down2);
2779
2780 tmp = _mm_sub_epi16(ERROR_REACH, R0);
2781 i0 = _mm_movemask_epi8(tmp);
2782
2783 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2)
2784 return -1;
2785
2786 if(i == 2 * lenb - 2) {
2787 totalError = _mm_extract_epi16(R0, 2);
2788 }
2789 }
2790 }
2791 Down1 = _mm_insert_epi16(Down1,2*e,0);
2792
2793 //fill the first part of the error
2794 error = ( *(a-(i/2)) != *(b-(i/2-2)) );
2795 Diag = _mm_insert_epi16(Diag,error,0);
2796 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
2797 Diag = _mm_insert_epi16(Diag,error,1);
2798 Diag = _mm_insert_epi16(Diag,2*e,2);
2799
2800 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
2801 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
2802
2803 i0 = _mm_extract_epi16(R0, 0);
2804 i1 = _mm_extract_epi16(R0, 1);
2805
2806 totalError = min(totalError, i1);
2807
2808 //fill the second part of the error
2809 i++;
2810 Diag = _mm_sub_epi8(Diag, Diag);
2811 Diag = _mm_insert_epi16(Diag,2*e,0);
2812 error = ( *(a-(i/2)) != *(b-(lenb-1)) );
2813 Diag = _mm_insert_epi16(Diag,error,1);
2814 Diag = _mm_insert_epi16(Diag,2*e,2);
2815
2816 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
2817 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
2818
2819 i0 = _mm_extract_epi16(R1, 0);
2820 i1 = _mm_extract_epi16(R1, 1);
2821
2822 totalError = min(totalError, i1);
2823
2824 //fill the last the last element of the matrix
2825 i++;
2826 Diag = _mm_sub_epi8(Diag, Diag);
2827 error = ( *(a-(i/2)) != *(b-(lenb-1)) );
2828 Diag = _mm_insert_epi16(Diag,error,0);
2829
2830 Down = _mm_insert_epi16(Down,1,0);
2831
2832 Side = _mm_insert_epi16(Side,1,0);
2833
2834 tmp = _mm_srli_si128(R1,2);
2835
2836 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag);
2837 R0 = _mm_min_epi16(R0,tmp+Side);
2838
2839 i0 = _mm_extract_epi16(R0, 0);
2840
2841 totalError = min(totalError, i0);
2842
2843 if(totalError > e || totalError == 0)
2844 return -1;
2845 return totalError;
2846 }
2847
2848 void initBestMapping(int totalReadNumber)
2849 {
2850 int i = 0;
2851 bestHitMappingInfo = getMem(totalReadNumber * sizeof(BestFullMappingInfo));
2852 for(i = 0; i < totalReadNumber; i++) {
2853 bestHitMappingInfo[i].loc = -1;
2854 }
2855 }
2856
2857
2858 void finalizeBestSingleMapping()
2859 {
2860 int i = 0;
2861 char *_tmpQual, *_tmpSeq;
2862 char rqual[SEQ_LENGTH + 1];
2863 rqual[SEQ_LENGTH]='\0';
2864
2865 for(i = 0; i < _msf_seqListSize; i++)
2866 {
2867 if(_msf_seqList[i].hits[0] != 0)
2868 {
2869 if (bestHitMappingInfo[i].dir)
2870 {
2871 reverse(_msf_seqList[i].qual, rqual, SEQ_LENGTH);
2872 _tmpQual = rqual;
2873 _tmpSeq = _msf_seqList[i].rseq;
2874 }
2875 else
2876 {
2877 _tmpQual = _msf_seqList[i].qual;
2878 _tmpSeq = _msf_seqList[i].seq;
2879 }
2880
2881
2882 _msf_output.QNAME = _msf_seqList[i].name;
2883 _msf_output.FLAG = 16 * bestHitMappingInfo[i].dir;
2884 _msf_output.RNAME = bestHitMappingInfo[i].chr;
2885
2886 _msf_output.POS = bestHitMappingInfo[i].loc;
2887 _msf_output.MAPQ = 255;
2888 _msf_output.CIGAR = bestHitMappingInfo[i].cigar ;
2889 _msf_output.MRNAME = "*";
2890 _msf_output.MPOS = 0;
2891 _msf_output.ISIZE = 0;
2892
2893
2894 _msf_output.SEQ = _tmpSeq;
2895 _msf_output.QUAL = _tmpQual;
2896
2897 _msf_output.optSize = 2;
2898 _msf_output.optFields = _msf_optionalFields;
2899
2900 _msf_optionalFields[0].tag = "NM";
2901 _msf_optionalFields[0].type = 'i';
2902 _msf_optionalFields[0].iVal = bestHitMappingInfo[i].err;
2903
2904 _msf_optionalFields[1].tag = "MD";
2905 _msf_optionalFields[1].type = 'Z';
2906 _msf_optionalFields[1].sVal = bestHitMappingInfo[i].md;
2907
2908 output(_msf_output);
2909 }
2910 }
2911 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo));
2912 }
2913 /**********************************************/
2914 int compare (const void *a, const void *b)
2915 {
2916 return ((Pair *)a)->hv - ((Pair *)b)->hv;
2917 /*char *s1 = ((Pair *)a)->hv;
2918 char *s2 = ((Pair *)b)->hv;
2919 int i = 0;
2920
2921 int diff = 0;
2922 int sign = 0;
2923
2924 for(i = 0; i < SEQ_LENGTH; i++)
2925 {
2926 diff += (s1[i] != s2[i]);
2927 if(s1[i] > s2[i])
2928 sign++;
2929 else if(s1[i] < s2[i])
2930 sign--;
2931 }
2932
2933 return diff*sign;*/
2934 // return strncmp(s1, s2,SEQ_LENGTH);
2935
2936 }
2937 /**********************************************/
2938 void preProcessReads()
2939 {
2940 int i = 0;
2941
2942 _msf_sort_seqList = getMem(_msf_seqListSize * sizeof(Pair));
2943 for(i = 0; i < _msf_seqListSize; i++)
2944 {
2945 _msf_sort_seqList[i].hv = hashVal(_msf_seqList[i].seq);
2946
2947 _msf_sort_seqList[i].readNumber = i;
2948 }
2949
2950 qsort(_msf_sort_seqList, _msf_seqListSize, sizeof(Pair), compare);
2951
2952 /*
2953 for(i = 0; i < _msf_seqListSize; i++)
2954 {
2955 //printf("%s\n", _msf_sort_seqList[i].hv);
2956 }
2957 */
2958
2959 _msf_map_sort_seqList = getMem(_msf_seqListSize * sizeof(int));
2960
2961 for(i = 0; i < _msf_seqListSize; i++)
2962 _msf_map_sort_seqList[_msf_seqList[i].readNumber] = i;
2963
2964 }
2965 /**********************************************/
2966
2967 int verifySingleEnd(int index, char* seq, int offset)
2968 {
2969 int curOff = 0;
2970 int i;
2971
2972 char *ref;
2973
2974 int err;
2975 int errCnt =0;
2976 int errCntOff = 0;
2977 int NCntOff = 0;
2978
2979 ref = _msf_refGen + index - 1;
2980
2981 verificationCnt++;
2982
2983 for (i = 0; i < SEQ_LENGTH; i++)
2984 {
2985 err = *ref != *seq;
2986 errCnt += err;
2987 if (errCnt > errThreshold)
2988 {
2989
2990 return -1;
2991 }
2992
2993 if (i >= _msf_samplingLocs[curOff] && i <= _msf_samplingLocsEnds[curOff])
2994 {
2995 errCntOff += err;
2996 NCntOff += (*seq == 'N');
2997 }
2998 else if (curOff < _msf_samplingLocsSize && i>=_msf_samplingLocs[curOff+1])
2999 {
3000
3001 if (errCntOff == 0 && NCntOff == 0 && offset > curOff)
3002 {
3003 return -1;
3004 }
3005
3006 errCntOff = 0;
3007 NCntOff = 0;
3008 curOff++;
3009
3010 if ( i >= _msf_samplingLocs[curOff])
3011 {
3012 errCntOff += err;
3013 NCntOff += (*seq == 'N');
3014 }
3015 }
3016
3017 ref++;
3018 seq++;
3019 }
3020 return errCnt;
3021 }
3022
3023 /*********************************************/
3024 void initFAST(Read *seqList, int seqListSize, int *samplingLocs, int samplingLocsSize, char *genFileName)
3025 {
3026 int i;
3027
3028 if (_msf_optionalFields == NULL)
3029 {
3030 _msf_op = getMem(SEQ_LENGTH);
3031 if (pairedEndMode)
3032 {
3033 _msf_optionalFields = getMem(8*sizeof(OPT_FIELDS));
3034 }
3035 else
3036 {
3037 _msf_optionalFields = getMem(2*sizeof(OPT_FIELDS));
3038 }
3039
3040 for (i=0; i<200;i++)
3041 {
3042 sprintf(_msf_numbers[i],"%d%c",i, '\0');
3043 }
3044 sprintf(_msf_cigar, "%dM", SEQ_LENGTH);
3045 }
3046
3047 if (_msf_samplingLocsEnds == NULL)
3048 {
3049 _msf_samplingLocs = samplingLocs;
3050 _msf_samplingLocsSize = samplingLocsSize;
3051
3052 _msf_samplingLocsEnds = getMem(sizeof(int)*_msf_samplingLocsSize);
3053 for (i=0; i<_msf_samplingLocsSize; i++)
3054 {
3055 _msf_samplingLocsEnds[i]=_msf_samplingLocs[i]+WINDOW_SIZE-1;
3056 }
3057
3058 _msf_seqList = seqList;
3059 _msf_seqListSize = seqListSize;
3060
3061 preProcessReads();
3062
3063 _msf_oeaMapping = getMem(_msf_seqListSize * sizeof(int));
3064 for(i = 0; i < _msf_seqListSize; i++)
3065 {
3066 _msf_oeaMapping[i] = 0;
3067 }
3068
3069 _msf_discordantMapping = getMem(_msf_seqListSize * sizeof(int));
3070 for(i = 0; i < _msf_seqListSize; i++)
3071 {
3072 _msf_discordantMapping[i] = 0;
3073 }
3074
3075 }
3076
3077 if (_msf_refGenName == NULL)
3078 {
3079 _msf_refGenName = getMem(4*SEQ_LENGTH);
3080 }
3081 _msf_refGen = getRefGenome();
3082 _msf_refGenLength = strlen(_msf_refGen);
3083
3084 _msf_refGenOffset = getRefGenomeOffset();
3085 snprintf(_msf_refGenName, 4*SEQ_LENGTH,"%s%c", getRefGenomeName(), '\0');
3086 _msf_refGenName[strlen(getRefGenomeName())] = '\0';
3087
3088
3089 if (_msf_verifiedLocs != NULL){
3090 freeMem(_msf_verifiedLocs, sizeof(int) * (_msf_refGenLength+1));
3091 }
3092
3093 _msf_verifiedLocs = (int *) getMem(sizeof(int)*(_msf_refGenLength+1));
3094
3095 for (i=0; i<=_msf_refGenLength; i++)
3096 _msf_verifiedLocs[i] = _msf_seqListSize*10+1;
3097
3098
3099
3100 if (pairedEndMode && _msf_seqHits == NULL)
3101 {
3102
3103 _msf_mappingInfo = getMem(seqListSize * sizeof (MappingInfo));
3104
3105 for (i=0; i<seqListSize; i++)
3106 {
3107 //_msf_mappingInfo[i].next = getMem(sizeof(MappingLocations));
3108 _msf_mappingInfo[i].next = NULL;
3109 _msf_mappingInfo[i].size = 0;
3110 }
3111
3112 _msf_seqHits = getMem((_msf_seqListSize) * sizeof(int));
3113
3114
3115 for (i=0; i<_msf_seqListSize; i++)
3116 {
3117 _msf_seqHits[i] = 0;
3118 }
3119
3120 _msf_readHasConcordantMapping = getMem(_msf_seqListSize / 2 * sizeof(char));
3121 for(i = 0; i < _msf_seqListSize/2; i++)
3122 {
3123 _msf_readHasConcordantMapping[i] = 0;
3124 }
3125
3126 initLoadingRefGenome(genFileName);
3127 }
3128
3129 if (_msf_refGenOffset == 0)
3130 {
3131 _msf_refGenBeg = 1;
3132 }
3133 else
3134 {
3135 _msf_refGenBeg = CONTIG_OVERLAP - SEQ_LENGTH + 2;
3136 }
3137 _msf_refGenEnd = _msf_refGenLength - SEQ_LENGTH + 1;
3138
3139
3140 }
3141 /**********************************************/
3142 void finalizeFAST()
3143 {
3144 freeMem(_msf_seqHits, (_msf_seqListSize) * sizeof(int));
3145 freeMem(_msf_refGenName, 4*SEQ_LENGTH);
3146
3147
3148 /*
3149 int i;
3150 for (i=0; i<_msf_rIndexSize; i++)
3151 {
3152 freeMem(_msf_rIndex[i].seqInfo, _msf_rIndex[i].seqInfo[0]+1);
3153 }
3154 freeMem(_msf_rIndex, _msf_rIndexSize);*/
3155
3156
3157 freeMem(_msf_map_sort_seqList, sizeof(Pair) * _msf_seqListSize);
3158 freeMem(_msf_sort_seqList, sizeof(int) * _msf_seqListSize);
3159
3160 }
3161
3162 /*
3163 Will apply the Levenshtein Dynamic programming.
3164 Different from verifySingleEndEditDistance fucntion
3165 as in this fucntion only one dynamic table is made while
3166 in verifySingleEndEditDistance two dynamic table is made
3167 for each right and left string
3168 */
3169 int editDistance(int refIndex, char *seq, int seqLength, char *matrix)
3170 {
3171 int i = 0;
3172 int size = 0;
3173 int error = 0;
3174 int rIndex = 0;
3175 int directionIndex = 0;
3176
3177 int min = 0;
3178 int minIndex =0;
3179
3180 int tempUp = 0;
3181 int tempDown = 0;
3182
3183 char *ref;
3184
3185 int errorString = 0;
3186 /*
3187 1: Up
3188 2: Side
3189 3: Diagnoal Match
3190 4: Diagnoal Mismatch
3191 */
3192
3193 int upValue;
3194 int diagValue;
3195 int sideValue;
3196
3197 ref = _msf_refGen + refIndex - 1;
3198
3199 rIndex = 1;
3200
3201 for(i=0; i <= errThreshold; i++)
3202 {
3203 score[0][i] = i;
3204 score[i][0] = i;
3205 }
3206
3207 while(rIndex <= seqLength +errThreshold)
3208 {
3209 tempUp = ((rIndex - errThreshold) > 0 ? ((rIndex > seqLength) ? seqLength - errThreshold :rIndex - errThreshold) : 1 );
3210 tempDown = ((rIndex >= seqLength-errThreshold ) ? seqLength+1 :rIndex + errThreshold + 1);
3211 for(i = tempUp ; i < tempDown ; i++)
3212 {
3213 errorString = (*(ref+rIndex-1) == *(seq+i-1));
3214
3215 upValue = score[i-1][rIndex]+1;
3216 diagValue = score[i-1][rIndex-1]+ !errorString;
3217 sideValue = score[i][rIndex-1]+1;
3218
3219 if(i != tempUp && i != tempDown-1)
3220 score[i][rIndex] = min3(sideValue, diagValue , upValue);
3221
3222 else if( (i == ((rIndex - errThreshold) > 0 ? rIndex - errThreshold : 1)) && rIndex <= seqLength )
3223 score[i][rIndex] = min(sideValue, diagValue);
3224 else if(rIndex > seqLength && (i == seqLength - errThreshold) )
3225 score[i][rIndex] = sideValue;
3226 else
3227 score[i][rIndex] = min(diagValue , upValue);
3228
3229 if(i == tempUp)
3230 error = score[i][rIndex];
3231 else if(error > score[i][rIndex])
3232 error = score[i][rIndex];
3233 }
3234 rIndex++;
3235 }
3236
3237 min = score[seqLength][seqLength+errThreshold];
3238 minIndex = seqLength + errThreshold;
3239
3240 // Find the Best error for all the possible ways.
3241 for(i = 1; i <= 2*errThreshold; i++)
3242 {
3243 if(min >= score[seqLength][seqLength+errThreshold-i] && seqLength+errThreshold-i > 0)
3244 {
3245 min = score[seqLength][seqLength+errThreshold-i];
3246 minIndex = seqLength+errThreshold-i;
3247 }
3248 }
3249
3250 error = score[seqLength][minIndex];
3251
3252 directionIndex = seqLength;
3253 rIndex = minIndex;
3254 while(directionIndex != 0 || rIndex != 0)
3255 {
3256
3257 if(rIndex == 0)
3258 {
3259 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1)
3260 {
3261 matrix[size] = *(seq+directionIndex-1);
3262 size++;
3263 matrix[size] = 'I';
3264 directionIndex--;
3265 }
3266 }
3267 else if(directionIndex == 0)
3268 {
3269 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1)
3270 {
3271 matrix[size] = *(ref+rIndex-1);
3272 size++;
3273 matrix[size] = 'D';
3274 rIndex--;
3275 }
3276 }
3277 else if(directionIndex-rIndex == errThreshold)
3278 {
3279 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1)
3280 {
3281 matrix[size] = *(seq+directionIndex-1);
3282 size++;
3283 matrix[size] = 'I';
3284 directionIndex--;
3285 }
3286 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
3287 {
3288 matrix[size] = *(ref+rIndex-1);
3289 rIndex--;
3290 directionIndex--;
3291 }
3292 else
3293 {
3294 matrix[size] = 'M';
3295 rIndex--;
3296 directionIndex--;
3297 }
3298
3299 }
3300 else if(rIndex - directionIndex == errThreshold)
3301 {
3302 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1)
3303 {
3304 matrix[size] = *(ref+rIndex-1);
3305 size++;
3306 matrix[size] = 'D';
3307 rIndex--;
3308 }
3309 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
3310 {
3311 matrix[size] = *(ref+rIndex-1);
3312 rIndex--;
3313 directionIndex--;
3314 }
3315 else
3316 {
3317 matrix[size] = 'M';
3318 rIndex--;
3319 directionIndex--;
3320 }
3321 }
3322 else
3323 {
3324 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1 && directionIndex != 0)
3325 {
3326 matrix[size] = *(seq+directionIndex-1);
3327 size++;
3328 matrix[size] = 'I';
3329 directionIndex--;
3330 }
3331 else if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1 && rIndex != 0)
3332 {
3333 matrix[size] = *(ref+rIndex-1);
3334 size++;
3335 matrix[size] = 'D';
3336 rIndex--;
3337 }
3338 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
3339 {
3340 matrix[size] = *(ref+rIndex-1);
3341 rIndex--;
3342 directionIndex--;
3343 }
3344 else
3345 {
3346 matrix[size] = 'M';
3347 rIndex--;
3348 directionIndex--;
3349 }
3350 }
3351 size++;
3352 }
3353
3354 matrix[size] = '\0';
3355
3356 char returnString[200];
3357
3358 returnString[0] = '\0';
3359 reverse(matrix, returnString, size);
3360 sprintf(matrix, "%s", returnString);
3361
3362 return error;
3363 }
3364
3365 /*
3366 Will apply the Levenshtein Dynamic programming.
3367 in both right and left direction as long as the
3368 threshould error is reached or end of string length
3369
3370 */
3371 int msfHashVal(char *seq)
3372 {
3373 int i=0;
3374 int val=0, numericVal=0;
3375
3376 while(i<6)
3377 {
3378 switch (seq[i])
3379 {
3380 case 'A':
3381 numericVal = 0;
3382 break;
3383 case 'C':
3384 numericVal = 1;
3385 break;
3386 case 'G' :
3387 numericVal = 2;
3388 break;
3389 case 'T':
3390 numericVal = 3;
3391 break;
3392 default:
3393 return -1;
3394 break;
3395 }
3396 val = (val << 2)|numericVal;
3397 i++;
3398 }
3399 return val;
3400 }
3401
3402
3403
3404 int verifySingleEndEditDistance2(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
3405 {
3406 int i = 0;
3407
3408 char * ref;
3409 char * tempref;
3410
3411 int rIndex = 0; //reference Index
3412
3413 int e = errThreshold;
3414 int error = 0;
3415 int error1 = 0;
3416 int totalError = 0;
3417
3418
3419 /*
3420 1: Up
3421 2: Side
3422 3: Diagnoal Match
3423 4: Diagnoal Mismatch
3424 */
3425
3426
3427 int minIndex1 = 0;
3428 int minIndex2 = 0;
3429
3430
3431 int directionIndex = 0;
3432
3433 int size = 0;
3434
3435 int startIndex1 = 0;
3436
3437 rIndex = 1;
3438
3439
3440 char matrixR[200];
3441 char matrixL[200];
3442
3443 ref = _msf_refGen + refIndex - 1;
3444 tempref = _msf_refGen + refIndex - 1;
3445
3446 int jumpIndex = 0;
3447
3448 if(rSeqLength != 0)
3449 {
3450 error1 = forwardEditDistance2SSE2(ref+segLength+jumpIndex, rSeqLength-jumpIndex, rSeq+jumpIndex, rSeqLength-jumpIndex);
3451 if(error1 == -1)
3452 return -1;
3453 }
3454
3455
3456 if(lSeqLength != 0)
3457 {
3458 error = backwardEditDistance2SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
3459 if(error == -1)
3460 {
3461 return -1;
3462 }
3463 }
3464
3465 matrixL[0] = '\0';
3466 matrixR[0] = '\0';
3467
3468
3469 ref = _msf_refGen + refIndex - 1;
3470
3471 if(error1+error > errThreshold)
3472 return -1;
3473
3474 ref = _msf_refGen + refIndex - 1;
3475
3476 rIndex = startIndex1+1;
3477
3478 int i0 = 0;
3479 int i1 = 0;
3480 int i2 = 0;
3481
3482 __m128i R0;
3483 __m128i R1;
3484
3485 __m128i Side1, Side2,Side; //side matrix
3486 __m128i Down1, Down2,Down; //down matrix
3487 __m128i Diag; //
3488
3489 __m128i tmp;
3490
3491 /* initialize */
3492 R0 = _mm_setzero_si128 ();
3493 R1 = _mm_setzero_si128 ();
3494 Diag = _mm_setzero_si128 ();
3495 Side1 = _mm_setzero_si128 ();
3496 Side2 = _mm_setzero_si128 ();
3497 Down1 = _mm_setzero_si128 ();
3498 Down2 = _mm_setzero_si128 ();
3499 Down = _mm_setzero_si128 ();
3500 Side = _mm_setzero_si128 ();
3501 tmp = _mm_setzero_si128 ();
3502 /* end initialize */
3503
3504 int mismatch[3] = {0,0,0};
3505
3506 if(lSeqLength != 0)
3507 {
3508 char *a;
3509 char *b;
3510
3511 a = ref-1;
3512 b = lSeq+lSeqLength-1;
3513
3514 R0 = _mm_insert_epi16(R0,0,0);
3515
3516 score[0][0] = 0;
3517
3518 R1 = _mm_insert_epi16(R1,1,0);
3519 R1 = _mm_insert_epi16(R1,1,1);
3520
3521 score[1][0] = 1;
3522 direction1[1][0] = 1;
3523 score[0][1] = 1;
3524 direction1[0][1] = 2;
3525
3526 mismatch[0] = ((a[0]) != (b[0]));
3527
3528 Diag = _mm_insert_epi16(Diag,2*e,0);
3529 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
3530 Diag = _mm_insert_epi16(Diag,2*e,2);
3531
3532 Side1 = _mm_insert_epi16(Side1,1,0);
3533 Side1 = _mm_insert_epi16(Side1,1,1);
3534 Side1 = _mm_insert_epi16(Side1,2*e,2);
3535
3536 Side2 = _mm_insert_epi16(Side2,2*e,0);
3537 Side2 = _mm_insert_epi16(Side2,1,1);
3538 Side2 = _mm_insert_epi16(Side2,1,2);
3539
3540 Down1 = _mm_insert_epi16(Down1,1,0);
3541 Down1 = _mm_insert_epi16(Down1,1,1);
3542 Down1 = _mm_insert_epi16(Down1,2*e,2);
3543
3544 Down2 = _mm_insert_epi16(Down2,2*e,0);
3545 Down2 = _mm_insert_epi16(Down2,1,1);
3546 Down2 = _mm_insert_epi16(Down2,1,2);
3547
3548 tmp = _mm_slli_si128(R1,2);
3549
3550 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3551 R0 = _mm_min_epi16(R0,tmp+Down2);
3552
3553 i0 = _mm_extract_epi16(R0, 0);
3554 i1 = _mm_extract_epi16(R0, 1);
3555 i2 = _mm_extract_epi16(R0, 2);
3556
3557 score[0][2] = i0;
3558 score[1][1] = i1;
3559 score[2][0] = i2;
3560
3561 direction1[0][2] = 2;
3562 direction1[1][1] = ((mismatch[0] == 0)? 3 : 4);
3563 direction1[2][0] = 1;
3564
3565 for (i = 3; i < 2*lSeqLength; i++)
3566 {
3567 if(i % 2 ==1)
3568 {
3569 Diag = _mm_sub_epi8(Diag, Diag);
3570 mismatch[0] = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) );
3571 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3572 mismatch[1] = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) );
3573 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3574
3575 tmp = _mm_srli_si128(R0,2);
3576
3577 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
3578 R1 = _mm_min_epi16(R1,R0+Down1);
3579
3580 i0 = _mm_extract_epi16(R1, 0);
3581 i1 = _mm_extract_epi16(R1, 1);
3582
3583 score[i/2][i/2+1] = i0;
3584 score[i/2+1][i/2] = i1;
3585
3586 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
3587 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
3588 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4;
3589
3590 direction1[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 :
3591 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 :
3592 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4;
3593
3594 if(i > 2 * lSeqLength - 2)
3595 {
3596 error = min(error, i1);
3597 if(error == i1)
3598 minIndex1 = i-lSeqLength;
3599 }
3600 }
3601
3602 else if(i % 2 == 0)
3603 {
3604 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) );
3605 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3606 mismatch[1] = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
3607 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3608 mismatch[2] = ( *(a-(i/2-2)) != *(b-(i/2)) );
3609 Diag = _mm_insert_epi16(Diag,mismatch[2],2);
3610
3611 tmp = _mm_slli_si128(R1,2);
3612
3613 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3614 R0 = _mm_min_epi16(R0,tmp+Down2);
3615
3616 i0 = _mm_extract_epi16(R0, 0);
3617 i1 = _mm_extract_epi16(R0, 1);
3618 i2 = _mm_extract_epi16(R0, 2);
3619
3620 score[i/2-1][i/2+1] = i0;
3621 score[i/2][i/2] = i1;
3622 score[i/2+1][i/2-1] = i2;
3623
3624 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
3625
3626 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
3627 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
3628 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4;
3629
3630 direction1[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2] == 0) ? 3 :
3631 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4;
3632
3633 if( (i/2) % segLength == 0 && i1 == 0) // the segment has been processed no need to process it again
3634 {
3635 return -1;
3636 }
3637
3638 if(i == 2 * lSeqLength - 2)
3639 {
3640 error = i2;
3641 minIndex1 = i-lSeqLength;
3642 }
3643 }
3644 }
3645
3646 Down1 = _mm_insert_epi16(Down1,2*e,0);
3647
3648 //fill the first part of the error
3649 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) );
3650 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3651 mismatch[1] = ( *(a-(i/2-1)) !=*(b-(i/2-1)) );
3652 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3653 Diag = _mm_insert_epi16(Diag,2*e,2);
3654
3655 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3656 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
3657
3658 i0 = _mm_extract_epi16(R0, 0);
3659 i1 = _mm_extract_epi16(R0, 1);
3660
3661 error = min(error, i1);
3662 if(error == i1)
3663 minIndex1 = i-lSeqLength;
3664
3665 score[i/2-1][i/2+1] = i0;
3666 score[i/2][i/2] = i1;
3667
3668 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
3669 (score[i/2-1][i/2+1]-score[i/2-1][i/2]) ? 2 : 4;
3670
3671 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
3672 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
3673 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4;
3674
3675 //fill the second part of the error
3676 i++;
3677 Diag = _mm_sub_epi8(Diag, Diag);
3678 Diag = _mm_insert_epi16(Diag,2*e,0);
3679 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) );
3680 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
3681 Diag = _mm_insert_epi16(Diag,2*e,2);
3682
3683 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
3684 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
3685
3686 i0 = _mm_extract_epi16(R1, 0);
3687 i1 = _mm_extract_epi16(R1, 1);
3688
3689 error = min(error, i1);
3690 if(error == i1)
3691 minIndex1 = i-lSeqLength;
3692
3693 score[i/2-1][i/2+2] = i0;
3694 score[i/2][i/2+1] = i1;
3695
3696 direction1[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 :
3697 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 4;
3698
3699 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2]) ? 3 :
3700 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
3701 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4;
3702
3703 //fill the last the last element of the matrix
3704 i++;
3705 Diag = _mm_sub_epi8(Diag, Diag);
3706 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) );
3707 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3708
3709 Down = _mm_insert_epi16(Down,1,0);
3710
3711 Side = _mm_insert_epi16(Side,1,0);
3712
3713 tmp = _mm_srli_si128(R1,2);
3714
3715 R0 = _mm_min_epi16(R1+Down, R0+Diag);
3716 R0 = _mm_min_epi16(R0,tmp+Side);
3717
3718 i0 = _mm_extract_epi16(R0, 0);
3719
3720 error = min(error, i0);
3721 if(error == 0)
3722 return -1;
3723 if(error == i0)
3724 minIndex1 = i-lSeqLength;
3725 if(mismatch[0] == 0)
3726 direction1[lSeqLength][lSeqLength+errThreshold] = 3;
3727 else
3728 {
3729 if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength][lSeqLength+errThreshold-1] == 1)
3730 direction1[lSeqLength][lSeqLength+errThreshold] = 2;
3731 else if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength-1][lSeqLength+errThreshold] == 1)
3732 direction1[lSeqLength][lSeqLength+errThreshold] = 1;
3733 else
3734 direction1[lSeqLength][lSeqLength+errThreshold] = 4;
3735 }
3736 }
3737 error1 = error;
3738 error = 0;
3739
3740 directionIndex = lSeqLength;
3741 rIndex = minIndex1;
3742
3743
3744 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
3745
3746 ref = ref + segLength;
3747
3748 if(rSeqLength <= e)
3749 {
3750 char *a;
3751 char *b;
3752
3753 int tmp_index = 0;
3754
3755 a = ref;
3756 b = rSeq;
3757
3758 for(tmp_index = 0; tmp_index < rSeqLength; tmp_index++)
3759 {
3760 matrixR[tmp_index] = (a[tmp_index]==b[tmp_index]) ? 'M' : a[tmp_index] ;
3761 }
3762 matrixR[tmp_index] = '\0';
3763 }
3764 else if(rSeqLength != 0 && rSeqLength >= e)
3765 {
3766 char *a;
3767 char *b;
3768
3769 a = ref;
3770 b = rSeq;
3771
3772 R0 = _mm_sub_epi8(R0, R0);
3773 R1 = _mm_sub_epi8(R1, R1);
3774
3775 R0 = _mm_insert_epi16(R0,0,0);
3776
3777 score[0][0] = 0;
3778
3779 R1 = _mm_insert_epi16(R1,1,0);
3780 R1 = _mm_insert_epi16(R1,1,1);
3781
3782 score[1][0] = 1;
3783 direction2[1][0] = 1;
3784 score[0][1] = 1;
3785 direction2[0][1] = 2;
3786
3787 mismatch[0] = ((a[0]) != (b[0]));
3788
3789 Diag = _mm_insert_epi16(Diag,2*e,0);
3790 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
3791 Diag = _mm_insert_epi16(Diag,2*e,2);
3792
3793 Side1 = _mm_insert_epi16(Side1,1,0);
3794 Side1 = _mm_insert_epi16(Side1,1,1);
3795 Side1 = _mm_insert_epi16(Side1,2*e,2);
3796
3797 Side2 = _mm_insert_epi16(Side2,2*e,0);
3798 Side2 = _mm_insert_epi16(Side2,1,1);
3799 Side2 = _mm_insert_epi16(Side2,1,2);
3800
3801 Down1 = _mm_insert_epi16(Down1,1,0);
3802 Down1 = _mm_insert_epi16(Down1,1,1);
3803 Down1 = _mm_insert_epi16(Down1,2*e,2);
3804
3805 Down2 = _mm_insert_epi16(Down2,2*e,0);
3806 Down2 = _mm_insert_epi16(Down2,1,1);
3807 Down2 = _mm_insert_epi16(Down2,1,2);
3808
3809 tmp = _mm_slli_si128(R1,2);
3810
3811 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3812 R0 = _mm_min_epi16(R0,tmp+Down2);
3813
3814 i0 = _mm_extract_epi16(R0, 0);
3815 i1 = _mm_extract_epi16(R0, 1);
3816 i2 = _mm_extract_epi16(R0, 2);
3817
3818 score[0][2] = i0;
3819 score[1][1] = i1;
3820 score[2][0] = i2;
3821
3822 direction2[0][2] = 2;
3823 direction2[1][1] = ((mismatch[0] == 0)? 3 : 4);
3824 direction2[2][0] = 1;
3825
3826
3827 for (i = 3; i < 2*rSeqLength; i++)
3828 {
3829 if(i % 2 ==1)
3830 {
3831 Diag = _mm_sub_epi8(Diag, Diag);
3832 mismatch[0] = ((a[(i+1)/2-1]) != (b[(i-1)/2-1]));
3833 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3834 mismatch[1] = ((a[(i-1)/2-1]) != (b[(i+1)/2-1]));
3835 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3836
3837 tmp = _mm_srli_si128(R0,2);
3838
3839 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
3840 R1 = _mm_min_epi16(R1,R0+Down1);
3841
3842 i0 = _mm_extract_epi16(R1, 0);
3843 i1 = _mm_extract_epi16(R1, 1);
3844
3845 score[i/2][i/2+1] = i0;
3846 score[i/2+1][i/2] = i1;
3847
3848 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
3849 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
3850 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4;
3851
3852 direction2[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 :
3853 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 :
3854 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4;
3855
3856
3857 if(i > 2 * rSeqLength - 2)
3858 {
3859 error = min(error, i1);
3860 if(error == i1)
3861 minIndex2 = i-rSeqLength;
3862 }
3863 }
3864
3865 else if(i % 2 == 0)
3866 {
3867 mismatch[0] = ((a[i/2]) != (b[i/2-2]));
3868 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3869 mismatch[1] = ((a[i/2-1]) != (b[i/2-1]));
3870 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3871 mismatch[2] = ((a[i/2-2]) != (b[i/2]));
3872 Diag = _mm_insert_epi16(Diag,mismatch[2],2);
3873
3874 tmp = _mm_slli_si128(R1,2);
3875
3876 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3877 R0 = _mm_min_epi16(R0,tmp+Down2);
3878
3879 i0 = _mm_extract_epi16(R0, 0);
3880 i1 = _mm_extract_epi16(R0, 1);
3881 i2 = _mm_extract_epi16(R0, 2);
3882
3883 score[i/2-1][i/2+1] = i0;
3884 score[i/2][i/2] = i1;
3885 score[i/2+1][i/2-1] = i2;
3886
3887 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
3888 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
3889
3890 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
3891 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
3892 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4;
3893
3894 direction2[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2]==0) ? 3 :
3895 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4;
3896
3897
3898 if(i == 2 * rSeqLength - 2)
3899 {
3900 error = i2;
3901 minIndex2 = i-rSeqLength;
3902 }
3903 }
3904 }
3905
3906 Down1 = _mm_insert_epi16(Down1,2*e,0);
3907
3908 //fill the first part of the error
3909 mismatch[0] = ((a[i/2]) != (b[i/2-2]));
3910 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3911 mismatch[1] = ((a[i/2-1]) != (b[i/2-1]));
3912 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
3913 Diag = _mm_insert_epi16(Diag,2*e,2);
3914
3915 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
3916 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
3917
3918 i0 = _mm_extract_epi16(R0, 0);
3919 i1 = _mm_extract_epi16(R0, 1);
3920
3921 error = min(error, i1);
3922 if(error == i1)
3923 minIndex2 = i-rSeqLength;
3924
3925 score[i/2-1][i/2+1] = i0;
3926 score[i/2][i/2] = i1;
3927
3928 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
3929 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
3930
3931 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
3932 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
3933 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4;
3934
3935
3936 //fill the second part of the error
3937 i++;
3938 Diag = _mm_sub_epi8(Diag, Diag);
3939 Diag = _mm_insert_epi16(Diag,2*e,0);
3940 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1]));
3941 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
3942 Diag = _mm_insert_epi16(Diag,2*e,2);
3943
3944 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
3945 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
3946
3947 i0 = _mm_extract_epi16(R1, 0);
3948 i1 = _mm_extract_epi16(R1, 1);
3949
3950 error = min(error, i1);
3951 if(error == i1)
3952 minIndex2 = i-rSeqLength;
3953
3954 score[i/2-1][i/2+2] = i0;
3955 score[i/2][i/2+1] = i1;
3956
3957 direction2[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 :
3958 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 3;
3959
3960 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
3961 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
3962 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4;
3963
3964
3965 //fill the last the last element of the matrix
3966 i++;
3967 Diag = _mm_sub_epi8(Diag, Diag);
3968 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1]));
3969 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
3970
3971 Down = _mm_sub_epi8(Down, Down);
3972 Down = _mm_insert_epi16(Down,1,0);
3973
3974 Side = _mm_sub_epi8(Side, Side);
3975 Side = _mm_insert_epi16(Side,1,0);
3976
3977 tmp = _mm_srli_si128(R1,2);
3978
3979 R0 = _mm_min_epi16(R1+Down, R0+Diag);
3980 R0 = _mm_min_epi16(R0,tmp+Side);
3981
3982 i0 = _mm_extract_epi16(R0, 0);
3983
3984 error = min(error, i0);
3985 if(error == i0)
3986 minIndex2 = i-rSeqLength;
3987
3988 if(mismatch[0] == 0)
3989 direction2[rSeqLength][rSeqLength+errThreshold] = 3;
3990 else
3991 {
3992 if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength][rSeqLength+errThreshold-1] == 1)
3993 direction2[lSeqLength][lSeqLength+errThreshold] = 2;
3994 else if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength-1][rSeqLength+errThreshold] == 1)
3995 direction2[rSeqLength][rSeqLength+errThreshold] = 1;
3996 else
3997 direction2[rSeqLength][rSeqLength+errThreshold] = 4;
3998 }
3999
4000 }
4001
4002 totalError = error1 + error;
4003
4004 size = 0;
4005 directionIndex = rSeqLength;
4006 rIndex = minIndex2;
4007
4008
4009 if(rSeqLength > e)
4010 {
4011 while(directionIndex != 0 || rIndex != 0)
4012 {
4013
4014 if(direction2[directionIndex][rIndex] == 3)
4015 {
4016 matrixR[size] = 'M';
4017 rIndex--;
4018 directionIndex--;
4019 }
4020 else if(direction2[directionIndex][rIndex] == 4)
4021 {
4022 matrixR[size] = *(ref+rIndex-1);
4023 rIndex--;
4024 directionIndex--;
4025 }
4026 else if(direction2[directionIndex][rIndex] == 2)
4027 {
4028 matrixR[size] = *(ref+rIndex-1);
4029 size++;
4030 matrixR[size] = 'D';
4031 rIndex--;
4032 }
4033 else
4034 {
4035 matrixR[size] = *(rSeq+directionIndex-1);
4036 size++;
4037 matrixR[size] = 'I';
4038 directionIndex--;
4039 }
4040 size++;
4041 }
4042 matrixR[size] = '\0';
4043 }
4044 size = 0;
4045 directionIndex = lSeqLength;
4046 rIndex = minIndex1;
4047
4048 while(directionIndex != 0 || rIndex != 0)
4049 {
4050
4051 if(direction1[directionIndex][rIndex] == 3)
4052 {
4053 matrixL[size] = 'M';
4054 rIndex--;
4055 directionIndex--;
4056 }
4057 else if(direction1[directionIndex][rIndex] == 4)
4058 {
4059 matrixL[size] = *(tempref-rIndex);
4060 rIndex--;
4061 directionIndex--;
4062 }
4063 else if(direction1[directionIndex][rIndex] == 2)
4064 {
4065 matrixL[size] = 'D';
4066 size++;
4067 matrixL[size] = *(tempref-rIndex);
4068 rIndex--;
4069 }
4070 else
4071 {
4072 matrixL[size] = 'I';
4073 size++;
4074 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
4075 directionIndex--;
4076 }
4077
4078 size++;
4079 }
4080
4081 matrixL[size] = '\0';
4082
4083 char middle[200];
4084 middle[0] = '\0';
4085
4086 for(i = 0; i < segLength; i++)
4087 middle[i] = 'M';
4088 middle[segLength] = '\0';
4089
4090 char rmatrixR[200];
4091
4092 reverse(matrixR, rmatrixR, strlen(matrixR));
4093
4094 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
4095
4096 return totalError;
4097 }
4098
4099 int verifySingleEndEditDistance4(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
4100 {
4101
4102 int i = 0;
4103
4104 char * ref;
4105 char * tempref;
4106
4107 int rIndex = 0; //reference Index
4108
4109 int error = 0;
4110 int error1 = 0;
4111
4112 int error2 = 0;
4113 int error3 = 0;
4114 int totalError = 0;
4115 int errorSegment = 0;
4116
4117 int ERROR_BOUND = errThreshold;
4118
4119
4120 /*
4121 1: Up
4122 2: Side
4123 3: Diagnoal Match
4124 4: Diagnoal Mismatch
4125 */
4126
4127 int min = 0;
4128 int minIndex1 = 0;
4129 int minIndex2 = 0;
4130
4131 int directionIndex = 0;
4132
4133
4134 int size = 0;
4135
4136 ref = _msf_refGen + refIndex - 1;
4137 tempref = _msf_refGen + refIndex - 1;
4138
4139
4140 if(lSeqLength != 0)
4141 {
4142 error3 = backwardEditDistance4SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
4143 if(error3 == -1 || error3 == 0){
4144 return -1;
4145 }
4146 }
4147
4148 if(rSeqLength != 0)
4149 {
4150 error2 = forwardEditDistance4SSE2(ref+segLength, rSeqLength, rSeq, rSeqLength);
4151 if(error2 == -1)
4152 return -1;
4153 }
4154
4155 if(error2 + error3 > errThreshold)
4156 return -1;
4157
4158 rIndex = 1;
4159
4160 int prevError = 0;
4161
4162 int tempUp = 0;
4163 int tempDown = 0;
4164
4165 int errorString = 0;
4166
4167 int upValue;
4168 int diagValue;
4169 int sideValue;
4170
4171 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0)
4172 {
4173 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
4174 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
4175 for(i = tempUp ; i < tempDown ; i++)
4176 {
4177 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
4178
4179 upValue = scoreB[i-1][rIndex]+1;
4180 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
4181 sideValue = scoreB[i][rIndex-1]+1;
4182
4183 if(i != tempUp && i != tempDown-1)
4184 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
4185
4186 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
4187 scoreB[i][rIndex] = min(sideValue, diagValue);
4188 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
4189 scoreB[i][rIndex] = sideValue;
4190 else
4191 scoreB[i][rIndex] = min(diagValue , upValue);
4192
4193 if(i == tempUp)
4194 error = scoreB[i][rIndex];
4195 else if(error > scoreB[i][rIndex])
4196 error = scoreB[i][rIndex];
4197 }
4198 if(rIndex <= lSeqLength)
4199 {
4200 errorSegment = error-prevError;
4201 }
4202 rIndex++;
4203 }
4204
4205 if(lSeqLength != 0)
4206 {
4207 min = scoreB[lSeqLength][lSeqLength+errThreshold];
4208 minIndex1 = lSeqLength + errThreshold;
4209
4210 // Find the Best error for all the possible ways.
4211 for(i = 1; i <= 2*errThreshold; i++)
4212 {
4213 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0)
4214 {
4215 min = scoreB[lSeqLength][lSeqLength+errThreshold-i];
4216 minIndex1 = lSeqLength+errThreshold-i;
4217 }
4218 }
4219 error = scoreB[lSeqLength][minIndex1];
4220 }
4221
4222 error1 = error;
4223
4224 error = 0;
4225 errorSegment = 0;
4226
4227 directionIndex = lSeqLength;
4228 rIndex = minIndex1;
4229
4230
4231 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
4232
4233 ref = ref + segLength;
4234
4235 if(rSeqLength != 0)
4236 {
4237 rIndex = 1;
4238 while(rIndex <= rSeqLength+errThreshold-error1)
4239 {
4240 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
4241 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
4242 for(i = tempUp; i < tempDown ; i++)
4243 {
4244 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
4245
4246 upValue = scoreF[i-1][rIndex]+1;
4247 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
4248 sideValue = scoreF[i][rIndex-1]+1;
4249
4250 if(i != tempUp && i != tempDown-1)
4251 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
4252 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
4253 scoreF[i][rIndex] = min(sideValue, diagValue);
4254 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
4255 scoreF[i][rIndex] = sideValue;
4256 else
4257 scoreF[i][rIndex] = min(diagValue , upValue);
4258
4259 if(i == tempUp)
4260 error = scoreF[i][rIndex];
4261 if(error > scoreF[i][rIndex])
4262 error = scoreF[i][rIndex];
4263 }
4264 if(rIndex <= rSeqLength)
4265 {
4266 errorSegment = error;
4267 }
4268
4269 rIndex++;
4270 }
4271
4272 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1];
4273 minIndex2 = rSeqLength + errThreshold-error1;
4274
4275 // Find the Best error for all the possible ways.
4276 for(i = 1; i <= 2*(errThreshold-error1); i++)
4277 {
4278 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0)
4279 {
4280 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i];
4281 minIndex2 = rSeqLength+errThreshold-error1-i;
4282 }
4283 }
4284 error = scoreF[rSeqLength][minIndex2];
4285 }
4286
4287 totalError = error + error1;
4288
4289 if(errThreshold > 4)
4290 printf("ERROR in errorThreshold.\n");
4291
4292
4293 if(totalError != error2 + error3 && totalError > errThreshold)
4294 {
4295 printf("ErrorF=%d, ErrorB=%d Error=%d Error=%d\n", error2,error3,error1,error);
4296
4297 scanf("%d", &i);
4298 }
4299
4300 char matrixR[200];
4301 char matrixL[200];
4302
4303 matrixR[0] = '\0';
4304 matrixL[0] = '\0';
4305
4306 size = 0;
4307 directionIndex = rSeqLength;
4308 rIndex = minIndex2;
4309
4310 while(directionIndex != 0 || rIndex != 0)
4311 {
4312 if(directionIndex-rIndex == errThreshold)
4313 {
4314 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
4315 {
4316 matrixR[size] = *(rSeq+directionIndex-1);
4317 size++;
4318 matrixR[size] = 'I';
4319 directionIndex--;
4320 }
4321 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4322 {
4323 matrixR[size] = *(ref+rIndex-1);
4324 rIndex--;
4325 directionIndex--;
4326 }
4327 else
4328 {
4329 matrixR[size] = 'M';
4330 rIndex--;
4331 directionIndex--;
4332 }
4333
4334 }
4335 else if(rIndex - directionIndex == errThreshold)
4336 {
4337 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
4338 {
4339 matrixR[size] = *(ref+rIndex-1);
4340 size++;
4341 matrixR[size] = 'D';
4342 rIndex--;
4343 }
4344 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4345 {
4346 matrixR[size] = *(ref+rIndex-1);
4347 rIndex--;
4348 directionIndex--;
4349 }
4350 else
4351 {
4352 matrixR[size] = 'M';
4353 rIndex--;
4354 directionIndex--;
4355 }
4356 }
4357 else
4358 {
4359 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
4360 {
4361 matrixR[size] = *(rSeq+directionIndex-1);
4362 size++;
4363 matrixR[size] = 'I';
4364 directionIndex--;
4365 }
4366 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
4367 {
4368 matrixR[size] = *(ref+rIndex-1);
4369 size++;
4370 matrixR[size] = 'D';
4371 rIndex--;
4372 }
4373 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4374 {
4375 matrixR[size] = *(ref+rIndex-1);
4376 rIndex--;
4377 directionIndex--;
4378 }
4379 else
4380 {
4381 matrixR[size] = 'M';
4382 rIndex--;
4383 directionIndex--;
4384 }
4385 }
4386 size++;
4387 }
4388 matrixR[size] = '\0';
4389
4390 size = 0;
4391 directionIndex = lSeqLength;
4392 rIndex = minIndex1;
4393
4394
4395 while(directionIndex != 0 || rIndex != 0)
4396 {
4397 if(directionIndex-rIndex == errThreshold)
4398 {
4399 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
4400 {
4401 matrixL[size] = 'I';
4402 size++;
4403 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
4404 directionIndex--;
4405 }
4406 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4407 {
4408 matrixL[size] = *(tempref-rIndex);
4409 rIndex--;
4410 directionIndex--;
4411 }
4412 else
4413 {
4414 matrixL[size] = 'M';
4415 rIndex--;
4416 directionIndex--;
4417 }
4418
4419 }
4420 else if(rIndex - directionIndex == errThreshold)
4421 {
4422 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
4423 {
4424 matrixL[size] = 'D';
4425 size++;
4426 matrixL[size] = *(tempref-rIndex);
4427 rIndex--;
4428 }
4429 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4430 {
4431 matrixL[size] = *(tempref-rIndex);
4432 rIndex--;
4433 directionIndex--;
4434 }
4435 else
4436 {
4437 matrixL[size] = 'M';
4438 rIndex--;
4439 directionIndex--;
4440 }
4441 }
4442 else
4443 {
4444 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
4445 {
4446 matrixL[size] = 'I';
4447 size++;
4448 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
4449 directionIndex--;
4450 }
4451 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
4452 {
4453 matrixL[size] = 'D';
4454 size++;
4455 matrixL[size] = *(tempref-rIndex);
4456 rIndex--;
4457 }
4458 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4459 {
4460 matrixL[size] = *(tempref-rIndex);
4461 rIndex--;
4462 directionIndex--;
4463 }
4464 else
4465 {
4466 matrixL[size] = 'M';
4467 rIndex--;
4468 directionIndex--;
4469 }
4470 }
4471
4472 size++;
4473 }
4474
4475 matrixL[size] = '\0';
4476 char middle[200];
4477 middle[0] = '\0';
4478
4479 for(i = 0; i < segLength; i++)
4480 middle[i] = 'M';
4481 middle[segLength] = '\0';
4482
4483 char rmatrixR[200];
4484
4485 reverse(matrixR, rmatrixR, strlen(matrixR));
4486
4487 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
4488
4489 return totalError;
4490
4491 }
4492
4493 int verifySingleEndEditDistanceExtention(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
4494 char *matrix, int *map_location, short *seqHashValue)
4495 {
4496 int i = 0;
4497
4498 char * ref;
4499 char * tempref;
4500
4501 int rIndex = 0; //reference Index
4502
4503 int error = 0;
4504 int error1 = 0;
4505
4506 int error2 = 0;
4507 int error3 = 0;
4508 int totalError = 0;
4509 int errorSegment = 0;
4510
4511 int ERROR_BOUND = min(4, errThreshold);
4512
4513
4514 /*
4515 1: Up
4516 2: Side
4517 3: Diagnoal Match
4518 4: Diagnoal Mismatch
4519 */
4520
4521 int min = 0;
4522 int minIndex1 = 0;
4523 int minIndex2 = 0;
4524
4525 int directionIndex = 0;
4526
4527
4528 int size = 0;
4529
4530 ref = _msf_refGen + refIndex - 1;
4531 tempref = _msf_refGen + refIndex - 1;
4532
4533
4534 if(lSeqLength != 0)
4535 {
4536 error3 = backwardEditDistanceSSE2Extention(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
4537 if(error3 == -1){
4538 return -1;
4539 }
4540 }
4541
4542 if(rSeqLength != 0)
4543 {
4544 error2 = forwardEditDistanceSSE2Extention(ref+segLength, rSeqLength, rSeq, rSeqLength);
4545 if(error2 == -1)
4546 return -1;
4547 }
4548
4549 if(error2 + error3 > errThreshold)
4550 return -1;
4551
4552 rIndex = 1;
4553
4554 int prevError = 0;
4555
4556 int tempUp = 0;
4557 int tempDown = 0;
4558
4559 int errorString = 0;
4560
4561 int upValue;
4562 int diagValue;
4563 int sideValue;
4564 if(lSeqLength > ERROR_BOUND)
4565 {
4566 while(rIndex <= lSeqLength+ERROR_BOUND && lSeqLength != 0)
4567 {
4568 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
4569 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
4570 for(i = tempUp ; i < tempDown ; i++)
4571 {
4572 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
4573
4574 upValue = scoreB[i-1][rIndex]+1;
4575 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
4576 sideValue = scoreB[i][rIndex-1]+1;
4577
4578 if(i != tempUp && i != tempDown-1)
4579 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
4580
4581 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
4582 scoreB[i][rIndex] = min(sideValue, diagValue);
4583 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
4584 scoreB[i][rIndex] = sideValue;
4585 else
4586 scoreB[i][rIndex] = min(diagValue , upValue);
4587
4588 if(i == tempUp)
4589 error = scoreB[i][rIndex];
4590 else if(error > scoreB[i][rIndex])
4591 error = scoreB[i][rIndex];
4592 }
4593 if(rIndex <= lSeqLength)
4594 {
4595 errorSegment = error-prevError;
4596 }
4597 rIndex++;
4598 }
4599
4600 if(lSeqLength != 0)
4601 {
4602 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND];
4603 minIndex1 = lSeqLength + ERROR_BOUND;
4604
4605 // Find the Best error for all the possible ways.
4606 for(i = 1; i <= 2*ERROR_BOUND; i++)
4607 {
4608 if(min >= scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i] && lSeqLength+ERROR_BOUND-i > 0)
4609 {
4610 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i];
4611 minIndex1 = lSeqLength+ERROR_BOUND-i;
4612 }
4613 }
4614 error = scoreB[lSeqLength][minIndex1];
4615 }
4616 }
4617 else
4618 {
4619 int j = 0;
4620 for(i = 1; i <= lSeqLength; i++)
4621 {
4622 for(j = 1; j <= lSeqLength; j++)
4623 {
4624 scoreB[i][j] = min3(scoreB[i-1][j-1]+ (*(ref-j) != *(lSeq+lSeqLength-i) ),scoreB[i][j-1]+1 ,scoreB[i-1][j]+1);
4625 }
4626 }
4627 error = scoreB[lSeqLength][lSeqLength];
4628 minIndex1 = lSeqLength;
4629
4630 }
4631 error1 = error;
4632
4633 error = 0;
4634 errorSegment = 0;
4635
4636 directionIndex = lSeqLength;
4637 rIndex = minIndex1;
4638
4639 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
4640
4641 ref = ref + segLength;
4642
4643 if(rSeqLength != 0 && rSeqLength > ERROR_BOUND)
4644 {
4645 ERROR_BOUND = min(ERROR_BOUND, rSeqLength);
4646
4647 if(rSeqLength == ERROR_BOUND)
4648 {
4649 for(i=0; i < 2*ERROR_BOUND; i++)
4650 scoreF[0][i] = i;
4651 }
4652
4653 rIndex = 1;
4654 while(rIndex <= rSeqLength+ERROR_BOUND)
4655 {
4656 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
4657 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
4658 for(i = tempUp; i < tempDown ; i++)
4659 {
4660 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
4661 upValue = scoreF[i-1][rIndex]+1;
4662 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
4663 sideValue = scoreF[i][rIndex-1]+1;
4664
4665 if(i != tempUp && i != tempDown-1)
4666 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
4667 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
4668 scoreF[i][rIndex] = min(sideValue, diagValue);
4669 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
4670 scoreF[i][rIndex] = sideValue;
4671 else
4672 scoreF[i][rIndex] = min(diagValue , upValue);
4673
4674 if(i == tempUp)
4675 error = scoreF[i][rIndex];
4676 if(error > scoreF[i][rIndex])
4677 error = scoreF[i][rIndex];
4678 }
4679 if(rIndex <= rSeqLength)
4680 {
4681 errorSegment = error;
4682 }
4683 rIndex++;
4684 }
4685 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND];
4686 minIndex2 = rSeqLength + ERROR_BOUND;
4687
4688 // Find the Best error for all the possible ways.
4689 for(i = 1; i <= 2*ERROR_BOUND; i++)
4690 {
4691 if(min > scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i] && rSeqLength+ERROR_BOUND-i > 0)
4692 {
4693 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i];
4694 minIndex2 = rSeqLength+ERROR_BOUND-i;
4695 }
4696 }
4697 error = scoreF[rSeqLength][minIndex2];
4698 }
4699 else
4700 {
4701 int j = 0;
4702 for(i = 1; i <= rSeqLength; i++)
4703 {
4704 for(j = 1; j <= rSeqLength; j++)
4705 {
4706 scoreF[i][j] = min3(scoreF[i-1][j-1]+ (*(ref+j-1) != *(rSeq+i-1) ),scoreF[i][j-1]+1 ,scoreF[i-1][j]+1);
4707 }
4708 }
4709 error = scoreF[rSeqLength][rSeqLength];
4710 minIndex2 = rSeqLength;
4711 }
4712
4713 totalError = error + error1;
4714
4715 if(totalError != error2+error3)
4716 {
4717 for(i = 0; i < lSeqLength; i++)
4718 printf("%c", *(tempref-1-i));
4719 printf("\n");
4720 for(i = 0; i < lSeqLength; i++)
4721 printf("%c", *(lSeq+i));
4722 printf("\n");
4723
4724 for(i = 0; i < rSeqLength; i++)
4725 printf("%c", *(tempref+segLength+i));
4726 printf("\n");
4727
4728 for(i = 0; i < rSeqLength; i++)
4729 printf("%c", *(rSeq+i));
4730 printf("\n");
4731
4732 printf("ERROR=%d\n", totalError);
4733 printf("ERROR_SSE=%d\n", error3+error2);
4734
4735 printf("ERROR_SSE_back=%d E_SSE_forw=%d\n", error3, error2);
4736 printf("ERROR_back=%d E_forw=%d\n", error1, error);
4737
4738 }
4739
4740 char matrixR[200];
4741 char matrixL[200];
4742
4743 matrixR[0] = '\0';
4744 matrixL[0] = '\0';
4745
4746 size = 0;
4747 directionIndex = rSeqLength;
4748 rIndex = minIndex2;
4749
4750
4751 while(directionIndex != 0 || rIndex != 0)
4752 {
4753 if(directionIndex-rIndex == errThreshold)
4754 {
4755 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
4756 {
4757 matrixR[size] = *(rSeq+directionIndex-1);
4758 size++;
4759 matrixR[size] = 'I';
4760 directionIndex--;
4761 }
4762 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4763 {
4764 matrixR[size] = *(ref+rIndex-1);
4765 rIndex--;
4766 directionIndex--;
4767 }
4768 else
4769 {
4770 matrixR[size] = 'M';
4771 rIndex--;
4772 directionIndex--;
4773 }
4774
4775 }
4776 else if(rIndex - directionIndex == errThreshold)
4777 {
4778 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
4779 {
4780 matrixR[size] = *(ref+rIndex-1);
4781 size++;
4782 matrixR[size] = 'D';
4783 rIndex--;
4784 }
4785 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4786 {
4787 matrixR[size] = *(ref+rIndex-1);
4788 rIndex--;
4789 directionIndex--;
4790 }
4791 else
4792 {
4793 matrixR[size] = 'M';
4794 rIndex--;
4795 directionIndex--;
4796 }
4797 }
4798 else
4799 {
4800 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
4801 {
4802 matrixR[size] = *(rSeq+directionIndex-1);
4803 size++;
4804 matrixR[size] = 'I';
4805 directionIndex--;
4806 }
4807 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
4808 {
4809 matrixR[size] = *(ref+rIndex-1);
4810 size++;
4811 matrixR[size] = 'D';
4812 rIndex--;
4813 }
4814 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
4815 {
4816 matrixR[size] = *(ref+rIndex-1);
4817 rIndex--;
4818 directionIndex--;
4819 }
4820 else
4821 {
4822 matrixR[size] = 'M';
4823 rIndex--;
4824 directionIndex--;
4825 }
4826 }
4827 size++;
4828 }
4829 matrixR[size] = '\0';
4830
4831 size = 0;
4832 directionIndex = lSeqLength;
4833 rIndex = minIndex1;
4834
4835
4836 while(directionIndex != 0 || rIndex != 0)
4837 {
4838 if(directionIndex-rIndex == errThreshold)
4839 {
4840 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
4841 {
4842 matrixL[size] = 'I';
4843 size++;
4844 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
4845 directionIndex--;
4846 }
4847 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4848 {
4849 matrixL[size] = *(tempref-rIndex);
4850 rIndex--;
4851 directionIndex--;
4852 }
4853 else
4854 {
4855 matrixL[size] = 'M';
4856 rIndex--;
4857 directionIndex--;
4858 }
4859
4860 }
4861 else if(rIndex - directionIndex == errThreshold)
4862 {
4863 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
4864 {
4865 matrixL[size] = 'D';
4866 size++;
4867 matrixL[size] = *(tempref-rIndex);
4868 rIndex--;
4869 }
4870 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4871 {
4872 matrixL[size] = *(tempref-rIndex);
4873 rIndex--;
4874 directionIndex--;
4875 }
4876 else
4877 {
4878 matrixL[size] = 'M';
4879 rIndex--;
4880 directionIndex--;
4881 }
4882 }
4883 else
4884 {
4885 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
4886 {
4887 matrixL[size] = 'I';
4888 size++;
4889 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
4890 directionIndex--;
4891 }
4892 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
4893 {
4894 matrixL[size] = 'D';
4895 size++;
4896 matrixL[size] = *(tempref-rIndex);
4897 rIndex--;
4898 }
4899 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
4900 {
4901 matrixL[size] = *(tempref-rIndex);
4902 rIndex--;
4903 directionIndex--;
4904 }
4905 else
4906 {
4907 matrixL[size] = 'M';
4908 rIndex--;
4909 directionIndex--;
4910 }
4911 }
4912 size++;
4913 }
4914 matrixL[size] = '\0';
4915
4916 char middle[200];
4917 middle[0] = '\0';
4918 for(i = 0; i < segLength; i++)
4919 middle[i] = 'M';
4920 middle[segLength] = '\0';
4921
4922 char rmatrixR[200];
4923
4924 reverse(matrixR, rmatrixR, strlen(matrixR));
4925
4926 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
4927
4928
4929 return totalError;
4930
4931 }
4932
4933
4934 int verifySingleEndEditDistance(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
4935 {
4936
4937 int i = 0;
4938
4939 char * ref;
4940 char * tempref;
4941
4942 int rIndex = 0; //reference Index
4943
4944 int error = 0;
4945 int error1 = 0;
4946
4947 int error2 = 0;
4948 int error3 = 0;
4949
4950 int totalError = 0;
4951 int errorSegment = 0;
4952
4953 int ERROR_BOUND = errThreshold;
4954
4955 /*
4956 1: Up
4957 2: Side
4958 3: Diagnoal Match
4959 4: Diagnoal Mismatch
4960 */
4961
4962 int min = 0;
4963 int minIndex1 = 0;
4964 int minIndex2 = 0;
4965
4966 int directionIndex = 0;
4967
4968
4969 int size = 0;
4970
4971 ref = _msf_refGen + refIndex - 1;
4972 tempref = _msf_refGen + refIndex - 1;
4973
4974
4975 if(rSeqLength != 0)
4976 {
4977 if(errThreshold %2 == 1)
4978 error2 = forwardEditDistanceSSE2Odd(ref+segLength, rSeqLength, rSeq, rSeqLength);
4979 else
4980 error2 = forwardEditDistanceSSE2G(ref+segLength, rSeqLength, rSeq, rSeqLength);
4981 if(error2 == -1)
4982 return -1;
4983 }
4984
4985 if(lSeqLength != 0)
4986 {
4987 if(errThreshold % 2 == 1)
4988 error3 = backwardEditDistanceSSE2Odd(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
4989 else
4990 error3 = backwardEditDistanceSSE2G(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
4991 if(error3 == -1 || error3 == 0){
4992 return -1;
4993 }
4994 }
4995
4996 if(error3 + error2 > errThreshold)
4997 return -1;
4998
4999 for(i = 0 ; i < errThreshold + 1; i++)
5000 {
5001 scoreB[0][i] = i;
5002 scoreB[i][0] = i;
5003 }
5004
5005 rIndex = 1;
5006 int prevError = 0;
5007
5008 int tempUp = 0;
5009 int tempDown = 0;
5010
5011 int errorString = 0;
5012
5013 int upValue;
5014 int diagValue;
5015 int sideValue;
5016
5017 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0)
5018 {
5019 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
5020 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
5021 for(i = tempUp ; i < tempDown ; i++)
5022 {
5023 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
5024
5025 upValue = scoreB[i-1][rIndex]+1;
5026 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
5027 sideValue = scoreB[i][rIndex-1]+1;
5028
5029 if(i != tempUp && i != tempDown-1)
5030 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
5031
5032 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
5033 scoreB[i][rIndex] = min(sideValue, diagValue);
5034 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
5035 scoreB[i][rIndex] = sideValue;
5036 else
5037 scoreB[i][rIndex] = min(diagValue , upValue);
5038
5039 if(i == tempUp)
5040 error = scoreB[i][rIndex];
5041 else if(error > scoreB[i][rIndex])
5042 error = scoreB[i][rIndex];
5043 }
5044 if(rIndex <= lSeqLength)
5045 {
5046 errorSegment = error-prevError;
5047 }
5048 rIndex++;
5049 }
5050 if(lSeqLength != 0)
5051 {
5052 min = scoreB[lSeqLength][lSeqLength+errThreshold];
5053 minIndex1 = lSeqLength + errThreshold;
5054
5055 // Find the Best error for all the possible ways.
5056 for(i = 1; i <= 2*errThreshold; i++)
5057 {
5058 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0)
5059 {
5060 min = scoreB[lSeqLength][lSeqLength+errThreshold-i];
5061 minIndex1 = lSeqLength+errThreshold-i;
5062 }
5063 }
5064 error = scoreB[lSeqLength][minIndex1];
5065 }
5066
5067 error1 = error;
5068
5069 error = 0;
5070 errorSegment = 0;
5071
5072 directionIndex = lSeqLength;
5073 rIndex = minIndex1;
5074
5075 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
5076
5077 ref = ref + segLength;
5078
5079 if(rSeqLength != 0)
5080 {
5081 for(i = 0 ; i < errThreshold + 1; i++)
5082 {
5083 scoreF[0][i] = i;
5084 scoreF[i][0] = i;
5085 }
5086
5087
5088 rIndex = 1;
5089 while(rIndex <= rSeqLength+errThreshold-error1)
5090 {
5091 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
5092 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
5093 for(i = tempUp; i < tempDown ; i++)
5094 {
5095 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
5096
5097 upValue = scoreF[i-1][rIndex]+1;
5098 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
5099 sideValue = scoreF[i][rIndex-1]+1;
5100
5101 if(i != tempUp && i != tempDown-1)
5102 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
5103 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
5104 scoreF[i][rIndex] = min(sideValue, diagValue);
5105 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
5106 scoreF[i][rIndex] = sideValue;
5107 else
5108 scoreF[i][rIndex] = min(diagValue , upValue);
5109
5110 if(i == tempUp)
5111 error = scoreF[i][rIndex];
5112 if(error > scoreF[i][rIndex])
5113 error = scoreF[i][rIndex];
5114 }
5115 if(rIndex <= rSeqLength)
5116 {
5117 errorSegment = error;
5118 }
5119 rIndex++;
5120 }
5121
5122 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1];
5123 minIndex2 = rSeqLength + errThreshold-error1;
5124
5125 // Find the Best error for all the possible ways.
5126 for(i = 1; i <= 2*(errThreshold-error1); i++)
5127 {
5128 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0)
5129 {
5130 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i];
5131 minIndex2 = rSeqLength+errThreshold-error1-i;
5132 }
5133 }
5134 error = scoreF[rSeqLength][minIndex2];
5135 }
5136
5137 totalError = error + error1;
5138
5139
5140 if(totalError != error2 + error3 && totalError > errThreshold)
5141 {
5142 for(i = 0; i < lSeqLength; i++)
5143 printf("%c", *(tempref-1-i));
5144 printf("\n");
5145 for(i = 0; i < lSeqLength; i++)
5146 printf("%c", *(lSeq+i));
5147 printf("\n");
5148
5149 for(i = 0; i < rSeqLength; i++)
5150 printf("%c", *(tempref+segLength+i));
5151 printf("\n");
5152
5153 for(i = 0; i < rSeqLength; i++)
5154 printf("%c", *(rSeq+i));
5155 printf("\n");
5156
5157
5158 printf("SSEF=%d SSEB%d\n", error2, error3);
5159 printf("F=%d B=%d\n", error, error1);
5160 scanf("%d", &i);
5161 }
5162
5163 char matrixR[200];
5164 char matrixL[200];
5165
5166 matrixR[0] = '\0';
5167 matrixL[0] = '\0';
5168
5169 size = 0;
5170 directionIndex = rSeqLength;
5171 rIndex = minIndex2;
5172
5173 while(directionIndex != 0 || rIndex != 0)
5174 {
5175 if(directionIndex-rIndex == errThreshold)
5176 {
5177 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
5178 {
5179 matrixR[size] = *(rSeq+directionIndex-1);
5180 size++;
5181 matrixR[size] = 'I';
5182 directionIndex--;
5183 }
5184 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
5185 {
5186 matrixR[size] = *(ref+rIndex-1);
5187 rIndex--;
5188 directionIndex--;
5189 }
5190 else
5191 {
5192 matrixR[size] = 'M';
5193 rIndex--;
5194 directionIndex--;
5195 }
5196
5197 }
5198 else if(rIndex - directionIndex == errThreshold)
5199 {
5200 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
5201 {
5202 matrixR[size] = *(ref+rIndex-1);
5203 size++;
5204 matrixR[size] = 'D';
5205 rIndex--;
5206 }
5207 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
5208 {
5209 matrixR[size] = *(ref+rIndex-1);
5210 rIndex--;
5211 directionIndex--;
5212 }
5213 else
5214 {
5215 matrixR[size] = 'M';
5216 rIndex--;
5217 directionIndex--;
5218 }
5219 }
5220 else
5221 {
5222 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
5223 {
5224 matrixR[size] = *(rSeq+directionIndex-1);
5225 size++;
5226 matrixR[size] = 'I';
5227 directionIndex--;
5228 }
5229 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
5230 {
5231 matrixR[size] = *(ref+rIndex-1);
5232 size++;
5233 matrixR[size] = 'D';
5234 rIndex--;
5235 }
5236 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
5237 {
5238 matrixR[size] = *(ref+rIndex-1);
5239 rIndex--;
5240 directionIndex--;
5241 }
5242 else
5243 {
5244 matrixR[size] = 'M';
5245 rIndex--;
5246 directionIndex--;
5247 }
5248 }
5249 size++;
5250 }
5251 matrixR[size] = '\0';
5252
5253 size = 0;
5254 directionIndex = lSeqLength;
5255 rIndex = minIndex1;
5256
5257
5258 while(directionIndex != 0 || rIndex != 0)
5259 {
5260 if(directionIndex-rIndex == errThreshold)
5261 {
5262 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
5263 {
5264 matrixL[size] = 'I';
5265 size++;
5266 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
5267 directionIndex--;
5268 }
5269 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
5270 {
5271 matrixL[size] = *(tempref-rIndex);
5272 rIndex--;
5273 directionIndex--;
5274 }
5275 else
5276 {
5277 matrixL[size] = 'M';
5278 rIndex--;
5279 directionIndex--;
5280 }
5281
5282 }
5283 else if(rIndex - directionIndex == errThreshold)
5284 {
5285 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
5286 {
5287 matrixL[size] = 'D';
5288 size++;
5289 matrixL[size] = *(tempref-rIndex);
5290 rIndex--;
5291 }
5292 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
5293 {
5294 matrixL[size] = *(tempref-rIndex);
5295 rIndex--;
5296 directionIndex--;
5297 }
5298 else
5299 {
5300 matrixL[size] = 'M';
5301 rIndex--;
5302 directionIndex--;
5303 }
5304 }
5305 else
5306 {
5307 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
5308 {
5309 matrixL[size] = 'I';
5310 size++;
5311 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
5312 directionIndex--;
5313 }
5314 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
5315 {
5316 matrixL[size] = 'D';
5317 size++;
5318 matrixL[size] = *(tempref-rIndex);
5319 rIndex--;
5320 }
5321 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
5322 {
5323 matrixL[size] = *(tempref-rIndex);
5324 rIndex--;
5325 directionIndex--;
5326 }
5327 else
5328 {
5329 matrixL[size] = 'M';
5330 rIndex--;
5331 directionIndex--;
5332 }
5333 }
5334 size++;
5335 }
5336 matrixL[size] = '\0';
5337 char middle[200];
5338 middle[0] = '\0';
5339 for(i = 0; i < segLength; i++)
5340 middle[i] = 'M';
5341 middle[segLength] = '\0';
5342
5343 char rmatrixR[200];
5344
5345 reverse(matrixR, rmatrixR, strlen(matrixR));
5346
5347 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
5348
5349 return totalError;
5350 }
5351
5352
5353 int addCigarSize(int cnt){
5354 if (cnt<10) return 1;
5355 else if (cnt < 100) return 2;
5356 return 3;
5357 }
5358
5359 /*
5360 Generate Cigar from the back tracking matrix
5361 */
5362 void generateCigar(char *matrix, int matrixLength, char *cigar)
5363 {
5364 int i = 0;
5365
5366 int counterM=0;
5367 int counterI=0;
5368 int counterD=0;
5369
5370 int cigarSize = 0;
5371
5372 cigar[0] = '\0';
5373
5374 while(i < matrixLength)
5375 {
5376 if(matrix[i]=='M')
5377 {
5378 counterM++;
5379 if(counterI != 0)
5380 {
5381 sprintf(cigar, "%s%dI", cigar, counterI);
5382 cigarSize += addCigarSize(counterI) + 1;
5383 cigar[cigarSize] = '\0';
5384 counterI=0;
5385 }
5386 else if(counterD != 0)
5387 {
5388 sprintf(cigar, "%s%dD", cigar, counterD);
5389 cigarSize += addCigarSize(counterD) + 1;
5390 cigar[cigarSize] = '\0';
5391 counterD=0;
5392 }
5393 }
5394 else if(matrix[i] == 'I')
5395 {
5396 if(counterM != 0)
5397 {
5398 sprintf(cigar, "%s%dM", cigar, counterM);
5399 cigarSize += addCigarSize(counterM) + 1;
5400 cigar[cigarSize] = '\0';
5401 counterM = 0;
5402 }
5403 else if(counterD != 0)
5404 {
5405 sprintf(cigar, "%s%dD", cigar, counterD);
5406 cigarSize += addCigarSize(counterD) + 1;
5407 cigar[cigarSize] = '\0';
5408 counterD=0;
5409 }
5410 counterI++;
5411 i++;
5412
5413 }
5414 else if (matrix[i] == 'D')
5415 {
5416 if(counterM != 0)
5417 {
5418 sprintf(cigar, "%s%dM", cigar, counterM);
5419 cigarSize += addCigarSize(counterM) + 1;
5420 cigar[cigarSize] = '\0';
5421 counterM = 0;
5422 }
5423 else if(counterI != 0)
5424 {
5425 sprintf(cigar, "%s%dI", cigar, counterI);
5426 cigarSize += addCigarSize(counterI) + 1;
5427 cigar[cigarSize] = '\0';
5428 counterI=0;
5429 }
5430
5431 counterD++;
5432 i++;
5433
5434 }
5435 else
5436 {
5437 counterM++;
5438 if(counterI != 0)
5439 {
5440 sprintf(cigar, "%s%dI", cigar, counterI);
5441 cigarSize += addCigarSize(counterI) + 1;
5442 cigar[cigarSize] = '\0';
5443 counterI=0;
5444 }
5445 else if(counterD != 0)
5446 {
5447 sprintf(cigar, "%s%dD", cigar, counterD);
5448 cigarSize += addCigarSize(counterD) + 1;
5449 cigar[cigarSize] = '\0';
5450 counterD=0;
5451 }
5452 }
5453 i++;
5454 }
5455
5456 if(counterM != 0)
5457 {
5458 sprintf(cigar, "%s%dM", cigar, counterM);
5459 cigarSize += addCigarSize(counterM) + 1;
5460 cigar[cigarSize] = '\0';
5461 counterM = 0;
5462 }
5463 else if(counterI != 0)
5464 {
5465 sprintf(cigar, "%s%dI", cigar, counterI);
5466 cigarSize += addCigarSize(counterI) + 1;
5467 cigar[cigarSize] = '\0';
5468 counterI = 0;
5469 }
5470 else if(counterD != 0)
5471 {
5472 sprintf(cigar, "%s%dD", cigar, counterD);
5473 cigarSize += addCigarSize(counterD) + 1;
5474 cigar[cigarSize] = '\0';
5475 counterD = 0;
5476 }
5477
5478 cigar[cigarSize] = '\0';
5479 }
5480
5481 /*
5482 Creates the Cigar output from the mismatching positions format [0-9]+(([ACTGN]|\^[ACTGN]+)[0-9]+)*
5483 */
5484 void generateCigarFromMD(char *mismatch, int mismatchLength, char *cigar)
5485 {
5486 int i = 0;
5487 int j = 0;
5488
5489 int start = 0;
5490 int cigarSize = 0;
5491
5492 cigar[0] = '\0';
5493
5494 while(i < mismatchLength)
5495 {
5496 if(mismatch[i] >= '0' && mismatch[i] <= '9')
5497 {
5498 start = i;
5499
5500 while(mismatch[i] >= '0' && mismatch[i] <= '9' && i < mismatchLength)
5501 i++;
5502
5503 int value = atoi(mismatch+start);
5504 for(j = 0; j < value-1; j++)
5505 {
5506 cigar[cigarSize] = 'M';
5507 cigarSize++;
5508 }
5509 cigar[cigarSize] = 'M';
5510 }
5511 else if(mismatch[i] == '^')
5512 {
5513 cigar[cigarSize] = 'I';
5514 i++;
5515 }
5516 else if(mismatch[i] == '\'')
5517 {
5518 cigar[cigarSize] = 'D';
5519 i++;
5520 }
5521 else
5522 {
5523 cigar[cigarSize] = 'M';
5524 cigarSize++;
5525 }
5526 cigarSize++;
5527 i++;
5528 }
5529 cigar[cigarSize] = '\0';
5530 }
5531
5532 void generateSNPSAM(char *matrix, int matrixLength, char *outputSNP)
5533 {
5534
5535 int i = 0;
5536
5537 int counterM = 0;
5538 int counterD = 0;
5539
5540 char delete[100];
5541
5542 int snpSize = 0;
5543
5544 outputSNP[0] = '\0';
5545 delete[0] = '\0';
5546
5547
5548 while(i < matrixLength)
5549 {
5550 if(matrix[i]=='M')
5551 {
5552 counterM++;
5553 if(counterD != 0)
5554 {
5555 delete[counterD] = '\0';
5556 counterD=0;
5557 sprintf(outputSNP, "%s^%s", outputSNP,delete);
5558 snpSize += strlen(delete) + 1;
5559 outputSNP[snpSize] = '\0';
5560 delete[0] = '\0';
5561 }
5562 }
5563 else if(matrix[i] == 'D')
5564 {
5565 if(counterM != 0)
5566 {
5567 sprintf(outputSNP, "%s%d", outputSNP, counterM);
5568 snpSize += addCigarSize(counterM);
5569 outputSNP[snpSize] = '\0';
5570 counterM=0;
5571 delete[counterD] = matrix[i+1];
5572 i++;
5573 counterD++;
5574 }
5575 else if(counterD != 0)
5576 {
5577 delete[counterD] = matrix[i+1];
5578 counterD++;
5579 i++;
5580 }
5581 else
5582 {
5583 delete[counterD] = matrix[i+1];
5584 counterD++;
5585 i++;
5586 }
5587 }
5588 else if(matrix[i] == 'I')
5589 {
5590 if(counterM != 0)
5591 {
5592 // sprintf(outputSNP, "%s%d\0", outputSNP, counterM);
5593 //counterM++;
5594 }
5595 else if(counterD != 0)
5596 {
5597 delete[counterD] = '\0';
5598 sprintf(outputSNP, "%s^%s", outputSNP, delete);
5599 snpSize += strlen(delete) + 1;
5600 outputSNP[snpSize] = '\0';
5601 counterD = 0;
5602 delete[0] = '\0';
5603 }
5604 i++;
5605
5606 }
5607 else
5608 {
5609 if(counterM != 0)
5610 {
5611 sprintf(outputSNP, "%s%d", outputSNP, counterM);
5612 snpSize += addCigarSize(counterM);
5613 outputSNP[snpSize] = '\0';
5614 counterM = 0;
5615 }
5616 if(counterD != 0)
5617 {
5618 delete[counterD] = '\0';
5619 counterD=0;
5620 sprintf(outputSNP, "%s^%s", outputSNP, delete);
5621 snpSize += strlen(delete) + 1;
5622 outputSNP[snpSize] = '\0';
5623 delete[0] = '\0';
5624 }
5625 sprintf(outputSNP,"%s%c",outputSNP,matrix[i]);
5626 snpSize += 1;
5627 outputSNP[snpSize] = '\0';
5628 }
5629 i++;
5630 }
5631
5632 if(counterM != 0)
5633 {
5634 sprintf(outputSNP, "%s%d", outputSNP, counterM);
5635 snpSize += addCigarSize(counterM);
5636 outputSNP[snpSize] = '\0';
5637 counterM = 0;
5638 }
5639 else if(counterD != 0)
5640 {
5641 delete[counterD] = '\0';
5642 sprintf(outputSNP, "%s^%s", outputSNP, delete);
5643 snpSize += strlen(delete) + 1;
5644 outputSNP[snpSize] = '\0';
5645 counterD = 0;
5646 }
5647
5648 outputSNP[snpSize] = '\0';
5649 }
5650 /**********************************************/
5651
5652 /*
5653 direction = 0 forward
5654 1 backward
5655
5656 */
5657
5658 void mapSingleEndSeq(unsigned int *l1, int s1, int readNumber, int readSegment, int direction)
5659 {
5660 int j = 0;
5661 int z = 0;
5662 int *locs = (int *) l1;
5663 char *_tmpSeq, *_tmpQual;
5664 char rqual[SEQ_LENGTH+1];
5665 rqual[SEQ_LENGTH]='\0';
5666
5667 int genLoc = 0;
5668 int leftSeqLength = 0;
5669 int rightSeqLength = 0;
5670 int middleSeqLength = 0;
5671
5672 char matrix[200];
5673 char editString[200];
5674 char cigar[MAX_CIGAR_SIZE];
5675
5676 short *_tmpHashValue;
5677
5678 if (direction)
5679 {
5680 reverse(_msf_seqList[readNumber].qual, rqual, SEQ_LENGTH);
5681 _tmpQual = rqual;
5682 _tmpSeq = _msf_seqList[readNumber].rseq;
5683 _tmpHashValue = _msf_seqList[readNumber].rhashValue;
5684 }
5685 else
5686 {
5687 _tmpQual = _msf_seqList[readNumber].qual;
5688 _tmpSeq = _msf_seqList[readNumber].seq;
5689 _tmpHashValue = _msf_seqList[readNumber].hashValue;
5690 }
5691
5692 int readId = 2*readNumber+direction;
5693 for (z=0; z<s1; z++)
5694 {
5695
5696
5697 int map_location = 0;
5698 int a = 0;
5699 int o = readSegment;
5700
5701 genLoc = locs[z];//-_msf_samplingLocs[o];
5702
5703
5704 if ( genLoc-_msf_samplingLocs[o] < _msf_refGenBeg ||
5705 genLoc-_msf_samplingLocs[o] > _msf_refGenEnd ||
5706 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId ||
5707 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId
5708 )
5709 continue;
5710 int err = -1;
5711
5712
5713 map_location = 0;
5714
5715 leftSeqLength = _msf_samplingLocs[o];
5716 middleSeqLength = WINDOW_SIZE;
5717 a = leftSeqLength + middleSeqLength;
5718 rightSeqLength = SEQ_LENGTH - a;
5719
5720 if(errThreshold == 2)
5721 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength,
5722 _tmpSeq + a, rightSeqLength,
5723 middleSeqLength, matrix, &map_location, _tmpHashValue);
5724 else if(errThreshold == 4)
5725 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength,
5726 _tmpSeq + a, rightSeqLength,
5727 middleSeqLength, matrix, &map_location, _tmpHashValue);
5728 else if(errThreshold ==3)
5729 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
5730 _tmpSeq + a, rightSeqLength,
5731 middleSeqLength, matrix, &map_location, _tmpHashValue);
5732 /*else if(errThreshold == 6)
5733 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
5734 _tmpSeq + a, rightSeqLength,
5735 middleSeqLength, matrix, &map_location, _tmpHashValue);
5736 */
5737 else
5738 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength,
5739 _tmpSeq + a, rightSeqLength,
5740 middleSeqLength, matrix, &map_location, _tmpHashValue);
5741
5742 if(err != -1)
5743 {
5744 generateSNPSAM(matrix, strlen(matrix), editString);
5745 generateCigar(matrix, strlen(matrix), cigar);
5746 }
5747
5748 if(err != -1 && !bestMode)
5749 {
5750
5751 mappingCnt++;
5752
5753 int j = 0;
5754 int k = 0;
5755 for(k = 0; k < readSegment+1; k++)
5756 {
5757 for(j = -errThreshold ; j <= errThreshold; j++)
5758 {
5759 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg &&
5760 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd)
5761 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId;
5762 }
5763 }
5764 _msf_seqList[readNumber].hits[0]++;
5765
5766 _msf_output.QNAME = _msf_seqList[readNumber].name;
5767 _msf_output.FLAG = 16 * direction;
5768 _msf_output.RNAME = _msf_refGenName;
5769 _msf_output.POS = map_location + _msf_refGenOffset;
5770 _msf_output.MAPQ = 255;
5771 _msf_output.CIGAR = cigar;
5772 _msf_output.MRNAME = "*";
5773 _msf_output.MPOS = 0;
5774 _msf_output.ISIZE = 0;
5775 _msf_output.SEQ = _tmpSeq;
5776 _msf_output.QUAL = _tmpQual;
5777
5778 _msf_output.optSize = 2;
5779 _msf_output.optFields = _msf_optionalFields;
5780
5781 _msf_optionalFields[0].tag = "NM";
5782 _msf_optionalFields[0].type = 'i';
5783 _msf_optionalFields[0].iVal = err;
5784
5785 _msf_optionalFields[1].tag = "MD";
5786 _msf_optionalFields[1].type = 'Z';
5787 _msf_optionalFields[1].sVal = editString;
5788
5789 output(_msf_output);
5790
5791
5792 if (_msf_seqList[readNumber].hits[0] == 1)
5793 {
5794 mappedSeqCnt++;
5795 }
5796
5797 if ( maxHits == 0 )
5798 {
5799 _msf_seqList[readNumber].hits[0] = 2;
5800 }
5801
5802
5803 if ( maxHits!=0 && _msf_seqList[readNumber].hits[0] == maxHits)
5804 {
5805 completedSeqCnt++;
5806 break;
5807 }
5808
5809 }
5810 else if(err != -1 && bestMode)
5811 {
5812 mappingCnt++;
5813 _msf_seqList[readNumber].hits[0]++;
5814
5815 if (_msf_seqList[readNumber].hits[0] == 1)
5816 {
5817 mappedSeqCnt++;
5818 }
5819
5820 if ( maxHits == 0 )
5821 {
5822 _msf_seqList[readNumber].hits[0] = 2;
5823 }
5824
5825 if(err < bestHitMappingInfo[readNumber].err || bestHitMappingInfo[readNumber].loc == -1)
5826 {
5827 setFullMappingInfo(readNumber, map_location + _msf_refGenOffset, direction, err, 0, editString, _msf_refGenName, cigar );
5828 }
5829 }
5830 else
5831 {
5832 for(j = -errThreshold ; j <= errThreshold; j++)
5833 {
5834 if(genLoc+j > _msf_refGenBeg &&
5835 genLoc+j < _msf_refGenEnd)
5836 _msf_verifiedLocs[genLoc+j] = -readId;
5837 }
5838 }
5839 }
5840 }
5841
5842
5843 int mapAllSingleEndSeq()
5844 {
5845 int i = 0;
5846 int j = 0;
5847 int k = 0;
5848
5849
5850 unsigned int *locs = NULL;
5851
5852
5853 int prev_hash = 0;
5854
5855 for(i = 0; i < _msf_seqListSize; i++)
5856 {
5857 for(j = 0; j < _msf_samplingLocsSize; j++)
5858 {
5859 k = _msf_sort_seqList[i].readNumber;
5860 // if(j != 0)
5861 // if(strncmp(_msf_seqList[k].seq+_msf_samplingLocs[j], _msf_seqList[k].seq+_msf_samplingLocs[j-1], segSize) == 0)
5862 // continue;
5863 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]))
5864 // continue;
5865 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]));
5866 if ( locs != NULL)
5867 {
5868 mapSingleEndSeq(locs+1, locs[0],k ,j, 0);
5869 }
5870 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]);
5871 }
5872 }
5873 i = 0;
5874
5875 for(i = 0; i < _msf_seqListSize; i++)
5876 {
5877 for(j = 0; j < _msf_samplingLocsSize; j++)
5878 {
5879 k = _msf_sort_seqList[i].readNumber;
5880
5881 // if(j != 0)
5882 // if(strncmp(_msf_seqList[k].rseq+_msf_samplingLocs[j], _msf_seqList[k].rseq+_msf_samplingLocs[j-1], segSize) == 0)
5883 // continue;
5884 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]))
5885 // continue;
5886 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j]));
5887 if ( locs != NULL)
5888 {
5889 mapSingleEndSeq(locs+1, locs[0],k ,j, 1);
5890 }
5891 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]);
5892 }
5893 }
5894 return 1;
5895 }
5896
5897
5898 /**********************************************/
5899 /**********************************************/
5900 /**********************************************/
5901 /**********************************************/
5902 /**********************************************/
5903 int compareOut (const void *a, const void *b)
5904 {
5905 FullMappingInfo *aInfo = (FullMappingInfo *)a;
5906 FullMappingInfo *bInfo = (FullMappingInfo *)b;
5907 return aInfo->loc - bInfo->loc;
5908 }
5909
5910
5911
5912 /**********************************************/
5913
5914 /*
5915 direction 0: Forward
5916 1: Reverse
5917 */
5918
5919 void mapPairEndSeqList(unsigned int *l1, int s1, int readNumber, int readSegment, int direction)
5920 {
5921 int z = 0;
5922 int *locs = (int *) l1;
5923 char *_tmpSeq;
5924
5925 char rqual[SEQ_LENGTH+1];
5926
5927 char matrix[200];
5928 char editString[200];
5929 char cigar[MAX_CIGAR_SIZE];
5930
5931 short *_tmpHashValue;
5932
5933 int leftSeqLength = 0;
5934 int middleSeqLength = 0;
5935 int rightSeqLength =0;
5936 int a = 0;
5937
5938 rqual[SEQ_LENGTH]='\0';
5939
5940
5941 int r = readNumber;
5942
5943 char d = (direction==1)?-1:1;
5944
5945 if (d==-1)
5946 {
5947 _tmpSeq = _msf_seqList[readNumber].rseq;
5948 _tmpHashValue = _msf_seqList[r].rhashValue;
5949 }
5950 else
5951 {
5952 _tmpSeq = _msf_seqList[readNumber].seq;
5953 _tmpHashValue = _msf_seqList[r].hashValue;
5954 }
5955
5956 int readId = 2*readNumber+direction;
5957 for (z=0; z<s1; z++)
5958 {
5959 int genLoc = locs[z];//-_msf_samplingLocs[o];
5960 int err = -1;
5961 int map_location = 0;
5962 int o = readSegment;
5963
5964 leftSeqLength = _msf_samplingLocs[o];
5965 middleSeqLength = WINDOW_SIZE;
5966 a = leftSeqLength + middleSeqLength;
5967 rightSeqLength = SEQ_LENGTH - a;
5968
5969 if(genLoc - leftSeqLength < _msf_refGenBeg || genLoc + rightSeqLength + middleSeqLength > _msf_refGenEnd ||
5970 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId || _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId)
5971 continue;
5972
5973 if(errThreshold == 2)
5974 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength,
5975 _tmpSeq + a, rightSeqLength,
5976 middleSeqLength, matrix, &map_location, _tmpHashValue);
5977 else if(errThreshold == 4)
5978 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength,
5979 _tmpSeq + a, rightSeqLength,
5980 middleSeqLength, matrix, &map_location, _tmpHashValue);
5981 else if(errThreshold ==3)
5982 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
5983 _tmpSeq + a, rightSeqLength,
5984 middleSeqLength, matrix, &map_location, _tmpHashValue);
5985 /*else if(errThreshold == 6)
5986 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
5987 _tmpSeq + a, rightSeqLength,
5988 middleSeqLength, matrix, &map_location, _tmpHashValue);*/
5989 else
5990 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength,
5991 _tmpSeq + a, rightSeqLength,
5992 middleSeqLength, matrix, &map_location, _tmpHashValue);
5993
5994
5995 if (err != -1)
5996 {
5997 int j = 0;
5998 int k = 0;
5999
6000 for(k = 0; k < readSegment+1; k++)
6001 {
6002 for(j = -errThreshold ; j <= errThreshold; j++)
6003 {
6004 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg &&
6005 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd)
6006 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId;
6007 }
6008 }
6009
6010
6011 generateSNPSAM(matrix, strlen(matrix), editString);
6012 generateCigar(matrix, strlen(matrix), cigar);
6013
6014 MappingLocations *parent = NULL;
6015 MappingLocations *child = _msf_mappingInfo[r].next;
6016
6017 genLoc = map_location + _msf_refGenOffset;
6018 int i = 0;
6019 for (i=0; i<(_msf_mappingInfo[r].size/MAP_CHUNKS); i++)
6020 {
6021 parent = child;
6022 child = child->next;
6023 }
6024
6025 if (child==NULL)
6026 {
6027 MappingLocations *tmp = getMem(sizeof(MappingLocations));
6028
6029 tmp->next = NULL;
6030 tmp->loc[0]=genLoc * d;
6031 tmp->err[0]=err;
6032
6033 tmp->cigarSize[0] = strlen(cigar);
6034 sprintf(tmp->cigar[0],"%s", cigar);
6035
6036 tmp->mdSize[0] = strlen(editString);
6037 sprintf(tmp->md[0],"%s", editString);
6038
6039 if (parent == NULL)
6040 _msf_mappingInfo[r].next = tmp;
6041 else
6042 parent->next = tmp;
6043 }
6044 else
6045 {
6046 if(strlen(cigar) > SEQ_LENGTH || strlen(editString) > SEQ_LENGTH)
6047 {
6048 printf("ERROR in %d read size(After mapping) exceedes cigar=%d md =%d cigar=%s md =%s\n", r, (int)strlen(cigar), (int)strlen(editString), cigar, editString);
6049 }
6050
6051 child->loc[_msf_mappingInfo[r].size % MAP_CHUNKS] = genLoc * d;
6052 child->err[_msf_mappingInfo[r].size % MAP_CHUNKS] = err;
6053
6054 child->cigarSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(cigar);
6055 sprintf(child->cigar[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",cigar);
6056
6057 child->mdSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(editString);
6058 sprintf(child->md[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",editString);
6059 }
6060 _msf_mappingInfo[r].size++;
6061
6062 }
6063 else
6064 {
6065 _msf_verifiedLocs[genLoc] = -readId;
6066 }
6067
6068 }
6069 }
6070
6071 /**********************************************/
6072 void mapPairedEndSeq()
6073 {
6074 int i = 0;
6075 int j = 0;
6076 int k = 0;
6077
6078 unsigned int *locs = NULL;
6079 while ( i < _msf_seqListSize )
6080 {
6081 for(j = 0; j < _msf_samplingLocsSize; j++)
6082 {
6083 k = _msf_sort_seqList[i].readNumber;
6084 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]));
6085 if ( locs != NULL)
6086 {
6087 mapPairEndSeqList(locs+1, locs[0],k ,j, 0);
6088 }
6089 }
6090 i++;
6091 }
6092 i = 0;
6093
6094 while ( i < _msf_seqListSize )
6095 {
6096 for(j = 0; j < _msf_samplingLocsSize; j++)
6097 {
6098 k = _msf_sort_seqList[i].readNumber;
6099 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j]));
6100 if ( locs != NULL)
6101 {
6102 mapPairEndSeqList(locs+1, locs[0],k ,j, 1);
6103 }
6104 }
6105
6106 i++;
6107 }
6108 char fname1[FILE_NAME_LENGTH];
6109 char fname2[FILE_NAME_LENGTH];
6110 MappingLocations *cur;
6111 int tmpOut;
6112 int lmax=0, rmax=0;
6113
6114 sprintf(fname1, "%s__%s__%s__%d__1.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles);
6115 sprintf(fname2, "%s__%s__%s__%d__2.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles);
6116
6117 FILE* out;
6118 FILE* out1 = fileOpen(fname1, "w");
6119 FILE* out2 = fileOpen(fname2, "w");
6120
6121 _msf_openFiles++;
6122
6123 for (i=0; i<_msf_seqListSize; i++)
6124 {
6125
6126 if (i%2==0)
6127 {
6128 out = out1;
6129
6130 if (lmax < _msf_mappingInfo[i].size)
6131 {
6132 lmax = _msf_mappingInfo[i].size;
6133 }
6134 }
6135 else
6136 {
6137 out = out2;
6138 if (rmax < _msf_mappingInfo[i].size)
6139 {
6140 rmax = _msf_mappingInfo[i].size;
6141 }
6142 }
6143
6144 tmpOut = fwrite(&(_msf_mappingInfo[i].size), sizeof(int), 1, out);
6145 if (_msf_mappingInfo[i].size > 0)
6146 {
6147 cur = _msf_mappingInfo[i].next;
6148 for (j=0; j < _msf_mappingInfo[i].size; j++)
6149 {
6150 if ( j>0 && j%MAP_CHUNKS==0)
6151 {
6152 cur = cur->next;
6153 }
6154 if(cur->cigarSize[j % MAP_CHUNKS] > SEQ_LENGTH || cur->mdSize[j % MAP_CHUNKS] > SEQ_LENGTH)
6155 {
6156 printf("ERROR in %d read size exceeds cigar=%d md =%d cigar=%s md =%s\n", i, cur->cigarSize[j % MAP_CHUNKS], cur->mdSize[j % MAP_CHUNKS], cur->cigar[j % MAP_CHUNKS], cur->md[j % MAP_CHUNKS]);
6157 }
6158
6159 tmpOut = fwrite(&(cur->loc[j % MAP_CHUNKS]), sizeof(int), 1, out);
6160
6161 tmpOut = fwrite(&(cur->err[j % MAP_CHUNKS]), sizeof(int), 1, out);
6162
6163 tmpOut = fwrite(&(cur->cigarSize[j % MAP_CHUNKS]), sizeof(int), 1, out);
6164 tmpOut = fwrite((cur->cigar[j % MAP_CHUNKS]), sizeof(char), (cur->cigarSize[j % MAP_CHUNKS]), out);
6165
6166 tmpOut = fwrite(&(cur->mdSize[j % MAP_CHUNKS]), sizeof(int), 1, out);
6167 tmpOut = fwrite((cur->md[j % MAP_CHUNKS]), sizeof(char), (cur->mdSize[j % MAP_CHUNKS]), out);
6168
6169 }
6170 _msf_mappingInfo[i].size = 0;
6171 //_msf_mappingInfo[i].next = NULL;
6172 }
6173 }
6174
6175 _msf_maxLSize += lmax;
6176 _msf_maxRSize += rmax;
6177
6178 fclose(out1);
6179 fclose(out2);
6180
6181 }
6182
6183 void outputPairFullMappingInfo(FILE *fp, int readNumber)
6184 {
6185
6186 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
6187 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1];
6188
6189 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0';
6190
6191 seq1 = _msf_seqList[readNumber*2].seq;
6192 rseq1 = _msf_seqList[readNumber*2].rseq;
6193 qual1 = _msf_seqList[readNumber*2].qual;
6194
6195 reverse(_msf_seqList[readNumber*2].qual, rqual1, SEQ_LENGTH);
6196
6197 seq2 = _msf_seqList[readNumber*2+1].seq;
6198 rseq2 = _msf_seqList[readNumber*2+1].rseq;
6199 qual2 = _msf_seqList[readNumber*2+1].qual;
6200
6201 reverse(_msf_seqList[readNumber*2+1].qual, rqual2, SEQ_LENGTH);
6202
6203
6204 if(bestHitMappingInfo[readNumber*2].loc == -1 && bestHitMappingInfo[readNumber*2+1].loc == -1)
6205 return;
6206 else
6207 {
6208
6209 char *seq;
6210 char *qual;
6211 char d1;
6212 char d2;
6213 int isize;
6214 int proper=0;
6215 // ISIZE CALCULATION
6216 // The distance between outer edges
6217 isize = abs(bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc)+SEQ_LENGTH - 2;
6218
6219 if (bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc > 0)
6220 {
6221 isize *= -1;
6222 }
6223 d1 = (bestHitMappingInfo[readNumber*2].dir == -1)?1:0;
6224 d2 = (bestHitMappingInfo[readNumber*2+1].dir == -1)?1:0;
6225
6226 if ( d1 )
6227 {
6228 seq = rseq1;
6229 qual = rqual1;
6230 }
6231 else
6232 {
6233 seq = seq1;
6234 qual = qual1;
6235 }
6236 if ( (bestHitMappingInfo[readNumber*2].loc < bestHitMappingInfo[readNumber*2+1].loc && !d1 && d2) ||
6237 (bestHitMappingInfo[readNumber*2].loc > bestHitMappingInfo[readNumber*2+1].loc && d1 && !d2) )
6238 {
6239 proper = 2;
6240 }
6241 else
6242 {
6243 proper = 0;
6244 }
6245
6246 _msf_output.POS = bestHitMappingInfo[readNumber*2].loc;
6247 _msf_output.MPOS = bestHitMappingInfo[readNumber*2+1].loc;
6248 _msf_output.FLAG = 1+proper+16*d1+32*d2+64;
6249 _msf_output.ISIZE = isize;
6250 _msf_output.SEQ = seq,
6251 _msf_output.QUAL = qual;
6252 _msf_output.QNAME = _msf_seqList[readNumber*2].name;
6253 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr;
6254 _msf_output.MAPQ = 255;
6255 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2].cigar;
6256 _msf_output.MRNAME = "=";
6257
6258 _msf_output.optSize = 2;
6259 _msf_output.optFields = _msf_optionalFields;
6260
6261 _msf_optionalFields[0].tag = "NM";
6262 _msf_optionalFields[0].type = 'i';
6263 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2].err;
6264
6265 _msf_optionalFields[1].tag = "MD";
6266 _msf_optionalFields[1].type = 'Z';
6267 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2].md;
6268
6269 outputSAM(fp, _msf_output);
6270 output(_msf_output);
6271
6272 if ( d2 )
6273 {
6274 seq = rseq2;
6275 qual = rqual2;
6276 }
6277 else
6278 {
6279 seq = seq2;
6280 qual = qual2;
6281 }
6282
6283 _msf_output.POS = bestHitMappingInfo[readNumber*2+1].loc;
6284 _msf_output.MPOS = bestHitMappingInfo[readNumber*2].loc;
6285 _msf_output.FLAG = 1+proper+16*d2+32*d1+128;
6286 _msf_output.ISIZE = -isize;
6287 _msf_output.SEQ = seq,
6288 _msf_output.QUAL = qual;
6289 _msf_output.QNAME = _msf_seqList[readNumber*2].name;
6290 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr;
6291 _msf_output.MAPQ = 255;
6292 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2+1].cigar;
6293 _msf_output.MRNAME = "=";
6294
6295 _msf_output.optSize = 2;
6296 _msf_output.optFields = _msf_optionalFields;
6297
6298 _msf_optionalFields[0].tag = "NM";
6299 _msf_optionalFields[0].type = 'i';
6300 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2+1].err;
6301
6302 _msf_optionalFields[1].tag = "MD";
6303 _msf_optionalFields[1].type = 'Z';
6304 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2+1].md;
6305
6306 outputSAM(fp, _msf_output);
6307 output(_msf_output);
6308 }
6309 }
6310
6311
6312 /*
6313 Find the closet one to the c
6314 @return 0: if the x1 is closer to c
6315 1: if the x2 is closer to c
6316 2: if both distance are equal
6317 -1: if error
6318 */
6319 int findNearest(int x1, int x2, int c)
6320 {
6321
6322 if (abs(x1 - c) > abs(x2 - c) )
6323 return 0;
6324 else if ( abs(x1 - c) < abs(x2 - c) )
6325 return 1;
6326 else if ( abs(x1 - c) == abs(x2 - c) )
6327 return 2;
6328 else
6329 return -1;
6330 }
6331
6332 void initBestConcordantDiscordant(int readNumber)
6333 {
6334 char bestConcordantFileName[FILE_NAME_LENGTH];
6335 //char bestDiscordantFileName[FILE_NAME_LENGTH];
6336
6337 //OPEN THE BEST CONCORDANT FILE
6338 //BEGIN{Farhad Hormozdiari}
6339 /* begin {calkan} */
6340 //sprintf(bestConcordantFileName, "%s%s__BEST.CONCORDANT", mappingOutputPath, mappingOutput);
6341 sprintf(bestConcordantFileName, "%s%s_BEST.sam", mappingOutputPath, mappingOutput);
6342
6343 bestConcordantFILE = fileOpen(bestConcordantFileName, "w");
6344 bestDiscordantFILE = bestConcordantFILE;
6345 /* end {calkan} */
6346 //END{Farhad Hormozdiari}
6347
6348
6349 //OPEN THE BEST DISCORDANT FILE
6350 //BEGIN{Farhad Hormozdiari}
6351 /* begin {calkan}
6352 sprintf(bestDiscordantFileName, "%s%s__BEST.DISCORDANT", mappingOutputPath, mappingOutput);
6353 bestDiscordantFILE = fileOpen(bestDiscordantFileName, "w");
6354 end {calkan} */
6355
6356 //END{Farhad Hormozdiari}
6357
6358 initBestMapping(readNumber);
6359 }
6360
6361 void finalizeBestConcordantDiscordant()
6362 {
6363 int i = 0;
6364
6365 for(i = 0; i<_msf_seqListSize/2; i++)
6366 {
6367 if(_msf_readHasConcordantMapping[i]==1)
6368 outputPairFullMappingInfo(bestConcordantFILE, i);
6369 else
6370 outputPairFullMappingInfo(bestDiscordantFILE, i);
6371 }
6372
6373 fclose(bestConcordantFILE);
6374 // fclose(bestDiscordantFILE);
6375
6376 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo));
6377 }
6378
6379 void setFullMappingInfo(int readNumber, int loc, int dir, int err, int score, char *md, char * refName, char *cigar)
6380 {
6381 bestHitMappingInfo[readNumber].loc = loc;
6382 bestHitMappingInfo[readNumber].dir = dir;
6383 bestHitMappingInfo[readNumber].err = err;
6384 bestHitMappingInfo[readNumber].score = score;
6385
6386 strncpy(bestHitMappingInfo[readNumber].md, md, strlen(md)+1);
6387 strncpy(bestHitMappingInfo[readNumber].chr, refName, strlen(refName)+1);
6388 strncpy(bestHitMappingInfo[readNumber].cigar, cigar, strlen(cigar)+1);
6389 }
6390
6391
6392 void setPairFullMappingInfo(int readNumber, FullMappingInfo mi1, FullMappingInfo mi2)
6393 {
6394
6395 bestHitMappingInfo[readNumber*2].loc = mi1.loc;
6396 bestHitMappingInfo[readNumber*2].dir = mi1.dir;
6397 bestHitMappingInfo[readNumber*2].err = mi1.err;
6398 bestHitMappingInfo[readNumber*2].score = mi1.score;
6399 snprintf(bestHitMappingInfo[readNumber*2].chr, MAX_REF_SIZE, "%s", _msf_refGenName);
6400
6401
6402 strncpy(bestHitMappingInfo[readNumber*2].md, mi1.md, strlen(mi1.md)+1);
6403 strncpy(bestHitMappingInfo[readNumber*2].cigar, mi1.cigar, strlen(mi1.cigar)+1);
6404
6405
6406 /*
6407 sprintf(bestHitMappingInfo[readNumber*2].md, "%s\0", mi1.md);
6408 sprintf(bestHitMappingInfo[readNumber*2].cigar, "%s\0", mi1.cigar);
6409 */
6410
6411
6412 bestHitMappingInfo[readNumber*2+1].loc = mi2.loc;
6413 bestHitMappingInfo[readNumber*2+1].dir = mi2.dir;
6414 bestHitMappingInfo[readNumber*2+1].err = mi2.err;
6415 bestHitMappingInfo[readNumber*2+1].score = mi2.score;
6416
6417 snprintf(bestHitMappingInfo[readNumber*2+1].chr, MAX_REF_SIZE, "%s", _msf_refGenName);
6418
6419 /*
6420 sprintf(bestHitMappingInfo[readNumber*2+1].md, "%s\0", mi2.md);
6421 sprintf(bestHitMappingInfo[readNumber*2+1].cigar, "%s\0", mi2.cigar);
6422 */
6423
6424 strncpy(bestHitMappingInfo[readNumber*2+1].md, mi2.md, strlen(mi2.md)+1);
6425 strncpy(bestHitMappingInfo[readNumber*2+1].cigar, mi2.cigar, strlen(mi2.cigar)+1);
6426
6427 }
6428
6429 /**********************************************/
6430 void outputPairedEnd()
6431 {
6432 int i = 0;
6433
6434 char cigar[MAX_CIGAR_SIZE];
6435
6436 int tmpOut;
6437
6438 loadRefGenome(&_msf_refGen, &_msf_refGenName, &tmpOut);
6439
6440 FILE* in1[_msf_openFiles];
6441 FILE* in2[_msf_openFiles];
6442
6443 char fname1[_msf_openFiles][FILE_NAME_LENGTH];
6444 char fname2[_msf_openFiles][FILE_NAME_LENGTH];
6445
6446 // discordant
6447 FILE *out=NULL, *out1=NULL;
6448
6449 char fname3[FILE_NAME_LENGTH];
6450 char fname4[FILE_NAME_LENGTH];
6451
6452 int meanDistanceMapping = 0;
6453
6454 char *rqual1;
6455 char *rqual2;
6456
6457 rqual1 = getMem((SEQ_LENGTH+1)*sizeof(char));
6458 rqual2 = getMem((SEQ_LENGTH+1)*sizeof(char));
6459
6460 if (pairedEndDiscordantMode)
6461 {
6462 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput);
6463 sprintf(fname4, "%s__%s__oea", mappingOutputPath, mappingOutput);
6464 out = fileOpen(fname3, "a");
6465 out1 = fileOpen(fname4, "a");
6466 }
6467
6468 FullMappingInfo *mi1 = getMem(sizeof(FullMappingInfo) * _msf_maxLSize);
6469 FullMappingInfo *mi2 = getMem(sizeof(FullMappingInfo) * _msf_maxRSize);
6470
6471 _msf_fileCount[_msf_maxFile] = 0;
6472 for (i=0; i<_msf_openFiles; i++)
6473 {
6474 sprintf(fname1[i], "%s__%s__%s__%d__1.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i);
6475 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][0], "%s", fname1[i]);
6476
6477 sprintf(fname2[i], "%s__%s__%s__%d__2.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i);
6478 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][1], "%s", fname2[i]);
6479
6480 in1[i] = fileOpen(fname1[i], "r");
6481 in2[i] = fileOpen(fname2[i], "r");
6482 _msf_fileCount[_msf_maxFile]++;
6483 }
6484 _msf_maxFile++;
6485
6486 int size;
6487 int j, k;
6488 int size1, size2;
6489
6490 meanDistanceMapping = (pairedEndDiscordantMode==1)? (minPairEndedDiscordantDistance+maxPairEndedDiscordantDistance)/2 + SEQ_LENGTH
6491 : (minPairEndedDistance + maxPairEndedDistance) / 2 + SEQ_LENGTH;
6492
6493 for (i=0; i<_msf_seqListSize/2; i++)
6494 {
6495 size1 = size2 = 0;
6496 for (j=0; j<_msf_openFiles; j++)
6497 {
6498 tmpOut = fread(&size, sizeof(int), 1, in1[j]);
6499 if ( size > 0 )
6500 {
6501 for (k=0; k<size; k++)
6502 {
6503 mi1[size1+k].dir = 1;
6504 tmpOut = fread (&(mi1[size1+k].loc), sizeof(int), 1, in1[j]);
6505 tmpOut = fread (&(mi1[size1+k].err), sizeof(int), 1, in1[j]);
6506
6507 tmpOut = fread (&(mi1[size1+k].cigarSize), sizeof(int), 1, in1[j]);
6508 tmpOut = fread ((mi1[size1+k].cigar), sizeof(char), mi1[size1+k].cigarSize, in1[j]);
6509 mi1[size1+k].cigar[mi1[size1+k].cigarSize] = '\0';
6510
6511 tmpOut = fread (&(mi1[size1+k].mdSize), sizeof(int), 1, in1[j]);
6512 tmpOut = fread ((mi1[size1+k].md), sizeof(char), (mi1[size1+k].mdSize), in1[j]);
6513 mi1[size1+k].md[mi1[size1+k].mdSize] = '\0';
6514
6515 if (mi1[size1+k].loc<1)
6516 {
6517 mi1[size1+k].loc *= -1;
6518 mi1[size1+k].dir = -1;
6519 }
6520 }
6521 qsort(mi1+size1, size, sizeof(FullMappingInfo), compareOut);
6522 size1+=size;
6523 }
6524 }
6525
6526 for (j=0; j<_msf_openFiles; j++)
6527 {
6528 tmpOut = fread(&size, sizeof(int), 1, in2[j]);
6529 if ( size > 0 )
6530 {
6531 for (k=0; k<size; k++)
6532 {
6533 mi2[size2+k].dir = 1;
6534 tmpOut = fread (&(mi2[size2+k].loc), sizeof(int), 1, in2[j]);
6535 tmpOut = fread (&(mi2[size2+k].err), sizeof(int), 1, in2[j]);
6536
6537 tmpOut = fread (&(mi2[size2+k].cigarSize), sizeof(int), 1, in2[j]);
6538 tmpOut = fread ((mi2[size2+k].cigar), sizeof(char), mi2[size2+k].cigarSize, in2[j]);
6539 mi2[size2+k].cigar[mi2[size2+k].cigarSize] = '\0';
6540
6541 tmpOut = fread (&(mi2[size2+k].mdSize), sizeof(int), 1, in2[j]);
6542 tmpOut = fread ((mi2[size2+k].md), sizeof(char), mi2[size2+k].mdSize, in2[j]);
6543 mi2[size2+k].md[mi2[size2+k].mdSize] = '\0';
6544
6545 if (mi2[size2+k].loc<1)
6546 {
6547 mi2[size2+k].loc *= -1;
6548 mi2[size2+k].dir = -1;
6549 }
6550 }
6551 qsort(mi2+size2, size, sizeof(FullMappingInfo), compareOut);
6552 size2+=size;
6553 }
6554 }
6555
6556 int lm, ll, rl, rm;
6557 int pos = 0;
6558
6559 if (pairedEndDiscordantMode)
6560 {
6561
6562 for (j=0; j<size1; j++)
6563 {
6564 lm = mi1[j].loc - maxPairEndedDiscordantDistance + 1;
6565 ll = mi1[j].loc - minPairEndedDiscordantDistance + 1;
6566 rl = mi1[j].loc + minPairEndedDiscordantDistance - 1;
6567 rm = mi1[j].loc + maxPairEndedDiscordantDistance - 1;
6568
6569 while (pos<size2 && mi2[pos].loc < lm)
6570 {
6571 pos++;
6572 }
6573
6574 k = pos;
6575 while (k<size2 && mi2[k].loc<=rm)
6576 {
6577 if ( mi2[k].loc <= ll || mi2[k].loc >= rl)
6578 {
6579 if ( (mi1[j].loc < mi2[k].loc && mi1[j].dir==1 && mi2[k].dir == -1) ||
6580 (mi1[j].loc > mi2[k].loc && mi1[j].dir==-1 && mi2[k].dir == 1) )
6581 {
6582 _msf_seqList[i*2].hits[0]=1;
6583 _msf_seqList[i*2+1].hits[0]=1;
6584
6585 if(nosamMode != 0)
6586 {
6587 size1=0;
6588 size2=0;
6589 }
6590
6591 break;
6592 }
6593 }
6594 k++;
6595 }
6596 }
6597
6598 _msf_seqHits[i*2] += size1;
6599 _msf_seqHits[i*2+1] += size2;
6600
6601
6602 if (_msf_seqHits[i*2+1] * _msf_seqHits[i*2] > DISCORDANT_CUT_OFF && nosamMode != 0)
6603 {
6604 _msf_seqList[i*2].hits[0]=1;
6605 _msf_seqList[i*2+1].hits[0]=1;
6606 size1=0;
6607 size2=0;
6608 }
6609
6610
6611
6612
6613 int tmp = 0;
6614 int rNo = 0;
6615 int loc = 0;
6616 int err = 0;
6617 float sc = 0;
6618 char l = 0;
6619
6620 //write the OEA data
6621 if(_msf_seqHits[i*2] == 0 )
6622 {
6623 for(k = 0;k < size2 && _msf_oeaMapping[i*2+1] < maxOEAOutput ;k++)
6624 {
6625 rNo = i*2+1;
6626 loc = mi2[k].loc*mi2[k].dir;
6627 err = mi2[k].err;
6628 sc = mi2[k].score;
6629
6630 l = strlen(_msf_refGenName);
6631
6632 tmp = fwrite(&rNo, sizeof(int), 1, out1);
6633
6634 tmp = fwrite(&l, sizeof(char), 1, out1);
6635 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1);
6636
6637 tmp = fwrite(&loc, sizeof(int), 1, out1);
6638 tmp = fwrite(&err, sizeof(int), 1, out1);
6639 tmp = fwrite(&sc, sizeof(float), 1, out1);
6640
6641 if(mi2[k].cigarSize > SEQ_LENGTH || mi2[k].cigarSize <= 0)
6642 printf("ERROR CIGAR size=%d %s\n", mi2[k].cigarSize, _msf_seqList[i*2+1].seq);
6643
6644 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out1);
6645 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out1);
6646
6647 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out1);
6648 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out1);
6649
6650 _msf_oeaMapping[i*2+1]++;
6651 }
6652 }
6653 if(_msf_seqHits[i*2+1] == 0)
6654 {
6655 for(j = 0;j < size1 && _msf_oeaMapping[i*2] < maxOEAOutput;j++)
6656 {
6657 rNo = i*2;
6658 loc = mi1[j].loc*mi1[j].dir;
6659 err = mi1[j].err;
6660 sc = mi1[j].score;
6661
6662 l = strlen(_msf_refGenName);
6663
6664 tmp = fwrite(&rNo, sizeof(int), 1, out1);
6665
6666 tmp = fwrite(&l, sizeof(char), 1, out1);
6667 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1);
6668
6669 tmp = fwrite(&loc, sizeof(int), 1, out1);
6670 tmp = fwrite(&err, sizeof(int), 1, out1);
6671 tmp = fwrite(&sc, sizeof(float), 1, out1);
6672
6673 if(mi1[j].cigarSize > SEQ_LENGTH || mi1[j].cigarSize <= 0 )
6674 printf("ERROR %d %s\n", mi1[j].cigarSize, _msf_seqList[i*2+1].seq);
6675
6676 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out1);
6677 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out1);
6678
6679 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out1);
6680 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out1);
6681
6682 _msf_oeaMapping[i*2]++;
6683 }
6684 }
6685 }
6686
6687 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
6688
6689
6690
6691
6692 rqual1[SEQ_LENGTH] = '\0';
6693 rqual2[SEQ_LENGTH] = '\0';
6694 rqual1[0] = '\0';
6695 rqual2[0] = '\0';
6696
6697
6698
6699 seq1 = _msf_seqList[i*2].seq;
6700 rseq1 = _msf_seqList[i*2].rseq;
6701 qual1 = _msf_seqList[i*2].qual;
6702
6703
6704
6705 strncpy(rqual1, _msf_seqList[i*2].qual, SEQ_LENGTH);
6706
6707 seq2 = _msf_seqList[i*2+1].seq;
6708 rseq2 = _msf_seqList[i*2+1].rseq;
6709 qual2 = _msf_seqList[i*2+1].qual;
6710
6711
6712 strncpy(rqual2, _msf_seqList[i*2+1].qual, SEQ_LENGTH);
6713
6714 if (pairedEndDiscordantMode)
6715 {
6716 for (k=0; k<size1; k++)
6717 {
6718 mi1[k].score = calculateScore(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, (mi1[k].dir==-1)?rqual1:qual1, mi1[k].cigar);
6719 }
6720
6721 for (k=0; k<size2; k++)
6722 {
6723 mi2[k].score = calculateScore(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, (mi2[k].dir==-1)?rqual2:qual2, mi2[k].cigar);
6724 }
6725
6726 }
6727
6728
6729 if (pairedEndDiscordantMode)
6730 {
6731 for (j=0; j<size1; j++)
6732 {
6733 for(k = 0; k < size2; k++)
6734 {
6735 if(
6736 (mi2[k].loc-mi1[j].loc >= minPairEndedDiscordantDistance &&
6737 mi2[k].loc-mi1[j].loc <= maxPairEndedDiscordantDistance &&
6738 mi1[j].dir > 0 && mi2[k].dir < 0 )
6739
6740 ||
6741
6742 (mi1[j].loc-mi2[k].loc >= minPairEndedDiscordantDistance &&
6743 mi1[j].loc-mi2[k].loc <= maxPairEndedDiscordantDistance &&
6744 mi1[j].dir < 0 && mi2[k].dir > 0)
6745 )
6746 {
6747 //POSSIBLE CONCORDANT
6748 if(_msf_readHasConcordantMapping[i] == 0)
6749 {
6750 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6751 _msf_readHasConcordantMapping[i] = 1;
6752 _msf_seqList[i*2].hits[0] = 1;
6753 _msf_seqList[i*2+1].hits[0] = 1;
6754 }
6755 else
6756 {
6757 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err)
6758 {
6759
6760 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err ==
6761 mi1[j].err + mi2[k].err &&
6762 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
6763 abs(mi2[k].loc - mi1[j].loc),
6764 meanDistanceMapping
6765 ) == 0 )
6766 {
6767 continue;
6768 }
6769 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6770 }
6771 }
6772 }
6773 //DISCORDANT TO TEMP FILE FOR POST PROCESSIING
6774 else if(_msf_readHasConcordantMapping[i] == 0 &&
6775 _msf_seqHits[i*2] != 0 &&
6776 _msf_seqHits[i*2+1] != 0)
6777 {
6778
6779 int tmp;
6780 int rNo = i;
6781 int loc = mi1[j].loc*mi1[j].dir;
6782 int err = mi1[j].err;
6783 float sc = mi1[j].score;
6784
6785 char l = strlen(_msf_refGenName);
6786
6787 if(_msf_discordantMapping[i*2] < maxDiscordantOutput)
6788 {
6789
6790 tmp = fwrite(&rNo, sizeof(int), 1, out);
6791
6792 tmp = fwrite(&l, sizeof(char), 1, out);
6793 tmp = fwrite(_msf_refGenName, sizeof(char), l, out);
6794
6795 tmp = fwrite(&loc, sizeof(int), 1, out);
6796 tmp = fwrite(&err, sizeof(int), 1, out);
6797 tmp = fwrite(&sc, sizeof(float), 1, out);
6798
6799 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out);
6800 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out);
6801
6802 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out);
6803 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out);
6804
6805
6806 loc = mi2[k].loc*mi2[k].dir;
6807 err = mi2[k].err;
6808 sc = mi2[k].score;
6809
6810 tmp = fwrite(&loc, sizeof(int), 1, out);
6811 tmp = fwrite(&err, sizeof(int), 1, out);
6812 tmp = fwrite(&sc, sizeof(float), 1, out);
6813
6814 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out);
6815 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out);
6816
6817 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out);
6818 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out);
6819
6820
6821 _msf_discordantMapping[i*2]++;
6822 }
6823 //SET THE BEST DISCORDANT
6824 //BEGIN {Farhad Hormozdiari}
6825 if( bestHitMappingInfo[i*2].loc == -1 &&
6826 bestHitMappingInfo[i*2+1].loc == -1 &&
6827 _msf_readHasConcordantMapping[i] == 0)
6828 {
6829 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6830 _msf_seqList[i*2].hits[0] = 1;
6831 _msf_seqList[i*2+1].hits[0] = 1;
6832 }
6833 else if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err
6834 && _msf_readHasConcordantMapping[i] == 0)
6835 {
6836 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err &&
6837 findNearest( abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
6838 abs(mi1[j].loc - mi2[k].loc),
6839 meanDistanceMapping
6840 ) == 0
6841 )
6842 {
6843 continue;
6844 }
6845 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6846 }
6847 //END {Farhad Hormozdiari}
6848 }
6849 }
6850 }
6851 }
6852 else
6853 {
6854 for (j=0; j<size1; j++)
6855 {
6856 for(k = 0; k < size2; k++)
6857 {
6858 if((mi2[k].loc-mi1[j].loc >= minPairEndedDistance &&
6859 mi2[k].loc-mi1[j].loc <= maxPairEndedDistance &&
6860 mi1[j].dir > 0 && mi2[k].dir < 0)
6861 ||
6862 (mi1[j].loc-mi2[k].loc >= minPairEndedDistance &&
6863 mi1[j].loc-mi2[k].loc <= maxPairEndedDistance &&
6864 mi1[j].dir < 0 && mi2[k].dir > 0)
6865 )
6866 {
6867 char *seq;
6868 char *qual;
6869 char d1;
6870 char d2;
6871 int isize;
6872 int proper=0;
6873 // ISIZE CALCULATION
6874 // The distance between outer edges
6875 isize = abs(mi1[j].loc - mi2[k].loc)+SEQ_LENGTH-2;
6876 if (mi1[j].loc - mi2[k].loc > 0)
6877 {
6878 isize *= -1;
6879 }
6880
6881 d1 = (mi1[j].dir == -1)?1:0;
6882 d2 = (mi2[k].dir == -1)?1:0;
6883
6884 //SET THE READ HAS CONCORDANT MAPPING
6885 _msf_readHasConcordantMapping[i] = 1;
6886
6887 if ( d1 )
6888 {
6889 seq = rseq1;
6890 qual = rqual1;
6891 }
6892 else
6893 {
6894 seq = seq1;
6895 qual = qual1;
6896 }
6897
6898 if ((mi1[j].loc < mi2[k].loc && !d1 && d2) ||
6899 (mi1[j].loc > mi2[k].loc && d1 && !d2) )
6900 {
6901 proper = 2;
6902 }
6903 else
6904 {
6905 proper = 0;
6906 }
6907
6908
6909 _msf_output.POS = mi1[j].loc;
6910 _msf_output.MPOS = mi2[k].loc;
6911 _msf_output.FLAG = 1+proper+16*d1+32*d2+64;
6912 _msf_output.ISIZE = isize;
6913 _msf_output.SEQ = seq,
6914 _msf_output.QUAL = qual;
6915 _msf_output.QNAME = _msf_seqList[i*2].name;
6916 _msf_output.RNAME = _msf_refGenName;
6917 _msf_output.MAPQ = 255;
6918 _msf_output.CIGAR = cigar;
6919 _msf_output.MRNAME = "=";
6920
6921 _msf_output.optSize = 2;
6922 _msf_output.optFields = _msf_optionalFields;
6923
6924 _msf_optionalFields[0].tag = "NM";
6925 _msf_optionalFields[0].type = 'i';
6926 _msf_optionalFields[0].iVal = mi1[j].err;
6927
6928 _msf_optionalFields[1].tag = "MD";
6929 _msf_optionalFields[1].type = 'Z';
6930 _msf_optionalFields[1].sVal = mi1[j].md;
6931
6932 if(!bestMode)
6933 output(_msf_output);
6934
6935 if ( d2 )
6936 {
6937 seq = rseq2;
6938 qual = rqual2;
6939 }
6940 else
6941 {
6942 seq = seq2;
6943 qual = qual2;
6944 }
6945
6946 _msf_output.POS = mi2[k].loc;
6947 _msf_output.MPOS = mi1[j].loc;
6948 _msf_output.FLAG = 1+proper+16*d2+32*d1+128;
6949 _msf_output.ISIZE = -isize;
6950 _msf_output.SEQ = seq,
6951 _msf_output.QUAL = qual;
6952 _msf_output.QNAME = _msf_seqList[i*2].name;
6953 _msf_output.RNAME = _msf_refGenName;
6954 _msf_output.MAPQ = 255;
6955 _msf_output.CIGAR = cigar;
6956 _msf_output.MRNAME = "=";
6957
6958 _msf_output.optSize = 2;
6959 _msf_output.optFields = _msf_optionalFields;
6960
6961 _msf_optionalFields[0].tag = "NM";
6962 _msf_optionalFields[0].type = 'i';
6963 _msf_optionalFields[0].iVal = mi2[k].err;;
6964
6965 _msf_optionalFields[1].tag = "MD";
6966 _msf_optionalFields[1].type = 'Z';
6967 _msf_optionalFields[1].sVal = mi2[k].md;
6968
6969 if(!bestMode)
6970 output(_msf_output);
6971 //SET THE BEST CONCORDANT
6972 //BEGIN {Farhad Hormozdiari}
6973 if(bestHitMappingInfo[i*2].loc == -1 && bestHitMappingInfo[i*2+1].loc == -1)
6974 {
6975 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6976 }
6977 else
6978 {
6979 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err)
6980 {
6981
6982 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err &&
6983 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
6984 abs(mi2[k].loc - mi1[j].loc),
6985 meanDistanceMapping
6986 ) == 0 )
6987 {
6988 continue;
6989 }
6990 setPairFullMappingInfo(i, mi1[j], mi2[k]);
6991 }
6992 }
6993 //END {Farhad Hormozdiari}
6994 }
6995 }
6996 }
6997
6998 }
6999 }
7000
7001 freeMem(rqual1, 0);
7002 freeMem(rqual2, 0);
7003
7004 if (pairedEndDiscordantMode)
7005 {
7006 fclose(out);
7007 fclose(out1);
7008 }
7009
7010 for (i=0; i<_msf_openFiles; i++)
7011 {
7012 fclose(in1[i]);
7013 fclose(in2[i]);
7014
7015 unlink(fname1[i]);
7016 unlink(fname2[i]);
7017 }
7018
7019 freeMem(mi1, sizeof(FullMappingInfo)*_msf_maxLSize);
7020 freeMem(mi2, sizeof(FullMappingInfo)*_msf_maxRSize);
7021
7022 _msf_openFiles = 0;
7023 }
7024
7025 /**********************************************/
7026 /**********************************************/
7027 /**********************************************/
7028 /**********************************************/
7029 float str2int(char *str, int index1, int index2)
7030 {
7031 char tmp[200];
7032 strncpy(tmp, &str[index1], index2-index1);
7033 tmp[index2-index1] = '\0';
7034 return atol(tmp);
7035 }
7036
7037 float calculateScore(int index, char *seq, char *qual,char *md)
7038 {
7039 int i;
7040 int j;
7041 char *ref;
7042 char *ver;
7043
7044 ref = _msf_refGen + index-1;
7045 ver = seq;
7046 float score = 1;
7047
7048 char tmp[200];
7049 int value = 0;
7050 int end = 0;
7051 int index1 = 0;
7052 int index2 = 0;
7053
7054 i=0;
7055 while(1)
7056 {
7057
7058 if(i>=strlen(md))
7059 break;
7060
7061 index1 = i;
7062
7063 while(md[i] >='0' && md[i]<='9')
7064 {
7065 i++;
7066 }
7067
7068 index2 = i;
7069
7070 value = str2int(md, index1,index2);
7071
7072 if(md[i]=='M')
7073 {
7074 for(j=0;j<value;j++)
7075 {
7076 tmp[end]='M';
7077 end++;
7078 }
7079 }
7080 else if(md[i]=='I')
7081 {
7082 for(j=0;j<value;j++)
7083 {
7084 tmp[end]='I';
7085 end++;
7086 }
7087
7088 }
7089 else if(md[i] == 'D')
7090 {
7091 for(j=0;j<value;j++)
7092 {
7093 tmp[end]='D';
7094 end++;
7095 }
7096 }
7097 i++;
7098 }
7099
7100 tmp[end] = '\0';
7101
7102 j = 0;
7103
7104 for (i = 0; i < end; i++)
7105 {
7106 if(tmp[i] == 'M')
7107 {
7108 if (*ref != *ver)
7109 {
7110 score *= 0.001 + 1/pow( 10, ((qual[j]-33)/10.0) );
7111 }
7112
7113 ref++;
7114 ver++;
7115 j++;
7116 }
7117 else if(tmp[i] == 'I')
7118 {
7119 ver++;
7120 j++;
7121 }
7122 else if(tmp[i] == 'D')
7123 {
7124 ref++;
7125 }
7126 }
7127
7128 return score;
7129 }
7130
7131 int matoi(char *str, int start, int end)
7132 {
7133 int i = 0;
7134 char tmp[200];
7135
7136 for(i=0;i < end-start; i++)
7137 tmp[i] = str[start+i];
7138 tmp[i]='\0';
7139
7140 return atoi(tmp);
7141 }
7142
7143 void convertCigarToMatrix(char *cigar, int cigar_size, char * matrix)
7144 {
7145 int i = 0;
7146 int j = 0;
7147
7148 int start = 0;
7149 int size = 0;
7150
7151 matrix[0] = '\0';
7152
7153 while(i < cigar_size)
7154 {
7155 if(cigar[i] >= '0' && cigar[i] <= '9')
7156 {
7157 start = i;
7158
7159 while(cigar[i] >= '0' && cigar[i] <= '9' && i < cigar_size)
7160 i++;
7161
7162 int value = matoi(cigar, start, i);
7163 for(j = 0; j < value; j++)
7164 {
7165 if(cigar[i] == 'M')
7166 matrix[size] = 'M';
7167 else if(cigar[i] == 'D')
7168 matrix[size] ='D';
7169 else if(cigar[i] == 'I')
7170 matrix[size] = 'I';
7171 size++;
7172 }
7173 }
7174 i++;
7175 }
7176 matrix[size] = '\0';
7177 }
7178
7179
7180
7181 void convertMDToMatrix(char *md, int md_size, char * matrix)
7182 {
7183 int i = 0;
7184 int j = 0;
7185
7186 int start = 0;
7187 int size = 0;
7188
7189 matrix[0] = '\0';
7190
7191 while(i < md_size)
7192 {
7193 if(md[i] >= '0' && md[i] <= '9')
7194 {
7195 start = i;
7196
7197 while(md[i] >= '0' && md[i] <= '9' && i < md_size)
7198 i++;
7199
7200 int value = matoi(md, start, i);
7201 for(j = 0; j < value; j++)
7202 {
7203 matrix[size] = 'M';
7204 size++;
7205 }
7206 i--;
7207 }
7208 else if(md[i] == '^')
7209 {
7210 matrix[size] = 'D';
7211 size++;
7212 }
7213 else
7214 {
7215 matrix[size] = md[i];
7216 size++;
7217 }
7218 //size++;
7219 i++;
7220 }
7221 matrix[size] = '\0';
7222 }
7223
7224
7225 void convertMDCigarToMatrix(char *cigar, int cigar_size, char *md, int md_size, char *matrix)
7226 {
7227 int i = 0;
7228 int j = 0;
7229
7230 int size = 0;
7231
7232 char tmp1[200];
7233 char tmp2[200];
7234 convertMDToMatrix(md,md_size, tmp2);
7235
7236 convertCigarToMatrix(cigar, cigar_size,tmp1);
7237
7238
7239
7240 while(i < strlen(tmp1))
7241 {
7242 if(tmp1[i]=='M')
7243 {
7244 if(j < strlen(tmp2))
7245 {
7246 if(tmp2[j]=='M')
7247 {
7248 matrix[size]='M';
7249 size++;
7250 }
7251 if(tmp2[j]!='M')
7252 {
7253 matrix[size]=tmp2[j];
7254 size++;
7255 }
7256 }
7257 else
7258 {
7259 matrix[size]='M';
7260 size++;
7261 }
7262 }
7263 else if(tmp1[i] == 'D')
7264 {
7265 matrix[size]='D';
7266 size++;
7267 j++;
7268 matrix[size]=tmp2[j];
7269 size++;
7270
7271 }
7272 else if(tmp1[i] == 'I')
7273 {
7274 matrix[size]='I';
7275 size++;
7276 }
7277
7278 i++;
7279 if(j < strlen(tmp2))
7280 j++;
7281 }
7282
7283 if(strlen(tmp1))
7284
7285 matrix[size] = '\0';
7286
7287 }
7288
7289 void convertInsertion(char * in_matrix, char * seq, char *out_matrix)
7290 {
7291 int i = 0;
7292 int j = 0;
7293 int size = 0;
7294
7295 while( i < strlen(in_matrix))
7296 {
7297 if(in_matrix[i] == 'M')
7298 {
7299 out_matrix[size] = 'M';
7300 size++;
7301 j++;
7302 }
7303 else if(in_matrix[i] == 'D')
7304 {
7305 out_matrix[size] = 'D';
7306 size++;
7307
7308 i++;
7309 j++;
7310
7311 out_matrix[size] = seq[j];
7312 j++;
7313 size++;
7314 }
7315 else if(in_matrix[i] == 'I')
7316 {
7317 out_matrix[size] = 'I';
7318 size++;
7319 out_matrix[size] = seq[j];
7320 size++;
7321 j++;
7322 }
7323 else
7324 {
7325 out_matrix[size] = in_matrix[i];
7326 size++;
7327 j++;
7328 }
7329 i++;
7330 }
7331 out_matrix[size] = '\0';
7332 }
7333
7334 /**********************************************/
7335 void outputPairedEndDiscPP()
7336 {
7337 char tmp_matrix1[200];
7338 char tmp_matrix2[200];
7339
7340 char matrix1[200];
7341 char matrix2[200];
7342
7343 char cigar1[200];
7344 char editString1[200];
7345
7346 char cigar2[200];
7347 char editString2[200];
7348
7349 char seq1[SEQ_LENGTH+1];
7350 char qual1[SEQ_LENGTH+1];
7351
7352 char seq2[SEQ_LENGTH+1];
7353 char qual2[SEQ_LENGTH+1];
7354
7355 char genName[SEQ_LENGTH];
7356 char fname1[FILE_NAME_LENGTH];
7357 char fname2[FILE_NAME_LENGTH];
7358 char l;
7359 int l_size;
7360 int loc1, loc2;
7361 int err1, err2;
7362 char dir1, dir2;
7363 float sc1, sc2, lsc=0;
7364 int flag = 0;
7365 int rNo,lrNo = -1;
7366 int tmp;
7367 FILE *in, *out;
7368
7369 sprintf(fname1, "%s__%s__disc", mappingOutputPath, mappingOutput);
7370 sprintf(fname2, "%s%s_DIVET.vh", mappingOutputPath, mappingOutput);
7371
7372 in = fileOpen(fname1, "r");
7373 out = fileOpen(fname2, "w");
7374
7375 if (in != NULL)
7376 {
7377 flag = fread(&rNo, sizeof(int), 1, in);
7378 }
7379 else
7380 {
7381 flag = 0;
7382 }
7383
7384 seq1[SEQ_LENGTH] = '\0';
7385 qual1[SEQ_LENGTH] = '\0';
7386
7387 seq2[SEQ_LENGTH] = '\0';
7388 qual2[SEQ_LENGTH] = '\0';
7389
7390 while (flag)
7391 {
7392 tmp = fread(&l, sizeof(char), 1, in);
7393 tmp = fread(genName, sizeof(char), l, in);
7394 genName[(int)l]='\0';
7395 tmp = fread(&loc1, sizeof(int), 1, in);
7396 tmp = fread(&err1, sizeof(int), 1, in);
7397 tmp = fread(&sc1, sizeof(float), 1, in);
7398
7399 //tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out);
7400
7401 tmp = fread(&l_size, sizeof(int), 1, in);
7402 tmp = fread(cigar1, sizeof(char), l_size, in);
7403 cigar1[(int)l_size]='\0';
7404 //tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out);
7405
7406 //tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out);
7407 tmp = fread(&l_size, sizeof(int), 1, in);
7408 tmp = fread(editString1, sizeof(char), l_size, in);
7409 editString1[(int)l_size]='\0';
7410 //tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out);
7411
7412 tmp = fread(&loc2, sizeof(int), 1, in);
7413 tmp = fread(&err2, sizeof(int), 1, in);
7414 tmp = fread(&sc2, sizeof(float), 1, in);
7415
7416 tmp = fread(&l_size, sizeof(int), 1, in);
7417 tmp = fread(cigar2, sizeof(char), l_size, in);
7418 cigar2[(int)l_size]='\0';
7419
7420 tmp = fread(&l_size, sizeof(int), 1, in);
7421 tmp = fread(editString2, sizeof(char), l_size, in);
7422 editString2[(int)l_size]='\0';
7423
7424 convertMDCigarToMatrix(cigar1, strlen(cigar1), editString1, strlen(editString1), tmp_matrix1);
7425 convertMDCigarToMatrix(cigar2, strlen(cigar2), editString2, strlen(editString2), tmp_matrix2);
7426
7427
7428 if(_msf_readHasConcordantMapping[rNo] == 0)
7429 {
7430
7431 dir1 = dir2 = 'F';
7432
7433 strncpy(seq1, _msf_seqList[rNo*2].seq, SEQ_LENGTH);
7434 strncpy(seq2, _msf_seqList[rNo*2+1].seq, SEQ_LENGTH);
7435
7436 if (loc1 < 0)
7437 {
7438 dir1 = 'R';
7439 loc1 = -loc1;
7440
7441 strncpy(seq1, _msf_seqList[rNo*2].rseq, SEQ_LENGTH);
7442 }
7443
7444 if (loc2 < 0)
7445 {
7446 dir2 = 'R';
7447 loc2 = -loc2;
7448
7449 strncpy(seq2, _msf_seqList[rNo*2+1].rseq, SEQ_LENGTH);
7450 }
7451
7452 convertInsertion(tmp_matrix1, seq1, matrix1);
7453 convertInsertion(tmp_matrix2, seq2, matrix2);
7454
7455
7456 if (rNo != lrNo)
7457 {
7458 int j;
7459 for (j=0; j<SEQ_LENGTH; j++)
7460 {
7461 lsc += _msf_seqList[rNo*2].qual[j]+_msf_seqList[rNo*2+1].qual[j];
7462 }
7463 lsc /= 2*SEQ_LENGTH;
7464 lsc -= 33;
7465 lrNo = rNo;
7466 }
7467
7468 char event = '\0';
7469
7470
7471 if ( dir1 == dir2 )
7472 {
7473 event = 'V';
7474 }
7475 else
7476 {
7477 if (loc1 < loc2)
7478 {
7479
7480 if (dir1 == 'R' && dir2 == 'F')
7481 {
7482 event = 'E';
7483
7484 }
7485 else if ( loc2 - loc1 >= maxPairEndedDiscordantDistance )
7486 {
7487 event = 'D';
7488 }
7489 else
7490 {
7491 event = 'I';
7492 }
7493 }
7494 else if (loc2 < loc1)
7495 {
7496 if (dir2 == 'R' && dir1 == 'F')
7497 {
7498 event = 'E';
7499 }
7500 else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance )
7501 {
7502 event = 'D';
7503 }
7504 else
7505 {
7506 event = 'I';
7507 }
7508 }
7509 }
7510 _msf_seqList[rNo*2].hits[0] = 2;
7511 if(event != 'E')
7512 fprintf(out, "%s\t%s\t%d\t%d\t%c\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%e\n",
7513 _msf_seqList[rNo*2].name, genName, loc1, (loc1+SEQ_LENGTH-1), dir1,
7514 loc2, (loc2+SEQ_LENGTH-1), dir2, event, (err1+err2), lsc, sc1*sc2);
7515
7516 }
7517 flag = fread(&rNo, sizeof(int), 1, in);
7518 }
7519
7520 fclose(in);
7521 fclose(out);
7522
7523 unlink(fname1);
7524 }
7525
7526 void finalizeOEAReads(char *fileName)
7527 {
7528 FILE *fp_out1;
7529 FILE * in;
7530
7531 char genName[SEQ_LENGTH];
7532
7533 char fname1[FILE_NAME_LENGTH];
7534 char fname2[FILE_NAME_LENGTH];
7535
7536 char l=0;
7537 int loc1=0;
7538
7539 int err1;
7540
7541 char d;
7542
7543 float sc1=0;
7544 int flag = 0;
7545 int rNo=-1;
7546 int tmp=0;
7547
7548 int cigarSize = 0;
7549 int mdSize = 0;
7550
7551 char cigar[SEQ_LENGTH+1];
7552 char md[SEQ_LENGTH+1];
7553
7554 char *seq1, *seq2, *qual1, *qual2;
7555 char *rqual1, *rqual2;
7556
7557 seq1=NULL; seq2=NULL; qual1=NULL; qual2=NULL;
7558
7559 rqual1 = getMem(200*sizeof(char));
7560 rqual2 = getMem(200*sizeof(char));
7561
7562 rqual1[0] = '\0';
7563 rqual2[0] = '\0';
7564
7565 /*
7566 char mappingOutput2[2 * SEQ_LENGTH];
7567 int mo_len;
7568 mo_len = strlen(mappingOutput);
7569 strcpy(mappingOutput2, mappingOutput);
7570
7571 if (mappingOutput[mo_len-1]=='m' && mappingOutput[mo_len-2]=='a' && mappingOutput[mo_len-3]=='s' && mappingOutput[mo_len-4]=='.')
7572 mappingOutput2[mo_len-4] = 0;
7573 */
7574
7575 sprintf(fname1, "%s%s_OEA.sam", mappingOutputPath, mappingOutput);
7576
7577 fp_out1 = fileOpen(fname1, "w");
7578
7579 in = NULL;
7580 if (pairedEndDiscordantMode){
7581 sprintf(fname2, "%s__%s__oea", mappingOutputPath, mappingOutput);
7582
7583 in = fileOpen(fname2, "r");
7584 }
7585
7586
7587 if (in != NULL)
7588 {
7589 flag = fread(&rNo, sizeof(int), 1, in);
7590 }
7591 else
7592 {
7593 flag = 0;
7594 }
7595
7596 while (flag)
7597 {
7598 cigar[0] = '\0';
7599 md[0] = '\0';
7600
7601 tmp = fread(&l, sizeof(char), 1, in);
7602 tmp = fread(genName, sizeof(char), l, in);
7603
7604 genName[(int)l]='\0';
7605
7606
7607 tmp = fread(&loc1, sizeof(int), 1, in);
7608 tmp = fread(&err1, sizeof(int), 1, in);
7609 tmp = fread(&sc1, sizeof(float), 1, in);
7610
7611 tmp = fread (&cigarSize, sizeof(int), 1, in);
7612 tmp = fread (cigar, sizeof(char), cigarSize, in);
7613
7614 cigar[cigarSize] = '\0';
7615
7616 tmp = fread (&mdSize, sizeof(int), 1, in);
7617 tmp = fread (md, sizeof(char), mdSize, in);
7618 md[mdSize] = '\0';
7619
7620 d = 1;
7621
7622 if(loc1 < 0)
7623 {
7624 d = -1;
7625 loc1 *= -1;
7626
7627 seq1 = _msf_seqList[rNo].rseq;
7628 reverse(_msf_seqList[rNo].qual, rqual1, SEQ_LENGTH);
7629 rqual1[SEQ_LENGTH] = '\0';
7630 }
7631 else
7632 {
7633 seq1 = _msf_seqList[rNo].seq;
7634 qual1 = _msf_seqList[rNo].qual;
7635 }
7636
7637 if(rNo % 2 == 0)
7638 {
7639 seq2 = _msf_seqList[rNo+1].seq;
7640 qual2 = _msf_seqList[rNo+1].qual;
7641 }
7642 else
7643 {
7644 seq2 = _msf_seqList[rNo-1].seq;
7645 qual2 = _msf_seqList[rNo-1].qual;
7646 }
7647
7648 if(_msf_seqHits[rNo] != 0 && _msf_seqHits[(rNo%2==0)?rNo+1:rNo-1] == 0)
7649 {
7650 _msf_output.POS = loc1;
7651 _msf_output.MPOS = 0;
7652 _msf_output.FLAG = (rNo % 2 ==0)? 1+4+32*d+128 : 1+8+16*d+64 ;
7653 _msf_output.ISIZE = 0;
7654 _msf_output.SEQ = seq1;
7655 _msf_output.QUAL = qual1;
7656 _msf_output.QNAME = _msf_seqList[rNo].name;
7657 _msf_output.RNAME = genName;
7658 _msf_output.MAPQ = 255;
7659 _msf_output.CIGAR = cigar;
7660 _msf_output.MRNAME = "=";
7661
7662
7663 _msf_output.optSize = 4;
7664 _msf_output.optFields = _msf_optionalFields;
7665
7666 _msf_optionalFields[0].tag = "NM";
7667 _msf_optionalFields[0].type = 'i';
7668 _msf_optionalFields[0].iVal = err1;
7669
7670 _msf_optionalFields[1].tag = "MD";
7671 _msf_optionalFields[1].type = 'Z';
7672 _msf_optionalFields[1].sVal = md;
7673
7674
7675
7676 //for the OEA reads
7677 _msf_optionalFields[2].tag = "NS";
7678 _msf_optionalFields[2].type = 'Z';
7679 _msf_optionalFields[2].sVal = seq2;
7680
7681
7682 _msf_optionalFields[3].tag = "NQ";
7683 _msf_optionalFields[3].type = 'Z';
7684 _msf_optionalFields[3].sVal = qual2;
7685
7686 outputSAM(fp_out1, _msf_output);
7687
7688 _msf_seqList[rNo].hits[0] = -1;
7689 _msf_seqList[(rNo%2==0)?rNo+1:rNo-1].hits[0] = -1;
7690 }
7691 flag = fread(&rNo, sizeof(int), 1, in);
7692 }
7693
7694 freeMem(rqual1, 0);
7695 freeMem(rqual2, 0);
7696
7697 unlink(fname2);
7698
7699 fclose(fp_out1);
7700 }
7701
7702
7703 void outputTransChromosal(char *fileName1, char *fileName2, FILE * fp_out)
7704 {
7705 int i = 0;
7706 int j = 0;
7707 int k = 0;
7708
7709 char *index;
7710
7711 int size1 = 0;
7712 int size2 = 0;
7713
7714 FILE *fp1 = NULL;
7715 FILE *fp2 = NULL;
7716
7717 char geneFileName1[FILE_NAME_LENGTH];
7718 char geneFileName2[FILE_NAME_LENGTH];
7719
7720 FullMappingInfoLink *miL = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink));
7721 FullMappingInfoLink *miR = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink));
7722
7723
7724 if(fileName1 != NULL && fileName2 != NULL)
7725 {
7726
7727 fp1 = fileOpen(fileName1, "r");
7728 fp2 = fileOpen(fileName2, "r");
7729
7730 index = strstr(fileName1, "__");
7731 strncpy(geneFileName1, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2);
7732 geneFileName1[strstr(index + 2, "__") - index - 2] = '\0';
7733
7734 index = strstr(fileName2, "__");
7735 strncpy(geneFileName2, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2);
7736 geneFileName2[strstr(index + 2, "__") - index - 2] = '\0';
7737
7738
7739 for(i = 0; i < _msf_seqListSize / 2; i++)
7740 {
7741 fread(&size1, sizeof(int), 1, fp1);
7742 fread(&size2, sizeof(int), 1, fp2);
7743
7744 miL[i].mi = getMem(size1 * sizeof(FullMappingInfo) );
7745 miR[i].mi = getMem(size2 * sizeof(FullMappingInfo) );
7746
7747 miL[i].size = size1;
7748 miR[i].size = size2;
7749
7750 for(j = 0; j < size1; j++)
7751 {
7752 fread(&(miL[i].mi[j].loc), sizeof(int), 1, fp1);
7753
7754 fread (&(miL[i].mi[j].err), sizeof(int), 1, fp1);
7755
7756 fread (&(miL[i].mi[j].cigarSize), sizeof(int), 1, fp1);
7757 fread ((miL[i].mi[j].cigar), sizeof(char), miL[i].mi[j].cigarSize+1, fp1);
7758
7759 fread (&(miL[i].mi[j].mdSize), sizeof(int), 1, fp1);
7760 fread ((miL[i].mi[j].md), sizeof(char), miL[i].mi[j].mdSize+1, fp1);
7761
7762 miL[i].mi[j].dir = 1;
7763 if(miL[i].mi[j].loc < 1)
7764 {
7765 miL[i].mi[j].loc *= -1;
7766 miL[i].mi[j].dir = -1;
7767 }
7768 }
7769 for(k = 0; k < size2; k++)
7770 {
7771 fread(&(miR[i].mi[k].loc), sizeof(int), 1, fp2);
7772
7773 fread (&(miR[i].mi[k].err), sizeof(int), 1, fp2);
7774
7775 fread (&(miR[i].mi[k].cigarSize), sizeof(int), 1, fp2);
7776 fread ((miR[i].mi[k].cigar), sizeof(char), miR[i].mi[k].cigarSize+1, fp2);
7777
7778 fread (&(miR[i].mi[k].mdSize), sizeof(int), 1, fp2);
7779 fread ((miR[i].mi[k].md), sizeof(char), miR[i].mi[k].mdSize+1, fp2);
7780
7781 miR[i].mi[k].dir = 1;
7782 if(miR[i].mi[k].loc < 1)
7783 {
7784 miR[i].mi[k].loc *= -1;
7785 miR[i].mi[k].dir = -1;
7786 }
7787 }
7788 if(_msf_readHasConcordantMapping[i] == 0 && size1 != 0 && size2 != 0 && (size1 * size2 < MAX_TRANS_CHROMOSAL_OUTPUT))
7789 {
7790 int d1 = 0;
7791 int d2 = 0;
7792 char *seq, *qual;
7793 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
7794 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1];
7795 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0';
7796 seq1 = _msf_seqList[i*2].seq;
7797 rseq1 = _msf_seqList[i*2].rseq;
7798 qual1 = _msf_seqList[i*2].qual;
7799 reverse(_msf_seqList[i*2].qual, rqual1, SEQ_LENGTH);
7800
7801 seq2 = _msf_seqList[i*2+1].seq;
7802 rseq2 = _msf_seqList[i*2+1].rseq;
7803 qual2 = _msf_seqList[i*2+1].qual;
7804 reverse(_msf_seqList[i*2+1].qual, rqual2, SEQ_LENGTH);
7805
7806 for(j = 0; j < size1; j++)
7807 {
7808 d1 = (miL[i].mi[j].dir == -1)?1:0;
7809
7810 if ( d1 )
7811 {
7812 seq = rseq1;
7813 qual = rqual1;
7814 }
7815 else
7816 {
7817 seq = seq1;
7818 qual = qual1;
7819 }
7820
7821 for(k = 0; k < size2; k++)
7822 {
7823
7824 d2 = (miR[i].mi[k].dir == -1)?1:0;
7825
7826 _msf_output.POS = miL[i].mi[j].loc;
7827 _msf_output.MPOS = miR[i].mi[k].loc;
7828 _msf_output.FLAG = 0;
7829 _msf_output.ISIZE = 0;
7830 _msf_output.SEQ = seq,
7831 _msf_output.QUAL = qual;
7832 _msf_output.QNAME = _msf_seqList[i*2].name;
7833 _msf_output.RNAME = geneFileName1;
7834 _msf_output.MAPQ = 255;
7835 _msf_output.CIGAR = miL[i].mi[j].cigar;
7836 _msf_output.MRNAME = "=";
7837
7838 _msf_output.optSize = 2;
7839 _msf_output.optFields = _msf_optionalFields;
7840
7841 _msf_optionalFields[0].tag = "NM";
7842 _msf_optionalFields[0].type = 'i';
7843 _msf_optionalFields[0].iVal = miL[i].mi[j].err;
7844
7845 _msf_optionalFields[1].tag = "MD";
7846 _msf_optionalFields[1].type = 'Z';
7847 _msf_optionalFields[1].sVal = miL[i].mi[j].md;
7848
7849
7850 if ( d2 )
7851 {
7852 seq = rseq2;
7853 qual = rqual2;
7854 }
7855 else
7856 {
7857 seq = seq2;
7858 qual = qual2;
7859 }
7860
7861 outputSAM(fp_out, _msf_output);
7862
7863
7864 _msf_output.POS = miR[i].mi[k].loc;
7865 _msf_output.MPOS = miL[i].mi[j].loc;
7866 _msf_output.FLAG = 0;
7867 _msf_output.ISIZE = 0;
7868 _msf_output.SEQ = seq,
7869 _msf_output.QUAL = qual;
7870 _msf_output.QNAME = _msf_seqList[i*2+1].name;
7871 _msf_output.RNAME = geneFileName2;
7872 _msf_output.MAPQ = 255;
7873 _msf_output.CIGAR = miR[i].mi[k].cigar;
7874 _msf_output.MRNAME = "=";
7875
7876 _msf_output.optSize = 2;
7877 _msf_output.optFields = _msf_optionalFields;
7878
7879 _msf_optionalFields[0].tag = "NM";
7880 _msf_optionalFields[0].type = 'i';
7881 _msf_optionalFields[0].iVal = miR[i].mi[k].err;
7882
7883 _msf_optionalFields[1].tag = "MD";
7884 _msf_optionalFields[1].type = 'Z';
7885 _msf_optionalFields[1].sVal = miR[i].mi[k].md;
7886
7887 outputSAM(fp_out, _msf_output);
7888
7889 }
7890 }
7891 }
7892 }
7893
7894 }
7895
7896 for(i = 0; i < _msf_seqListSize / 2; i++)
7897 {
7898 freeMem(miL[i].mi, miL[i].size * sizeof(FullMappingInfo));
7899 freeMem(miR[i].mi, miR[i].size * sizeof(FullMappingInfo));
7900 }
7901
7902 freeMem(miL, _msf_seqListSize * sizeof(FullMappingInfoLink));
7903 freeMem(miR, _msf_seqListSize * sizeof(FullMappingInfoLink));
7904
7905 fclose(fp1);
7906 fclose(fp2);
7907 }
7908
7909 /*
7910 if flag is 1 it will output all the possible trans chromsal mapping
7911 otherwise only tmp file will be delete
7912
7913 */
7914
7915 void outputAllTransChromosal(int flag)
7916 {
7917
7918 int i = 0;
7919 int j = 0;
7920 int k = 0;
7921 int l = 0;
7922
7923 FILE *fp_out = NULL;
7924 char fname1[200];
7925
7926 if(flag)
7927 {
7928 fp_out = fileOpen(fname1, "w");
7929
7930 sprintf(fname1, "%s%s_TRANSCHROMOSOMAL", mappingOutputPath, mappingOutput);
7931
7932 // for(i = 0; i < _msf_maxFile; i++)
7933 // {
7934 i = 0;
7935 for(j = i+1; j < _msf_maxFile; j++)
7936 {
7937 if(i != j)
7938 {
7939 for(k = 0; k < _msf_fileCount[i]; k++)
7940 {
7941 for(l = 0; l < _msf_fileCount[j]; l++)
7942 {
7943 outputTransChromosal(_msf_fileName[i][k][0], _msf_fileName[j][l][1], fp_out);
7944 }// for l
7945 }// for k
7946 }// if
7947 }// for j
7948 // } //for i
7949 }
7950
7951 for(i = 0; i < _msf_maxFile; i++)
7952 {
7953 for(j = 0; j < _msf_fileCount[i]; j++)
7954 {
7955 unlink(_msf_fileName[i][j][0]);
7956 unlink(_msf_fileName[i][j][1]);
7957 }
7958 }
7959 if(flag)
7960 fclose(fp_out);
7961 }