Mercurial > repos > calkan > mrfast
comparison mrfast-2.1.0.5/MrFAST.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7b3dc85dc7fd | 1:d4054b05b015 |
---|---|
1 /* | |
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without modification, | |
6 * are permitted provided that the following conditions are met: | |
7 * | |
8 * Redistributions of source code must retain the above copyright notice, this list | |
9 * of conditions and the following disclaimer. | |
10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
11 * list of conditions and the following disclaimer in the documentation and/or other | |
12 * materials provided with the distribution. | |
13 * - Neither the names of the University of Washington, Simon Fraser University, | |
14 * nor the names of its contributors may be | |
15 * used to endorse or promote products derived from this software without specific | |
16 * prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 /* | |
32 Authors: | |
33 Farhad Hormozdiari | |
34 Faraz Hach | |
35 Can Alkan | |
36 Emails: | |
37 farhadh AT uw DOT edu | |
38 fhach AT cs DOT sfu DOT ca | |
39 calkan AT uw DOT edu | |
40 */ | |
41 | |
42 #include <stdio.h> | |
43 #include <stdlib.h> | |
44 #include <string.h> | |
45 #include <math.h> | |
46 #include <dirent.h> | |
47 #include <xmmintrin.h> | |
48 #include <emmintrin.h> | |
49 #include <mmintrin.h> | |
50 | |
51 | |
52 #include "Common.h" | |
53 #include "Reads.h" | |
54 #include "HashTable.h" | |
55 #include "Output.h" | |
56 #include "MrFAST.h" | |
57 #include "RefGenome.h" | |
58 | |
59 | |
60 #define min(a,b) ((a)>(b)?(b):(a)) | |
61 #define min3(a,b,c) ((a)>(b)?(b>c?c:b):(a>c?c:a)) | |
62 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4)))) | |
63 | |
64 #define MAX_REF_SIZE 18 | |
65 | |
66 | |
67 float calculateScore(int index, char *seq, char *qual, char *md); | |
68 unsigned char mrFAST = 1; | |
69 char *versionNumberF="0.5"; | |
70 | |
71 long long verificationCnt = 0; | |
72 long long mappingCnt = 0; | |
73 long long mappedSeqCnt = 0; | |
74 long long completedSeqCnt = 0; | |
75 char *mappingOutput; | |
76 /**********************************************/ | |
77 char *_msf_refGen = NULL; | |
78 int _msf_refGenLength = 0; | |
79 int _msf_refGenOffset = 0; | |
80 char *_msf_refGenName = NULL; | |
81 | |
82 int _msf_refGenBeg; | |
83 int _msf_refGenEnd; | |
84 | |
85 IHashTable *_msf_hashTable = NULL; | |
86 | |
87 int *_msf_samplingLocs; | |
88 int *_msf_samplingLocsEnds; | |
89 int _msf_samplingLocsSize; | |
90 | |
91 Read *_msf_seqList; | |
92 int _msf_seqListSize; | |
93 | |
94 Pair *_msf_sort_seqList = NULL; | |
95 int *_msf_map_sort_seqList; | |
96 | |
97 ReadIndexTable *_msf_rIndex = NULL; | |
98 int _msf_rIndexSize; | |
99 int _msf_rIndexMax; | |
100 | |
101 SAM _msf_output; | |
102 | |
103 OPT_FIELDS *_msf_optionalFields; | |
104 | |
105 char *_msf_op; | |
106 | |
107 int *_msf_verifiedLocs = NULL; | |
108 | |
109 char _msf_numbers[200][3]; | |
110 char _msf_cigar[5]; | |
111 | |
112 MappingInfo *_msf_mappingInfo; | |
113 | |
114 int *_msf_seqHits; | |
115 int _msf_openFiles = 0; | |
116 int _msf_maxLSize=0; | |
117 int _msf_maxRSize=0; | |
118 | |
119 BestFullMappingInfo *bestHitMappingInfo; | |
120 | |
121 /*************************/ | |
122 int _msf_maxFile=0; | |
123 char _msf_fileName[4000][200][2][FILE_NAME_LENGTH]; | |
124 int _msf_fileCount[4000]; | |
125 | |
126 char *_msf_readHasConcordantMapping; //boolean if a read has concordant mapping :D | |
127 | |
128 int *_msf_oeaMapping; | |
129 int *_msf_discordantMapping; | |
130 | |
131 FILE *bestConcordantFILE; | |
132 FILE *bestDiscordantFILE; | |
133 | |
134 int counter = 0; | |
135 | |
136 int scoreF[200][200]; | |
137 int scoreB[200][200]; | |
138 | |
139 int score[200][200]; | |
140 int direction1[200][200]; | |
141 int direction2[200][200]; | |
142 | |
143 __m128i MASK; | |
144 | |
145 int lookUpTable[15625][15625]; | |
146 | |
147 /**************************************************Methods***************************************************/ | |
148 int smallEditDistanceF(char *a, int lena, char *b, int lenb) | |
149 { | |
150 int matrix[20][20]; | |
151 int i = 0; | |
152 int j = 0; | |
153 | |
154 for(i = 0; i <= lena; i++) | |
155 { | |
156 matrix[0][i] = i; | |
157 } | |
158 | |
159 for(i = 0; i <= lenb; i++) | |
160 { | |
161 matrix[i][0] = i; | |
162 } | |
163 | |
164 | |
165 for(i = 1; i <= lenb; i++) | |
166 { | |
167 for(j = 1; j <= lena; j++) | |
168 { | |
169 matrix[i][j] = min3(matrix[i-1][j-1]+ (a[j-1] != b[i-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1); | |
170 } | |
171 } | |
172 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]); | |
173 } | |
174 | |
175 int smallEditDistanceB(char *a, int lena, char *b, int lenb) | |
176 { | |
177 int matrix[20][20]; | |
178 int i = 0; | |
179 int j = 0; | |
180 | |
181 for(i = 0; i <= lena; i++) | |
182 { | |
183 matrix[0][i] = i; | |
184 } | |
185 | |
186 for(i = 0; i <= lenb; i++) | |
187 { | |
188 matrix[i][0] = i; | |
189 } | |
190 | |
191 | |
192 for(i = 1; i <= lenb; i++) | |
193 { | |
194 for(j = 1; j <= lena; j++) | |
195 { | |
196 matrix[i][j] = min3(matrix[i-1][j-1]+ (*(a-j+1) != *(b-i+1)),matrix[i][j-1]+1 ,matrix[i-1][j]+1); | |
197 } | |
198 } | |
199 | |
200 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]); | |
201 } | |
202 | |
203 char fastEditDistance(int per1, int per2) | |
204 { | |
205 | |
206 int i = 0; | |
207 int j = 0; | |
208 | |
209 char str1[7]; | |
210 char str2[7]; | |
211 | |
212 int val1 = per1; | |
213 int val2 = per2; | |
214 | |
215 int index = 0; | |
216 int mod = 0; | |
217 | |
218 int matrix[7][7]; | |
219 | |
220 int min = 20; | |
221 | |
222 while(index < 6) | |
223 { | |
224 mod = val1%5; | |
225 str1[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N')); | |
226 val1 = val1 /5; | |
227 index++; | |
228 } | |
229 | |
230 str1[6] = '\0'; | |
231 | |
232 index = 0; | |
233 while(index < 6) | |
234 { | |
235 mod=val2%5; | |
236 str2[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N')); | |
237 val2 = val2 / 5; | |
238 index++; | |
239 } | |
240 str2[6] = '\0'; | |
241 | |
242 for(i = 0; i < 7; i++) | |
243 { | |
244 matrix[0][i] = i; | |
245 matrix[i][0] = i; | |
246 } | |
247 | |
248 for(i = 1; i < 7; i++) | |
249 { | |
250 for(j = 1; j < 7; j++) | |
251 { | |
252 matrix[i][j] = min3(matrix[i-1][j-1]+ (str1[i-1] != str2[j-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1); | |
253 } | |
254 } | |
255 | |
256 for(i = 0; i < 7; i++) | |
257 { | |
258 if(matrix[i][6] < min) | |
259 min = matrix[i][6]; | |
260 } | |
261 | |
262 for(i = 0; i < 7; i++) | |
263 { | |
264 if(matrix[6][i] < min) | |
265 min = matrix[6][i]; | |
266 } | |
267 return min; | |
268 } | |
269 | |
270 void initLookUpTable() | |
271 { | |
272 int i = 0; | |
273 | |
274 MASK = _mm_insert_epi16(MASK,1,0); | |
275 MASK = _mm_insert_epi16(MASK,1,1); | |
276 MASK = _mm_insert_epi16(MASK,1,2); | |
277 MASK = _mm_insert_epi16(MASK,1,3); | |
278 MASK = _mm_insert_epi16(MASK,1,4); | |
279 MASK = _mm_insert_epi16(MASK,0,5); | |
280 MASK = _mm_insert_epi16(MASK,0,6); | |
281 MASK = _mm_insert_epi16(MASK,0,7); | |
282 | |
283 for(i = 0 ; i < errThreshold + 1; i++) | |
284 { | |
285 scoreF[0][i] = i; | |
286 scoreF[i][0] = i; | |
287 } | |
288 | |
289 for(i = 0 ; i < errThreshold + 1; i++) | |
290 { | |
291 scoreB[0][i] = i; | |
292 scoreB[i][0] = i; | |
293 } | |
294 | |
295 | |
296 } | |
297 | |
298 int backwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb) | |
299 { | |
300 if(lenb == 0 || lena == 0) | |
301 return 0; | |
302 | |
303 int i = 0; | |
304 int j = 0; | |
305 int k = 0; | |
306 | |
307 | |
308 int e = errThreshold; | |
309 | |
310 char flag = 0; | |
311 | |
312 int minError = 2*e; | |
313 | |
314 __m128i R0, R1; | |
315 __m128i Diag; | |
316 __m128i Side1, Side2; | |
317 __m128i Down1, Down2; | |
318 __m128i Error; | |
319 __m128i tmp; | |
320 | |
321 /* initialize */ | |
322 R0 = _mm_setzero_si128 (); | |
323 R1 = _mm_setzero_si128 (); | |
324 Diag = _mm_setzero_si128 (); | |
325 Side1 = _mm_setzero_si128 (); | |
326 Side2 = _mm_setzero_si128 (); | |
327 Down1 = _mm_setzero_si128 (); | |
328 Down2 = _mm_setzero_si128 (); | |
329 Error = _mm_setzero_si128 (); | |
330 tmp = _mm_setzero_si128 (); | |
331 /* end initialize */ | |
332 | |
333 if(lenb <= e) | |
334 { | |
335 return smallEditDistanceB(a,lena,b,lenb); | |
336 } | |
337 | |
338 | |
339 R1 = _mm_xor_si128(R1, R1); | |
340 R0 = _mm_xor_si128(R0, R0); | |
341 | |
342 Diag = _mm_xor_si128(Diag, Diag); | |
343 Side1 = _mm_xor_si128(Side1, Side1); | |
344 Down1 = _mm_xor_si128(Down1, Down1); | |
345 | |
346 Diag = _mm_insert_epi16(Diag,2*e,0); | |
347 | |
348 Side1 = _mm_insert_epi16(Side1,1,0); | |
349 Side1 = _mm_insert_epi16(Side1,2*e,1); | |
350 | |
351 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
352 Down1 = _mm_insert_epi16(Down1,1,1); | |
353 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
354 | |
355 R0 = _mm_insert_epi16(R0,0,0); | |
356 | |
357 R1 = _mm_insert_epi16(R1,1,0); | |
358 R1 = _mm_insert_epi16(R1,1,1); | |
359 | |
360 for(i=2; i <= e; i++) | |
361 { | |
362 //set side | |
363 Side1 = _mm_slli_si128(Side1,2); | |
364 Side1 = _mm_insert_epi16(Side1,1,0); | |
365 | |
366 Down1 = _mm_insert_epi16(Down1,1,0); | |
367 Down1 = _mm_slli_si128(Down1,2); | |
368 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
369 | |
370 Diag = _mm_xor_si128(Diag, Diag); | |
371 if( i%2 == 0) | |
372 { | |
373 Diag = _mm_insert_epi16(Diag,2*e,0); | |
374 | |
375 for(j=1;j<=i-1;j++) | |
376 { | |
377 Diag = _mm_slli_si128(Diag, 2); | |
378 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0); | |
379 } | |
380 Diag = _mm_slli_si128(Diag, 2); | |
381 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
382 | |
383 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag); | |
384 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1); | |
385 } | |
386 | |
387 else | |
388 { | |
389 Diag = _mm_insert_epi16(Diag,2*e,0); | |
390 for(j=i/2-1;j>=-i/2;j--) | |
391 { | |
392 Diag = _mm_slli_si128(Diag, 2); | |
393 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0); | |
394 } | |
395 Diag = _mm_slli_si128(Diag, 2); | |
396 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
397 | |
398 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
399 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
400 } | |
401 } | |
402 Error = _mm_xor_si128(Error, Error); | |
403 Side2 = _mm_xor_si128(Side2, Side2); | |
404 Down2 = _mm_xor_si128(Down2, Down2); | |
405 Down1 = _mm_xor_si128(Down1, Down1); | |
406 | |
407 Error = _mm_insert_epi16(Error,e,0); | |
408 Side1 = _mm_insert_epi16(Side2,2*e,0); | |
409 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
410 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
411 | |
412 | |
413 for(j=0; j < e; j++) | |
414 { | |
415 Side2 = _mm_slli_si128(Side2, 2); | |
416 Side2 = _mm_insert_epi16(Side2,1,0); | |
417 | |
418 Side1 = _mm_slli_si128(Side1, 2); | |
419 Side1 = _mm_insert_epi16(Side1,1,0); | |
420 | |
421 Down1 = _mm_slli_si128(Down1, 2); | |
422 Down1 = _mm_insert_epi16(Down1,1,0); | |
423 | |
424 Down2 = _mm_slli_si128(Down2, 2); | |
425 Down2 = _mm_insert_epi16(Down2,1,0); | |
426 | |
427 Error = _mm_slli_si128(Error, 2); | |
428 Error = _mm_insert_epi16(Error, e, 0); | |
429 } | |
430 | |
431 Down2= _mm_slli_si128(Down2, 2); | |
432 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
433 | |
434 for(; i <= 2*lenb-(e-1);i++) | |
435 { | |
436 flag = 0; | |
437 Diag = _mm_xor_si128(Diag, Diag); | |
438 if( i%2 == 0) | |
439 { | |
440 for(j=e/2;j>=-e/2;j--) | |
441 { | |
442 Diag = _mm_slli_si128(Diag, 2); | |
443 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0); | |
444 } | |
445 | |
446 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
447 R0 = _mm_min_epi16(R0, R1+Down1); | |
448 | |
449 | |
450 if(_mm_extract_epi16(R0,0) <= e) | |
451 flag = 1; | |
452 tmp = _mm_srli_si128(R0,2); | |
453 for(j=0; j <= e;j++) | |
454 { | |
455 if(_mm_extract_epi16(tmp,0) <= e) | |
456 flag = 1; | |
457 tmp = _mm_srli_si128(tmp,2); | |
458 } | |
459 | |
460 if(flag == 0) | |
461 return -1; | |
462 | |
463 if(i == 2*lenb-e) | |
464 { | |
465 tmp = _mm_srli_si128(R0,2); | |
466 for(k=0; k < e-2;k++) | |
467 tmp = _mm_srli_si128(tmp,2); | |
468 minError = _mm_extract_epi16(tmp,0); | |
469 } | |
470 | |
471 } | |
472 | |
473 else | |
474 { | |
475 for(j=e/2;j>=-e/2-1;j--) | |
476 { | |
477 Diag = _mm_slli_si128(Diag, 2); | |
478 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i)/2-j-1)),0); | |
479 } | |
480 | |
481 // printf("@%d %d %d %d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1), _mm_extract_epi16(Diag,2), | |
482 // _mm_extract_epi16(Diag,3)); | |
483 | |
484 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
485 | |
486 // printf("#~%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2), | |
487 // _mm_extract_epi16(R1,3)); | |
488 | |
489 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
490 | |
491 // printf("$%d %d %d %d\n", _mm_extract_epi16(Side2,0), _mm_extract_epi16(Side2,1), _mm_extract_epi16(Side2,2), | |
492 // _mm_extract_epi16(Side2,3)); | |
493 | |
494 // printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2), | |
495 // _mm_extract_epi16(R1,3)); | |
496 | |
497 | |
498 | |
499 if(i >= 2*lenb-e) | |
500 { | |
501 tmp = _mm_srli_si128(R1,2); | |
502 for(k=0; k < e-1;k++) | |
503 tmp = _mm_srli_si128(tmp,2); | |
504 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
505 } | |
506 } | |
507 } | |
508 | |
509 //first cell | |
510 Diag = _mm_xor_si128(Diag,Diag); | |
511 Diag = _mm_insert_epi16(Diag, *(b-(lenb-3)) != *(a-lena), 0); | |
512 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena-1)), 1); | |
513 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-2)), 2); | |
514 Diag = _mm_insert_epi16(Diag, 2*e, 3); | |
515 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
516 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
517 | |
518 minError = min(minError, _mm_extract_epi16(R1,2)); | |
519 | |
520 //second cell | |
521 Diag = _mm_xor_si128(Diag,Diag); | |
522 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena)), 0); | |
523 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-1)), 1); | |
524 Diag = _mm_insert_epi16(Diag, 2*e, 2); | |
525 | |
526 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
527 R0 = _mm_min_epi16(R0, R1+Down1); | |
528 | |
529 minError = min(minError, _mm_extract_epi16(R0,1)); | |
530 | |
531 //third cell | |
532 Diag = _mm_xor_si128(Diag,Diag); | |
533 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena+1)), 0); | |
534 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena)), 1); | |
535 Diag = _mm_insert_epi16(Diag, 2*e, 2); | |
536 | |
537 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
538 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
539 | |
540 minError = min(minError, _mm_extract_epi16(R1,1)); | |
541 | |
542 //forth | |
543 Diag = _mm_xor_si128(Diag,Diag); | |
544 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+1)), 0); | |
545 Diag = _mm_insert_epi16(Diag, 2*e, 1); | |
546 | |
547 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
548 R0 = _mm_min_epi16(R0, R1+Down1); | |
549 | |
550 minError = min(minError, _mm_extract_epi16(R0,0)); | |
551 | |
552 //fifth | |
553 Diag = _mm_xor_si128(Diag,Diag); | |
554 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+2)), 0); | |
555 Diag = _mm_insert_epi16(Diag, 2*e, 1); | |
556 | |
557 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
558 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
559 | |
560 minError = min(minError, _mm_extract_epi16(R0,0)); | |
561 | |
562 if(minError > e) | |
563 return -1; | |
564 return minError; | |
565 } | |
566 | |
567 int backwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb) | |
568 { | |
569 if(lenb == 0 || lena == 0) | |
570 return 0; | |
571 | |
572 int i = 0; | |
573 int j = 0; | |
574 int k = 0; | |
575 | |
576 | |
577 int e = errThreshold; | |
578 | |
579 char flag = 0; | |
580 | |
581 int minError = 2*e; | |
582 | |
583 __m128i R0, R1; | |
584 __m128i Diag; | |
585 __m128i Side1, Side2; | |
586 __m128i Down1, Down2; | |
587 __m128i Error; | |
588 __m128i tmp; | |
589 | |
590 /* initialize */ | |
591 R0 = _mm_setzero_si128 (); | |
592 R1 = _mm_setzero_si128 (); | |
593 Diag = _mm_setzero_si128 (); | |
594 Side1 = _mm_setzero_si128 (); | |
595 Side2 = _mm_setzero_si128 (); | |
596 Down1 = _mm_setzero_si128 (); | |
597 Down2 = _mm_setzero_si128 (); | |
598 Error = _mm_setzero_si128 (); | |
599 tmp = _mm_setzero_si128 (); | |
600 /* end initialize */ | |
601 | |
602 if(lenb <= e) | |
603 { | |
604 return smallEditDistanceB(a,lena,b,lenb); | |
605 } | |
606 | |
607 | |
608 R1 = _mm_xor_si128(R1, R1); | |
609 R0 = _mm_xor_si128(R0, R0); | |
610 | |
611 Diag = _mm_xor_si128(Diag, Diag); | |
612 Side1 = _mm_xor_si128(Side1, Side1); | |
613 Down1 = _mm_xor_si128(Down1, Down1); | |
614 | |
615 Diag = _mm_insert_epi16(Diag,2*e,0); | |
616 | |
617 Side1 = _mm_insert_epi16(Side1,1,0); | |
618 Side1 = _mm_insert_epi16(Side1,2*e,1); | |
619 | |
620 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
621 Down1 = _mm_insert_epi16(Down1,1,1); | |
622 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
623 | |
624 R0 = _mm_insert_epi16(R0,0,0); | |
625 | |
626 R1 = _mm_insert_epi16(R1,1,0); | |
627 R1 = _mm_insert_epi16(R1,1,1); | |
628 | |
629 for(i=2; i <= e; i++) | |
630 { | |
631 //set side | |
632 Side1 = _mm_slli_si128(Side1,2); | |
633 Side1 = _mm_insert_epi16(Side1,1,0); | |
634 | |
635 Down1 = _mm_insert_epi16(Down1,1,0); | |
636 Down1 = _mm_slli_si128(Down1,2); | |
637 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
638 | |
639 Diag = _mm_xor_si128(Diag, Diag); | |
640 if( i%2 == 0) | |
641 { | |
642 Diag = _mm_insert_epi16(Diag,2*e,0); | |
643 | |
644 for(j=1;j<=i-1;j++) | |
645 { | |
646 Diag = _mm_slli_si128(Diag, 2); | |
647 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0); | |
648 } | |
649 Diag = _mm_slli_si128(Diag, 2); | |
650 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
651 | |
652 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag); | |
653 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1); | |
654 } | |
655 | |
656 else | |
657 { | |
658 Diag = _mm_insert_epi16(Diag,2*e,0); | |
659 for(j=i/2-1;j>=-i/2;j--) | |
660 { | |
661 Diag = _mm_slli_si128(Diag, 2); | |
662 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0); | |
663 } | |
664 Diag = _mm_slli_si128(Diag, 2); | |
665 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
666 | |
667 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
668 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
669 } | |
670 } | |
671 Error = _mm_xor_si128(Error, Error); | |
672 Side2 = _mm_xor_si128(Side2, Side2); | |
673 Down2 = _mm_xor_si128(Down2, Down2); | |
674 Down1 = _mm_xor_si128(Down1, Down1); | |
675 | |
676 Error = _mm_insert_epi16(Error,e,0); | |
677 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
678 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
679 | |
680 | |
681 for(j=0; j < e; j++) | |
682 { | |
683 Side2 = _mm_slli_si128(Side2, 2); | |
684 Side2 = _mm_insert_epi16(Side2,1,0); | |
685 | |
686 Down1 = _mm_slli_si128(Down1, 2); | |
687 Down1 = _mm_insert_epi16(Down1,1,0); | |
688 | |
689 Down2 = _mm_slli_si128(Down2, 2); | |
690 Down2 = _mm_insert_epi16(Down2,1,0); | |
691 | |
692 Error = _mm_slli_si128(Error, 2); | |
693 Error = _mm_insert_epi16(Error, e, 0); | |
694 } | |
695 | |
696 Down2= _mm_slli_si128(Down2, 2); | |
697 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
698 | |
699 for(; i <= 2*lenb-(e-1);i++) | |
700 { | |
701 flag = 0; | |
702 Diag = _mm_xor_si128(Diag, Diag); | |
703 if( i%2 == 0) | |
704 { | |
705 for(j=e/2;j>=-e/2;j--) | |
706 { | |
707 Diag = _mm_slli_si128(Diag, 2); | |
708 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0); | |
709 } | |
710 | |
711 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
712 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
713 | |
714 if(_mm_extract_epi16(R0,0) <= e) | |
715 flag = 1; | |
716 tmp = _mm_srli_si128(R0,2); | |
717 for(j=0; j <= e;j++) | |
718 { | |
719 if(_mm_extract_epi16(tmp,0) <= e) | |
720 flag = 1; | |
721 tmp = _mm_srli_si128(tmp,2); | |
722 } | |
723 | |
724 if(flag == 0) | |
725 return -1; | |
726 | |
727 if(i == 2*lenb-e) | |
728 { | |
729 tmp = _mm_srli_si128(R0,2); | |
730 for(k=0; k < e-1;k++) | |
731 tmp = _mm_srli_si128(tmp,2); | |
732 minError = _mm_extract_epi16(tmp,0); | |
733 } | |
734 | |
735 } | |
736 | |
737 else | |
738 { | |
739 for(j=-e/2+1;j<=e/2;j++) | |
740 { | |
741 Diag = _mm_slli_si128(Diag, 2); | |
742 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2-j-1)) != *(a-((i-1)/2+j-1)),0); | |
743 } | |
744 | |
745 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
746 R1 = _mm_min_epi16(R1, R0+Down1); | |
747 | |
748 | |
749 if(i >= 2*lenb-e) | |
750 { | |
751 tmp = _mm_srli_si128(R1,2); | |
752 for(k=0; k < e-2;k++) | |
753 tmp = _mm_srli_si128(tmp,2); | |
754 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
755 } | |
756 } | |
757 } | |
758 | |
759 j=0; | |
760 int tmpE = e; | |
761 for(;j<2*(e-2)+1;j++) | |
762 { | |
763 | |
764 Diag = _mm_xor_si128(Diag, Diag); | |
765 //set the first element | |
766 if(j==0) | |
767 { | |
768 for( k=0;k<=e-1;k++ ) | |
769 { | |
770 Diag = _mm_slli_si128(Diag, 2); | |
771 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
772 } | |
773 | |
774 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
775 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
776 | |
777 | |
778 tmpE--; | |
779 tmp = _mm_srli_si128(R0,2); | |
780 for(k=0; k < e-2;k++) | |
781 tmp = _mm_srli_si128(tmp,2); | |
782 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
783 } | |
784 else if(j%2 == 0) | |
785 { | |
786 for(k=0;k<tmpE;k++) | |
787 { | |
788 Diag = _mm_slli_si128(Diag, 2); | |
789 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
790 } | |
791 | |
792 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
793 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
794 | |
795 tmpE--; | |
796 | |
797 tmp = _mm_srli_si128(R0,2); | |
798 for(k=0; k < tmpE-1;k++) | |
799 tmp = _mm_srli_si128(tmp,2); | |
800 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
801 } | |
802 | |
803 | |
804 else | |
805 { | |
806 for(k=0;k<tmpE;k++) | |
807 { | |
808 Diag = _mm_slli_si128(Diag, 2); | |
809 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
810 } | |
811 | |
812 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
813 R1 = _mm_min_epi16(R1, R0+Down1); | |
814 | |
815 tmp = _mm_srli_si128(R1,2); | |
816 for(k=0; k < tmpE-2;k++) | |
817 tmp = _mm_srli_si128(tmp,2); | |
818 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
819 } | |
820 i++; | |
821 } | |
822 //Diag | |
823 | |
824 Diag = _mm_xor_si128(Diag,Diag); | |
825 Diag = _mm_insert_epi16(Diag, 2*e, 0); | |
826 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1); | |
827 | |
828 Side1 = _mm_insert_epi16(Side1,1,0); | |
829 Side1 = _mm_insert_epi16(Side1,1,1); | |
830 | |
831 Down1 = _mm_insert_epi16(Down1, 2*e, 0); | |
832 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
833 | |
834 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
835 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
836 | |
837 minError = min(minError, _mm_extract_epi16(R1,1)); | |
838 | |
839 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0); | |
840 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
841 | |
842 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
843 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
844 | |
845 minError = min(minError, _mm_extract_epi16(R0,0)); | |
846 | |
847 if(minError > e) | |
848 return -1; | |
849 return minError; | |
850 } | |
851 | |
852 inline int backwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb) | |
853 { | |
854 if(lenb == 0 || lena == 0) | |
855 return 0; | |
856 | |
857 int i = 0; | |
858 int j = 0; | |
859 int k = 0; | |
860 | |
861 int i0; | |
862 int i1; | |
863 int i2; | |
864 int i4; | |
865 int i5; | |
866 | |
867 int e = 4; | |
868 int mismatch = errThreshold; | |
869 | |
870 int minError = 2*errThreshold; | |
871 int index = 0; | |
872 int tmpValue = 0; | |
873 | |
874 if(lenb <= e) | |
875 { | |
876 return smallEditDistanceB(a,lena,b,lenb); | |
877 } | |
878 | |
879 | |
880 __m128i R0, R1; | |
881 __m128i Diag; | |
882 __m128i Side1, Side2; | |
883 __m128i Down1, Down2; | |
884 __m128i tmp; | |
885 __m128i SeqA, SeqB; | |
886 __m128i Result; | |
887 | |
888 /* initialize */ | |
889 R0 = _mm_setzero_si128 (); | |
890 R1 = _mm_setzero_si128 (); | |
891 Diag = _mm_setzero_si128 (); | |
892 Side1 = _mm_setzero_si128 (); | |
893 Side2 = _mm_setzero_si128 (); | |
894 Down1 = _mm_setzero_si128 (); | |
895 Down2 = _mm_setzero_si128 (); | |
896 SeqA = _mm_setzero_si128 (); | |
897 SeqB = _mm_setzero_si128 (); | |
898 Result = _mm_setzero_si128 (); | |
899 /* end initialize */ | |
900 | |
901 R1 = _mm_xor_si128(R1, R1); | |
902 R0 = _mm_xor_si128(R0, R0); | |
903 | |
904 Diag = _mm_xor_si128(Diag, Diag); | |
905 Diag = _mm_insert_epi16(Diag,minError,0); | |
906 | |
907 i0 = (a[0] != b[0]); | |
908 i1 = min(i0, ( *(a-1)!=*b) )+1; | |
909 i2 = min(i0,( a[0] != *(b-1) ) )+1; | |
910 | |
911 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1); | |
912 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1; | |
913 i5 = min(i2, (a[0] != *(b-2))+1)+1; | |
914 | |
915 R1 = _mm_insert_epi16(R1, 3, 0); | |
916 R1 = _mm_insert_epi16(R1, i1, 1); | |
917 R1 = _mm_insert_epi16(R1, i2, 2); | |
918 R1 = _mm_insert_epi16(R1, 3, 3); | |
919 | |
920 | |
921 R0 = _mm_insert_epi16(R0, 4, 0); | |
922 R0 = _mm_insert_epi16(R0, i4, 1); | |
923 R0 = _mm_insert_epi16(R0, i0, 2); | |
924 R0 = _mm_insert_epi16(R0, i5, 3); | |
925 R0 = _mm_insert_epi16(R0, 4, 4); | |
926 | |
927 | |
928 Side2 = _mm_xor_si128(Side2, Side2); | |
929 Down2 = _mm_xor_si128(Down2, Down2); | |
930 Down1 = _mm_xor_si128(Down1, Down1); | |
931 Side1 = _mm_xor_si128(Side1, Side1); | |
932 | |
933 Side2 = _mm_insert_epi16(Side2,minError,0); | |
934 Down1 = _mm_insert_epi16(Down1,minError,0); | |
935 | |
936 Side1 = _mm_insert_epi16(Side1,1,0); | |
937 | |
938 index = 0; | |
939 for(j=0; j < e; j++) | |
940 { | |
941 Side2 = _mm_slli_si128(Side2, 2); | |
942 Side2 = _mm_insert_epi16(Side2,1,0); | |
943 | |
944 Down1 = _mm_slli_si128(Down1, 2); | |
945 Down1 = _mm_insert_epi16(Down1,1,0); | |
946 | |
947 Down2 = _mm_slli_si128(Down2, 2); | |
948 Down2 = _mm_insert_epi16(Down2,1,0); | |
949 | |
950 Side1 = _mm_slli_si128(Side1, 2); | |
951 Side1 = _mm_insert_epi16(Side1,1,0); | |
952 | |
953 SeqA = _mm_slli_si128(SeqA, 2); | |
954 SeqB = _mm_slli_si128(SeqB, 2); | |
955 SeqA = _mm_insert_epi16(SeqA,*(a-index),0); | |
956 SeqB = _mm_insert_epi16(SeqB,*(b-index),0); | |
957 index++; | |
958 } | |
959 | |
960 Down2= _mm_slli_si128(Down2, 2); | |
961 Down2 = _mm_insert_epi16(Down2,minError,0); | |
962 | |
963 index = 4; | |
964 i = 5; | |
965 | |
966 int loopEnd = 2*lenb-(e-1); | |
967 for(; i <= loopEnd ;i++) | |
968 { | |
969 | |
970 Diag = _mm_xor_si128(Diag, Diag); | |
971 if( i%2 == 0) | |
972 { | |
973 SeqA = _mm_slli_si128(SeqA, 2); | |
974 SeqB = _mm_slli_si128(SeqB, 2); | |
975 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0); | |
976 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0); | |
977 | |
978 index++; | |
979 | |
980 tmp = _mm_shufflelo_epi16(SeqB,27); | |
981 tmp = _mm_slli_si128(tmp, 2); | |
982 tmpValue = _mm_extract_epi16(tmp, 5); | |
983 tmp = _mm_insert_epi16(tmp, tmpValue, 0); | |
984 | |
985 Result = _mm_cmpeq_epi16(SeqA, tmp); | |
986 Diag = _mm_andnot_si128(Result, MASK); | |
987 | |
988 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
989 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
990 | |
991 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold | |
992 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold && _mm_extract_epi16(R1, 0) > errThreshold | |
993 && _mm_extract_epi16(R1, 1) > errThreshold && _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold) | |
994 return -1; | |
995 | |
996 if(i == 2*lenb-e) | |
997 { | |
998 tmp = _mm_srli_si128(R0,2); | |
999 for(k=0; k < e-1;k++) | |
1000 tmp = _mm_srli_si128(tmp,2); | |
1001 minError = _mm_extract_epi16(tmp,0); | |
1002 } | |
1003 | |
1004 } | |
1005 | |
1006 else | |
1007 { | |
1008 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27)); | |
1009 Diag = _mm_andnot_si128(Result, MASK); | |
1010 | |
1011 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1012 R1 = _mm_min_epi16(R1, R0+Down1); | |
1013 | |
1014 | |
1015 if(i >= 2*lenb-e) | |
1016 { | |
1017 tmp = _mm_srli_si128(R1,2); | |
1018 for(k=0; k < e-2;k++) | |
1019 tmp = _mm_srli_si128(tmp,2); | |
1020 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1021 } | |
1022 } | |
1023 | |
1024 | |
1025 } | |
1026 | |
1027 j=0; | |
1028 int tmpE = e; | |
1029 for(;j<2*(e-2)+1;j++) | |
1030 { | |
1031 | |
1032 Diag = _mm_xor_si128(Diag, Diag); | |
1033 //set the first element | |
1034 if(j==0) | |
1035 { | |
1036 for( k=0;k<=e-1;k++ ) | |
1037 { | |
1038 Diag = _mm_slli_si128(Diag, 2); | |
1039 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1040 } | |
1041 | |
1042 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1043 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1044 | |
1045 tmpE--; | |
1046 | |
1047 tmp = _mm_srli_si128(R0,2); | |
1048 for(k=0; k < e-2;k++) | |
1049 tmp = _mm_srli_si128(tmp,2); | |
1050 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1051 } | |
1052 else if(j%2 == 0) | |
1053 { | |
1054 for(k=0;k<tmpE;k++) | |
1055 { | |
1056 Diag = _mm_slli_si128(Diag, 2); | |
1057 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1058 } | |
1059 | |
1060 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1061 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1062 | |
1063 tmpE--; | |
1064 | |
1065 tmp = _mm_srli_si128(R0,2); | |
1066 for(k=0; k < tmpE-1;k++) | |
1067 tmp = _mm_srli_si128(tmp,2); | |
1068 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1069 } | |
1070 | |
1071 | |
1072 else | |
1073 { | |
1074 for(k=0;k<tmpE;k++) | |
1075 { | |
1076 Diag = _mm_slli_si128(Diag, 2); | |
1077 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1078 } | |
1079 | |
1080 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1081 R1 = _mm_min_epi16(R1, R0+Down1); | |
1082 | |
1083 tmp = _mm_srli_si128(R1,2); | |
1084 for(k=0; k < tmpE-2;k++) | |
1085 tmp = _mm_srli_si128(tmp,2); | |
1086 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1087 } | |
1088 i++; | |
1089 } | |
1090 //Diag | |
1091 | |
1092 Diag = _mm_xor_si128(Diag,Diag); | |
1093 Diag = _mm_insert_epi16(Diag, 2*errThreshold, 0); | |
1094 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1); | |
1095 | |
1096 Side1 = _mm_insert_epi16(Side1,1,0); | |
1097 Side1 = _mm_insert_epi16(Side1,1,1); | |
1098 | |
1099 Down1 = _mm_insert_epi16(Down1, 2*errThreshold, 0); | |
1100 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
1101 | |
1102 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
1103 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
1104 | |
1105 minError = min(minError, _mm_extract_epi16(R1,1)); | |
1106 | |
1107 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0); | |
1108 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
1109 | |
1110 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
1111 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
1112 | |
1113 minError = min(minError, _mm_extract_epi16(R0,0)); | |
1114 | |
1115 if(minError > mismatch) | |
1116 return -1; | |
1117 return minError; | |
1118 } | |
1119 | |
1120 int backwardEditDistance4SSE2(char *a, int lena, char *b,int lenb) | |
1121 { | |
1122 if(lenb == 0 || lena == 0) | |
1123 return 0; | |
1124 | |
1125 int i = 0; | |
1126 int j = 0; | |
1127 int k = 0; | |
1128 | |
1129 int i0; | |
1130 int i1; | |
1131 int i2; | |
1132 int i4; | |
1133 int i5; | |
1134 | |
1135 int e = errThreshold; | |
1136 | |
1137 int minError = 2*e; | |
1138 int index = 0; | |
1139 int tmpValue = 0; | |
1140 | |
1141 if(lenb <= e) | |
1142 { | |
1143 return smallEditDistanceB(a,lena,b,lenb); | |
1144 } | |
1145 | |
1146 __m128i R0, R1; | |
1147 __m128i Diag; | |
1148 __m128i Side1, Side2; | |
1149 __m128i Down1, Down2; | |
1150 __m128i tmp; | |
1151 __m128i SeqA, SeqB; | |
1152 __m128i Result; | |
1153 | |
1154 /* initialize */ | |
1155 R0 = _mm_setzero_si128 (); | |
1156 R1 = _mm_setzero_si128 (); | |
1157 Diag = _mm_setzero_si128 (); | |
1158 Side1 = _mm_setzero_si128 (); | |
1159 Side2 = _mm_setzero_si128 (); | |
1160 Down1 = _mm_setzero_si128 (); | |
1161 Down2 = _mm_setzero_si128 (); | |
1162 SeqA = _mm_setzero_si128 (); | |
1163 SeqB = _mm_setzero_si128 (); | |
1164 Result = _mm_setzero_si128 (); | |
1165 /* end initialize */ | |
1166 | |
1167 R1 = _mm_xor_si128(R1, R1); | |
1168 R0 = _mm_xor_si128(R0, R0); | |
1169 | |
1170 Diag = _mm_xor_si128(Diag, Diag); | |
1171 Diag = _mm_insert_epi16(Diag,2*e,0); | |
1172 | |
1173 i0 = (a[0] != b[0]); | |
1174 i1 = min(i0, ( *(a-1)!=*b) )+1; | |
1175 i2 = min(i0,( a[0] != *(b-1) ) )+1; | |
1176 | |
1177 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1); | |
1178 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1; | |
1179 i5 = min(i2, (a[0] != *(b-2))+1)+1; | |
1180 | |
1181 R1 = _mm_insert_epi16(R1, 3, 0); | |
1182 R1 = _mm_insert_epi16(R1, i1, 1); | |
1183 R1 = _mm_insert_epi16(R1, i2, 2); | |
1184 R1 = _mm_insert_epi16(R1, 3, 3); | |
1185 | |
1186 | |
1187 R0 = _mm_insert_epi16(R0, 4, 0); | |
1188 R0 = _mm_insert_epi16(R0, i4, 1); | |
1189 R0 = _mm_insert_epi16(R0, i0, 2); | |
1190 R0 = _mm_insert_epi16(R0, i5, 3); | |
1191 R0 = _mm_insert_epi16(R0, 4, 4); | |
1192 | |
1193 Side2 = _mm_xor_si128(Side2, Side2); | |
1194 Down2 = _mm_xor_si128(Down2, Down2); | |
1195 Down1 = _mm_xor_si128(Down1, Down1); | |
1196 Side1 = _mm_xor_si128(Side1, Side1); | |
1197 | |
1198 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
1199 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
1200 | |
1201 Side1 = _mm_insert_epi16(Side1,1,0); | |
1202 | |
1203 index = 0; | |
1204 for(j=0; j < e; j++) | |
1205 { | |
1206 Side2 = _mm_slli_si128(Side2, 2); | |
1207 Side2 = _mm_insert_epi16(Side2,1,0); | |
1208 | |
1209 Down1 = _mm_slli_si128(Down1, 2); | |
1210 Down1 = _mm_insert_epi16(Down1,1,0); | |
1211 | |
1212 Down2 = _mm_slli_si128(Down2, 2); | |
1213 Down2 = _mm_insert_epi16(Down2,1,0); | |
1214 | |
1215 Side1 = _mm_slli_si128(Side1, 2); | |
1216 Side1 = _mm_insert_epi16(Side1,1,0); | |
1217 | |
1218 SeqA = _mm_slli_si128(SeqA, 2); | |
1219 SeqB = _mm_slli_si128(SeqB, 2); | |
1220 SeqA = _mm_insert_epi16(SeqA,*(a-index),0); | |
1221 SeqB = _mm_insert_epi16(SeqB,*(b-index),0); | |
1222 index++; | |
1223 } | |
1224 | |
1225 Down2= _mm_slli_si128(Down2, 2); | |
1226 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
1227 | |
1228 index = 4; | |
1229 i = 5; | |
1230 int loopEnd = 2*lenb-(e-1); | |
1231 for(; i <= loopEnd ;i++) | |
1232 { | |
1233 | |
1234 Diag = _mm_xor_si128(Diag, Diag); | |
1235 if( i%2 == 0) | |
1236 { | |
1237 SeqA = _mm_slli_si128(SeqA, 2); | |
1238 SeqB = _mm_slli_si128(SeqB, 2); | |
1239 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0); | |
1240 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0); | |
1241 | |
1242 index++; | |
1243 | |
1244 tmp = _mm_shufflelo_epi16(SeqB,27); | |
1245 tmp = _mm_slli_si128(tmp, 2); | |
1246 tmpValue = _mm_extract_epi16(tmp, 5); | |
1247 tmp = _mm_insert_epi16(tmp, tmpValue, 0); | |
1248 | |
1249 Result = _mm_cmpeq_epi16(SeqA, tmp); | |
1250 Diag = _mm_andnot_si128(Result, MASK); | |
1251 | |
1252 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1253 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1254 | |
1255 //tmp = _mm_sub_epi16(Error, R0); | |
1256 //i0 = _mm_movemask_epi8(tmp); | |
1257 | |
1258 if( _mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e | |
1259 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e && | |
1260 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e ) | |
1261 return -1; | |
1262 | |
1263 if(i == 2*lenb-e) | |
1264 { | |
1265 tmp = _mm_srli_si128(R0,2); | |
1266 for(k=0; k < e-1;k++) | |
1267 tmp = _mm_srli_si128(tmp,2); | |
1268 minError = _mm_extract_epi16(tmp,0); | |
1269 } | |
1270 | |
1271 } | |
1272 | |
1273 else | |
1274 { | |
1275 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27)); | |
1276 Diag = _mm_andnot_si128(Result, MASK); | |
1277 | |
1278 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1279 R1 = _mm_min_epi16(R1, R0+Down1); | |
1280 | |
1281 if(i >= 2*lenb-e) | |
1282 { | |
1283 tmp = _mm_srli_si128(R1,2); | |
1284 for(k=0; k < e-2;k++) | |
1285 tmp = _mm_srli_si128(tmp,2); | |
1286 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1287 } | |
1288 } | |
1289 | |
1290 | |
1291 } | |
1292 | |
1293 j=0; | |
1294 | |
1295 int tmpE = e; | |
1296 | |
1297 for(;j<2*(e-2)+1;j++) | |
1298 { | |
1299 | |
1300 Diag = _mm_xor_si128(Diag, Diag); | |
1301 //set the first element | |
1302 if(j==0) | |
1303 { | |
1304 for( k=0;k<=e-1;k++ ) | |
1305 { | |
1306 Diag = _mm_slli_si128(Diag, 2); | |
1307 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1308 } | |
1309 | |
1310 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1311 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1312 | |
1313 tmpE--; | |
1314 | |
1315 tmp = _mm_srli_si128(R0,2); | |
1316 for(k=0; k < e-2;k++) | |
1317 tmp = _mm_srli_si128(tmp,2); | |
1318 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1319 } | |
1320 else if(j%2 == 0) | |
1321 { | |
1322 for(k=0;k<tmpE;k++) | |
1323 { | |
1324 Diag = _mm_slli_si128(Diag, 2); | |
1325 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1326 } | |
1327 | |
1328 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1329 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1330 | |
1331 tmpE--; | |
1332 | |
1333 tmp = _mm_srli_si128(R0,2); | |
1334 for(k=0; k < tmpE-1;k++) | |
1335 tmp = _mm_srli_si128(tmp,2); | |
1336 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1337 } | |
1338 | |
1339 | |
1340 else | |
1341 { | |
1342 for(k=0;k<tmpE;k++) | |
1343 { | |
1344 Diag = _mm_slli_si128(Diag, 2); | |
1345 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0); | |
1346 } | |
1347 | |
1348 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1349 R1 = _mm_min_epi16(R1, R0+Down1); | |
1350 | |
1351 tmp = _mm_srli_si128(R1,2); | |
1352 for(k=0; k < tmpE-2;k++) | |
1353 tmp = _mm_srli_si128(tmp,2); | |
1354 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1355 } | |
1356 i++; | |
1357 } | |
1358 //Diag | |
1359 | |
1360 Diag = _mm_xor_si128(Diag,Diag); | |
1361 Diag = _mm_insert_epi16(Diag, 2*e, 0); | |
1362 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1); | |
1363 | |
1364 Side1 = _mm_insert_epi16(Side1,1,0); | |
1365 Side1 = _mm_insert_epi16(Side1,1,1); | |
1366 | |
1367 Down1 = _mm_insert_epi16(Down1, 2*e, 0); | |
1368 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
1369 | |
1370 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
1371 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
1372 | |
1373 minError = min(minError, _mm_extract_epi16(R1,1)); | |
1374 | |
1375 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0); | |
1376 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
1377 | |
1378 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
1379 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
1380 | |
1381 minError = min(minError, _mm_extract_epi16(R0,0)); | |
1382 | |
1383 if(minError > e) | |
1384 return -1; | |
1385 return minError; | |
1386 } | |
1387 | |
1388 inline int forwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb) | |
1389 { | |
1390 if(lenb == 0 || lena == 0) | |
1391 return 0; | |
1392 | |
1393 int i = 0; | |
1394 int j = 0; | |
1395 int k = 0; | |
1396 | |
1397 int i0=0; | |
1398 int i1=0; | |
1399 int i2=0; | |
1400 int i4=0; | |
1401 int i5=0; | |
1402 | |
1403 int mismatch = errThreshold; | |
1404 int e = 4; | |
1405 | |
1406 int minError = 4*mismatch+1; | |
1407 int index = 0; | |
1408 int tmpValue = 0; | |
1409 | |
1410 if(lenb <= e) | |
1411 { | |
1412 return smallEditDistanceF(a,lena,b,lenb); | |
1413 } | |
1414 | |
1415 | |
1416 register __m128i R0, R1; | |
1417 __m128i Diag; | |
1418 __m128i Side1, Side2; | |
1419 __m128i Down1, Down2; | |
1420 __m128i tmp; | |
1421 register __m128i SeqA, SeqB; | |
1422 __m128i Result; | |
1423 | |
1424 __m128i tmpSeqA; | |
1425 __m128i tmpSeqB; | |
1426 | |
1427 /* initialize */ | |
1428 R0 = _mm_setzero_si128 (); | |
1429 R1 = _mm_setzero_si128 (); | |
1430 Diag = _mm_setzero_si128 (); | |
1431 Side1 = _mm_setzero_si128 (); | |
1432 Side2 = _mm_setzero_si128 (); | |
1433 Down1 = _mm_setzero_si128 (); | |
1434 Down2 = _mm_setzero_si128 (); | |
1435 SeqA = _mm_setzero_si128 (); | |
1436 SeqB = _mm_setzero_si128 (); | |
1437 Result = _mm_setzero_si128 (); | |
1438 /* end initialize */ | |
1439 | |
1440 | |
1441 R1 = _mm_xor_si128(R1, R1); | |
1442 R0 = _mm_xor_si128(R0, R0); | |
1443 | |
1444 Diag = _mm_xor_si128(Diag, Diag); | |
1445 Diag = _mm_insert_epi16(Diag,minError,0); | |
1446 | |
1447 i0 = (a[0] != b[0]); | |
1448 i1 = min(i0, (a[1]!=b[0]))+1; | |
1449 i2 = min(i0,(a[0]!=b[1]))+1; | |
1450 | |
1451 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1); | |
1452 i4 = min(i1, (a[2]!=b[0])+1)+1; | |
1453 i5 = min(i2, (a[0]!=b[2])+1)+1; | |
1454 | |
1455 R1 = _mm_insert_epi16(R1, 3, 0); | |
1456 R1 = _mm_insert_epi16(R1, i1, 1); | |
1457 R1 = _mm_insert_epi16(R1, i2, 2); | |
1458 R1 = _mm_insert_epi16(R1, 3, 3); | |
1459 | |
1460 R0 = _mm_insert_epi16(R0, 4, 0); | |
1461 R0 = _mm_insert_epi16(R0, i4, 1); | |
1462 R0 = _mm_insert_epi16(R0, i0, 2); | |
1463 R0 = _mm_insert_epi16(R0, i5, 3); | |
1464 R0 = _mm_insert_epi16(R0, 4, 4); | |
1465 | |
1466 Side2 = _mm_xor_si128(Side2, Side2); | |
1467 Down2 = _mm_xor_si128(Down2, Down2); | |
1468 Down1 = _mm_xor_si128(Down1, Down1); | |
1469 Side1 = _mm_xor_si128(Side1, Side1); | |
1470 | |
1471 Side2 = _mm_insert_epi16(Side2,minError,0); | |
1472 Down1 = _mm_insert_epi16(Down1,minError,0); | |
1473 | |
1474 Side1 = _mm_insert_epi16(Side1,1,0); | |
1475 | |
1476 index = 0; | |
1477 for(j=0; j < e; j++) | |
1478 { | |
1479 Side2 = _mm_slli_si128(Side2, 2); | |
1480 Side2 = _mm_insert_epi16(Side2,1,0); | |
1481 | |
1482 Down1 = _mm_slli_si128(Down1, 2); | |
1483 Down1 = _mm_insert_epi16(Down1,1,0); | |
1484 | |
1485 Down2 = _mm_slli_si128(Down2, 2); | |
1486 Down2 = _mm_insert_epi16(Down2,1,0); | |
1487 | |
1488 Side1 = _mm_slli_si128(Side1, 2); | |
1489 Side1 = _mm_insert_epi16(Side1,1,0); | |
1490 | |
1491 SeqA = _mm_slli_si128(SeqA, 2); | |
1492 SeqB = _mm_slli_si128(SeqB, 2); | |
1493 SeqA = _mm_insert_epi16(SeqA,a[index],0); | |
1494 SeqB = _mm_insert_epi16(SeqB,b[index],0); | |
1495 index++; | |
1496 } | |
1497 | |
1498 Down2= _mm_slli_si128(Down2, 2); | |
1499 Down2 = _mm_insert_epi16(Down2,minError,0); | |
1500 | |
1501 index = 4; | |
1502 i = 5; | |
1503 | |
1504 int loopEnd = 2*lenb-(e-1); | |
1505 for(; i <= loopEnd ;i++) | |
1506 { | |
1507 if( i%2 == 0) | |
1508 { | |
1509 tmpSeqA = _mm_slli_si128(SeqA, 2); | |
1510 tmpSeqB = _mm_slli_si128(SeqB, 2); | |
1511 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0); | |
1512 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0); | |
1513 | |
1514 index++; | |
1515 | |
1516 tmp = _mm_shufflelo_epi16(SeqB,27); | |
1517 tmp = _mm_slli_si128(tmp, 2); | |
1518 tmpValue = _mm_extract_epi16(tmp, 5); | |
1519 tmp = _mm_insert_epi16(tmp, tmpValue, 0); | |
1520 | |
1521 Result = _mm_cmpeq_epi16(SeqA, tmp); | |
1522 Diag = _mm_andnot_si128(Result, MASK); | |
1523 | |
1524 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1525 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1526 | |
1527 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold | |
1528 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold && | |
1529 _mm_extract_epi16(R1, 0) > errThreshold && _mm_extract_epi16(R1, 1) > errThreshold && | |
1530 _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold) | |
1531 return -1; | |
1532 | |
1533 if(i == 2*lenb-e) | |
1534 { | |
1535 tmp = _mm_srli_si128(R0,2); | |
1536 for(k=0; k < e-1;k++) | |
1537 tmp = _mm_srli_si128(tmp,2); | |
1538 minError = _mm_extract_epi16(tmp,0); | |
1539 } | |
1540 | |
1541 } | |
1542 | |
1543 else | |
1544 { | |
1545 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27)); | |
1546 Diag = _mm_andnot_si128(Result, MASK); | |
1547 | |
1548 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1549 R1 = _mm_min_epi16(R1, R0+Down1); | |
1550 | |
1551 if(i >= 2*lenb-e) | |
1552 { | |
1553 tmp = _mm_srli_si128(R1,2); | |
1554 for(k=0; k < e-2;k++) | |
1555 tmp = _mm_srli_si128(tmp,2); | |
1556 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1557 } | |
1558 } | |
1559 } | |
1560 | |
1561 j=0; | |
1562 int tmpE = e; | |
1563 for(;j<2*(e-2)+1;j++) | |
1564 { | |
1565 | |
1566 Diag = _mm_xor_si128(Diag, Diag); | |
1567 //set the first element | |
1568 if(j==0) | |
1569 { | |
1570 for( k=0;k<=e-1;k++ ) | |
1571 { | |
1572 Diag = _mm_slli_si128(Diag, 2); | |
1573 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1574 } | |
1575 | |
1576 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1577 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1578 | |
1579 tmpE--; | |
1580 | |
1581 tmp = _mm_srli_si128(R0,2); | |
1582 for(k=0; k < e-2;k++) | |
1583 tmp = _mm_srli_si128(tmp,2); | |
1584 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1585 } | |
1586 else if(j%2 == 0) | |
1587 { | |
1588 for(k=0;k<tmpE;k++) | |
1589 { | |
1590 Diag = _mm_slli_si128(Diag, 2); | |
1591 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1592 } | |
1593 | |
1594 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1595 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1596 | |
1597 tmpE--; | |
1598 | |
1599 tmp = _mm_srli_si128(R0,2); | |
1600 for(k=0; k < tmpE-1;k++) | |
1601 tmp = _mm_srli_si128(tmp,2); | |
1602 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1603 } | |
1604 | |
1605 | |
1606 else | |
1607 { | |
1608 for(k=0;k<tmpE;k++) | |
1609 { | |
1610 Diag = _mm_slli_si128(Diag, 2); | |
1611 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1612 } | |
1613 | |
1614 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1615 R1 = _mm_min_epi16(R1, R0+Down1); | |
1616 | |
1617 tmp = _mm_srli_si128(R1,2); | |
1618 for(k=0; k < tmpE-2;k++) | |
1619 tmp = _mm_srli_si128(tmp,2); | |
1620 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1621 } | |
1622 i++; | |
1623 } | |
1624 //Diag | |
1625 | |
1626 Diag = _mm_xor_si128(Diag,Diag); | |
1627 Diag = _mm_insert_epi16(Diag, minError, 0); | |
1628 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1); | |
1629 | |
1630 Side1 = _mm_insert_epi16(Side1,1,0); | |
1631 Side1 = _mm_insert_epi16(Side1,1,1); | |
1632 | |
1633 Down1 = _mm_insert_epi16(Down1, minError, 0); | |
1634 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
1635 | |
1636 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
1637 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
1638 | |
1639 minError = min(minError, _mm_extract_epi16(R1,1)); | |
1640 | |
1641 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0); | |
1642 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
1643 | |
1644 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
1645 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
1646 | |
1647 | |
1648 minError = min(minError, _mm_extract_epi16(R0,0)); | |
1649 | |
1650 | |
1651 if(minError > mismatch) | |
1652 return -1; | |
1653 return minError; | |
1654 } | |
1655 | |
1656 | |
1657 | |
1658 int forwardEditDistance4SSE2(char *a, int lena, char *b,int lenb) | |
1659 { | |
1660 if(lenb == 0 || lena == 0) | |
1661 return 0; | |
1662 | |
1663 int i = 0; | |
1664 int j = 0; | |
1665 int k = 0; | |
1666 | |
1667 int i0=0; | |
1668 int i1=0; | |
1669 int i2=0; | |
1670 int i4=0; | |
1671 int i5=0; | |
1672 | |
1673 int e = errThreshold; | |
1674 | |
1675 int minError = 2*e; | |
1676 int index = 0; | |
1677 int tmpValue = 0; | |
1678 | |
1679 if(lenb <= e) | |
1680 { | |
1681 return smallEditDistanceF(a,lena,b,lenb); | |
1682 } | |
1683 | |
1684 | |
1685 register __m128i R0, R1; | |
1686 __m128i Diag; | |
1687 __m128i Side1, Side2; | |
1688 __m128i Down1, Down2; | |
1689 __m128i tmp; | |
1690 register __m128i SeqA, SeqB; | |
1691 __m128i Result; | |
1692 | |
1693 __m128i tmpSeqA; | |
1694 __m128i tmpSeqB; | |
1695 | |
1696 /* initialize */ | |
1697 R0 = _mm_setzero_si128 (); | |
1698 R1 = _mm_setzero_si128 (); | |
1699 Diag = _mm_setzero_si128 (); | |
1700 Side1 = _mm_setzero_si128 (); | |
1701 Side2 = _mm_setzero_si128 (); | |
1702 Down1 = _mm_setzero_si128 (); | |
1703 Down2 = _mm_setzero_si128 (); | |
1704 SeqA = _mm_setzero_si128 (); | |
1705 SeqB = _mm_setzero_si128 (); | |
1706 Result = _mm_setzero_si128 (); | |
1707 /* end initialize */ | |
1708 | |
1709 R1 = _mm_xor_si128(R1, R1); | |
1710 R0 = _mm_xor_si128(R0, R0); | |
1711 | |
1712 Diag = _mm_xor_si128(Diag, Diag); | |
1713 Diag = _mm_insert_epi16(Diag,2*e,0); | |
1714 | |
1715 i0 = (a[0] != b[0]); | |
1716 i1 = min(i0, (a[1]!=b[0]))+1; | |
1717 i2 = min(i0,(a[0]!=b[1]))+1; | |
1718 | |
1719 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1); | |
1720 i4 = min(i1, (a[2]!=b[0])+1)+1; | |
1721 i5 = min(i2, (a[0]!=b[2])+1)+1; | |
1722 | |
1723 R1 = _mm_insert_epi16(R1, 3, 0); | |
1724 R1 = _mm_insert_epi16(R1, i1, 1); | |
1725 R1 = _mm_insert_epi16(R1, i2, 2); | |
1726 R1 = _mm_insert_epi16(R1, 3, 3); | |
1727 | |
1728 R0 = _mm_insert_epi16(R0, 4, 0); | |
1729 R0 = _mm_insert_epi16(R0, i4, 1); | |
1730 R0 = _mm_insert_epi16(R0, i0, 2); | |
1731 R0 = _mm_insert_epi16(R0, i5, 3); | |
1732 R0 = _mm_insert_epi16(R0, 4, 4); | |
1733 | |
1734 Side2 = _mm_xor_si128(Side2, Side2); | |
1735 Down2 = _mm_xor_si128(Down2, Down2); | |
1736 Down1 = _mm_xor_si128(Down1, Down1); | |
1737 Side1 = _mm_xor_si128(Side1, Side1); | |
1738 | |
1739 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
1740 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
1741 | |
1742 Side1 = _mm_insert_epi16(Side1,1,0); | |
1743 | |
1744 index = 0; | |
1745 for(j=0; j < e; j++) | |
1746 { | |
1747 Side2 = _mm_slli_si128(Side2, 2); | |
1748 Side2 = _mm_insert_epi16(Side2,1,0); | |
1749 | |
1750 Down1 = _mm_slli_si128(Down1, 2); | |
1751 Down1 = _mm_insert_epi16(Down1,1,0); | |
1752 | |
1753 Down2 = _mm_slli_si128(Down2, 2); | |
1754 Down2 = _mm_insert_epi16(Down2,1,0); | |
1755 | |
1756 Side1 = _mm_slli_si128(Side1, 2); | |
1757 Side1 = _mm_insert_epi16(Side1,1,0); | |
1758 | |
1759 SeqA = _mm_slli_si128(SeqA, 2); | |
1760 SeqB = _mm_slli_si128(SeqB, 2); | |
1761 SeqA = _mm_insert_epi16(SeqA,a[index],0); | |
1762 SeqB = _mm_insert_epi16(SeqB,b[index],0); | |
1763 index++; | |
1764 } | |
1765 | |
1766 Down2= _mm_slli_si128(Down2, 2); | |
1767 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
1768 | |
1769 index = 4; | |
1770 i = 5; | |
1771 | |
1772 int loopEnd = 2*lenb-(e-1); | |
1773 for(; i <= loopEnd ;i++) | |
1774 { | |
1775 //Diag = _mm_xor_si128(Diag, Diag); | |
1776 if( i%2 == 0) | |
1777 { | |
1778 tmpSeqA = _mm_slli_si128(SeqA, 2); | |
1779 tmpSeqB = _mm_slli_si128(SeqB, 2); | |
1780 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0); | |
1781 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0); | |
1782 | |
1783 index++; | |
1784 | |
1785 tmp = _mm_shufflelo_epi16(SeqB,27); | |
1786 tmp = _mm_slli_si128(tmp, 2); | |
1787 tmpValue = _mm_extract_epi16(tmp, 5); | |
1788 tmp = _mm_insert_epi16(tmp, tmpValue, 0); | |
1789 | |
1790 Result = _mm_cmpeq_epi16(SeqA, tmp); | |
1791 Diag = _mm_andnot_si128(Result, MASK); | |
1792 | |
1793 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1794 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1795 | |
1796 if(_mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e | |
1797 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e && | |
1798 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e) | |
1799 return -1; | |
1800 | |
1801 if(i == 2*lenb-e) | |
1802 { | |
1803 tmp = _mm_srli_si128(R0,2); | |
1804 for(k=0; k < e-1;k++) | |
1805 tmp = _mm_srli_si128(tmp,2); | |
1806 minError = _mm_extract_epi16(tmp,0); | |
1807 } | |
1808 | |
1809 } | |
1810 | |
1811 else | |
1812 { | |
1813 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27)); | |
1814 Diag = _mm_andnot_si128(Result, MASK); | |
1815 | |
1816 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1817 R1 = _mm_min_epi16(R1, R0+Down1); | |
1818 | |
1819 if(i >= 2*lenb-e) | |
1820 { | |
1821 tmp = _mm_srli_si128(R1,2); | |
1822 for(k=0; k < e-2;k++) | |
1823 tmp = _mm_srli_si128(tmp,2); | |
1824 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1825 } | |
1826 } | |
1827 | |
1828 | |
1829 } | |
1830 j=0; | |
1831 int tmpE = e; | |
1832 for(;j<2*(e-2)+1;j++) | |
1833 { | |
1834 | |
1835 Diag = _mm_xor_si128(Diag, Diag); | |
1836 //set the first element | |
1837 if(j==0) | |
1838 { | |
1839 for( k=0;k<=e-1;k++ ) | |
1840 { | |
1841 Diag = _mm_slli_si128(Diag, 2); | |
1842 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1843 } | |
1844 | |
1845 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1846 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1847 | |
1848 tmpE--; | |
1849 | |
1850 tmp = _mm_srli_si128(R0,2); | |
1851 for(k=0; k < e-2;k++) | |
1852 tmp = _mm_srli_si128(tmp,2); | |
1853 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1854 } | |
1855 else if(j%2 == 0) | |
1856 { | |
1857 for(k=0;k<tmpE;k++) | |
1858 { | |
1859 Diag = _mm_slli_si128(Diag, 2); | |
1860 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1861 } | |
1862 | |
1863 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
1864 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
1865 | |
1866 tmpE--; | |
1867 | |
1868 tmp = _mm_srli_si128(R0,2); | |
1869 for(k=0; k < tmpE-1;k++) | |
1870 tmp = _mm_srli_si128(tmp,2); | |
1871 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1872 } | |
1873 | |
1874 | |
1875 else | |
1876 { | |
1877 for(k=0;k<tmpE;k++) | |
1878 { | |
1879 Diag = _mm_slli_si128(Diag, 2); | |
1880 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
1881 } | |
1882 | |
1883 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
1884 R1 = _mm_min_epi16(R1, R0+Down1); | |
1885 | |
1886 tmp = _mm_srli_si128(R1,2); | |
1887 for(k=0; k < tmpE-2;k++) | |
1888 tmp = _mm_srli_si128(tmp,2); | |
1889 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
1890 } | |
1891 i++; | |
1892 } | |
1893 //Diag | |
1894 | |
1895 Diag = _mm_xor_si128(Diag,Diag); | |
1896 Diag = _mm_insert_epi16(Diag, 2*e, 0); | |
1897 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1); | |
1898 | |
1899 Side1 = _mm_insert_epi16(Side1,1,0); | |
1900 Side1 = _mm_insert_epi16(Side1,1,1); | |
1901 | |
1902 Down1 = _mm_insert_epi16(Down1, 2*e, 0); | |
1903 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
1904 | |
1905 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
1906 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
1907 | |
1908 minError = min(minError, _mm_extract_epi16(R1,1)); | |
1909 | |
1910 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0); | |
1911 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
1912 | |
1913 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
1914 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
1915 | |
1916 minError = min(minError, _mm_extract_epi16(R0,0)); | |
1917 | |
1918 if(minError > e) | |
1919 return -1; | |
1920 return minError; | |
1921 } | |
1922 | |
1923 int forwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb) | |
1924 { | |
1925 if(lenb == 0 || lena == 0) | |
1926 return 0; | |
1927 | |
1928 int i = 0; | |
1929 int j = 0; | |
1930 int k = 0; | |
1931 | |
1932 int e = errThreshold; | |
1933 | |
1934 int minError = 2*e; | |
1935 | |
1936 char flag = 0; | |
1937 | |
1938 if(lenb <= e) | |
1939 { | |
1940 return smallEditDistanceF(a,lena,b,lenb); | |
1941 } | |
1942 | |
1943 | |
1944 __m128i R0, R1; | |
1945 __m128i Diag; | |
1946 __m128i Side1, Side2; | |
1947 __m128i Down1, Down2; | |
1948 __m128i Error; | |
1949 __m128i tmp; | |
1950 | |
1951 /* initialize */ | |
1952 R0 = _mm_setzero_si128 (); | |
1953 R1 = _mm_setzero_si128 (); | |
1954 Diag = _mm_setzero_si128 (); | |
1955 Side1 = _mm_setzero_si128 (); | |
1956 Side2 = _mm_setzero_si128 (); | |
1957 Down1 = _mm_setzero_si128 (); | |
1958 Down2 = _mm_setzero_si128 (); | |
1959 Error = _mm_setzero_si128 (); | |
1960 tmp = _mm_setzero_si128 (); | |
1961 /* end initialize */ | |
1962 | |
1963 R1 = _mm_xor_si128(R1, R1); | |
1964 R0 = _mm_xor_si128(R0, R0); | |
1965 | |
1966 Diag = _mm_xor_si128(Diag, Diag); | |
1967 Side1 = _mm_xor_si128(Side1, Side1); | |
1968 Down1 = _mm_xor_si128(Down1, Down1); | |
1969 | |
1970 Diag = _mm_insert_epi16(Diag,2*e,0); | |
1971 | |
1972 Side1 = _mm_insert_epi16(Side1,1,0); | |
1973 Side1 = _mm_insert_epi16(Side1,2*e,1); | |
1974 | |
1975 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
1976 Down1 = _mm_insert_epi16(Down1,1,1); | |
1977 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
1978 | |
1979 R0 = _mm_insert_epi16(R0,0,0); | |
1980 | |
1981 R1 = _mm_insert_epi16(R1,1,0); | |
1982 R1 = _mm_insert_epi16(R1,1,1); | |
1983 | |
1984 for(i=2; i <= e; i++) | |
1985 { | |
1986 //set side | |
1987 Side1 = _mm_slli_si128(Side1,2); | |
1988 Side1 = _mm_insert_epi16(Side1,1,0); | |
1989 | |
1990 Down1 = _mm_insert_epi16(Down1,1,0); | |
1991 Down1 = _mm_slli_si128(Down1,2); | |
1992 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
1993 | |
1994 Diag = _mm_xor_si128(Diag, Diag); | |
1995 if( i%2 == 0) | |
1996 { | |
1997 Diag = _mm_insert_epi16(Diag,2*e,0); | |
1998 | |
1999 for(j=1;j<=i-1;j++) | |
2000 { | |
2001 Diag = _mm_slli_si128(Diag, 2); | |
2002 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0); | |
2003 } | |
2004 Diag = _mm_slli_si128(Diag, 2); | |
2005 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
2006 | |
2007 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag); | |
2008 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1); | |
2009 | |
2010 } | |
2011 | |
2012 else | |
2013 { | |
2014 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2015 for(j=i/2-1;j>=-i/2;j--) | |
2016 { | |
2017 Diag = _mm_slli_si128(Diag, 2); | |
2018 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0); | |
2019 } | |
2020 Diag = _mm_slli_si128(Diag, 2); | |
2021 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
2022 | |
2023 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
2024 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
2025 | |
2026 } | |
2027 } | |
2028 Error = _mm_xor_si128(Error, Error); | |
2029 Side2 = _mm_xor_si128(Side2, Side2); | |
2030 Side1 = _mm_xor_si128(Side1, Side1); | |
2031 Down2 = _mm_xor_si128(Down2, Down2); | |
2032 Down1 = _mm_xor_si128(Down1, Down1); | |
2033 | |
2034 | |
2035 Error = _mm_insert_epi16(Error,e,0); | |
2036 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
2037 Side1 = _mm_insert_epi16(Side2,2*e,0); | |
2038 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2039 | |
2040 | |
2041 for(j=0; j < e; j++) | |
2042 { | |
2043 Side2 = _mm_slli_si128(Side2, 2); | |
2044 Side2 = _mm_insert_epi16(Side2,1,0); | |
2045 | |
2046 Side1 = _mm_slli_si128(Side1, 2); | |
2047 Side1 = _mm_insert_epi16(Side1,1,0); | |
2048 | |
2049 Down1 = _mm_slli_si128(Down1, 2); | |
2050 Down1 = _mm_insert_epi16(Down1,1,0); | |
2051 | |
2052 Down2 = _mm_slli_si128(Down2, 2); | |
2053 Down2 = _mm_insert_epi16(Down2,1,0); | |
2054 | |
2055 Error = _mm_slli_si128(Error, 2); | |
2056 Error = _mm_insert_epi16(Error, e, 0); | |
2057 } | |
2058 | |
2059 Down2= _mm_slli_si128(Down2, 2); | |
2060 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
2061 | |
2062 for(; i <= 2*lenb-(e-1);i++) | |
2063 { | |
2064 flag = 0; | |
2065 Diag = _mm_xor_si128(Diag, Diag); | |
2066 if( i%2 == 0) | |
2067 { | |
2068 for(j=e/2;j>=-e/2;j--) | |
2069 { | |
2070 Diag = _mm_slli_si128(Diag, 2); | |
2071 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0); | |
2072 } | |
2073 | |
2074 | |
2075 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
2076 R0 = _mm_min_epi16(R0, R1+Down1); | |
2077 | |
2078 if(_mm_extract_epi16(R0,0) <= e) | |
2079 flag = 1; | |
2080 | |
2081 tmp = _mm_srli_si128(R0,2); | |
2082 for(j=0; j < e-1;j++) | |
2083 { | |
2084 if(_mm_extract_epi16(tmp,0) <= e) | |
2085 flag = 1; | |
2086 tmp = _mm_srli_si128(tmp,2); | |
2087 } | |
2088 // printf("#%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2)); | |
2089 if(flag == 0) | |
2090 return -1; | |
2091 | |
2092 if(i == 2*lenb-(e-1)) | |
2093 { | |
2094 tmp = _mm_srli_si128(R0,2); | |
2095 for(k=0; k < e-2;k++) | |
2096 tmp = _mm_srli_si128(tmp,2); | |
2097 minError = _mm_extract_epi16(tmp,0); | |
2098 } | |
2099 | |
2100 } | |
2101 | |
2102 else | |
2103 { | |
2104 for(j=e/2;j>=-e/2-1;j--) | |
2105 { | |
2106 Diag = _mm_slli_si128(Diag, 2); | |
2107 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i)/2-j-1],0); | |
2108 } | |
2109 | |
2110 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
2111 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
2112 | |
2113 //printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2), | |
2114 // _mm_extract_epi16(R1,3)); | |
2115 | |
2116 if(i >= 2*lenb-e) | |
2117 { | |
2118 tmp = _mm_srli_si128(R1,2); | |
2119 for(k=0; k < e-1;k++) | |
2120 tmp = _mm_srli_si128(tmp,2); | |
2121 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
2122 } | |
2123 } | |
2124 } | |
2125 | |
2126 //first cell | |
2127 Diag = _mm_xor_si128(Diag,Diag); | |
2128 Diag = _mm_insert_epi16(Diag, b[lenb-3] != a[lena], 0); | |
2129 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena-1], 1); | |
2130 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-2], 2); | |
2131 Diag = _mm_insert_epi16(Diag, 2*e, 3); | |
2132 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
2133 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
2134 | |
2135 | |
2136 minError = min(minError, _mm_extract_epi16(R1,2)); | |
2137 | |
2138 //second cell | |
2139 Diag = _mm_xor_si128(Diag,Diag); | |
2140 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena], 0); | |
2141 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-1], 1); | |
2142 Diag = _mm_insert_epi16(Diag, 2*e, 2); | |
2143 | |
2144 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
2145 R0 = _mm_min_epi16(R0, R1+Down1); | |
2146 | |
2147 | |
2148 minError = min(minError, _mm_extract_epi16(R0,1)); | |
2149 | |
2150 //third cell | |
2151 Diag = _mm_xor_si128(Diag,Diag); | |
2152 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena+1], 0); | |
2153 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena], 1); | |
2154 Diag = _mm_insert_epi16(Diag, 2*e, 2); | |
2155 | |
2156 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
2157 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
2158 | |
2159 | |
2160 minError = min(minError, _mm_extract_epi16(R1,1)); | |
2161 | |
2162 //forth | |
2163 Diag = _mm_xor_si128(Diag,Diag); | |
2164 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+1], 0); | |
2165 Diag = _mm_insert_epi16(Diag, 2*e, 1); | |
2166 | |
2167 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag); | |
2168 R0 = _mm_min_epi16(R0, R1+Down1); | |
2169 | |
2170 minError = min(minError, _mm_extract_epi16(R0,0)); | |
2171 | |
2172 //fifth | |
2173 Diag = _mm_xor_si128(Diag,Diag); | |
2174 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+2], 0); | |
2175 Diag = _mm_insert_epi16(Diag, 2*e, 1); | |
2176 | |
2177 R1 = _mm_min_epi16(R0+Side2, R1+Diag); | |
2178 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2); | |
2179 | |
2180 | |
2181 minError = min(minError, _mm_extract_epi16(R1,0)); | |
2182 | |
2183 if(minError > e) | |
2184 return -1; | |
2185 return minError; | |
2186 | |
2187 } | |
2188 | |
2189 int forwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb) | |
2190 { | |
2191 if(lenb == 0 || lena == 0) | |
2192 return 0; | |
2193 | |
2194 int i = 0; | |
2195 int j = 0; | |
2196 int k = 0; | |
2197 | |
2198 int e = errThreshold; | |
2199 | |
2200 int minError = 2*e; | |
2201 | |
2202 char flag = 0; | |
2203 | |
2204 if(lenb <= e) | |
2205 { | |
2206 return smallEditDistanceF(a,lena,b,lenb); | |
2207 } | |
2208 | |
2209 | |
2210 __m128i R0, R1; | |
2211 __m128i Diag; | |
2212 __m128i Side1, Side2; | |
2213 __m128i Down1, Down2; | |
2214 __m128i Error; | |
2215 __m128i tmp; | |
2216 | |
2217 /* initialize */ | |
2218 R0 = _mm_setzero_si128 (); | |
2219 R1 = _mm_setzero_si128 (); | |
2220 Diag = _mm_setzero_si128 (); | |
2221 Side1 = _mm_setzero_si128 (); | |
2222 Side2 = _mm_setzero_si128 (); | |
2223 Down1 = _mm_setzero_si128 (); | |
2224 Down2 = _mm_setzero_si128 (); | |
2225 Error = _mm_setzero_si128 (); | |
2226 tmp = _mm_setzero_si128 (); | |
2227 /* end initialize */ | |
2228 | |
2229 R1 = _mm_xor_si128(R1, R1); | |
2230 R0 = _mm_xor_si128(R0, R0); | |
2231 | |
2232 Diag = _mm_xor_si128(Diag, Diag); | |
2233 Side1 = _mm_xor_si128(Side1, Side1); | |
2234 Down1 = _mm_xor_si128(Down1, Down1); | |
2235 | |
2236 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2237 | |
2238 Side1 = _mm_insert_epi16(Side1,1,0); | |
2239 Side1 = _mm_insert_epi16(Side1,2*e,1); | |
2240 | |
2241 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2242 Down1 = _mm_insert_epi16(Down1,1,1); | |
2243 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
2244 | |
2245 R0 = _mm_insert_epi16(R0,0,0); | |
2246 | |
2247 R1 = _mm_insert_epi16(R1,1,0); | |
2248 R1 = _mm_insert_epi16(R1,1,1); | |
2249 | |
2250 for(i=2; i <= e; i++) | |
2251 { | |
2252 //set side | |
2253 Side1 = _mm_slli_si128(Side1,2); | |
2254 Side1 = _mm_insert_epi16(Side1,1,0); | |
2255 | |
2256 Down1 = _mm_insert_epi16(Down1,1,0); | |
2257 Down1 = _mm_slli_si128(Down1,2); | |
2258 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2259 | |
2260 Diag = _mm_xor_si128(Diag, Diag); | |
2261 if( i%2 == 0) | |
2262 { | |
2263 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2264 | |
2265 for(j=1;j<=i-1;j++) | |
2266 { | |
2267 Diag = _mm_slli_si128(Diag, 2); | |
2268 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0); | |
2269 } | |
2270 Diag = _mm_slli_si128(Diag, 2); | |
2271 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
2272 | |
2273 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag); | |
2274 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1); | |
2275 } | |
2276 | |
2277 else | |
2278 { | |
2279 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2280 for(j=i/2-1;j>=-i/2;j--) | |
2281 { | |
2282 Diag = _mm_slli_si128(Diag, 2); | |
2283 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0); | |
2284 } | |
2285 Diag = _mm_slli_si128(Diag, 2); | |
2286 Diag = _mm_insert_epi16(Diag, 2*e,0); | |
2287 | |
2288 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
2289 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
2290 } | |
2291 } | |
2292 Error = _mm_xor_si128(Error, Error); | |
2293 Side2 = _mm_xor_si128(Side2, Side2); | |
2294 Down2 = _mm_xor_si128(Down2, Down2); | |
2295 Down1 = _mm_xor_si128(Down1, Down1); | |
2296 | |
2297 Error = _mm_insert_epi16(Error,e,0); | |
2298 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
2299 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2300 | |
2301 | |
2302 for(j=0; j < e; j++) | |
2303 { | |
2304 Side2 = _mm_slli_si128(Side2, 2); | |
2305 Side2 = _mm_insert_epi16(Side2,1,0); | |
2306 | |
2307 Down1 = _mm_slli_si128(Down1, 2); | |
2308 Down1 = _mm_insert_epi16(Down1,1,0); | |
2309 | |
2310 Down2 = _mm_slli_si128(Down2, 2); | |
2311 Down2 = _mm_insert_epi16(Down2,1,0); | |
2312 | |
2313 Error = _mm_slli_si128(Error, 2); | |
2314 Error = _mm_insert_epi16(Error, e, 0); | |
2315 } | |
2316 | |
2317 Down2= _mm_slli_si128(Down2, 2); | |
2318 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
2319 | |
2320 for(; i <= 2*lenb-(e-1);i++) | |
2321 { | |
2322 flag = 0; | |
2323 Diag = _mm_xor_si128(Diag, Diag); | |
2324 if( i%2 == 0) | |
2325 { | |
2326 for(j=e/2;j>=-e/2;j--) | |
2327 { | |
2328 Diag = _mm_slli_si128(Diag, 2); | |
2329 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0); | |
2330 } | |
2331 | |
2332 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
2333 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
2334 | |
2335 | |
2336 if(_mm_extract_epi16(R0,0) <= e) | |
2337 flag = 1; | |
2338 | |
2339 tmp = _mm_srli_si128(R0,2); | |
2340 for(j=0; j < e-1;j++) | |
2341 { | |
2342 if(_mm_extract_epi16(tmp,0) <= e) | |
2343 flag = 1; | |
2344 tmp = _mm_srli_si128(tmp,2); | |
2345 } | |
2346 | |
2347 | |
2348 if(flag == 0) | |
2349 return -1; | |
2350 | |
2351 if(i == 2*lenb-e) | |
2352 { | |
2353 tmp = _mm_srli_si128(R0,2); | |
2354 for(k=0; k < e-1;k++) | |
2355 tmp = _mm_srli_si128(tmp,2); | |
2356 minError = _mm_extract_epi16(tmp,0); | |
2357 } | |
2358 | |
2359 } | |
2360 | |
2361 else | |
2362 { | |
2363 for(j=-e/2+1;j<=e/2;j++) | |
2364 { | |
2365 Diag = _mm_slli_si128(Diag, 2); | |
2366 Diag = _mm_insert_epi16(Diag, b[(i+1)/2-j-1] != a[(i-1)/2+j-1],0); | |
2367 } | |
2368 | |
2369 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
2370 R1 = _mm_min_epi16(R1, R0+Down1); | |
2371 | |
2372 if(i >= 2*lenb-e) | |
2373 { | |
2374 tmp = _mm_srli_si128(R1,2); | |
2375 for(k=0; k < e-2;k++) | |
2376 tmp = _mm_srli_si128(tmp,2); | |
2377 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
2378 } | |
2379 } | |
2380 } | |
2381 | |
2382 j=0; | |
2383 int tmpE = e; | |
2384 for(;j<2*(e-2)+1;j++) | |
2385 { | |
2386 | |
2387 Diag = _mm_xor_si128(Diag, Diag); | |
2388 //set the first element | |
2389 if(j==0) | |
2390 { | |
2391 for( k=0;k<=e-1;k++ ) | |
2392 { | |
2393 Diag = _mm_slli_si128(Diag, 2); | |
2394 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
2395 } | |
2396 | |
2397 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
2398 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
2399 | |
2400 tmpE--; | |
2401 | |
2402 tmp = _mm_srli_si128(R0,2); | |
2403 for(k=0; k < e-2;k++) | |
2404 tmp = _mm_srli_si128(tmp,2); | |
2405 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
2406 } | |
2407 else if(j%2 == 0) | |
2408 { | |
2409 for(k=0;k<tmpE;k++) | |
2410 { | |
2411 Diag = _mm_slli_si128(Diag, 2); | |
2412 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
2413 } | |
2414 | |
2415 R0 = _mm_min_epi16(R1+Side2, R0+Diag); | |
2416 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2); | |
2417 | |
2418 tmpE--; | |
2419 | |
2420 tmp = _mm_srli_si128(R0,2); | |
2421 for(k=0; k < tmpE-1;k++) | |
2422 tmp = _mm_srli_si128(tmp,2); | |
2423 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
2424 } | |
2425 | |
2426 | |
2427 else | |
2428 { | |
2429 for(k=0;k<tmpE;k++) | |
2430 { | |
2431 Diag = _mm_slli_si128(Diag, 2); | |
2432 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0); | |
2433 } | |
2434 | |
2435 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag); | |
2436 R1 = _mm_min_epi16(R1, R0+Down1); | |
2437 | |
2438 tmp = _mm_srli_si128(R1,2); | |
2439 for(k=0; k < tmpE-1;k++) | |
2440 tmp = _mm_srli_si128(tmp,2); | |
2441 minError = min(minError, _mm_extract_epi16(tmp,0)); | |
2442 } | |
2443 i++; | |
2444 } | |
2445 //Diag | |
2446 | |
2447 Diag = _mm_xor_si128(Diag,Diag); | |
2448 Diag = _mm_insert_epi16(Diag, 2*e, 0); | |
2449 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1); | |
2450 | |
2451 Side1 = _mm_insert_epi16(Side1,1,0); | |
2452 Side1 = _mm_insert_epi16(Side1,1,1); | |
2453 | |
2454 Down1 = _mm_insert_epi16(Down1, 2*e, 0); | |
2455 Down1 = _mm_insert_epi16(Down1, 1, 1); | |
2456 | |
2457 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
2458 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1); | |
2459 | |
2460 minError = min(minError, _mm_extract_epi16(R1,1)); | |
2461 | |
2462 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 1); | |
2463 Down1 = _mm_insert_epi16(Down1, 1, 0); | |
2464 | |
2465 R0 = _mm_min_epi16(R1+Down1,R0+Diag); | |
2466 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1); | |
2467 | |
2468 minError = min(minError, _mm_extract_epi16(R0,0)); | |
2469 | |
2470 if(minError > e) | |
2471 return -1; | |
2472 return minError; | |
2473 } | |
2474 | |
2475 | |
2476 int forwardEditDistance2SSE2(char *a, int lena, char *b,int lenb) | |
2477 { | |
2478 if(lenb == 0 || lena == 0) | |
2479 return 0; | |
2480 | |
2481 | |
2482 | |
2483 int i0 = 0; | |
2484 int i1 = 0; | |
2485 | |
2486 | |
2487 int error; //0: if the two character are equal 1: if not | |
2488 | |
2489 int i = 0; //loop index | |
2490 | |
2491 int e = 2; //error bound | |
2492 | |
2493 int totalError = 0; | |
2494 | |
2495 __m128i R0; | |
2496 __m128i R1; | |
2497 | |
2498 __m128i Side1, Side2,Side; //side matrix | |
2499 __m128i Down1, Down2,Down; //down matrix | |
2500 __m128i Diag; | |
2501 | |
2502 __m128i tmp; | |
2503 __m128i ERROR_REACH; | |
2504 | |
2505 /* initialize */ | |
2506 R0 = _mm_setzero_si128 (); | |
2507 R1 = _mm_setzero_si128 (); | |
2508 Diag = _mm_setzero_si128 (); | |
2509 Side1 = _mm_setzero_si128 (); | |
2510 Side2 = _mm_setzero_si128 (); | |
2511 Down1 = _mm_setzero_si128 (); | |
2512 Down2 = _mm_setzero_si128 (); | |
2513 Side = _mm_setzero_si128 (); | |
2514 Down = _mm_setzero_si128 (); | |
2515 tmp = _mm_setzero_si128 (); | |
2516 ERROR_REACH = _mm_setzero_si128 (); | |
2517 /* end initialize */ | |
2518 | |
2519 | |
2520 if(lenb <= e) | |
2521 { | |
2522 return smallEditDistanceF(a,lena,b,lenb); | |
2523 } | |
2524 | |
2525 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e); | |
2526 | |
2527 R0 = _mm_insert_epi16(R0,0,0); | |
2528 | |
2529 R1 = _mm_insert_epi16(R1,1,0); | |
2530 R1 = _mm_insert_epi16(R1,1,1); | |
2531 | |
2532 // error = ((a[0]) != (b[0])); | |
2533 | |
2534 Diag = _mm_set_epi16(0,0,0,0,0,2*e,((a[0]) != (b[0])),2*e); | |
2535 Side1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1); | |
2536 Side2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e); | |
2537 Down1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1); | |
2538 Down2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e); | |
2539 | |
2540 tmp = _mm_slli_si128(R1,2); | |
2541 | |
2542 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2543 R0 = _mm_min_epi16(R0,tmp+Down2); | |
2544 | |
2545 for (i = 3; i < 2*lena; i++) | |
2546 { | |
2547 if(i % 2 ==1) | |
2548 { | |
2549 | |
2550 Diag = _mm_xor_si128(Diag, Diag); | |
2551 error = ((a[(i+1)/2-1]) != (b[(i-1)/2-1])); | |
2552 Diag = _mm_insert_epi16(Diag,error,0); | |
2553 error = ((a[(i-1)/2-1]) != (b[(i+1)/2-1])); | |
2554 Diag = _mm_insert_epi16(Diag,error,1); | |
2555 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0, ((a[(i-1)/2-1]) != (b[(i+1)/2-1])) ,((a[(i+1)/2-1]) != (b[(i-1)/2-1]))); | |
2556 | |
2557 | |
2558 tmp = _mm_srli_si128(R0,2); | |
2559 | |
2560 R1 = _mm_min_epi16(tmp+Side1, R1+Diag); | |
2561 R1 = _mm_min_epi16(R1,R0+Down1); | |
2562 | |
2563 if(i > 2 * lenb - 2) | |
2564 { | |
2565 i1 = _mm_extract_epi16(R1, 1); | |
2566 totalError = min(totalError, i1); | |
2567 } | |
2568 } | |
2569 | |
2570 else if(i % 2 == 0) | |
2571 { | |
2572 error = ((a[i/2]) != (b[i/2-2])); | |
2573 Diag = _mm_insert_epi16(Diag,error,0); | |
2574 error = ((a[i/2-1]) != (b[i/2-1])); | |
2575 Diag = _mm_insert_epi16(Diag,error,1); | |
2576 error = ((a[i/2-2]) != (b[i/2])); | |
2577 Diag = _mm_insert_epi16(Diag,error,2); | |
2578 | |
2579 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, ((a[i/2-2]) != (b[i/2])) , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) ); | |
2580 | |
2581 tmp = _mm_slli_si128(R1,2); | |
2582 | |
2583 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2584 R0 = _mm_min_epi16(R0,tmp+Down2); | |
2585 | |
2586 tmp = _mm_sub_epi16(ERROR_REACH, R0); | |
2587 i0 = _mm_movemask_epi8(tmp); | |
2588 | |
2589 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2) | |
2590 return -1; | |
2591 if(i == 2 * lenb - 2) { | |
2592 totalError = _mm_extract_epi16(R0, 2); | |
2593 } | |
2594 } | |
2595 } | |
2596 | |
2597 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2598 | |
2599 //fill the first part of the error | |
2600 error = ((a[i/2]) != (b[i/2-2])); | |
2601 Diag = _mm_insert_epi16(Diag,error,0); | |
2602 error = ((a[i/2-1]) != (b[i/2-1])); | |
2603 Diag = _mm_insert_epi16(Diag,error,1); | |
2604 Diag = _mm_insert_epi16(Diag,2*e,2); | |
2605 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) ); | |
2606 | |
2607 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2608 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1); | |
2609 | |
2610 // i0 = _mm_extract_epi16(R0, 0); | |
2611 i1 = _mm_extract_epi16(R0, 1); | |
2612 | |
2613 totalError = min(totalError, i1); | |
2614 | |
2615 //fill the second part of the error | |
2616 i++; | |
2617 | |
2618 Diag = _mm_xor_si128(Diag, Diag); | |
2619 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2620 error = ((a[i/2]) != (b[lenb-1])); | |
2621 Diag = _mm_insert_epi16(Diag,error,1); | |
2622 Diag = _mm_insert_epi16(Diag,2*e,2); | |
2623 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2]) != (b[lenb-1])) , 2*e ); | |
2624 | |
2625 | |
2626 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
2627 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1); | |
2628 | |
2629 // i0 = _mm_extract_epi16(R1, 0); | |
2630 i1 = _mm_extract_epi16(R1, 1); | |
2631 | |
2632 totalError = min(totalError, i1); | |
2633 //fill the last the last element of the matrix | |
2634 i++; | |
2635 | |
2636 Diag = _mm_xor_si128(Diag, Diag); | |
2637 error = ((a[i/2]) != (b[lenb-1])); | |
2638 Diag = _mm_insert_epi16(Diag,error,0); | |
2639 | |
2640 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0 , 0 , ((a[i/2]) != (b[lenb-1])) ); | |
2641 | |
2642 | |
2643 Down = _mm_insert_epi16(Down,1,0); | |
2644 | |
2645 Side = _mm_insert_epi16(Side,1,0); | |
2646 | |
2647 tmp = _mm_srli_si128(R1,2); | |
2648 | |
2649 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag); | |
2650 R0 = _mm_min_epi16(R0,tmp+Side); | |
2651 | |
2652 i0 = _mm_extract_epi16(R0, 0); | |
2653 | |
2654 totalError = min(totalError, i0); | |
2655 | |
2656 if(totalError > e) | |
2657 return -1; | |
2658 | |
2659 return totalError; | |
2660 | |
2661 } | |
2662 | |
2663 int backwardEditDistance2SSE2(char *a, int lena, char *b,int lenb) | |
2664 { | |
2665 if(lenb == 0 || lena == 0) | |
2666 return 0; | |
2667 | |
2668 int i0 = 0; | |
2669 int i1 = 0; | |
2670 | |
2671 int error; //0: if the two character are equal 1: if not | |
2672 | |
2673 int i = 0; //loop index | |
2674 | |
2675 int e = 2; //error bound | |
2676 | |
2677 int totalError = 0; | |
2678 | |
2679 __m128i R0; | |
2680 __m128i R1; | |
2681 | |
2682 __m128i Side1, Side2,Side; //side matrix | |
2683 __m128i Down1, Down2,Down; //down matrix | |
2684 __m128i Diag; //diag matrix | |
2685 | |
2686 __m128i tmp; | |
2687 __m128i ERROR_REACH; | |
2688 | |
2689 /* initialize */ | |
2690 R0 = _mm_setzero_si128 (); | |
2691 R1 = _mm_setzero_si128 (); | |
2692 Diag = _mm_setzero_si128 (); | |
2693 Side1 = _mm_setzero_si128 (); | |
2694 Side2 = _mm_setzero_si128 (); | |
2695 Side = _mm_setzero_si128 (); | |
2696 Down1 = _mm_setzero_si128 (); | |
2697 Down2 = _mm_setzero_si128 (); | |
2698 Down = _mm_setzero_si128 (); | |
2699 ERROR_REACH = _mm_setzero_si128 (); | |
2700 tmp = _mm_setzero_si128 (); | |
2701 /* end initialize */ | |
2702 | |
2703 if(lenb <= e) | |
2704 { | |
2705 return smallEditDistanceB(a,lena,b,lenb); | |
2706 } | |
2707 | |
2708 | |
2709 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e); | |
2710 | |
2711 R0 = _mm_insert_epi16(R0,0,0); | |
2712 | |
2713 R1 = _mm_insert_epi16(R1,1,0); | |
2714 R1 = _mm_insert_epi16(R1,1,1); | |
2715 | |
2716 error = ((a[0]) != (b[0])); | |
2717 | |
2718 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2719 Diag = _mm_insert_epi16(Diag,error,1); | |
2720 Diag = _mm_insert_epi16(Diag,2*e,2); | |
2721 | |
2722 Side1 = _mm_insert_epi16(Side1,1,0); | |
2723 Side1 = _mm_insert_epi16(Side1,1,1); | |
2724 Side1 = _mm_insert_epi16(Side1,2*e,2); | |
2725 | |
2726 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
2727 Side2 = _mm_insert_epi16(Side2,1,1); | |
2728 Side2 = _mm_insert_epi16(Side2,1,2); | |
2729 | |
2730 Down1 = _mm_insert_epi16(Down1,1,0); | |
2731 Down1 = _mm_insert_epi16(Down1,1,1); | |
2732 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
2733 | |
2734 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
2735 Down2 = _mm_insert_epi16(Down2,1,1); | |
2736 Down2 = _mm_insert_epi16(Down2,1,2); | |
2737 | |
2738 tmp = _mm_slli_si128(R1,2); | |
2739 | |
2740 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2741 R0 = _mm_min_epi16(R0,tmp+Down2); | |
2742 | |
2743 // printf("%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2)); | |
2744 for (i = 3; i < 2*lena; i++) | |
2745 { | |
2746 if(i % 2 ==1) | |
2747 { | |
2748 Diag = _mm_sub_epi8(Diag, Diag); | |
2749 error = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) ); | |
2750 Diag = _mm_insert_epi16(Diag,error,0); | |
2751 error = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) ); | |
2752 Diag = _mm_insert_epi16(Diag,error,1); | |
2753 //printf("#%d #%d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1)); | |
2754 tmp = _mm_srli_si128(R0,2); | |
2755 | |
2756 R1 = _mm_min_epi16(tmp+Side1, R1+Diag); | |
2757 R1 = _mm_min_epi16(R1,R0+Down1); | |
2758 | |
2759 if(i > 2 * lenb - 2) { | |
2760 i1 = _mm_extract_epi16(R1, 1); | |
2761 totalError = min(totalError, i1); | |
2762 } | |
2763 // printf("%d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1)); | |
2764 } | |
2765 | |
2766 else if(i % 2 == 0) | |
2767 { | |
2768 error = ( *(a-(i/2)) != *(b-(i/2-2)) ); | |
2769 Diag = _mm_insert_epi16(Diag,error,0); | |
2770 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) ); | |
2771 Diag = _mm_insert_epi16(Diag,error,1); | |
2772 error = ( *(a-(i/2-2)) != *(b-(i/2))); | |
2773 Diag = _mm_insert_epi16(Diag,error,2); | |
2774 | |
2775 tmp = _mm_slli_si128(R1,2); | |
2776 | |
2777 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2778 R0 = _mm_min_epi16(R0,tmp+Down2); | |
2779 | |
2780 tmp = _mm_sub_epi16(ERROR_REACH, R0); | |
2781 i0 = _mm_movemask_epi8(tmp); | |
2782 | |
2783 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2) | |
2784 return -1; | |
2785 | |
2786 if(i == 2 * lenb - 2) { | |
2787 totalError = _mm_extract_epi16(R0, 2); | |
2788 } | |
2789 } | |
2790 } | |
2791 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
2792 | |
2793 //fill the first part of the error | |
2794 error = ( *(a-(i/2)) != *(b-(i/2-2)) ); | |
2795 Diag = _mm_insert_epi16(Diag,error,0); | |
2796 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) ); | |
2797 Diag = _mm_insert_epi16(Diag,error,1); | |
2798 Diag = _mm_insert_epi16(Diag,2*e,2); | |
2799 | |
2800 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
2801 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1); | |
2802 | |
2803 i0 = _mm_extract_epi16(R0, 0); | |
2804 i1 = _mm_extract_epi16(R0, 1); | |
2805 | |
2806 totalError = min(totalError, i1); | |
2807 | |
2808 //fill the second part of the error | |
2809 i++; | |
2810 Diag = _mm_sub_epi8(Diag, Diag); | |
2811 Diag = _mm_insert_epi16(Diag,2*e,0); | |
2812 error = ( *(a-(i/2)) != *(b-(lenb-1)) ); | |
2813 Diag = _mm_insert_epi16(Diag,error,1); | |
2814 Diag = _mm_insert_epi16(Diag,2*e,2); | |
2815 | |
2816 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
2817 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1); | |
2818 | |
2819 i0 = _mm_extract_epi16(R1, 0); | |
2820 i1 = _mm_extract_epi16(R1, 1); | |
2821 | |
2822 totalError = min(totalError, i1); | |
2823 | |
2824 //fill the last the last element of the matrix | |
2825 i++; | |
2826 Diag = _mm_sub_epi8(Diag, Diag); | |
2827 error = ( *(a-(i/2)) != *(b-(lenb-1)) ); | |
2828 Diag = _mm_insert_epi16(Diag,error,0); | |
2829 | |
2830 Down = _mm_insert_epi16(Down,1,0); | |
2831 | |
2832 Side = _mm_insert_epi16(Side,1,0); | |
2833 | |
2834 tmp = _mm_srli_si128(R1,2); | |
2835 | |
2836 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag); | |
2837 R0 = _mm_min_epi16(R0,tmp+Side); | |
2838 | |
2839 i0 = _mm_extract_epi16(R0, 0); | |
2840 | |
2841 totalError = min(totalError, i0); | |
2842 | |
2843 if(totalError > e || totalError == 0) | |
2844 return -1; | |
2845 return totalError; | |
2846 } | |
2847 | |
2848 void initBestMapping(int totalReadNumber) | |
2849 { | |
2850 int i = 0; | |
2851 bestHitMappingInfo = getMem(totalReadNumber * sizeof(BestFullMappingInfo)); | |
2852 for(i = 0; i < totalReadNumber; i++) { | |
2853 bestHitMappingInfo[i].loc = -1; | |
2854 } | |
2855 } | |
2856 | |
2857 | |
2858 void finalizeBestSingleMapping() | |
2859 { | |
2860 int i = 0; | |
2861 char *_tmpQual, *_tmpSeq; | |
2862 char rqual[SEQ_LENGTH + 1]; | |
2863 rqual[SEQ_LENGTH]='\0'; | |
2864 | |
2865 for(i = 0; i < _msf_seqListSize; i++) | |
2866 { | |
2867 if(_msf_seqList[i].hits[0] != 0) | |
2868 { | |
2869 if (bestHitMappingInfo[i].dir) | |
2870 { | |
2871 reverse(_msf_seqList[i].qual, rqual, SEQ_LENGTH); | |
2872 _tmpQual = rqual; | |
2873 _tmpSeq = _msf_seqList[i].rseq; | |
2874 } | |
2875 else | |
2876 { | |
2877 _tmpQual = _msf_seqList[i].qual; | |
2878 _tmpSeq = _msf_seqList[i].seq; | |
2879 } | |
2880 | |
2881 | |
2882 _msf_output.QNAME = _msf_seqList[i].name; | |
2883 _msf_output.FLAG = 16 * bestHitMappingInfo[i].dir; | |
2884 _msf_output.RNAME = bestHitMappingInfo[i].chr; | |
2885 | |
2886 _msf_output.POS = bestHitMappingInfo[i].loc; | |
2887 _msf_output.MAPQ = 255; | |
2888 _msf_output.CIGAR = bestHitMappingInfo[i].cigar ; | |
2889 _msf_output.MRNAME = "*"; | |
2890 _msf_output.MPOS = 0; | |
2891 _msf_output.ISIZE = 0; | |
2892 | |
2893 | |
2894 _msf_output.SEQ = _tmpSeq; | |
2895 _msf_output.QUAL = _tmpQual; | |
2896 | |
2897 _msf_output.optSize = 2; | |
2898 _msf_output.optFields = _msf_optionalFields; | |
2899 | |
2900 _msf_optionalFields[0].tag = "NM"; | |
2901 _msf_optionalFields[0].type = 'i'; | |
2902 _msf_optionalFields[0].iVal = bestHitMappingInfo[i].err; | |
2903 | |
2904 _msf_optionalFields[1].tag = "MD"; | |
2905 _msf_optionalFields[1].type = 'Z'; | |
2906 _msf_optionalFields[1].sVal = bestHitMappingInfo[i].md; | |
2907 | |
2908 output(_msf_output); | |
2909 } | |
2910 } | |
2911 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo)); | |
2912 } | |
2913 /**********************************************/ | |
2914 int compare (const void *a, const void *b) | |
2915 { | |
2916 return ((Pair *)a)->hv - ((Pair *)b)->hv; | |
2917 /*char *s1 = ((Pair *)a)->hv; | |
2918 char *s2 = ((Pair *)b)->hv; | |
2919 int i = 0; | |
2920 | |
2921 int diff = 0; | |
2922 int sign = 0; | |
2923 | |
2924 for(i = 0; i < SEQ_LENGTH; i++) | |
2925 { | |
2926 diff += (s1[i] != s2[i]); | |
2927 if(s1[i] > s2[i]) | |
2928 sign++; | |
2929 else if(s1[i] < s2[i]) | |
2930 sign--; | |
2931 } | |
2932 | |
2933 return diff*sign;*/ | |
2934 // return strncmp(s1, s2,SEQ_LENGTH); | |
2935 | |
2936 } | |
2937 /**********************************************/ | |
2938 void preProcessReads() | |
2939 { | |
2940 int i = 0; | |
2941 | |
2942 _msf_sort_seqList = getMem(_msf_seqListSize * sizeof(Pair)); | |
2943 for(i = 0; i < _msf_seqListSize; i++) | |
2944 { | |
2945 _msf_sort_seqList[i].hv = hashVal(_msf_seqList[i].seq); | |
2946 | |
2947 _msf_sort_seqList[i].readNumber = i; | |
2948 } | |
2949 | |
2950 qsort(_msf_sort_seqList, _msf_seqListSize, sizeof(Pair), compare); | |
2951 | |
2952 /* | |
2953 for(i = 0; i < _msf_seqListSize; i++) | |
2954 { | |
2955 //printf("%s\n", _msf_sort_seqList[i].hv); | |
2956 } | |
2957 */ | |
2958 | |
2959 _msf_map_sort_seqList = getMem(_msf_seqListSize * sizeof(int)); | |
2960 | |
2961 for(i = 0; i < _msf_seqListSize; i++) | |
2962 _msf_map_sort_seqList[_msf_seqList[i].readNumber] = i; | |
2963 | |
2964 } | |
2965 /**********************************************/ | |
2966 | |
2967 int verifySingleEnd(int index, char* seq, int offset) | |
2968 { | |
2969 int curOff = 0; | |
2970 int i; | |
2971 | |
2972 char *ref; | |
2973 | |
2974 int err; | |
2975 int errCnt =0; | |
2976 int errCntOff = 0; | |
2977 int NCntOff = 0; | |
2978 | |
2979 ref = _msf_refGen + index - 1; | |
2980 | |
2981 verificationCnt++; | |
2982 | |
2983 for (i = 0; i < SEQ_LENGTH; i++) | |
2984 { | |
2985 err = *ref != *seq; | |
2986 errCnt += err; | |
2987 if (errCnt > errThreshold) | |
2988 { | |
2989 | |
2990 return -1; | |
2991 } | |
2992 | |
2993 if (i >= _msf_samplingLocs[curOff] && i <= _msf_samplingLocsEnds[curOff]) | |
2994 { | |
2995 errCntOff += err; | |
2996 NCntOff += (*seq == 'N'); | |
2997 } | |
2998 else if (curOff < _msf_samplingLocsSize && i>=_msf_samplingLocs[curOff+1]) | |
2999 { | |
3000 | |
3001 if (errCntOff == 0 && NCntOff == 0 && offset > curOff) | |
3002 { | |
3003 return -1; | |
3004 } | |
3005 | |
3006 errCntOff = 0; | |
3007 NCntOff = 0; | |
3008 curOff++; | |
3009 | |
3010 if ( i >= _msf_samplingLocs[curOff]) | |
3011 { | |
3012 errCntOff += err; | |
3013 NCntOff += (*seq == 'N'); | |
3014 } | |
3015 } | |
3016 | |
3017 ref++; | |
3018 seq++; | |
3019 } | |
3020 return errCnt; | |
3021 } | |
3022 | |
3023 /*********************************************/ | |
3024 void initFAST(Read *seqList, int seqListSize, int *samplingLocs, int samplingLocsSize, char *genFileName) | |
3025 { | |
3026 int i; | |
3027 | |
3028 if (_msf_optionalFields == NULL) | |
3029 { | |
3030 _msf_op = getMem(SEQ_LENGTH); | |
3031 if (pairedEndMode) | |
3032 { | |
3033 _msf_optionalFields = getMem(8*sizeof(OPT_FIELDS)); | |
3034 } | |
3035 else | |
3036 { | |
3037 _msf_optionalFields = getMem(2*sizeof(OPT_FIELDS)); | |
3038 } | |
3039 | |
3040 for (i=0; i<200;i++) | |
3041 { | |
3042 sprintf(_msf_numbers[i],"%d%c",i, '\0'); | |
3043 } | |
3044 sprintf(_msf_cigar, "%dM", SEQ_LENGTH); | |
3045 } | |
3046 | |
3047 if (_msf_samplingLocsEnds == NULL) | |
3048 { | |
3049 _msf_samplingLocs = samplingLocs; | |
3050 _msf_samplingLocsSize = samplingLocsSize; | |
3051 | |
3052 _msf_samplingLocsEnds = getMem(sizeof(int)*_msf_samplingLocsSize); | |
3053 for (i=0; i<_msf_samplingLocsSize; i++) | |
3054 { | |
3055 _msf_samplingLocsEnds[i]=_msf_samplingLocs[i]+WINDOW_SIZE-1; | |
3056 } | |
3057 | |
3058 _msf_seqList = seqList; | |
3059 _msf_seqListSize = seqListSize; | |
3060 | |
3061 preProcessReads(); | |
3062 | |
3063 _msf_oeaMapping = getMem(_msf_seqListSize * sizeof(int)); | |
3064 for(i = 0; i < _msf_seqListSize; i++) | |
3065 { | |
3066 _msf_oeaMapping[i] = 0; | |
3067 } | |
3068 | |
3069 _msf_discordantMapping = getMem(_msf_seqListSize * sizeof(int)); | |
3070 for(i = 0; i < _msf_seqListSize; i++) | |
3071 { | |
3072 _msf_discordantMapping[i] = 0; | |
3073 } | |
3074 | |
3075 } | |
3076 | |
3077 if (_msf_refGenName == NULL) | |
3078 { | |
3079 _msf_refGenName = getMem(4*SEQ_LENGTH); | |
3080 } | |
3081 _msf_refGen = getRefGenome(); | |
3082 _msf_refGenLength = strlen(_msf_refGen); | |
3083 | |
3084 _msf_refGenOffset = getRefGenomeOffset(); | |
3085 snprintf(_msf_refGenName, 4*SEQ_LENGTH,"%s%c", getRefGenomeName(), '\0'); | |
3086 _msf_refGenName[strlen(getRefGenomeName())] = '\0'; | |
3087 | |
3088 | |
3089 if (_msf_verifiedLocs != NULL){ | |
3090 freeMem(_msf_verifiedLocs, sizeof(int) * (_msf_refGenLength+1)); | |
3091 } | |
3092 | |
3093 _msf_verifiedLocs = (int *) getMem(sizeof(int)*(_msf_refGenLength+1)); | |
3094 | |
3095 for (i=0; i<=_msf_refGenLength; i++) | |
3096 _msf_verifiedLocs[i] = _msf_seqListSize*10+1; | |
3097 | |
3098 | |
3099 | |
3100 if (pairedEndMode && _msf_seqHits == NULL) | |
3101 { | |
3102 | |
3103 _msf_mappingInfo = getMem(seqListSize * sizeof (MappingInfo)); | |
3104 | |
3105 for (i=0; i<seqListSize; i++) | |
3106 { | |
3107 //_msf_mappingInfo[i].next = getMem(sizeof(MappingLocations)); | |
3108 _msf_mappingInfo[i].next = NULL; | |
3109 _msf_mappingInfo[i].size = 0; | |
3110 } | |
3111 | |
3112 _msf_seqHits = getMem((_msf_seqListSize) * sizeof(int)); | |
3113 | |
3114 | |
3115 for (i=0; i<_msf_seqListSize; i++) | |
3116 { | |
3117 _msf_seqHits[i] = 0; | |
3118 } | |
3119 | |
3120 _msf_readHasConcordantMapping = getMem(_msf_seqListSize / 2 * sizeof(char)); | |
3121 for(i = 0; i < _msf_seqListSize/2; i++) | |
3122 { | |
3123 _msf_readHasConcordantMapping[i] = 0; | |
3124 } | |
3125 | |
3126 initLoadingRefGenome(genFileName); | |
3127 } | |
3128 | |
3129 if (_msf_refGenOffset == 0) | |
3130 { | |
3131 _msf_refGenBeg = 1; | |
3132 } | |
3133 else | |
3134 { | |
3135 _msf_refGenBeg = CONTIG_OVERLAP - SEQ_LENGTH + 2; | |
3136 } | |
3137 _msf_refGenEnd = _msf_refGenLength - SEQ_LENGTH + 1; | |
3138 | |
3139 | |
3140 } | |
3141 /**********************************************/ | |
3142 void finalizeFAST() | |
3143 { | |
3144 freeMem(_msf_seqHits, (_msf_seqListSize) * sizeof(int)); | |
3145 freeMem(_msf_refGenName, 4*SEQ_LENGTH); | |
3146 | |
3147 | |
3148 /* | |
3149 int i; | |
3150 for (i=0; i<_msf_rIndexSize; i++) | |
3151 { | |
3152 freeMem(_msf_rIndex[i].seqInfo, _msf_rIndex[i].seqInfo[0]+1); | |
3153 } | |
3154 freeMem(_msf_rIndex, _msf_rIndexSize);*/ | |
3155 | |
3156 | |
3157 freeMem(_msf_map_sort_seqList, sizeof(Pair) * _msf_seqListSize); | |
3158 freeMem(_msf_sort_seqList, sizeof(int) * _msf_seqListSize); | |
3159 | |
3160 } | |
3161 | |
3162 /* | |
3163 Will apply the Levenshtein Dynamic programming. | |
3164 Different from verifySingleEndEditDistance fucntion | |
3165 as in this fucntion only one dynamic table is made while | |
3166 in verifySingleEndEditDistance two dynamic table is made | |
3167 for each right and left string | |
3168 */ | |
3169 int editDistance(int refIndex, char *seq, int seqLength, char *matrix) | |
3170 { | |
3171 int i = 0; | |
3172 int size = 0; | |
3173 int error = 0; | |
3174 int rIndex = 0; | |
3175 int directionIndex = 0; | |
3176 | |
3177 int min = 0; | |
3178 int minIndex =0; | |
3179 | |
3180 int tempUp = 0; | |
3181 int tempDown = 0; | |
3182 | |
3183 char *ref; | |
3184 | |
3185 int errorString = 0; | |
3186 /* | |
3187 1: Up | |
3188 2: Side | |
3189 3: Diagnoal Match | |
3190 4: Diagnoal Mismatch | |
3191 */ | |
3192 | |
3193 int upValue; | |
3194 int diagValue; | |
3195 int sideValue; | |
3196 | |
3197 ref = _msf_refGen + refIndex - 1; | |
3198 | |
3199 rIndex = 1; | |
3200 | |
3201 for(i=0; i <= errThreshold; i++) | |
3202 { | |
3203 score[0][i] = i; | |
3204 score[i][0] = i; | |
3205 } | |
3206 | |
3207 while(rIndex <= seqLength +errThreshold) | |
3208 { | |
3209 tempUp = ((rIndex - errThreshold) > 0 ? ((rIndex > seqLength) ? seqLength - errThreshold :rIndex - errThreshold) : 1 ); | |
3210 tempDown = ((rIndex >= seqLength-errThreshold ) ? seqLength+1 :rIndex + errThreshold + 1); | |
3211 for(i = tempUp ; i < tempDown ; i++) | |
3212 { | |
3213 errorString = (*(ref+rIndex-1) == *(seq+i-1)); | |
3214 | |
3215 upValue = score[i-1][rIndex]+1; | |
3216 diagValue = score[i-1][rIndex-1]+ !errorString; | |
3217 sideValue = score[i][rIndex-1]+1; | |
3218 | |
3219 if(i != tempUp && i != tempDown-1) | |
3220 score[i][rIndex] = min3(sideValue, diagValue , upValue); | |
3221 | |
3222 else if( (i == ((rIndex - errThreshold) > 0 ? rIndex - errThreshold : 1)) && rIndex <= seqLength ) | |
3223 score[i][rIndex] = min(sideValue, diagValue); | |
3224 else if(rIndex > seqLength && (i == seqLength - errThreshold) ) | |
3225 score[i][rIndex] = sideValue; | |
3226 else | |
3227 score[i][rIndex] = min(diagValue , upValue); | |
3228 | |
3229 if(i == tempUp) | |
3230 error = score[i][rIndex]; | |
3231 else if(error > score[i][rIndex]) | |
3232 error = score[i][rIndex]; | |
3233 } | |
3234 rIndex++; | |
3235 } | |
3236 | |
3237 min = score[seqLength][seqLength+errThreshold]; | |
3238 minIndex = seqLength + errThreshold; | |
3239 | |
3240 // Find the Best error for all the possible ways. | |
3241 for(i = 1; i <= 2*errThreshold; i++) | |
3242 { | |
3243 if(min >= score[seqLength][seqLength+errThreshold-i] && seqLength+errThreshold-i > 0) | |
3244 { | |
3245 min = score[seqLength][seqLength+errThreshold-i]; | |
3246 minIndex = seqLength+errThreshold-i; | |
3247 } | |
3248 } | |
3249 | |
3250 error = score[seqLength][minIndex]; | |
3251 | |
3252 directionIndex = seqLength; | |
3253 rIndex = minIndex; | |
3254 while(directionIndex != 0 || rIndex != 0) | |
3255 { | |
3256 | |
3257 if(rIndex == 0) | |
3258 { | |
3259 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1) | |
3260 { | |
3261 matrix[size] = *(seq+directionIndex-1); | |
3262 size++; | |
3263 matrix[size] = 'I'; | |
3264 directionIndex--; | |
3265 } | |
3266 } | |
3267 else if(directionIndex == 0) | |
3268 { | |
3269 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1) | |
3270 { | |
3271 matrix[size] = *(ref+rIndex-1); | |
3272 size++; | |
3273 matrix[size] = 'D'; | |
3274 rIndex--; | |
3275 } | |
3276 } | |
3277 else if(directionIndex-rIndex == errThreshold) | |
3278 { | |
3279 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1) | |
3280 { | |
3281 matrix[size] = *(seq+directionIndex-1); | |
3282 size++; | |
3283 matrix[size] = 'I'; | |
3284 directionIndex--; | |
3285 } | |
3286 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 ) | |
3287 { | |
3288 matrix[size] = *(ref+rIndex-1); | |
3289 rIndex--; | |
3290 directionIndex--; | |
3291 } | |
3292 else | |
3293 { | |
3294 matrix[size] = 'M'; | |
3295 rIndex--; | |
3296 directionIndex--; | |
3297 } | |
3298 | |
3299 } | |
3300 else if(rIndex - directionIndex == errThreshold) | |
3301 { | |
3302 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1) | |
3303 { | |
3304 matrix[size] = *(ref+rIndex-1); | |
3305 size++; | |
3306 matrix[size] = 'D'; | |
3307 rIndex--; | |
3308 } | |
3309 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 ) | |
3310 { | |
3311 matrix[size] = *(ref+rIndex-1); | |
3312 rIndex--; | |
3313 directionIndex--; | |
3314 } | |
3315 else | |
3316 { | |
3317 matrix[size] = 'M'; | |
3318 rIndex--; | |
3319 directionIndex--; | |
3320 } | |
3321 } | |
3322 else | |
3323 { | |
3324 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
3325 { | |
3326 matrix[size] = *(seq+directionIndex-1); | |
3327 size++; | |
3328 matrix[size] = 'I'; | |
3329 directionIndex--; | |
3330 } | |
3331 else if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
3332 { | |
3333 matrix[size] = *(ref+rIndex-1); | |
3334 size++; | |
3335 matrix[size] = 'D'; | |
3336 rIndex--; | |
3337 } | |
3338 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 ) | |
3339 { | |
3340 matrix[size] = *(ref+rIndex-1); | |
3341 rIndex--; | |
3342 directionIndex--; | |
3343 } | |
3344 else | |
3345 { | |
3346 matrix[size] = 'M'; | |
3347 rIndex--; | |
3348 directionIndex--; | |
3349 } | |
3350 } | |
3351 size++; | |
3352 } | |
3353 | |
3354 matrix[size] = '\0'; | |
3355 | |
3356 char returnString[200]; | |
3357 | |
3358 returnString[0] = '\0'; | |
3359 reverse(matrix, returnString, size); | |
3360 sprintf(matrix, "%s", returnString); | |
3361 | |
3362 return error; | |
3363 } | |
3364 | |
3365 /* | |
3366 Will apply the Levenshtein Dynamic programming. | |
3367 in both right and left direction as long as the | |
3368 threshould error is reached or end of string length | |
3369 | |
3370 */ | |
3371 int msfHashVal(char *seq) | |
3372 { | |
3373 int i=0; | |
3374 int val=0, numericVal=0; | |
3375 | |
3376 while(i<6) | |
3377 { | |
3378 switch (seq[i]) | |
3379 { | |
3380 case 'A': | |
3381 numericVal = 0; | |
3382 break; | |
3383 case 'C': | |
3384 numericVal = 1; | |
3385 break; | |
3386 case 'G' : | |
3387 numericVal = 2; | |
3388 break; | |
3389 case 'T': | |
3390 numericVal = 3; | |
3391 break; | |
3392 default: | |
3393 return -1; | |
3394 break; | |
3395 } | |
3396 val = (val << 2)|numericVal; | |
3397 i++; | |
3398 } | |
3399 return val; | |
3400 } | |
3401 | |
3402 | |
3403 | |
3404 int verifySingleEndEditDistance2(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue) | |
3405 { | |
3406 int i = 0; | |
3407 | |
3408 char * ref; | |
3409 char * tempref; | |
3410 | |
3411 int rIndex = 0; //reference Index | |
3412 | |
3413 int e = errThreshold; | |
3414 int error = 0; | |
3415 int error1 = 0; | |
3416 int totalError = 0; | |
3417 | |
3418 | |
3419 /* | |
3420 1: Up | |
3421 2: Side | |
3422 3: Diagnoal Match | |
3423 4: Diagnoal Mismatch | |
3424 */ | |
3425 | |
3426 | |
3427 int minIndex1 = 0; | |
3428 int minIndex2 = 0; | |
3429 | |
3430 | |
3431 int directionIndex = 0; | |
3432 | |
3433 int size = 0; | |
3434 | |
3435 int startIndex1 = 0; | |
3436 | |
3437 rIndex = 1; | |
3438 | |
3439 | |
3440 char matrixR[200]; | |
3441 char matrixL[200]; | |
3442 | |
3443 ref = _msf_refGen + refIndex - 1; | |
3444 tempref = _msf_refGen + refIndex - 1; | |
3445 | |
3446 int jumpIndex = 0; | |
3447 | |
3448 if(rSeqLength != 0) | |
3449 { | |
3450 error1 = forwardEditDistance2SSE2(ref+segLength+jumpIndex, rSeqLength-jumpIndex, rSeq+jumpIndex, rSeqLength-jumpIndex); | |
3451 if(error1 == -1) | |
3452 return -1; | |
3453 } | |
3454 | |
3455 | |
3456 if(lSeqLength != 0) | |
3457 { | |
3458 error = backwardEditDistance2SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength); | |
3459 if(error == -1) | |
3460 { | |
3461 return -1; | |
3462 } | |
3463 } | |
3464 | |
3465 matrixL[0] = '\0'; | |
3466 matrixR[0] = '\0'; | |
3467 | |
3468 | |
3469 ref = _msf_refGen + refIndex - 1; | |
3470 | |
3471 if(error1+error > errThreshold) | |
3472 return -1; | |
3473 | |
3474 ref = _msf_refGen + refIndex - 1; | |
3475 | |
3476 rIndex = startIndex1+1; | |
3477 | |
3478 int i0 = 0; | |
3479 int i1 = 0; | |
3480 int i2 = 0; | |
3481 | |
3482 __m128i R0; | |
3483 __m128i R1; | |
3484 | |
3485 __m128i Side1, Side2,Side; //side matrix | |
3486 __m128i Down1, Down2,Down; //down matrix | |
3487 __m128i Diag; // | |
3488 | |
3489 __m128i tmp; | |
3490 | |
3491 /* initialize */ | |
3492 R0 = _mm_setzero_si128 (); | |
3493 R1 = _mm_setzero_si128 (); | |
3494 Diag = _mm_setzero_si128 (); | |
3495 Side1 = _mm_setzero_si128 (); | |
3496 Side2 = _mm_setzero_si128 (); | |
3497 Down1 = _mm_setzero_si128 (); | |
3498 Down2 = _mm_setzero_si128 (); | |
3499 Down = _mm_setzero_si128 (); | |
3500 Side = _mm_setzero_si128 (); | |
3501 tmp = _mm_setzero_si128 (); | |
3502 /* end initialize */ | |
3503 | |
3504 int mismatch[3] = {0,0,0}; | |
3505 | |
3506 if(lSeqLength != 0) | |
3507 { | |
3508 char *a; | |
3509 char *b; | |
3510 | |
3511 a = ref-1; | |
3512 b = lSeq+lSeqLength-1; | |
3513 | |
3514 R0 = _mm_insert_epi16(R0,0,0); | |
3515 | |
3516 score[0][0] = 0; | |
3517 | |
3518 R1 = _mm_insert_epi16(R1,1,0); | |
3519 R1 = _mm_insert_epi16(R1,1,1); | |
3520 | |
3521 score[1][0] = 1; | |
3522 direction1[1][0] = 1; | |
3523 score[0][1] = 1; | |
3524 direction1[0][1] = 2; | |
3525 | |
3526 mismatch[0] = ((a[0]) != (b[0])); | |
3527 | |
3528 Diag = _mm_insert_epi16(Diag,2*e,0); | |
3529 Diag = _mm_insert_epi16(Diag,mismatch[0],1); | |
3530 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3531 | |
3532 Side1 = _mm_insert_epi16(Side1,1,0); | |
3533 Side1 = _mm_insert_epi16(Side1,1,1); | |
3534 Side1 = _mm_insert_epi16(Side1,2*e,2); | |
3535 | |
3536 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
3537 Side2 = _mm_insert_epi16(Side2,1,1); | |
3538 Side2 = _mm_insert_epi16(Side2,1,2); | |
3539 | |
3540 Down1 = _mm_insert_epi16(Down1,1,0); | |
3541 Down1 = _mm_insert_epi16(Down1,1,1); | |
3542 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
3543 | |
3544 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
3545 Down2 = _mm_insert_epi16(Down2,1,1); | |
3546 Down2 = _mm_insert_epi16(Down2,1,2); | |
3547 | |
3548 tmp = _mm_slli_si128(R1,2); | |
3549 | |
3550 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3551 R0 = _mm_min_epi16(R0,tmp+Down2); | |
3552 | |
3553 i0 = _mm_extract_epi16(R0, 0); | |
3554 i1 = _mm_extract_epi16(R0, 1); | |
3555 i2 = _mm_extract_epi16(R0, 2); | |
3556 | |
3557 score[0][2] = i0; | |
3558 score[1][1] = i1; | |
3559 score[2][0] = i2; | |
3560 | |
3561 direction1[0][2] = 2; | |
3562 direction1[1][1] = ((mismatch[0] == 0)? 3 : 4); | |
3563 direction1[2][0] = 1; | |
3564 | |
3565 for (i = 3; i < 2*lSeqLength; i++) | |
3566 { | |
3567 if(i % 2 ==1) | |
3568 { | |
3569 Diag = _mm_sub_epi8(Diag, Diag); | |
3570 mismatch[0] = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) ); | |
3571 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3572 mismatch[1] = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) ); | |
3573 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3574 | |
3575 tmp = _mm_srli_si128(R0,2); | |
3576 | |
3577 R1 = _mm_min_epi16(tmp+Side1, R1+Diag); | |
3578 R1 = _mm_min_epi16(R1,R0+Down1); | |
3579 | |
3580 i0 = _mm_extract_epi16(R1, 0); | |
3581 i1 = _mm_extract_epi16(R1, 1); | |
3582 | |
3583 score[i/2][i/2+1] = i0; | |
3584 score[i/2+1][i/2] = i1; | |
3585 | |
3586 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 : | |
3587 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 : | |
3588 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4; | |
3589 | |
3590 direction1[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 : | |
3591 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 : | |
3592 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4; | |
3593 | |
3594 if(i > 2 * lSeqLength - 2) | |
3595 { | |
3596 error = min(error, i1); | |
3597 if(error == i1) | |
3598 minIndex1 = i-lSeqLength; | |
3599 } | |
3600 } | |
3601 | |
3602 else if(i % 2 == 0) | |
3603 { | |
3604 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) ); | |
3605 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3606 mismatch[1] = ( *(a-(i/2-1)) != *(b-(i/2-1)) ); | |
3607 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3608 mismatch[2] = ( *(a-(i/2-2)) != *(b-(i/2)) ); | |
3609 Diag = _mm_insert_epi16(Diag,mismatch[2],2); | |
3610 | |
3611 tmp = _mm_slli_si128(R1,2); | |
3612 | |
3613 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3614 R0 = _mm_min_epi16(R0,tmp+Down2); | |
3615 | |
3616 i0 = _mm_extract_epi16(R0, 0); | |
3617 i1 = _mm_extract_epi16(R0, 1); | |
3618 i2 = _mm_extract_epi16(R0, 2); | |
3619 | |
3620 score[i/2-1][i/2+1] = i0; | |
3621 score[i/2][i/2] = i1; | |
3622 score[i/2+1][i/2-1] = i2; | |
3623 | |
3624 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4; | |
3625 | |
3626 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 : | |
3627 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 : | |
3628 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4; | |
3629 | |
3630 direction1[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2] == 0) ? 3 : | |
3631 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4; | |
3632 | |
3633 if( (i/2) % segLength == 0 && i1 == 0) // the segment has been processed no need to process it again | |
3634 { | |
3635 return -1; | |
3636 } | |
3637 | |
3638 if(i == 2 * lSeqLength - 2) | |
3639 { | |
3640 error = i2; | |
3641 minIndex1 = i-lSeqLength; | |
3642 } | |
3643 } | |
3644 } | |
3645 | |
3646 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
3647 | |
3648 //fill the first part of the error | |
3649 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) ); | |
3650 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3651 mismatch[1] = ( *(a-(i/2-1)) !=*(b-(i/2-1)) ); | |
3652 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3653 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3654 | |
3655 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3656 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1); | |
3657 | |
3658 i0 = _mm_extract_epi16(R0, 0); | |
3659 i1 = _mm_extract_epi16(R0, 1); | |
3660 | |
3661 error = min(error, i1); | |
3662 if(error == i1) | |
3663 minIndex1 = i-lSeqLength; | |
3664 | |
3665 score[i/2-1][i/2+1] = i0; | |
3666 score[i/2][i/2] = i1; | |
3667 | |
3668 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : | |
3669 (score[i/2-1][i/2+1]-score[i/2-1][i/2]) ? 2 : 4; | |
3670 | |
3671 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 : | |
3672 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 : | |
3673 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4; | |
3674 | |
3675 //fill the second part of the error | |
3676 i++; | |
3677 Diag = _mm_sub_epi8(Diag, Diag); | |
3678 Diag = _mm_insert_epi16(Diag,2*e,0); | |
3679 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) ); | |
3680 Diag = _mm_insert_epi16(Diag,mismatch[0],1); | |
3681 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3682 | |
3683 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
3684 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1); | |
3685 | |
3686 i0 = _mm_extract_epi16(R1, 0); | |
3687 i1 = _mm_extract_epi16(R1, 1); | |
3688 | |
3689 error = min(error, i1); | |
3690 if(error == i1) | |
3691 minIndex1 = i-lSeqLength; | |
3692 | |
3693 score[i/2-1][i/2+2] = i0; | |
3694 score[i/2][i/2+1] = i1; | |
3695 | |
3696 direction1[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 : | |
3697 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 4; | |
3698 | |
3699 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2]) ? 3 : | |
3700 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 : | |
3701 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4; | |
3702 | |
3703 //fill the last the last element of the matrix | |
3704 i++; | |
3705 Diag = _mm_sub_epi8(Diag, Diag); | |
3706 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) ); | |
3707 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3708 | |
3709 Down = _mm_insert_epi16(Down,1,0); | |
3710 | |
3711 Side = _mm_insert_epi16(Side,1,0); | |
3712 | |
3713 tmp = _mm_srli_si128(R1,2); | |
3714 | |
3715 R0 = _mm_min_epi16(R1+Down, R0+Diag); | |
3716 R0 = _mm_min_epi16(R0,tmp+Side); | |
3717 | |
3718 i0 = _mm_extract_epi16(R0, 0); | |
3719 | |
3720 error = min(error, i0); | |
3721 if(error == 0) | |
3722 return -1; | |
3723 if(error == i0) | |
3724 minIndex1 = i-lSeqLength; | |
3725 if(mismatch[0] == 0) | |
3726 direction1[lSeqLength][lSeqLength+errThreshold] = 3; | |
3727 else | |
3728 { | |
3729 if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength][lSeqLength+errThreshold-1] == 1) | |
3730 direction1[lSeqLength][lSeqLength+errThreshold] = 2; | |
3731 else if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength-1][lSeqLength+errThreshold] == 1) | |
3732 direction1[lSeqLength][lSeqLength+errThreshold] = 1; | |
3733 else | |
3734 direction1[lSeqLength][lSeqLength+errThreshold] = 4; | |
3735 } | |
3736 } | |
3737 error1 = error; | |
3738 error = 0; | |
3739 | |
3740 directionIndex = lSeqLength; | |
3741 rIndex = minIndex1; | |
3742 | |
3743 | |
3744 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ; | |
3745 | |
3746 ref = ref + segLength; | |
3747 | |
3748 if(rSeqLength <= e) | |
3749 { | |
3750 char *a; | |
3751 char *b; | |
3752 | |
3753 int tmp_index = 0; | |
3754 | |
3755 a = ref; | |
3756 b = rSeq; | |
3757 | |
3758 for(tmp_index = 0; tmp_index < rSeqLength; tmp_index++) | |
3759 { | |
3760 matrixR[tmp_index] = (a[tmp_index]==b[tmp_index]) ? 'M' : a[tmp_index] ; | |
3761 } | |
3762 matrixR[tmp_index] = '\0'; | |
3763 } | |
3764 else if(rSeqLength != 0 && rSeqLength >= e) | |
3765 { | |
3766 char *a; | |
3767 char *b; | |
3768 | |
3769 a = ref; | |
3770 b = rSeq; | |
3771 | |
3772 R0 = _mm_sub_epi8(R0, R0); | |
3773 R1 = _mm_sub_epi8(R1, R1); | |
3774 | |
3775 R0 = _mm_insert_epi16(R0,0,0); | |
3776 | |
3777 score[0][0] = 0; | |
3778 | |
3779 R1 = _mm_insert_epi16(R1,1,0); | |
3780 R1 = _mm_insert_epi16(R1,1,1); | |
3781 | |
3782 score[1][0] = 1; | |
3783 direction2[1][0] = 1; | |
3784 score[0][1] = 1; | |
3785 direction2[0][1] = 2; | |
3786 | |
3787 mismatch[0] = ((a[0]) != (b[0])); | |
3788 | |
3789 Diag = _mm_insert_epi16(Diag,2*e,0); | |
3790 Diag = _mm_insert_epi16(Diag,mismatch[0],1); | |
3791 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3792 | |
3793 Side1 = _mm_insert_epi16(Side1,1,0); | |
3794 Side1 = _mm_insert_epi16(Side1,1,1); | |
3795 Side1 = _mm_insert_epi16(Side1,2*e,2); | |
3796 | |
3797 Side2 = _mm_insert_epi16(Side2,2*e,0); | |
3798 Side2 = _mm_insert_epi16(Side2,1,1); | |
3799 Side2 = _mm_insert_epi16(Side2,1,2); | |
3800 | |
3801 Down1 = _mm_insert_epi16(Down1,1,0); | |
3802 Down1 = _mm_insert_epi16(Down1,1,1); | |
3803 Down1 = _mm_insert_epi16(Down1,2*e,2); | |
3804 | |
3805 Down2 = _mm_insert_epi16(Down2,2*e,0); | |
3806 Down2 = _mm_insert_epi16(Down2,1,1); | |
3807 Down2 = _mm_insert_epi16(Down2,1,2); | |
3808 | |
3809 tmp = _mm_slli_si128(R1,2); | |
3810 | |
3811 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3812 R0 = _mm_min_epi16(R0,tmp+Down2); | |
3813 | |
3814 i0 = _mm_extract_epi16(R0, 0); | |
3815 i1 = _mm_extract_epi16(R0, 1); | |
3816 i2 = _mm_extract_epi16(R0, 2); | |
3817 | |
3818 score[0][2] = i0; | |
3819 score[1][1] = i1; | |
3820 score[2][0] = i2; | |
3821 | |
3822 direction2[0][2] = 2; | |
3823 direction2[1][1] = ((mismatch[0] == 0)? 3 : 4); | |
3824 direction2[2][0] = 1; | |
3825 | |
3826 | |
3827 for (i = 3; i < 2*rSeqLength; i++) | |
3828 { | |
3829 if(i % 2 ==1) | |
3830 { | |
3831 Diag = _mm_sub_epi8(Diag, Diag); | |
3832 mismatch[0] = ((a[(i+1)/2-1]) != (b[(i-1)/2-1])); | |
3833 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3834 mismatch[1] = ((a[(i-1)/2-1]) != (b[(i+1)/2-1])); | |
3835 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3836 | |
3837 tmp = _mm_srli_si128(R0,2); | |
3838 | |
3839 R1 = _mm_min_epi16(tmp+Side1, R1+Diag); | |
3840 R1 = _mm_min_epi16(R1,R0+Down1); | |
3841 | |
3842 i0 = _mm_extract_epi16(R1, 0); | |
3843 i1 = _mm_extract_epi16(R1, 1); | |
3844 | |
3845 score[i/2][i/2+1] = i0; | |
3846 score[i/2+1][i/2] = i1; | |
3847 | |
3848 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 : | |
3849 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 : | |
3850 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4; | |
3851 | |
3852 direction2[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 : | |
3853 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 : | |
3854 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4; | |
3855 | |
3856 | |
3857 if(i > 2 * rSeqLength - 2) | |
3858 { | |
3859 error = min(error, i1); | |
3860 if(error == i1) | |
3861 minIndex2 = i-rSeqLength; | |
3862 } | |
3863 } | |
3864 | |
3865 else if(i % 2 == 0) | |
3866 { | |
3867 mismatch[0] = ((a[i/2]) != (b[i/2-2])); | |
3868 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3869 mismatch[1] = ((a[i/2-1]) != (b[i/2-1])); | |
3870 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3871 mismatch[2] = ((a[i/2-2]) != (b[i/2])); | |
3872 Diag = _mm_insert_epi16(Diag,mismatch[2],2); | |
3873 | |
3874 tmp = _mm_slli_si128(R1,2); | |
3875 | |
3876 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3877 R0 = _mm_min_epi16(R0,tmp+Down2); | |
3878 | |
3879 i0 = _mm_extract_epi16(R0, 0); | |
3880 i1 = _mm_extract_epi16(R0, 1); | |
3881 i2 = _mm_extract_epi16(R0, 2); | |
3882 | |
3883 score[i/2-1][i/2+1] = i0; | |
3884 score[i/2][i/2] = i1; | |
3885 score[i/2+1][i/2-1] = i2; | |
3886 | |
3887 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : | |
3888 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4; | |
3889 | |
3890 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 : | |
3891 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 : | |
3892 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4; | |
3893 | |
3894 direction2[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2]==0) ? 3 : | |
3895 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4; | |
3896 | |
3897 | |
3898 if(i == 2 * rSeqLength - 2) | |
3899 { | |
3900 error = i2; | |
3901 minIndex2 = i-rSeqLength; | |
3902 } | |
3903 } | |
3904 } | |
3905 | |
3906 Down1 = _mm_insert_epi16(Down1,2*e,0); | |
3907 | |
3908 //fill the first part of the error | |
3909 mismatch[0] = ((a[i/2]) != (b[i/2-2])); | |
3910 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3911 mismatch[1] = ((a[i/2-1]) != (b[i/2-1])); | |
3912 Diag = _mm_insert_epi16(Diag,mismatch[1],1); | |
3913 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3914 | |
3915 R0 = _mm_min_epi16(R1+Side1, R0+Diag); | |
3916 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1); | |
3917 | |
3918 i0 = _mm_extract_epi16(R0, 0); | |
3919 i1 = _mm_extract_epi16(R0, 1); | |
3920 | |
3921 error = min(error, i1); | |
3922 if(error == i1) | |
3923 minIndex2 = i-rSeqLength; | |
3924 | |
3925 score[i/2-1][i/2+1] = i0; | |
3926 score[i/2][i/2] = i1; | |
3927 | |
3928 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : | |
3929 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4; | |
3930 | |
3931 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 : | |
3932 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 : | |
3933 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4; | |
3934 | |
3935 | |
3936 //fill the second part of the error | |
3937 i++; | |
3938 Diag = _mm_sub_epi8(Diag, Diag); | |
3939 Diag = _mm_insert_epi16(Diag,2*e,0); | |
3940 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1])); | |
3941 Diag = _mm_insert_epi16(Diag,mismatch[0],1); | |
3942 Diag = _mm_insert_epi16(Diag,2*e,2); | |
3943 | |
3944 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag); | |
3945 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1); | |
3946 | |
3947 i0 = _mm_extract_epi16(R1, 0); | |
3948 i1 = _mm_extract_epi16(R1, 1); | |
3949 | |
3950 error = min(error, i1); | |
3951 if(error == i1) | |
3952 minIndex2 = i-rSeqLength; | |
3953 | |
3954 score[i/2-1][i/2+2] = i0; | |
3955 score[i/2][i/2+1] = i1; | |
3956 | |
3957 direction2[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 : | |
3958 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 3; | |
3959 | |
3960 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 : | |
3961 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 : | |
3962 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4; | |
3963 | |
3964 | |
3965 //fill the last the last element of the matrix | |
3966 i++; | |
3967 Diag = _mm_sub_epi8(Diag, Diag); | |
3968 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1])); | |
3969 Diag = _mm_insert_epi16(Diag,mismatch[0],0); | |
3970 | |
3971 Down = _mm_sub_epi8(Down, Down); | |
3972 Down = _mm_insert_epi16(Down,1,0); | |
3973 | |
3974 Side = _mm_sub_epi8(Side, Side); | |
3975 Side = _mm_insert_epi16(Side,1,0); | |
3976 | |
3977 tmp = _mm_srli_si128(R1,2); | |
3978 | |
3979 R0 = _mm_min_epi16(R1+Down, R0+Diag); | |
3980 R0 = _mm_min_epi16(R0,tmp+Side); | |
3981 | |
3982 i0 = _mm_extract_epi16(R0, 0); | |
3983 | |
3984 error = min(error, i0); | |
3985 if(error == i0) | |
3986 minIndex2 = i-rSeqLength; | |
3987 | |
3988 if(mismatch[0] == 0) | |
3989 direction2[rSeqLength][rSeqLength+errThreshold] = 3; | |
3990 else | |
3991 { | |
3992 if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength][rSeqLength+errThreshold-1] == 1) | |
3993 direction2[lSeqLength][lSeqLength+errThreshold] = 2; | |
3994 else if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength-1][rSeqLength+errThreshold] == 1) | |
3995 direction2[rSeqLength][rSeqLength+errThreshold] = 1; | |
3996 else | |
3997 direction2[rSeqLength][rSeqLength+errThreshold] = 4; | |
3998 } | |
3999 | |
4000 } | |
4001 | |
4002 totalError = error1 + error; | |
4003 | |
4004 size = 0; | |
4005 directionIndex = rSeqLength; | |
4006 rIndex = minIndex2; | |
4007 | |
4008 | |
4009 if(rSeqLength > e) | |
4010 { | |
4011 while(directionIndex != 0 || rIndex != 0) | |
4012 { | |
4013 | |
4014 if(direction2[directionIndex][rIndex] == 3) | |
4015 { | |
4016 matrixR[size] = 'M'; | |
4017 rIndex--; | |
4018 directionIndex--; | |
4019 } | |
4020 else if(direction2[directionIndex][rIndex] == 4) | |
4021 { | |
4022 matrixR[size] = *(ref+rIndex-1); | |
4023 rIndex--; | |
4024 directionIndex--; | |
4025 } | |
4026 else if(direction2[directionIndex][rIndex] == 2) | |
4027 { | |
4028 matrixR[size] = *(ref+rIndex-1); | |
4029 size++; | |
4030 matrixR[size] = 'D'; | |
4031 rIndex--; | |
4032 } | |
4033 else | |
4034 { | |
4035 matrixR[size] = *(rSeq+directionIndex-1); | |
4036 size++; | |
4037 matrixR[size] = 'I'; | |
4038 directionIndex--; | |
4039 } | |
4040 size++; | |
4041 } | |
4042 matrixR[size] = '\0'; | |
4043 } | |
4044 size = 0; | |
4045 directionIndex = lSeqLength; | |
4046 rIndex = minIndex1; | |
4047 | |
4048 while(directionIndex != 0 || rIndex != 0) | |
4049 { | |
4050 | |
4051 if(direction1[directionIndex][rIndex] == 3) | |
4052 { | |
4053 matrixL[size] = 'M'; | |
4054 rIndex--; | |
4055 directionIndex--; | |
4056 } | |
4057 else if(direction1[directionIndex][rIndex] == 4) | |
4058 { | |
4059 matrixL[size] = *(tempref-rIndex); | |
4060 rIndex--; | |
4061 directionIndex--; | |
4062 } | |
4063 else if(direction1[directionIndex][rIndex] == 2) | |
4064 { | |
4065 matrixL[size] = 'D'; | |
4066 size++; | |
4067 matrixL[size] = *(tempref-rIndex); | |
4068 rIndex--; | |
4069 } | |
4070 else | |
4071 { | |
4072 matrixL[size] = 'I'; | |
4073 size++; | |
4074 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
4075 directionIndex--; | |
4076 } | |
4077 | |
4078 size++; | |
4079 } | |
4080 | |
4081 matrixL[size] = '\0'; | |
4082 | |
4083 char middle[200]; | |
4084 middle[0] = '\0'; | |
4085 | |
4086 for(i = 0; i < segLength; i++) | |
4087 middle[i] = 'M'; | |
4088 middle[segLength] = '\0'; | |
4089 | |
4090 char rmatrixR[200]; | |
4091 | |
4092 reverse(matrixR, rmatrixR, strlen(matrixR)); | |
4093 | |
4094 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR); | |
4095 | |
4096 return totalError; | |
4097 } | |
4098 | |
4099 int verifySingleEndEditDistance4(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue) | |
4100 { | |
4101 | |
4102 int i = 0; | |
4103 | |
4104 char * ref; | |
4105 char * tempref; | |
4106 | |
4107 int rIndex = 0; //reference Index | |
4108 | |
4109 int error = 0; | |
4110 int error1 = 0; | |
4111 | |
4112 int error2 = 0; | |
4113 int error3 = 0; | |
4114 int totalError = 0; | |
4115 int errorSegment = 0; | |
4116 | |
4117 int ERROR_BOUND = errThreshold; | |
4118 | |
4119 | |
4120 /* | |
4121 1: Up | |
4122 2: Side | |
4123 3: Diagnoal Match | |
4124 4: Diagnoal Mismatch | |
4125 */ | |
4126 | |
4127 int min = 0; | |
4128 int minIndex1 = 0; | |
4129 int minIndex2 = 0; | |
4130 | |
4131 int directionIndex = 0; | |
4132 | |
4133 | |
4134 int size = 0; | |
4135 | |
4136 ref = _msf_refGen + refIndex - 1; | |
4137 tempref = _msf_refGen + refIndex - 1; | |
4138 | |
4139 | |
4140 if(lSeqLength != 0) | |
4141 { | |
4142 error3 = backwardEditDistance4SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength); | |
4143 if(error3 == -1 || error3 == 0){ | |
4144 return -1; | |
4145 } | |
4146 } | |
4147 | |
4148 if(rSeqLength != 0) | |
4149 { | |
4150 error2 = forwardEditDistance4SSE2(ref+segLength, rSeqLength, rSeq, rSeqLength); | |
4151 if(error2 == -1) | |
4152 return -1; | |
4153 } | |
4154 | |
4155 if(error2 + error3 > errThreshold) | |
4156 return -1; | |
4157 | |
4158 rIndex = 1; | |
4159 | |
4160 int prevError = 0; | |
4161 | |
4162 int tempUp = 0; | |
4163 int tempDown = 0; | |
4164 | |
4165 int errorString = 0; | |
4166 | |
4167 int upValue; | |
4168 int diagValue; | |
4169 int sideValue; | |
4170 | |
4171 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0) | |
4172 { | |
4173 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 ); | |
4174 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
4175 for(i = tempUp ; i < tempDown ; i++) | |
4176 { | |
4177 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i)); | |
4178 | |
4179 upValue = scoreB[i-1][rIndex]+1; | |
4180 diagValue = scoreB[i-1][rIndex-1]+ !errorString; | |
4181 sideValue = scoreB[i][rIndex-1]+1; | |
4182 | |
4183 if(i != tempUp && i != tempDown-1) | |
4184 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue); | |
4185 | |
4186 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength ) | |
4187 scoreB[i][rIndex] = min(sideValue, diagValue); | |
4188 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) ) | |
4189 scoreB[i][rIndex] = sideValue; | |
4190 else | |
4191 scoreB[i][rIndex] = min(diagValue , upValue); | |
4192 | |
4193 if(i == tempUp) | |
4194 error = scoreB[i][rIndex]; | |
4195 else if(error > scoreB[i][rIndex]) | |
4196 error = scoreB[i][rIndex]; | |
4197 } | |
4198 if(rIndex <= lSeqLength) | |
4199 { | |
4200 errorSegment = error-prevError; | |
4201 } | |
4202 rIndex++; | |
4203 } | |
4204 | |
4205 if(lSeqLength != 0) | |
4206 { | |
4207 min = scoreB[lSeqLength][lSeqLength+errThreshold]; | |
4208 minIndex1 = lSeqLength + errThreshold; | |
4209 | |
4210 // Find the Best error for all the possible ways. | |
4211 for(i = 1; i <= 2*errThreshold; i++) | |
4212 { | |
4213 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0) | |
4214 { | |
4215 min = scoreB[lSeqLength][lSeqLength+errThreshold-i]; | |
4216 minIndex1 = lSeqLength+errThreshold-i; | |
4217 } | |
4218 } | |
4219 error = scoreB[lSeqLength][minIndex1]; | |
4220 } | |
4221 | |
4222 error1 = error; | |
4223 | |
4224 error = 0; | |
4225 errorSegment = 0; | |
4226 | |
4227 directionIndex = lSeqLength; | |
4228 rIndex = minIndex1; | |
4229 | |
4230 | |
4231 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ; | |
4232 | |
4233 ref = ref + segLength; | |
4234 | |
4235 if(rSeqLength != 0) | |
4236 { | |
4237 rIndex = 1; | |
4238 while(rIndex <= rSeqLength+errThreshold-error1) | |
4239 { | |
4240 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1; | |
4241 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
4242 for(i = tempUp; i < tempDown ; i++) | |
4243 { | |
4244 errorString = (*(ref+rIndex-1) == *(rSeq+i-1)); | |
4245 | |
4246 upValue = scoreF[i-1][rIndex]+1; | |
4247 diagValue = scoreF[i-1][rIndex-1]+ !errorString; | |
4248 sideValue = scoreF[i][rIndex-1]+1; | |
4249 | |
4250 if(i != tempUp && i != tempDown-1) | |
4251 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue); | |
4252 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength ) | |
4253 scoreF[i][rIndex] = min(sideValue, diagValue); | |
4254 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) ) | |
4255 scoreF[i][rIndex] = sideValue; | |
4256 else | |
4257 scoreF[i][rIndex] = min(diagValue , upValue); | |
4258 | |
4259 if(i == tempUp) | |
4260 error = scoreF[i][rIndex]; | |
4261 if(error > scoreF[i][rIndex]) | |
4262 error = scoreF[i][rIndex]; | |
4263 } | |
4264 if(rIndex <= rSeqLength) | |
4265 { | |
4266 errorSegment = error; | |
4267 } | |
4268 | |
4269 rIndex++; | |
4270 } | |
4271 | |
4272 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1]; | |
4273 minIndex2 = rSeqLength + errThreshold-error1; | |
4274 | |
4275 // Find the Best error for all the possible ways. | |
4276 for(i = 1; i <= 2*(errThreshold-error1); i++) | |
4277 { | |
4278 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0) | |
4279 { | |
4280 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i]; | |
4281 minIndex2 = rSeqLength+errThreshold-error1-i; | |
4282 } | |
4283 } | |
4284 error = scoreF[rSeqLength][minIndex2]; | |
4285 } | |
4286 | |
4287 totalError = error + error1; | |
4288 | |
4289 if(errThreshold > 4) | |
4290 printf("ERROR in errorThreshold.\n"); | |
4291 | |
4292 | |
4293 if(totalError != error2 + error3 && totalError > errThreshold) | |
4294 { | |
4295 printf("ErrorF=%d, ErrorB=%d Error=%d Error=%d\n", error2,error3,error1,error); | |
4296 | |
4297 scanf("%d", &i); | |
4298 } | |
4299 | |
4300 char matrixR[200]; | |
4301 char matrixL[200]; | |
4302 | |
4303 matrixR[0] = '\0'; | |
4304 matrixL[0] = '\0'; | |
4305 | |
4306 size = 0; | |
4307 directionIndex = rSeqLength; | |
4308 rIndex = minIndex2; | |
4309 | |
4310 while(directionIndex != 0 || rIndex != 0) | |
4311 { | |
4312 if(directionIndex-rIndex == errThreshold) | |
4313 { | |
4314 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1) | |
4315 { | |
4316 matrixR[size] = *(rSeq+directionIndex-1); | |
4317 size++; | |
4318 matrixR[size] = 'I'; | |
4319 directionIndex--; | |
4320 } | |
4321 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4322 { | |
4323 matrixR[size] = *(ref+rIndex-1); | |
4324 rIndex--; | |
4325 directionIndex--; | |
4326 } | |
4327 else | |
4328 { | |
4329 matrixR[size] = 'M'; | |
4330 rIndex--; | |
4331 directionIndex--; | |
4332 } | |
4333 | |
4334 } | |
4335 else if(rIndex - directionIndex == errThreshold) | |
4336 { | |
4337 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1) | |
4338 { | |
4339 matrixR[size] = *(ref+rIndex-1); | |
4340 size++; | |
4341 matrixR[size] = 'D'; | |
4342 rIndex--; | |
4343 } | |
4344 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4345 { | |
4346 matrixR[size] = *(ref+rIndex-1); | |
4347 rIndex--; | |
4348 directionIndex--; | |
4349 } | |
4350 else | |
4351 { | |
4352 matrixR[size] = 'M'; | |
4353 rIndex--; | |
4354 directionIndex--; | |
4355 } | |
4356 } | |
4357 else | |
4358 { | |
4359 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
4360 { | |
4361 matrixR[size] = *(rSeq+directionIndex-1); | |
4362 size++; | |
4363 matrixR[size] = 'I'; | |
4364 directionIndex--; | |
4365 } | |
4366 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
4367 { | |
4368 matrixR[size] = *(ref+rIndex-1); | |
4369 size++; | |
4370 matrixR[size] = 'D'; | |
4371 rIndex--; | |
4372 } | |
4373 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4374 { | |
4375 matrixR[size] = *(ref+rIndex-1); | |
4376 rIndex--; | |
4377 directionIndex--; | |
4378 } | |
4379 else | |
4380 { | |
4381 matrixR[size] = 'M'; | |
4382 rIndex--; | |
4383 directionIndex--; | |
4384 } | |
4385 } | |
4386 size++; | |
4387 } | |
4388 matrixR[size] = '\0'; | |
4389 | |
4390 size = 0; | |
4391 directionIndex = lSeqLength; | |
4392 rIndex = minIndex1; | |
4393 | |
4394 | |
4395 while(directionIndex != 0 || rIndex != 0) | |
4396 { | |
4397 if(directionIndex-rIndex == errThreshold) | |
4398 { | |
4399 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1) | |
4400 { | |
4401 matrixL[size] = 'I'; | |
4402 size++; | |
4403 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
4404 directionIndex--; | |
4405 } | |
4406 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4407 { | |
4408 matrixL[size] = *(tempref-rIndex); | |
4409 rIndex--; | |
4410 directionIndex--; | |
4411 } | |
4412 else | |
4413 { | |
4414 matrixL[size] = 'M'; | |
4415 rIndex--; | |
4416 directionIndex--; | |
4417 } | |
4418 | |
4419 } | |
4420 else if(rIndex - directionIndex == errThreshold) | |
4421 { | |
4422 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1) | |
4423 { | |
4424 matrixL[size] = 'D'; | |
4425 size++; | |
4426 matrixL[size] = *(tempref-rIndex); | |
4427 rIndex--; | |
4428 } | |
4429 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4430 { | |
4431 matrixL[size] = *(tempref-rIndex); | |
4432 rIndex--; | |
4433 directionIndex--; | |
4434 } | |
4435 else | |
4436 { | |
4437 matrixL[size] = 'M'; | |
4438 rIndex--; | |
4439 directionIndex--; | |
4440 } | |
4441 } | |
4442 else | |
4443 { | |
4444 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
4445 { | |
4446 matrixL[size] = 'I'; | |
4447 size++; | |
4448 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
4449 directionIndex--; | |
4450 } | |
4451 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
4452 { | |
4453 matrixL[size] = 'D'; | |
4454 size++; | |
4455 matrixL[size] = *(tempref-rIndex); | |
4456 rIndex--; | |
4457 } | |
4458 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4459 { | |
4460 matrixL[size] = *(tempref-rIndex); | |
4461 rIndex--; | |
4462 directionIndex--; | |
4463 } | |
4464 else | |
4465 { | |
4466 matrixL[size] = 'M'; | |
4467 rIndex--; | |
4468 directionIndex--; | |
4469 } | |
4470 } | |
4471 | |
4472 size++; | |
4473 } | |
4474 | |
4475 matrixL[size] = '\0'; | |
4476 char middle[200]; | |
4477 middle[0] = '\0'; | |
4478 | |
4479 for(i = 0; i < segLength; i++) | |
4480 middle[i] = 'M'; | |
4481 middle[segLength] = '\0'; | |
4482 | |
4483 char rmatrixR[200]; | |
4484 | |
4485 reverse(matrixR, rmatrixR, strlen(matrixR)); | |
4486 | |
4487 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR); | |
4488 | |
4489 return totalError; | |
4490 | |
4491 } | |
4492 | |
4493 int verifySingleEndEditDistanceExtention(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, | |
4494 char *matrix, int *map_location, short *seqHashValue) | |
4495 { | |
4496 int i = 0; | |
4497 | |
4498 char * ref; | |
4499 char * tempref; | |
4500 | |
4501 int rIndex = 0; //reference Index | |
4502 | |
4503 int error = 0; | |
4504 int error1 = 0; | |
4505 | |
4506 int error2 = 0; | |
4507 int error3 = 0; | |
4508 int totalError = 0; | |
4509 int errorSegment = 0; | |
4510 | |
4511 int ERROR_BOUND = min(4, errThreshold); | |
4512 | |
4513 | |
4514 /* | |
4515 1: Up | |
4516 2: Side | |
4517 3: Diagnoal Match | |
4518 4: Diagnoal Mismatch | |
4519 */ | |
4520 | |
4521 int min = 0; | |
4522 int minIndex1 = 0; | |
4523 int minIndex2 = 0; | |
4524 | |
4525 int directionIndex = 0; | |
4526 | |
4527 | |
4528 int size = 0; | |
4529 | |
4530 ref = _msf_refGen + refIndex - 1; | |
4531 tempref = _msf_refGen + refIndex - 1; | |
4532 | |
4533 | |
4534 if(lSeqLength != 0) | |
4535 { | |
4536 error3 = backwardEditDistanceSSE2Extention(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength); | |
4537 if(error3 == -1){ | |
4538 return -1; | |
4539 } | |
4540 } | |
4541 | |
4542 if(rSeqLength != 0) | |
4543 { | |
4544 error2 = forwardEditDistanceSSE2Extention(ref+segLength, rSeqLength, rSeq, rSeqLength); | |
4545 if(error2 == -1) | |
4546 return -1; | |
4547 } | |
4548 | |
4549 if(error2 + error3 > errThreshold) | |
4550 return -1; | |
4551 | |
4552 rIndex = 1; | |
4553 | |
4554 int prevError = 0; | |
4555 | |
4556 int tempUp = 0; | |
4557 int tempDown = 0; | |
4558 | |
4559 int errorString = 0; | |
4560 | |
4561 int upValue; | |
4562 int diagValue; | |
4563 int sideValue; | |
4564 if(lSeqLength > ERROR_BOUND) | |
4565 { | |
4566 while(rIndex <= lSeqLength+ERROR_BOUND && lSeqLength != 0) | |
4567 { | |
4568 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 ); | |
4569 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
4570 for(i = tempUp ; i < tempDown ; i++) | |
4571 { | |
4572 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i)); | |
4573 | |
4574 upValue = scoreB[i-1][rIndex]+1; | |
4575 diagValue = scoreB[i-1][rIndex-1]+ !errorString; | |
4576 sideValue = scoreB[i][rIndex-1]+1; | |
4577 | |
4578 if(i != tempUp && i != tempDown-1) | |
4579 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue); | |
4580 | |
4581 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength ) | |
4582 scoreB[i][rIndex] = min(sideValue, diagValue); | |
4583 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) ) | |
4584 scoreB[i][rIndex] = sideValue; | |
4585 else | |
4586 scoreB[i][rIndex] = min(diagValue , upValue); | |
4587 | |
4588 if(i == tempUp) | |
4589 error = scoreB[i][rIndex]; | |
4590 else if(error > scoreB[i][rIndex]) | |
4591 error = scoreB[i][rIndex]; | |
4592 } | |
4593 if(rIndex <= lSeqLength) | |
4594 { | |
4595 errorSegment = error-prevError; | |
4596 } | |
4597 rIndex++; | |
4598 } | |
4599 | |
4600 if(lSeqLength != 0) | |
4601 { | |
4602 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND]; | |
4603 minIndex1 = lSeqLength + ERROR_BOUND; | |
4604 | |
4605 // Find the Best error for all the possible ways. | |
4606 for(i = 1; i <= 2*ERROR_BOUND; i++) | |
4607 { | |
4608 if(min >= scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i] && lSeqLength+ERROR_BOUND-i > 0) | |
4609 { | |
4610 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i]; | |
4611 minIndex1 = lSeqLength+ERROR_BOUND-i; | |
4612 } | |
4613 } | |
4614 error = scoreB[lSeqLength][minIndex1]; | |
4615 } | |
4616 } | |
4617 else | |
4618 { | |
4619 int j = 0; | |
4620 for(i = 1; i <= lSeqLength; i++) | |
4621 { | |
4622 for(j = 1; j <= lSeqLength; j++) | |
4623 { | |
4624 scoreB[i][j] = min3(scoreB[i-1][j-1]+ (*(ref-j) != *(lSeq+lSeqLength-i) ),scoreB[i][j-1]+1 ,scoreB[i-1][j]+1); | |
4625 } | |
4626 } | |
4627 error = scoreB[lSeqLength][lSeqLength]; | |
4628 minIndex1 = lSeqLength; | |
4629 | |
4630 } | |
4631 error1 = error; | |
4632 | |
4633 error = 0; | |
4634 errorSegment = 0; | |
4635 | |
4636 directionIndex = lSeqLength; | |
4637 rIndex = minIndex1; | |
4638 | |
4639 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ; | |
4640 | |
4641 ref = ref + segLength; | |
4642 | |
4643 if(rSeqLength != 0 && rSeqLength > ERROR_BOUND) | |
4644 { | |
4645 ERROR_BOUND = min(ERROR_BOUND, rSeqLength); | |
4646 | |
4647 if(rSeqLength == ERROR_BOUND) | |
4648 { | |
4649 for(i=0; i < 2*ERROR_BOUND; i++) | |
4650 scoreF[0][i] = i; | |
4651 } | |
4652 | |
4653 rIndex = 1; | |
4654 while(rIndex <= rSeqLength+ERROR_BOUND) | |
4655 { | |
4656 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1; | |
4657 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
4658 for(i = tempUp; i < tempDown ; i++) | |
4659 { | |
4660 errorString = (*(ref+rIndex-1) == *(rSeq+i-1)); | |
4661 upValue = scoreF[i-1][rIndex]+1; | |
4662 diagValue = scoreF[i-1][rIndex-1]+ !errorString; | |
4663 sideValue = scoreF[i][rIndex-1]+1; | |
4664 | |
4665 if(i != tempUp && i != tempDown-1) | |
4666 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue); | |
4667 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength ) | |
4668 scoreF[i][rIndex] = min(sideValue, diagValue); | |
4669 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) ) | |
4670 scoreF[i][rIndex] = sideValue; | |
4671 else | |
4672 scoreF[i][rIndex] = min(diagValue , upValue); | |
4673 | |
4674 if(i == tempUp) | |
4675 error = scoreF[i][rIndex]; | |
4676 if(error > scoreF[i][rIndex]) | |
4677 error = scoreF[i][rIndex]; | |
4678 } | |
4679 if(rIndex <= rSeqLength) | |
4680 { | |
4681 errorSegment = error; | |
4682 } | |
4683 rIndex++; | |
4684 } | |
4685 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND]; | |
4686 minIndex2 = rSeqLength + ERROR_BOUND; | |
4687 | |
4688 // Find the Best error for all the possible ways. | |
4689 for(i = 1; i <= 2*ERROR_BOUND; i++) | |
4690 { | |
4691 if(min > scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i] && rSeqLength+ERROR_BOUND-i > 0) | |
4692 { | |
4693 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i]; | |
4694 minIndex2 = rSeqLength+ERROR_BOUND-i; | |
4695 } | |
4696 } | |
4697 error = scoreF[rSeqLength][minIndex2]; | |
4698 } | |
4699 else | |
4700 { | |
4701 int j = 0; | |
4702 for(i = 1; i <= rSeqLength; i++) | |
4703 { | |
4704 for(j = 1; j <= rSeqLength; j++) | |
4705 { | |
4706 scoreF[i][j] = min3(scoreF[i-1][j-1]+ (*(ref+j-1) != *(rSeq+i-1) ),scoreF[i][j-1]+1 ,scoreF[i-1][j]+1); | |
4707 } | |
4708 } | |
4709 error = scoreF[rSeqLength][rSeqLength]; | |
4710 minIndex2 = rSeqLength; | |
4711 } | |
4712 | |
4713 totalError = error + error1; | |
4714 | |
4715 if(totalError != error2+error3) | |
4716 { | |
4717 for(i = 0; i < lSeqLength; i++) | |
4718 printf("%c", *(tempref-1-i)); | |
4719 printf("\n"); | |
4720 for(i = 0; i < lSeqLength; i++) | |
4721 printf("%c", *(lSeq+i)); | |
4722 printf("\n"); | |
4723 | |
4724 for(i = 0; i < rSeqLength; i++) | |
4725 printf("%c", *(tempref+segLength+i)); | |
4726 printf("\n"); | |
4727 | |
4728 for(i = 0; i < rSeqLength; i++) | |
4729 printf("%c", *(rSeq+i)); | |
4730 printf("\n"); | |
4731 | |
4732 printf("ERROR=%d\n", totalError); | |
4733 printf("ERROR_SSE=%d\n", error3+error2); | |
4734 | |
4735 printf("ERROR_SSE_back=%d E_SSE_forw=%d\n", error3, error2); | |
4736 printf("ERROR_back=%d E_forw=%d\n", error1, error); | |
4737 | |
4738 } | |
4739 | |
4740 char matrixR[200]; | |
4741 char matrixL[200]; | |
4742 | |
4743 matrixR[0] = '\0'; | |
4744 matrixL[0] = '\0'; | |
4745 | |
4746 size = 0; | |
4747 directionIndex = rSeqLength; | |
4748 rIndex = minIndex2; | |
4749 | |
4750 | |
4751 while(directionIndex != 0 || rIndex != 0) | |
4752 { | |
4753 if(directionIndex-rIndex == errThreshold) | |
4754 { | |
4755 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1) | |
4756 { | |
4757 matrixR[size] = *(rSeq+directionIndex-1); | |
4758 size++; | |
4759 matrixR[size] = 'I'; | |
4760 directionIndex--; | |
4761 } | |
4762 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4763 { | |
4764 matrixR[size] = *(ref+rIndex-1); | |
4765 rIndex--; | |
4766 directionIndex--; | |
4767 } | |
4768 else | |
4769 { | |
4770 matrixR[size] = 'M'; | |
4771 rIndex--; | |
4772 directionIndex--; | |
4773 } | |
4774 | |
4775 } | |
4776 else if(rIndex - directionIndex == errThreshold) | |
4777 { | |
4778 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1) | |
4779 { | |
4780 matrixR[size] = *(ref+rIndex-1); | |
4781 size++; | |
4782 matrixR[size] = 'D'; | |
4783 rIndex--; | |
4784 } | |
4785 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4786 { | |
4787 matrixR[size] = *(ref+rIndex-1); | |
4788 rIndex--; | |
4789 directionIndex--; | |
4790 } | |
4791 else | |
4792 { | |
4793 matrixR[size] = 'M'; | |
4794 rIndex--; | |
4795 directionIndex--; | |
4796 } | |
4797 } | |
4798 else | |
4799 { | |
4800 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
4801 { | |
4802 matrixR[size] = *(rSeq+directionIndex-1); | |
4803 size++; | |
4804 matrixR[size] = 'I'; | |
4805 directionIndex--; | |
4806 } | |
4807 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
4808 { | |
4809 matrixR[size] = *(ref+rIndex-1); | |
4810 size++; | |
4811 matrixR[size] = 'D'; | |
4812 rIndex--; | |
4813 } | |
4814 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
4815 { | |
4816 matrixR[size] = *(ref+rIndex-1); | |
4817 rIndex--; | |
4818 directionIndex--; | |
4819 } | |
4820 else | |
4821 { | |
4822 matrixR[size] = 'M'; | |
4823 rIndex--; | |
4824 directionIndex--; | |
4825 } | |
4826 } | |
4827 size++; | |
4828 } | |
4829 matrixR[size] = '\0'; | |
4830 | |
4831 size = 0; | |
4832 directionIndex = lSeqLength; | |
4833 rIndex = minIndex1; | |
4834 | |
4835 | |
4836 while(directionIndex != 0 || rIndex != 0) | |
4837 { | |
4838 if(directionIndex-rIndex == errThreshold) | |
4839 { | |
4840 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1) | |
4841 { | |
4842 matrixL[size] = 'I'; | |
4843 size++; | |
4844 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
4845 directionIndex--; | |
4846 } | |
4847 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4848 { | |
4849 matrixL[size] = *(tempref-rIndex); | |
4850 rIndex--; | |
4851 directionIndex--; | |
4852 } | |
4853 else | |
4854 { | |
4855 matrixL[size] = 'M'; | |
4856 rIndex--; | |
4857 directionIndex--; | |
4858 } | |
4859 | |
4860 } | |
4861 else if(rIndex - directionIndex == errThreshold) | |
4862 { | |
4863 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1) | |
4864 { | |
4865 matrixL[size] = 'D'; | |
4866 size++; | |
4867 matrixL[size] = *(tempref-rIndex); | |
4868 rIndex--; | |
4869 } | |
4870 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4871 { | |
4872 matrixL[size] = *(tempref-rIndex); | |
4873 rIndex--; | |
4874 directionIndex--; | |
4875 } | |
4876 else | |
4877 { | |
4878 matrixL[size] = 'M'; | |
4879 rIndex--; | |
4880 directionIndex--; | |
4881 } | |
4882 } | |
4883 else | |
4884 { | |
4885 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
4886 { | |
4887 matrixL[size] = 'I'; | |
4888 size++; | |
4889 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
4890 directionIndex--; | |
4891 } | |
4892 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
4893 { | |
4894 matrixL[size] = 'D'; | |
4895 size++; | |
4896 matrixL[size] = *(tempref-rIndex); | |
4897 rIndex--; | |
4898 } | |
4899 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
4900 { | |
4901 matrixL[size] = *(tempref-rIndex); | |
4902 rIndex--; | |
4903 directionIndex--; | |
4904 } | |
4905 else | |
4906 { | |
4907 matrixL[size] = 'M'; | |
4908 rIndex--; | |
4909 directionIndex--; | |
4910 } | |
4911 } | |
4912 size++; | |
4913 } | |
4914 matrixL[size] = '\0'; | |
4915 | |
4916 char middle[200]; | |
4917 middle[0] = '\0'; | |
4918 for(i = 0; i < segLength; i++) | |
4919 middle[i] = 'M'; | |
4920 middle[segLength] = '\0'; | |
4921 | |
4922 char rmatrixR[200]; | |
4923 | |
4924 reverse(matrixR, rmatrixR, strlen(matrixR)); | |
4925 | |
4926 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR); | |
4927 | |
4928 | |
4929 return totalError; | |
4930 | |
4931 } | |
4932 | |
4933 | |
4934 int verifySingleEndEditDistance(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue) | |
4935 { | |
4936 | |
4937 int i = 0; | |
4938 | |
4939 char * ref; | |
4940 char * tempref; | |
4941 | |
4942 int rIndex = 0; //reference Index | |
4943 | |
4944 int error = 0; | |
4945 int error1 = 0; | |
4946 | |
4947 int error2 = 0; | |
4948 int error3 = 0; | |
4949 | |
4950 int totalError = 0; | |
4951 int errorSegment = 0; | |
4952 | |
4953 int ERROR_BOUND = errThreshold; | |
4954 | |
4955 /* | |
4956 1: Up | |
4957 2: Side | |
4958 3: Diagnoal Match | |
4959 4: Diagnoal Mismatch | |
4960 */ | |
4961 | |
4962 int min = 0; | |
4963 int minIndex1 = 0; | |
4964 int minIndex2 = 0; | |
4965 | |
4966 int directionIndex = 0; | |
4967 | |
4968 | |
4969 int size = 0; | |
4970 | |
4971 ref = _msf_refGen + refIndex - 1; | |
4972 tempref = _msf_refGen + refIndex - 1; | |
4973 | |
4974 | |
4975 if(rSeqLength != 0) | |
4976 { | |
4977 if(errThreshold %2 == 1) | |
4978 error2 = forwardEditDistanceSSE2Odd(ref+segLength, rSeqLength, rSeq, rSeqLength); | |
4979 else | |
4980 error2 = forwardEditDistanceSSE2G(ref+segLength, rSeqLength, rSeq, rSeqLength); | |
4981 if(error2 == -1) | |
4982 return -1; | |
4983 } | |
4984 | |
4985 if(lSeqLength != 0) | |
4986 { | |
4987 if(errThreshold % 2 == 1) | |
4988 error3 = backwardEditDistanceSSE2Odd(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength); | |
4989 else | |
4990 error3 = backwardEditDistanceSSE2G(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength); | |
4991 if(error3 == -1 || error3 == 0){ | |
4992 return -1; | |
4993 } | |
4994 } | |
4995 | |
4996 if(error3 + error2 > errThreshold) | |
4997 return -1; | |
4998 | |
4999 for(i = 0 ; i < errThreshold + 1; i++) | |
5000 { | |
5001 scoreB[0][i] = i; | |
5002 scoreB[i][0] = i; | |
5003 } | |
5004 | |
5005 rIndex = 1; | |
5006 int prevError = 0; | |
5007 | |
5008 int tempUp = 0; | |
5009 int tempDown = 0; | |
5010 | |
5011 int errorString = 0; | |
5012 | |
5013 int upValue; | |
5014 int diagValue; | |
5015 int sideValue; | |
5016 | |
5017 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0) | |
5018 { | |
5019 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 ); | |
5020 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
5021 for(i = tempUp ; i < tempDown ; i++) | |
5022 { | |
5023 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i)); | |
5024 | |
5025 upValue = scoreB[i-1][rIndex]+1; | |
5026 diagValue = scoreB[i-1][rIndex-1]+ !errorString; | |
5027 sideValue = scoreB[i][rIndex-1]+1; | |
5028 | |
5029 if(i != tempUp && i != tempDown-1) | |
5030 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue); | |
5031 | |
5032 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength ) | |
5033 scoreB[i][rIndex] = min(sideValue, diagValue); | |
5034 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) ) | |
5035 scoreB[i][rIndex] = sideValue; | |
5036 else | |
5037 scoreB[i][rIndex] = min(diagValue , upValue); | |
5038 | |
5039 if(i == tempUp) | |
5040 error = scoreB[i][rIndex]; | |
5041 else if(error > scoreB[i][rIndex]) | |
5042 error = scoreB[i][rIndex]; | |
5043 } | |
5044 if(rIndex <= lSeqLength) | |
5045 { | |
5046 errorSegment = error-prevError; | |
5047 } | |
5048 rIndex++; | |
5049 } | |
5050 if(lSeqLength != 0) | |
5051 { | |
5052 min = scoreB[lSeqLength][lSeqLength+errThreshold]; | |
5053 minIndex1 = lSeqLength + errThreshold; | |
5054 | |
5055 // Find the Best error for all the possible ways. | |
5056 for(i = 1; i <= 2*errThreshold; i++) | |
5057 { | |
5058 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0) | |
5059 { | |
5060 min = scoreB[lSeqLength][lSeqLength+errThreshold-i]; | |
5061 minIndex1 = lSeqLength+errThreshold-i; | |
5062 } | |
5063 } | |
5064 error = scoreB[lSeqLength][minIndex1]; | |
5065 } | |
5066 | |
5067 error1 = error; | |
5068 | |
5069 error = 0; | |
5070 errorSegment = 0; | |
5071 | |
5072 directionIndex = lSeqLength; | |
5073 rIndex = minIndex1; | |
5074 | |
5075 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ; | |
5076 | |
5077 ref = ref + segLength; | |
5078 | |
5079 if(rSeqLength != 0) | |
5080 { | |
5081 for(i = 0 ; i < errThreshold + 1; i++) | |
5082 { | |
5083 scoreF[0][i] = i; | |
5084 scoreF[i][0] = i; | |
5085 } | |
5086 | |
5087 | |
5088 rIndex = 1; | |
5089 while(rIndex <= rSeqLength+errThreshold-error1) | |
5090 { | |
5091 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1; | |
5092 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1); | |
5093 for(i = tempUp; i < tempDown ; i++) | |
5094 { | |
5095 errorString = (*(ref+rIndex-1) == *(rSeq+i-1)); | |
5096 | |
5097 upValue = scoreF[i-1][rIndex]+1; | |
5098 diagValue = scoreF[i-1][rIndex-1]+ !errorString; | |
5099 sideValue = scoreF[i][rIndex-1]+1; | |
5100 | |
5101 if(i != tempUp && i != tempDown-1) | |
5102 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue); | |
5103 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength ) | |
5104 scoreF[i][rIndex] = min(sideValue, diagValue); | |
5105 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) ) | |
5106 scoreF[i][rIndex] = sideValue; | |
5107 else | |
5108 scoreF[i][rIndex] = min(diagValue , upValue); | |
5109 | |
5110 if(i == tempUp) | |
5111 error = scoreF[i][rIndex]; | |
5112 if(error > scoreF[i][rIndex]) | |
5113 error = scoreF[i][rIndex]; | |
5114 } | |
5115 if(rIndex <= rSeqLength) | |
5116 { | |
5117 errorSegment = error; | |
5118 } | |
5119 rIndex++; | |
5120 } | |
5121 | |
5122 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1]; | |
5123 minIndex2 = rSeqLength + errThreshold-error1; | |
5124 | |
5125 // Find the Best error for all the possible ways. | |
5126 for(i = 1; i <= 2*(errThreshold-error1); i++) | |
5127 { | |
5128 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0) | |
5129 { | |
5130 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i]; | |
5131 minIndex2 = rSeqLength+errThreshold-error1-i; | |
5132 } | |
5133 } | |
5134 error = scoreF[rSeqLength][minIndex2]; | |
5135 } | |
5136 | |
5137 totalError = error + error1; | |
5138 | |
5139 | |
5140 if(totalError != error2 + error3 && totalError > errThreshold) | |
5141 { | |
5142 for(i = 0; i < lSeqLength; i++) | |
5143 printf("%c", *(tempref-1-i)); | |
5144 printf("\n"); | |
5145 for(i = 0; i < lSeqLength; i++) | |
5146 printf("%c", *(lSeq+i)); | |
5147 printf("\n"); | |
5148 | |
5149 for(i = 0; i < rSeqLength; i++) | |
5150 printf("%c", *(tempref+segLength+i)); | |
5151 printf("\n"); | |
5152 | |
5153 for(i = 0; i < rSeqLength; i++) | |
5154 printf("%c", *(rSeq+i)); | |
5155 printf("\n"); | |
5156 | |
5157 | |
5158 printf("SSEF=%d SSEB%d\n", error2, error3); | |
5159 printf("F=%d B=%d\n", error, error1); | |
5160 scanf("%d", &i); | |
5161 } | |
5162 | |
5163 char matrixR[200]; | |
5164 char matrixL[200]; | |
5165 | |
5166 matrixR[0] = '\0'; | |
5167 matrixL[0] = '\0'; | |
5168 | |
5169 size = 0; | |
5170 directionIndex = rSeqLength; | |
5171 rIndex = minIndex2; | |
5172 | |
5173 while(directionIndex != 0 || rIndex != 0) | |
5174 { | |
5175 if(directionIndex-rIndex == errThreshold) | |
5176 { | |
5177 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1) | |
5178 { | |
5179 matrixR[size] = *(rSeq+directionIndex-1); | |
5180 size++; | |
5181 matrixR[size] = 'I'; | |
5182 directionIndex--; | |
5183 } | |
5184 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
5185 { | |
5186 matrixR[size] = *(ref+rIndex-1); | |
5187 rIndex--; | |
5188 directionIndex--; | |
5189 } | |
5190 else | |
5191 { | |
5192 matrixR[size] = 'M'; | |
5193 rIndex--; | |
5194 directionIndex--; | |
5195 } | |
5196 | |
5197 } | |
5198 else if(rIndex - directionIndex == errThreshold) | |
5199 { | |
5200 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1) | |
5201 { | |
5202 matrixR[size] = *(ref+rIndex-1); | |
5203 size++; | |
5204 matrixR[size] = 'D'; | |
5205 rIndex--; | |
5206 } | |
5207 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
5208 { | |
5209 matrixR[size] = *(ref+rIndex-1); | |
5210 rIndex--; | |
5211 directionIndex--; | |
5212 } | |
5213 else | |
5214 { | |
5215 matrixR[size] = 'M'; | |
5216 rIndex--; | |
5217 directionIndex--; | |
5218 } | |
5219 } | |
5220 else | |
5221 { | |
5222 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
5223 { | |
5224 matrixR[size] = *(rSeq+directionIndex-1); | |
5225 size++; | |
5226 matrixR[size] = 'I'; | |
5227 directionIndex--; | |
5228 } | |
5229 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
5230 { | |
5231 matrixR[size] = *(ref+rIndex-1); | |
5232 size++; | |
5233 matrixR[size] = 'D'; | |
5234 rIndex--; | |
5235 } | |
5236 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 ) | |
5237 { | |
5238 matrixR[size] = *(ref+rIndex-1); | |
5239 rIndex--; | |
5240 directionIndex--; | |
5241 } | |
5242 else | |
5243 { | |
5244 matrixR[size] = 'M'; | |
5245 rIndex--; | |
5246 directionIndex--; | |
5247 } | |
5248 } | |
5249 size++; | |
5250 } | |
5251 matrixR[size] = '\0'; | |
5252 | |
5253 size = 0; | |
5254 directionIndex = lSeqLength; | |
5255 rIndex = minIndex1; | |
5256 | |
5257 | |
5258 while(directionIndex != 0 || rIndex != 0) | |
5259 { | |
5260 if(directionIndex-rIndex == errThreshold) | |
5261 { | |
5262 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1) | |
5263 { | |
5264 matrixL[size] = 'I'; | |
5265 size++; | |
5266 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
5267 directionIndex--; | |
5268 } | |
5269 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
5270 { | |
5271 matrixL[size] = *(tempref-rIndex); | |
5272 rIndex--; | |
5273 directionIndex--; | |
5274 } | |
5275 else | |
5276 { | |
5277 matrixL[size] = 'M'; | |
5278 rIndex--; | |
5279 directionIndex--; | |
5280 } | |
5281 | |
5282 } | |
5283 else if(rIndex - directionIndex == errThreshold) | |
5284 { | |
5285 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1) | |
5286 { | |
5287 matrixL[size] = 'D'; | |
5288 size++; | |
5289 matrixL[size] = *(tempref-rIndex); | |
5290 rIndex--; | |
5291 } | |
5292 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
5293 { | |
5294 matrixL[size] = *(tempref-rIndex); | |
5295 rIndex--; | |
5296 directionIndex--; | |
5297 } | |
5298 else | |
5299 { | |
5300 matrixL[size] = 'M'; | |
5301 rIndex--; | |
5302 directionIndex--; | |
5303 } | |
5304 } | |
5305 else | |
5306 { | |
5307 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0) | |
5308 { | |
5309 matrixL[size] = 'I'; | |
5310 size++; | |
5311 matrixL[size] = *(lSeq+lSeqLength-directionIndex); | |
5312 directionIndex--; | |
5313 } | |
5314 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0) | |
5315 { | |
5316 matrixL[size] = 'D'; | |
5317 size++; | |
5318 matrixL[size] = *(tempref-rIndex); | |
5319 rIndex--; | |
5320 } | |
5321 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 ) | |
5322 { | |
5323 matrixL[size] = *(tempref-rIndex); | |
5324 rIndex--; | |
5325 directionIndex--; | |
5326 } | |
5327 else | |
5328 { | |
5329 matrixL[size] = 'M'; | |
5330 rIndex--; | |
5331 directionIndex--; | |
5332 } | |
5333 } | |
5334 size++; | |
5335 } | |
5336 matrixL[size] = '\0'; | |
5337 char middle[200]; | |
5338 middle[0] = '\0'; | |
5339 for(i = 0; i < segLength; i++) | |
5340 middle[i] = 'M'; | |
5341 middle[segLength] = '\0'; | |
5342 | |
5343 char rmatrixR[200]; | |
5344 | |
5345 reverse(matrixR, rmatrixR, strlen(matrixR)); | |
5346 | |
5347 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR); | |
5348 | |
5349 return totalError; | |
5350 } | |
5351 | |
5352 | |
5353 int addCigarSize(int cnt){ | |
5354 if (cnt<10) return 1; | |
5355 else if (cnt < 100) return 2; | |
5356 return 3; | |
5357 } | |
5358 | |
5359 /* | |
5360 Generate Cigar from the back tracking matrix | |
5361 */ | |
5362 void generateCigar(char *matrix, int matrixLength, char *cigar) | |
5363 { | |
5364 int i = 0; | |
5365 | |
5366 int counterM=0; | |
5367 int counterI=0; | |
5368 int counterD=0; | |
5369 | |
5370 int cigarSize = 0; | |
5371 | |
5372 cigar[0] = '\0'; | |
5373 | |
5374 while(i < matrixLength) | |
5375 { | |
5376 if(matrix[i]=='M') | |
5377 { | |
5378 counterM++; | |
5379 if(counterI != 0) | |
5380 { | |
5381 sprintf(cigar, "%s%dI", cigar, counterI); | |
5382 cigarSize += addCigarSize(counterI) + 1; | |
5383 cigar[cigarSize] = '\0'; | |
5384 counterI=0; | |
5385 } | |
5386 else if(counterD != 0) | |
5387 { | |
5388 sprintf(cigar, "%s%dD", cigar, counterD); | |
5389 cigarSize += addCigarSize(counterD) + 1; | |
5390 cigar[cigarSize] = '\0'; | |
5391 counterD=0; | |
5392 } | |
5393 } | |
5394 else if(matrix[i] == 'I') | |
5395 { | |
5396 if(counterM != 0) | |
5397 { | |
5398 sprintf(cigar, "%s%dM", cigar, counterM); | |
5399 cigarSize += addCigarSize(counterM) + 1; | |
5400 cigar[cigarSize] = '\0'; | |
5401 counterM = 0; | |
5402 } | |
5403 else if(counterD != 0) | |
5404 { | |
5405 sprintf(cigar, "%s%dD", cigar, counterD); | |
5406 cigarSize += addCigarSize(counterD) + 1; | |
5407 cigar[cigarSize] = '\0'; | |
5408 counterD=0; | |
5409 } | |
5410 counterI++; | |
5411 i++; | |
5412 | |
5413 } | |
5414 else if (matrix[i] == 'D') | |
5415 { | |
5416 if(counterM != 0) | |
5417 { | |
5418 sprintf(cigar, "%s%dM", cigar, counterM); | |
5419 cigarSize += addCigarSize(counterM) + 1; | |
5420 cigar[cigarSize] = '\0'; | |
5421 counterM = 0; | |
5422 } | |
5423 else if(counterI != 0) | |
5424 { | |
5425 sprintf(cigar, "%s%dI", cigar, counterI); | |
5426 cigarSize += addCigarSize(counterI) + 1; | |
5427 cigar[cigarSize] = '\0'; | |
5428 counterI=0; | |
5429 } | |
5430 | |
5431 counterD++; | |
5432 i++; | |
5433 | |
5434 } | |
5435 else | |
5436 { | |
5437 counterM++; | |
5438 if(counterI != 0) | |
5439 { | |
5440 sprintf(cigar, "%s%dI", cigar, counterI); | |
5441 cigarSize += addCigarSize(counterI) + 1; | |
5442 cigar[cigarSize] = '\0'; | |
5443 counterI=0; | |
5444 } | |
5445 else if(counterD != 0) | |
5446 { | |
5447 sprintf(cigar, "%s%dD", cigar, counterD); | |
5448 cigarSize += addCigarSize(counterD) + 1; | |
5449 cigar[cigarSize] = '\0'; | |
5450 counterD=0; | |
5451 } | |
5452 } | |
5453 i++; | |
5454 } | |
5455 | |
5456 if(counterM != 0) | |
5457 { | |
5458 sprintf(cigar, "%s%dM", cigar, counterM); | |
5459 cigarSize += addCigarSize(counterM) + 1; | |
5460 cigar[cigarSize] = '\0'; | |
5461 counterM = 0; | |
5462 } | |
5463 else if(counterI != 0) | |
5464 { | |
5465 sprintf(cigar, "%s%dI", cigar, counterI); | |
5466 cigarSize += addCigarSize(counterI) + 1; | |
5467 cigar[cigarSize] = '\0'; | |
5468 counterI = 0; | |
5469 } | |
5470 else if(counterD != 0) | |
5471 { | |
5472 sprintf(cigar, "%s%dD", cigar, counterD); | |
5473 cigarSize += addCigarSize(counterD) + 1; | |
5474 cigar[cigarSize] = '\0'; | |
5475 counterD = 0; | |
5476 } | |
5477 | |
5478 cigar[cigarSize] = '\0'; | |
5479 } | |
5480 | |
5481 /* | |
5482 Creates the Cigar output from the mismatching positions format [0-9]+(([ACTGN]|\^[ACTGN]+)[0-9]+)* | |
5483 */ | |
5484 void generateCigarFromMD(char *mismatch, int mismatchLength, char *cigar) | |
5485 { | |
5486 int i = 0; | |
5487 int j = 0; | |
5488 | |
5489 int start = 0; | |
5490 int cigarSize = 0; | |
5491 | |
5492 cigar[0] = '\0'; | |
5493 | |
5494 while(i < mismatchLength) | |
5495 { | |
5496 if(mismatch[i] >= '0' && mismatch[i] <= '9') | |
5497 { | |
5498 start = i; | |
5499 | |
5500 while(mismatch[i] >= '0' && mismatch[i] <= '9' && i < mismatchLength) | |
5501 i++; | |
5502 | |
5503 int value = atoi(mismatch+start); | |
5504 for(j = 0; j < value-1; j++) | |
5505 { | |
5506 cigar[cigarSize] = 'M'; | |
5507 cigarSize++; | |
5508 } | |
5509 cigar[cigarSize] = 'M'; | |
5510 } | |
5511 else if(mismatch[i] == '^') | |
5512 { | |
5513 cigar[cigarSize] = 'I'; | |
5514 i++; | |
5515 } | |
5516 else if(mismatch[i] == '\'') | |
5517 { | |
5518 cigar[cigarSize] = 'D'; | |
5519 i++; | |
5520 } | |
5521 else | |
5522 { | |
5523 cigar[cigarSize] = 'M'; | |
5524 cigarSize++; | |
5525 } | |
5526 cigarSize++; | |
5527 i++; | |
5528 } | |
5529 cigar[cigarSize] = '\0'; | |
5530 } | |
5531 | |
5532 void generateSNPSAM(char *matrix, int matrixLength, char *outputSNP) | |
5533 { | |
5534 | |
5535 int i = 0; | |
5536 | |
5537 int counterM = 0; | |
5538 int counterD = 0; | |
5539 | |
5540 char delete[100]; | |
5541 | |
5542 int snpSize = 0; | |
5543 | |
5544 outputSNP[0] = '\0'; | |
5545 delete[0] = '\0'; | |
5546 | |
5547 | |
5548 while(i < matrixLength) | |
5549 { | |
5550 if(matrix[i]=='M') | |
5551 { | |
5552 counterM++; | |
5553 if(counterD != 0) | |
5554 { | |
5555 delete[counterD] = '\0'; | |
5556 counterD=0; | |
5557 sprintf(outputSNP, "%s^%s", outputSNP,delete); | |
5558 snpSize += strlen(delete) + 1; | |
5559 outputSNP[snpSize] = '\0'; | |
5560 delete[0] = '\0'; | |
5561 } | |
5562 } | |
5563 else if(matrix[i] == 'D') | |
5564 { | |
5565 if(counterM != 0) | |
5566 { | |
5567 sprintf(outputSNP, "%s%d", outputSNP, counterM); | |
5568 snpSize += addCigarSize(counterM); | |
5569 outputSNP[snpSize] = '\0'; | |
5570 counterM=0; | |
5571 delete[counterD] = matrix[i+1]; | |
5572 i++; | |
5573 counterD++; | |
5574 } | |
5575 else if(counterD != 0) | |
5576 { | |
5577 delete[counterD] = matrix[i+1]; | |
5578 counterD++; | |
5579 i++; | |
5580 } | |
5581 else | |
5582 { | |
5583 delete[counterD] = matrix[i+1]; | |
5584 counterD++; | |
5585 i++; | |
5586 } | |
5587 } | |
5588 else if(matrix[i] == 'I') | |
5589 { | |
5590 if(counterM != 0) | |
5591 { | |
5592 // sprintf(outputSNP, "%s%d\0", outputSNP, counterM); | |
5593 //counterM++; | |
5594 } | |
5595 else if(counterD != 0) | |
5596 { | |
5597 delete[counterD] = '\0'; | |
5598 sprintf(outputSNP, "%s^%s", outputSNP, delete); | |
5599 snpSize += strlen(delete) + 1; | |
5600 outputSNP[snpSize] = '\0'; | |
5601 counterD = 0; | |
5602 delete[0] = '\0'; | |
5603 } | |
5604 i++; | |
5605 | |
5606 } | |
5607 else | |
5608 { | |
5609 if(counterM != 0) | |
5610 { | |
5611 sprintf(outputSNP, "%s%d", outputSNP, counterM); | |
5612 snpSize += addCigarSize(counterM); | |
5613 outputSNP[snpSize] = '\0'; | |
5614 counterM = 0; | |
5615 } | |
5616 if(counterD != 0) | |
5617 { | |
5618 delete[counterD] = '\0'; | |
5619 counterD=0; | |
5620 sprintf(outputSNP, "%s^%s", outputSNP, delete); | |
5621 snpSize += strlen(delete) + 1; | |
5622 outputSNP[snpSize] = '\0'; | |
5623 delete[0] = '\0'; | |
5624 } | |
5625 sprintf(outputSNP,"%s%c",outputSNP,matrix[i]); | |
5626 snpSize += 1; | |
5627 outputSNP[snpSize] = '\0'; | |
5628 } | |
5629 i++; | |
5630 } | |
5631 | |
5632 if(counterM != 0) | |
5633 { | |
5634 sprintf(outputSNP, "%s%d", outputSNP, counterM); | |
5635 snpSize += addCigarSize(counterM); | |
5636 outputSNP[snpSize] = '\0'; | |
5637 counterM = 0; | |
5638 } | |
5639 else if(counterD != 0) | |
5640 { | |
5641 delete[counterD] = '\0'; | |
5642 sprintf(outputSNP, "%s^%s", outputSNP, delete); | |
5643 snpSize += strlen(delete) + 1; | |
5644 outputSNP[snpSize] = '\0'; | |
5645 counterD = 0; | |
5646 } | |
5647 | |
5648 outputSNP[snpSize] = '\0'; | |
5649 } | |
5650 /**********************************************/ | |
5651 | |
5652 /* | |
5653 direction = 0 forward | |
5654 1 backward | |
5655 | |
5656 */ | |
5657 | |
5658 void mapSingleEndSeq(unsigned int *l1, int s1, int readNumber, int readSegment, int direction) | |
5659 { | |
5660 int j = 0; | |
5661 int z = 0; | |
5662 int *locs = (int *) l1; | |
5663 char *_tmpSeq, *_tmpQual; | |
5664 char rqual[SEQ_LENGTH+1]; | |
5665 rqual[SEQ_LENGTH]='\0'; | |
5666 | |
5667 int genLoc = 0; | |
5668 int leftSeqLength = 0; | |
5669 int rightSeqLength = 0; | |
5670 int middleSeqLength = 0; | |
5671 | |
5672 char matrix[200]; | |
5673 char editString[200]; | |
5674 char cigar[MAX_CIGAR_SIZE]; | |
5675 | |
5676 short *_tmpHashValue; | |
5677 | |
5678 if (direction) | |
5679 { | |
5680 reverse(_msf_seqList[readNumber].qual, rqual, SEQ_LENGTH); | |
5681 _tmpQual = rqual; | |
5682 _tmpSeq = _msf_seqList[readNumber].rseq; | |
5683 _tmpHashValue = _msf_seqList[readNumber].rhashValue; | |
5684 } | |
5685 else | |
5686 { | |
5687 _tmpQual = _msf_seqList[readNumber].qual; | |
5688 _tmpSeq = _msf_seqList[readNumber].seq; | |
5689 _tmpHashValue = _msf_seqList[readNumber].hashValue; | |
5690 } | |
5691 | |
5692 int readId = 2*readNumber+direction; | |
5693 for (z=0; z<s1; z++) | |
5694 { | |
5695 | |
5696 | |
5697 int map_location = 0; | |
5698 int a = 0; | |
5699 int o = readSegment; | |
5700 | |
5701 genLoc = locs[z];//-_msf_samplingLocs[o]; | |
5702 | |
5703 | |
5704 if ( genLoc-_msf_samplingLocs[o] < _msf_refGenBeg || | |
5705 genLoc-_msf_samplingLocs[o] > _msf_refGenEnd || | |
5706 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId || | |
5707 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId | |
5708 ) | |
5709 continue; | |
5710 int err = -1; | |
5711 | |
5712 | |
5713 map_location = 0; | |
5714 | |
5715 leftSeqLength = _msf_samplingLocs[o]; | |
5716 middleSeqLength = WINDOW_SIZE; | |
5717 a = leftSeqLength + middleSeqLength; | |
5718 rightSeqLength = SEQ_LENGTH - a; | |
5719 | |
5720 if(errThreshold == 2) | |
5721 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength, | |
5722 _tmpSeq + a, rightSeqLength, | |
5723 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5724 else if(errThreshold == 4) | |
5725 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength, | |
5726 _tmpSeq + a, rightSeqLength, | |
5727 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5728 else if(errThreshold ==3) | |
5729 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength, | |
5730 _tmpSeq + a, rightSeqLength, | |
5731 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5732 /*else if(errThreshold == 6) | |
5733 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength, | |
5734 _tmpSeq + a, rightSeqLength, | |
5735 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5736 */ | |
5737 else | |
5738 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength, | |
5739 _tmpSeq + a, rightSeqLength, | |
5740 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5741 | |
5742 if(err != -1) | |
5743 { | |
5744 generateSNPSAM(matrix, strlen(matrix), editString); | |
5745 generateCigar(matrix, strlen(matrix), cigar); | |
5746 } | |
5747 | |
5748 if(err != -1 && !bestMode) | |
5749 { | |
5750 | |
5751 mappingCnt++; | |
5752 | |
5753 int j = 0; | |
5754 int k = 0; | |
5755 for(k = 0; k < readSegment+1; k++) | |
5756 { | |
5757 for(j = -errThreshold ; j <= errThreshold; j++) | |
5758 { | |
5759 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg && | |
5760 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd) | |
5761 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId; | |
5762 } | |
5763 } | |
5764 _msf_seqList[readNumber].hits[0]++; | |
5765 | |
5766 _msf_output.QNAME = _msf_seqList[readNumber].name; | |
5767 _msf_output.FLAG = 16 * direction; | |
5768 _msf_output.RNAME = _msf_refGenName; | |
5769 _msf_output.POS = map_location + _msf_refGenOffset; | |
5770 _msf_output.MAPQ = 255; | |
5771 _msf_output.CIGAR = cigar; | |
5772 _msf_output.MRNAME = "*"; | |
5773 _msf_output.MPOS = 0; | |
5774 _msf_output.ISIZE = 0; | |
5775 _msf_output.SEQ = _tmpSeq; | |
5776 _msf_output.QUAL = _tmpQual; | |
5777 | |
5778 _msf_output.optSize = 2; | |
5779 _msf_output.optFields = _msf_optionalFields; | |
5780 | |
5781 _msf_optionalFields[0].tag = "NM"; | |
5782 _msf_optionalFields[0].type = 'i'; | |
5783 _msf_optionalFields[0].iVal = err; | |
5784 | |
5785 _msf_optionalFields[1].tag = "MD"; | |
5786 _msf_optionalFields[1].type = 'Z'; | |
5787 _msf_optionalFields[1].sVal = editString; | |
5788 | |
5789 output(_msf_output); | |
5790 | |
5791 | |
5792 if (_msf_seqList[readNumber].hits[0] == 1) | |
5793 { | |
5794 mappedSeqCnt++; | |
5795 } | |
5796 | |
5797 if ( maxHits == 0 ) | |
5798 { | |
5799 _msf_seqList[readNumber].hits[0] = 2; | |
5800 } | |
5801 | |
5802 | |
5803 if ( maxHits!=0 && _msf_seqList[readNumber].hits[0] == maxHits) | |
5804 { | |
5805 completedSeqCnt++; | |
5806 break; | |
5807 } | |
5808 | |
5809 } | |
5810 else if(err != -1 && bestMode) | |
5811 { | |
5812 mappingCnt++; | |
5813 _msf_seqList[readNumber].hits[0]++; | |
5814 | |
5815 if (_msf_seqList[readNumber].hits[0] == 1) | |
5816 { | |
5817 mappedSeqCnt++; | |
5818 } | |
5819 | |
5820 if ( maxHits == 0 ) | |
5821 { | |
5822 _msf_seqList[readNumber].hits[0] = 2; | |
5823 } | |
5824 | |
5825 if(err < bestHitMappingInfo[readNumber].err || bestHitMappingInfo[readNumber].loc == -1) | |
5826 { | |
5827 setFullMappingInfo(readNumber, map_location + _msf_refGenOffset, direction, err, 0, editString, _msf_refGenName, cigar ); | |
5828 } | |
5829 } | |
5830 else | |
5831 { | |
5832 for(j = -errThreshold ; j <= errThreshold; j++) | |
5833 { | |
5834 if(genLoc+j > _msf_refGenBeg && | |
5835 genLoc+j < _msf_refGenEnd) | |
5836 _msf_verifiedLocs[genLoc+j] = -readId; | |
5837 } | |
5838 } | |
5839 } | |
5840 } | |
5841 | |
5842 | |
5843 int mapAllSingleEndSeq() | |
5844 { | |
5845 int i = 0; | |
5846 int j = 0; | |
5847 int k = 0; | |
5848 | |
5849 | |
5850 unsigned int *locs = NULL; | |
5851 | |
5852 | |
5853 int prev_hash = 0; | |
5854 | |
5855 for(i = 0; i < _msf_seqListSize; i++) | |
5856 { | |
5857 for(j = 0; j < _msf_samplingLocsSize; j++) | |
5858 { | |
5859 k = _msf_sort_seqList[i].readNumber; | |
5860 // if(j != 0) | |
5861 // if(strncmp(_msf_seqList[k].seq+_msf_samplingLocs[j], _msf_seqList[k].seq+_msf_samplingLocs[j-1], segSize) == 0) | |
5862 // continue; | |
5863 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j])) | |
5864 // continue; | |
5865 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j])); | |
5866 if ( locs != NULL) | |
5867 { | |
5868 mapSingleEndSeq(locs+1, locs[0],k ,j, 0); | |
5869 } | |
5870 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]); | |
5871 } | |
5872 } | |
5873 i = 0; | |
5874 | |
5875 for(i = 0; i < _msf_seqListSize; i++) | |
5876 { | |
5877 for(j = 0; j < _msf_samplingLocsSize; j++) | |
5878 { | |
5879 k = _msf_sort_seqList[i].readNumber; | |
5880 | |
5881 // if(j != 0) | |
5882 // if(strncmp(_msf_seqList[k].rseq+_msf_samplingLocs[j], _msf_seqList[k].rseq+_msf_samplingLocs[j-1], segSize) == 0) | |
5883 // continue; | |
5884 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j])) | |
5885 // continue; | |
5886 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j])); | |
5887 if ( locs != NULL) | |
5888 { | |
5889 mapSingleEndSeq(locs+1, locs[0],k ,j, 1); | |
5890 } | |
5891 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]); | |
5892 } | |
5893 } | |
5894 return 1; | |
5895 } | |
5896 | |
5897 | |
5898 /**********************************************/ | |
5899 /**********************************************/ | |
5900 /**********************************************/ | |
5901 /**********************************************/ | |
5902 /**********************************************/ | |
5903 int compareOut (const void *a, const void *b) | |
5904 { | |
5905 FullMappingInfo *aInfo = (FullMappingInfo *)a; | |
5906 FullMappingInfo *bInfo = (FullMappingInfo *)b; | |
5907 return aInfo->loc - bInfo->loc; | |
5908 } | |
5909 | |
5910 | |
5911 | |
5912 /**********************************************/ | |
5913 | |
5914 /* | |
5915 direction 0: Forward | |
5916 1: Reverse | |
5917 */ | |
5918 | |
5919 void mapPairEndSeqList(unsigned int *l1, int s1, int readNumber, int readSegment, int direction) | |
5920 { | |
5921 int z = 0; | |
5922 int *locs = (int *) l1; | |
5923 char *_tmpSeq; | |
5924 | |
5925 char rqual[SEQ_LENGTH+1]; | |
5926 | |
5927 char matrix[200]; | |
5928 char editString[200]; | |
5929 char cigar[MAX_CIGAR_SIZE]; | |
5930 | |
5931 short *_tmpHashValue; | |
5932 | |
5933 int leftSeqLength = 0; | |
5934 int middleSeqLength = 0; | |
5935 int rightSeqLength =0; | |
5936 int a = 0; | |
5937 | |
5938 rqual[SEQ_LENGTH]='\0'; | |
5939 | |
5940 | |
5941 int r = readNumber; | |
5942 | |
5943 char d = (direction==1)?-1:1; | |
5944 | |
5945 if (d==-1) | |
5946 { | |
5947 _tmpSeq = _msf_seqList[readNumber].rseq; | |
5948 _tmpHashValue = _msf_seqList[r].rhashValue; | |
5949 } | |
5950 else | |
5951 { | |
5952 _tmpSeq = _msf_seqList[readNumber].seq; | |
5953 _tmpHashValue = _msf_seqList[r].hashValue; | |
5954 } | |
5955 | |
5956 int readId = 2*readNumber+direction; | |
5957 for (z=0; z<s1; z++) | |
5958 { | |
5959 int genLoc = locs[z];//-_msf_samplingLocs[o]; | |
5960 int err = -1; | |
5961 int map_location = 0; | |
5962 int o = readSegment; | |
5963 | |
5964 leftSeqLength = _msf_samplingLocs[o]; | |
5965 middleSeqLength = WINDOW_SIZE; | |
5966 a = leftSeqLength + middleSeqLength; | |
5967 rightSeqLength = SEQ_LENGTH - a; | |
5968 | |
5969 if(genLoc - leftSeqLength < _msf_refGenBeg || genLoc + rightSeqLength + middleSeqLength > _msf_refGenEnd || | |
5970 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId || _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId) | |
5971 continue; | |
5972 | |
5973 if(errThreshold == 2) | |
5974 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength, | |
5975 _tmpSeq + a, rightSeqLength, | |
5976 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5977 else if(errThreshold == 4) | |
5978 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength, | |
5979 _tmpSeq + a, rightSeqLength, | |
5980 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5981 else if(errThreshold ==3) | |
5982 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength, | |
5983 _tmpSeq + a, rightSeqLength, | |
5984 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5985 /*else if(errThreshold == 6) | |
5986 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength, | |
5987 _tmpSeq + a, rightSeqLength, | |
5988 middleSeqLength, matrix, &map_location, _tmpHashValue);*/ | |
5989 else | |
5990 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength, | |
5991 _tmpSeq + a, rightSeqLength, | |
5992 middleSeqLength, matrix, &map_location, _tmpHashValue); | |
5993 | |
5994 | |
5995 if (err != -1) | |
5996 { | |
5997 int j = 0; | |
5998 int k = 0; | |
5999 | |
6000 for(k = 0; k < readSegment+1; k++) | |
6001 { | |
6002 for(j = -errThreshold ; j <= errThreshold; j++) | |
6003 { | |
6004 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg && | |
6005 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd) | |
6006 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId; | |
6007 } | |
6008 } | |
6009 | |
6010 | |
6011 generateSNPSAM(matrix, strlen(matrix), editString); | |
6012 generateCigar(matrix, strlen(matrix), cigar); | |
6013 | |
6014 MappingLocations *parent = NULL; | |
6015 MappingLocations *child = _msf_mappingInfo[r].next; | |
6016 | |
6017 genLoc = map_location + _msf_refGenOffset; | |
6018 int i = 0; | |
6019 for (i=0; i<(_msf_mappingInfo[r].size/MAP_CHUNKS); i++) | |
6020 { | |
6021 parent = child; | |
6022 child = child->next; | |
6023 } | |
6024 | |
6025 if (child==NULL) | |
6026 { | |
6027 MappingLocations *tmp = getMem(sizeof(MappingLocations)); | |
6028 | |
6029 tmp->next = NULL; | |
6030 tmp->loc[0]=genLoc * d; | |
6031 tmp->err[0]=err; | |
6032 | |
6033 tmp->cigarSize[0] = strlen(cigar); | |
6034 sprintf(tmp->cigar[0],"%s", cigar); | |
6035 | |
6036 tmp->mdSize[0] = strlen(editString); | |
6037 sprintf(tmp->md[0],"%s", editString); | |
6038 | |
6039 if (parent == NULL) | |
6040 _msf_mappingInfo[r].next = tmp; | |
6041 else | |
6042 parent->next = tmp; | |
6043 } | |
6044 else | |
6045 { | |
6046 if(strlen(cigar) > SEQ_LENGTH || strlen(editString) > SEQ_LENGTH) | |
6047 { | |
6048 printf("ERROR in %d read size(After mapping) exceedes cigar=%d md =%d cigar=%s md =%s\n", r, (int)strlen(cigar), (int)strlen(editString), cigar, editString); | |
6049 } | |
6050 | |
6051 child->loc[_msf_mappingInfo[r].size % MAP_CHUNKS] = genLoc * d; | |
6052 child->err[_msf_mappingInfo[r].size % MAP_CHUNKS] = err; | |
6053 | |
6054 child->cigarSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(cigar); | |
6055 sprintf(child->cigar[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",cigar); | |
6056 | |
6057 child->mdSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(editString); | |
6058 sprintf(child->md[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",editString); | |
6059 } | |
6060 _msf_mappingInfo[r].size++; | |
6061 | |
6062 } | |
6063 else | |
6064 { | |
6065 _msf_verifiedLocs[genLoc] = -readId; | |
6066 } | |
6067 | |
6068 } | |
6069 } | |
6070 | |
6071 /**********************************************/ | |
6072 void mapPairedEndSeq() | |
6073 { | |
6074 int i = 0; | |
6075 int j = 0; | |
6076 int k = 0; | |
6077 | |
6078 unsigned int *locs = NULL; | |
6079 while ( i < _msf_seqListSize ) | |
6080 { | |
6081 for(j = 0; j < _msf_samplingLocsSize; j++) | |
6082 { | |
6083 k = _msf_sort_seqList[i].readNumber; | |
6084 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j])); | |
6085 if ( locs != NULL) | |
6086 { | |
6087 mapPairEndSeqList(locs+1, locs[0],k ,j, 0); | |
6088 } | |
6089 } | |
6090 i++; | |
6091 } | |
6092 i = 0; | |
6093 | |
6094 while ( i < _msf_seqListSize ) | |
6095 { | |
6096 for(j = 0; j < _msf_samplingLocsSize; j++) | |
6097 { | |
6098 k = _msf_sort_seqList[i].readNumber; | |
6099 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j])); | |
6100 if ( locs != NULL) | |
6101 { | |
6102 mapPairEndSeqList(locs+1, locs[0],k ,j, 1); | |
6103 } | |
6104 } | |
6105 | |
6106 i++; | |
6107 } | |
6108 char fname1[FILE_NAME_LENGTH]; | |
6109 char fname2[FILE_NAME_LENGTH]; | |
6110 MappingLocations *cur; | |
6111 int tmpOut; | |
6112 int lmax=0, rmax=0; | |
6113 | |
6114 sprintf(fname1, "%s__%s__%s__%d__1.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles); | |
6115 sprintf(fname2, "%s__%s__%s__%d__2.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles); | |
6116 | |
6117 FILE* out; | |
6118 FILE* out1 = fileOpen(fname1, "w"); | |
6119 FILE* out2 = fileOpen(fname2, "w"); | |
6120 | |
6121 _msf_openFiles++; | |
6122 | |
6123 for (i=0; i<_msf_seqListSize; i++) | |
6124 { | |
6125 | |
6126 if (i%2==0) | |
6127 { | |
6128 out = out1; | |
6129 | |
6130 if (lmax < _msf_mappingInfo[i].size) | |
6131 { | |
6132 lmax = _msf_mappingInfo[i].size; | |
6133 } | |
6134 } | |
6135 else | |
6136 { | |
6137 out = out2; | |
6138 if (rmax < _msf_mappingInfo[i].size) | |
6139 { | |
6140 rmax = _msf_mappingInfo[i].size; | |
6141 } | |
6142 } | |
6143 | |
6144 tmpOut = fwrite(&(_msf_mappingInfo[i].size), sizeof(int), 1, out); | |
6145 if (_msf_mappingInfo[i].size > 0) | |
6146 { | |
6147 cur = _msf_mappingInfo[i].next; | |
6148 for (j=0; j < _msf_mappingInfo[i].size; j++) | |
6149 { | |
6150 if ( j>0 && j%MAP_CHUNKS==0) | |
6151 { | |
6152 cur = cur->next; | |
6153 } | |
6154 if(cur->cigarSize[j % MAP_CHUNKS] > SEQ_LENGTH || cur->mdSize[j % MAP_CHUNKS] > SEQ_LENGTH) | |
6155 { | |
6156 printf("ERROR in %d read size exceeds cigar=%d md =%d cigar=%s md =%s\n", i, cur->cigarSize[j % MAP_CHUNKS], cur->mdSize[j % MAP_CHUNKS], cur->cigar[j % MAP_CHUNKS], cur->md[j % MAP_CHUNKS]); | |
6157 } | |
6158 | |
6159 tmpOut = fwrite(&(cur->loc[j % MAP_CHUNKS]), sizeof(int), 1, out); | |
6160 | |
6161 tmpOut = fwrite(&(cur->err[j % MAP_CHUNKS]), sizeof(int), 1, out); | |
6162 | |
6163 tmpOut = fwrite(&(cur->cigarSize[j % MAP_CHUNKS]), sizeof(int), 1, out); | |
6164 tmpOut = fwrite((cur->cigar[j % MAP_CHUNKS]), sizeof(char), (cur->cigarSize[j % MAP_CHUNKS]), out); | |
6165 | |
6166 tmpOut = fwrite(&(cur->mdSize[j % MAP_CHUNKS]), sizeof(int), 1, out); | |
6167 tmpOut = fwrite((cur->md[j % MAP_CHUNKS]), sizeof(char), (cur->mdSize[j % MAP_CHUNKS]), out); | |
6168 | |
6169 } | |
6170 _msf_mappingInfo[i].size = 0; | |
6171 //_msf_mappingInfo[i].next = NULL; | |
6172 } | |
6173 } | |
6174 | |
6175 _msf_maxLSize += lmax; | |
6176 _msf_maxRSize += rmax; | |
6177 | |
6178 fclose(out1); | |
6179 fclose(out2); | |
6180 | |
6181 } | |
6182 | |
6183 void outputPairFullMappingInfo(FILE *fp, int readNumber) | |
6184 { | |
6185 | |
6186 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2; | |
6187 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1]; | |
6188 | |
6189 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0'; | |
6190 | |
6191 seq1 = _msf_seqList[readNumber*2].seq; | |
6192 rseq1 = _msf_seqList[readNumber*2].rseq; | |
6193 qual1 = _msf_seqList[readNumber*2].qual; | |
6194 | |
6195 reverse(_msf_seqList[readNumber*2].qual, rqual1, SEQ_LENGTH); | |
6196 | |
6197 seq2 = _msf_seqList[readNumber*2+1].seq; | |
6198 rseq2 = _msf_seqList[readNumber*2+1].rseq; | |
6199 qual2 = _msf_seqList[readNumber*2+1].qual; | |
6200 | |
6201 reverse(_msf_seqList[readNumber*2+1].qual, rqual2, SEQ_LENGTH); | |
6202 | |
6203 | |
6204 if(bestHitMappingInfo[readNumber*2].loc == -1 && bestHitMappingInfo[readNumber*2+1].loc == -1) | |
6205 return; | |
6206 else | |
6207 { | |
6208 | |
6209 char *seq; | |
6210 char *qual; | |
6211 char d1; | |
6212 char d2; | |
6213 int isize; | |
6214 int proper=0; | |
6215 // ISIZE CALCULATION | |
6216 // The distance between outer edges | |
6217 isize = abs(bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc)+SEQ_LENGTH - 2; | |
6218 | |
6219 if (bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc > 0) | |
6220 { | |
6221 isize *= -1; | |
6222 } | |
6223 d1 = (bestHitMappingInfo[readNumber*2].dir == -1)?1:0; | |
6224 d2 = (bestHitMappingInfo[readNumber*2+1].dir == -1)?1:0; | |
6225 | |
6226 if ( d1 ) | |
6227 { | |
6228 seq = rseq1; | |
6229 qual = rqual1; | |
6230 } | |
6231 else | |
6232 { | |
6233 seq = seq1; | |
6234 qual = qual1; | |
6235 } | |
6236 if ( (bestHitMappingInfo[readNumber*2].loc < bestHitMappingInfo[readNumber*2+1].loc && !d1 && d2) || | |
6237 (bestHitMappingInfo[readNumber*2].loc > bestHitMappingInfo[readNumber*2+1].loc && d1 && !d2) ) | |
6238 { | |
6239 proper = 2; | |
6240 } | |
6241 else | |
6242 { | |
6243 proper = 0; | |
6244 } | |
6245 | |
6246 _msf_output.POS = bestHitMappingInfo[readNumber*2].loc; | |
6247 _msf_output.MPOS = bestHitMappingInfo[readNumber*2+1].loc; | |
6248 _msf_output.FLAG = 1+proper+16*d1+32*d2+64; | |
6249 _msf_output.ISIZE = isize; | |
6250 _msf_output.SEQ = seq, | |
6251 _msf_output.QUAL = qual; | |
6252 _msf_output.QNAME = _msf_seqList[readNumber*2].name; | |
6253 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr; | |
6254 _msf_output.MAPQ = 255; | |
6255 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2].cigar; | |
6256 _msf_output.MRNAME = "="; | |
6257 | |
6258 _msf_output.optSize = 2; | |
6259 _msf_output.optFields = _msf_optionalFields; | |
6260 | |
6261 _msf_optionalFields[0].tag = "NM"; | |
6262 _msf_optionalFields[0].type = 'i'; | |
6263 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2].err; | |
6264 | |
6265 _msf_optionalFields[1].tag = "MD"; | |
6266 _msf_optionalFields[1].type = 'Z'; | |
6267 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2].md; | |
6268 | |
6269 outputSAM(fp, _msf_output); | |
6270 output(_msf_output); | |
6271 | |
6272 if ( d2 ) | |
6273 { | |
6274 seq = rseq2; | |
6275 qual = rqual2; | |
6276 } | |
6277 else | |
6278 { | |
6279 seq = seq2; | |
6280 qual = qual2; | |
6281 } | |
6282 | |
6283 _msf_output.POS = bestHitMappingInfo[readNumber*2+1].loc; | |
6284 _msf_output.MPOS = bestHitMappingInfo[readNumber*2].loc; | |
6285 _msf_output.FLAG = 1+proper+16*d2+32*d1+128; | |
6286 _msf_output.ISIZE = -isize; | |
6287 _msf_output.SEQ = seq, | |
6288 _msf_output.QUAL = qual; | |
6289 _msf_output.QNAME = _msf_seqList[readNumber*2].name; | |
6290 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr; | |
6291 _msf_output.MAPQ = 255; | |
6292 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2+1].cigar; | |
6293 _msf_output.MRNAME = "="; | |
6294 | |
6295 _msf_output.optSize = 2; | |
6296 _msf_output.optFields = _msf_optionalFields; | |
6297 | |
6298 _msf_optionalFields[0].tag = "NM"; | |
6299 _msf_optionalFields[0].type = 'i'; | |
6300 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2+1].err; | |
6301 | |
6302 _msf_optionalFields[1].tag = "MD"; | |
6303 _msf_optionalFields[1].type = 'Z'; | |
6304 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2+1].md; | |
6305 | |
6306 outputSAM(fp, _msf_output); | |
6307 output(_msf_output); | |
6308 } | |
6309 } | |
6310 | |
6311 | |
6312 /* | |
6313 Find the closet one to the c | |
6314 @return 0: if the x1 is closer to c | |
6315 1: if the x2 is closer to c | |
6316 2: if both distance are equal | |
6317 -1: if error | |
6318 */ | |
6319 int findNearest(int x1, int x2, int c) | |
6320 { | |
6321 | |
6322 if (abs(x1 - c) > abs(x2 - c) ) | |
6323 return 0; | |
6324 else if ( abs(x1 - c) < abs(x2 - c) ) | |
6325 return 1; | |
6326 else if ( abs(x1 - c) == abs(x2 - c) ) | |
6327 return 2; | |
6328 else | |
6329 return -1; | |
6330 } | |
6331 | |
6332 void initBestConcordantDiscordant(int readNumber) | |
6333 { | |
6334 char bestConcordantFileName[FILE_NAME_LENGTH]; | |
6335 //char bestDiscordantFileName[FILE_NAME_LENGTH]; | |
6336 | |
6337 //OPEN THE BEST CONCORDANT FILE | |
6338 //BEGIN{Farhad Hormozdiari} | |
6339 /* begin {calkan} */ | |
6340 //sprintf(bestConcordantFileName, "%s%s__BEST.CONCORDANT", mappingOutputPath, mappingOutput); | |
6341 sprintf(bestConcordantFileName, "%s%s_BEST.sam", mappingOutputPath, mappingOutput); | |
6342 | |
6343 bestConcordantFILE = fileOpen(bestConcordantFileName, "w"); | |
6344 bestDiscordantFILE = bestConcordantFILE; | |
6345 /* end {calkan} */ | |
6346 //END{Farhad Hormozdiari} | |
6347 | |
6348 | |
6349 //OPEN THE BEST DISCORDANT FILE | |
6350 //BEGIN{Farhad Hormozdiari} | |
6351 /* begin {calkan} | |
6352 sprintf(bestDiscordantFileName, "%s%s__BEST.DISCORDANT", mappingOutputPath, mappingOutput); | |
6353 bestDiscordantFILE = fileOpen(bestDiscordantFileName, "w"); | |
6354 end {calkan} */ | |
6355 | |
6356 //END{Farhad Hormozdiari} | |
6357 | |
6358 initBestMapping(readNumber); | |
6359 } | |
6360 | |
6361 void finalizeBestConcordantDiscordant() | |
6362 { | |
6363 int i = 0; | |
6364 | |
6365 for(i = 0; i<_msf_seqListSize/2; i++) | |
6366 { | |
6367 if(_msf_readHasConcordantMapping[i]==1) | |
6368 outputPairFullMappingInfo(bestConcordantFILE, i); | |
6369 else | |
6370 outputPairFullMappingInfo(bestDiscordantFILE, i); | |
6371 } | |
6372 | |
6373 fclose(bestConcordantFILE); | |
6374 // fclose(bestDiscordantFILE); | |
6375 | |
6376 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo)); | |
6377 } | |
6378 | |
6379 void setFullMappingInfo(int readNumber, int loc, int dir, int err, int score, char *md, char * refName, char *cigar) | |
6380 { | |
6381 bestHitMappingInfo[readNumber].loc = loc; | |
6382 bestHitMappingInfo[readNumber].dir = dir; | |
6383 bestHitMappingInfo[readNumber].err = err; | |
6384 bestHitMappingInfo[readNumber].score = score; | |
6385 | |
6386 strncpy(bestHitMappingInfo[readNumber].md, md, strlen(md)+1); | |
6387 strncpy(bestHitMappingInfo[readNumber].chr, refName, strlen(refName)+1); | |
6388 strncpy(bestHitMappingInfo[readNumber].cigar, cigar, strlen(cigar)+1); | |
6389 } | |
6390 | |
6391 | |
6392 void setPairFullMappingInfo(int readNumber, FullMappingInfo mi1, FullMappingInfo mi2) | |
6393 { | |
6394 | |
6395 bestHitMappingInfo[readNumber*2].loc = mi1.loc; | |
6396 bestHitMappingInfo[readNumber*2].dir = mi1.dir; | |
6397 bestHitMappingInfo[readNumber*2].err = mi1.err; | |
6398 bestHitMappingInfo[readNumber*2].score = mi1.score; | |
6399 snprintf(bestHitMappingInfo[readNumber*2].chr, MAX_REF_SIZE, "%s", _msf_refGenName); | |
6400 | |
6401 | |
6402 strncpy(bestHitMappingInfo[readNumber*2].md, mi1.md, strlen(mi1.md)+1); | |
6403 strncpy(bestHitMappingInfo[readNumber*2].cigar, mi1.cigar, strlen(mi1.cigar)+1); | |
6404 | |
6405 | |
6406 /* | |
6407 sprintf(bestHitMappingInfo[readNumber*2].md, "%s\0", mi1.md); | |
6408 sprintf(bestHitMappingInfo[readNumber*2].cigar, "%s\0", mi1.cigar); | |
6409 */ | |
6410 | |
6411 | |
6412 bestHitMappingInfo[readNumber*2+1].loc = mi2.loc; | |
6413 bestHitMappingInfo[readNumber*2+1].dir = mi2.dir; | |
6414 bestHitMappingInfo[readNumber*2+1].err = mi2.err; | |
6415 bestHitMappingInfo[readNumber*2+1].score = mi2.score; | |
6416 | |
6417 snprintf(bestHitMappingInfo[readNumber*2+1].chr, MAX_REF_SIZE, "%s", _msf_refGenName); | |
6418 | |
6419 /* | |
6420 sprintf(bestHitMappingInfo[readNumber*2+1].md, "%s\0", mi2.md); | |
6421 sprintf(bestHitMappingInfo[readNumber*2+1].cigar, "%s\0", mi2.cigar); | |
6422 */ | |
6423 | |
6424 strncpy(bestHitMappingInfo[readNumber*2+1].md, mi2.md, strlen(mi2.md)+1); | |
6425 strncpy(bestHitMappingInfo[readNumber*2+1].cigar, mi2.cigar, strlen(mi2.cigar)+1); | |
6426 | |
6427 } | |
6428 | |
6429 /**********************************************/ | |
6430 void outputPairedEnd() | |
6431 { | |
6432 int i = 0; | |
6433 | |
6434 char cigar[MAX_CIGAR_SIZE]; | |
6435 | |
6436 int tmpOut; | |
6437 | |
6438 loadRefGenome(&_msf_refGen, &_msf_refGenName, &tmpOut); | |
6439 | |
6440 FILE* in1[_msf_openFiles]; | |
6441 FILE* in2[_msf_openFiles]; | |
6442 | |
6443 char fname1[_msf_openFiles][FILE_NAME_LENGTH]; | |
6444 char fname2[_msf_openFiles][FILE_NAME_LENGTH]; | |
6445 | |
6446 // discordant | |
6447 FILE *out=NULL, *out1=NULL; | |
6448 | |
6449 char fname3[FILE_NAME_LENGTH]; | |
6450 char fname4[FILE_NAME_LENGTH]; | |
6451 | |
6452 int meanDistanceMapping = 0; | |
6453 | |
6454 char *rqual1; | |
6455 char *rqual2; | |
6456 | |
6457 rqual1 = getMem((SEQ_LENGTH+1)*sizeof(char)); | |
6458 rqual2 = getMem((SEQ_LENGTH+1)*sizeof(char)); | |
6459 | |
6460 if (pairedEndDiscordantMode) | |
6461 { | |
6462 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); | |
6463 sprintf(fname4, "%s__%s__oea", mappingOutputPath, mappingOutput); | |
6464 out = fileOpen(fname3, "a"); | |
6465 out1 = fileOpen(fname4, "a"); | |
6466 } | |
6467 | |
6468 FullMappingInfo *mi1 = getMem(sizeof(FullMappingInfo) * _msf_maxLSize); | |
6469 FullMappingInfo *mi2 = getMem(sizeof(FullMappingInfo) * _msf_maxRSize); | |
6470 | |
6471 _msf_fileCount[_msf_maxFile] = 0; | |
6472 for (i=0; i<_msf_openFiles; i++) | |
6473 { | |
6474 sprintf(fname1[i], "%s__%s__%s__%d__1.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i); | |
6475 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][0], "%s", fname1[i]); | |
6476 | |
6477 sprintf(fname2[i], "%s__%s__%s__%d__2.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i); | |
6478 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][1], "%s", fname2[i]); | |
6479 | |
6480 in1[i] = fileOpen(fname1[i], "r"); | |
6481 in2[i] = fileOpen(fname2[i], "r"); | |
6482 _msf_fileCount[_msf_maxFile]++; | |
6483 } | |
6484 _msf_maxFile++; | |
6485 | |
6486 int size; | |
6487 int j, k; | |
6488 int size1, size2; | |
6489 | |
6490 meanDistanceMapping = (pairedEndDiscordantMode==1)? (minPairEndedDiscordantDistance+maxPairEndedDiscordantDistance)/2 + SEQ_LENGTH | |
6491 : (minPairEndedDistance + maxPairEndedDistance) / 2 + SEQ_LENGTH; | |
6492 | |
6493 for (i=0; i<_msf_seqListSize/2; i++) | |
6494 { | |
6495 size1 = size2 = 0; | |
6496 for (j=0; j<_msf_openFiles; j++) | |
6497 { | |
6498 tmpOut = fread(&size, sizeof(int), 1, in1[j]); | |
6499 if ( size > 0 ) | |
6500 { | |
6501 for (k=0; k<size; k++) | |
6502 { | |
6503 mi1[size1+k].dir = 1; | |
6504 tmpOut = fread (&(mi1[size1+k].loc), sizeof(int), 1, in1[j]); | |
6505 tmpOut = fread (&(mi1[size1+k].err), sizeof(int), 1, in1[j]); | |
6506 | |
6507 tmpOut = fread (&(mi1[size1+k].cigarSize), sizeof(int), 1, in1[j]); | |
6508 tmpOut = fread ((mi1[size1+k].cigar), sizeof(char), mi1[size1+k].cigarSize, in1[j]); | |
6509 mi1[size1+k].cigar[mi1[size1+k].cigarSize] = '\0'; | |
6510 | |
6511 tmpOut = fread (&(mi1[size1+k].mdSize), sizeof(int), 1, in1[j]); | |
6512 tmpOut = fread ((mi1[size1+k].md), sizeof(char), (mi1[size1+k].mdSize), in1[j]); | |
6513 mi1[size1+k].md[mi1[size1+k].mdSize] = '\0'; | |
6514 | |
6515 if (mi1[size1+k].loc<1) | |
6516 { | |
6517 mi1[size1+k].loc *= -1; | |
6518 mi1[size1+k].dir = -1; | |
6519 } | |
6520 } | |
6521 qsort(mi1+size1, size, sizeof(FullMappingInfo), compareOut); | |
6522 size1+=size; | |
6523 } | |
6524 } | |
6525 | |
6526 for (j=0; j<_msf_openFiles; j++) | |
6527 { | |
6528 tmpOut = fread(&size, sizeof(int), 1, in2[j]); | |
6529 if ( size > 0 ) | |
6530 { | |
6531 for (k=0; k<size; k++) | |
6532 { | |
6533 mi2[size2+k].dir = 1; | |
6534 tmpOut = fread (&(mi2[size2+k].loc), sizeof(int), 1, in2[j]); | |
6535 tmpOut = fread (&(mi2[size2+k].err), sizeof(int), 1, in2[j]); | |
6536 | |
6537 tmpOut = fread (&(mi2[size2+k].cigarSize), sizeof(int), 1, in2[j]); | |
6538 tmpOut = fread ((mi2[size2+k].cigar), sizeof(char), mi2[size2+k].cigarSize, in2[j]); | |
6539 mi2[size2+k].cigar[mi2[size2+k].cigarSize] = '\0'; | |
6540 | |
6541 tmpOut = fread (&(mi2[size2+k].mdSize), sizeof(int), 1, in2[j]); | |
6542 tmpOut = fread ((mi2[size2+k].md), sizeof(char), mi2[size2+k].mdSize, in2[j]); | |
6543 mi2[size2+k].md[mi2[size2+k].mdSize] = '\0'; | |
6544 | |
6545 if (mi2[size2+k].loc<1) | |
6546 { | |
6547 mi2[size2+k].loc *= -1; | |
6548 mi2[size2+k].dir = -1; | |
6549 } | |
6550 } | |
6551 qsort(mi2+size2, size, sizeof(FullMappingInfo), compareOut); | |
6552 size2+=size; | |
6553 } | |
6554 } | |
6555 | |
6556 int lm, ll, rl, rm; | |
6557 int pos = 0; | |
6558 | |
6559 if (pairedEndDiscordantMode) | |
6560 { | |
6561 | |
6562 for (j=0; j<size1; j++) | |
6563 { | |
6564 lm = mi1[j].loc - maxPairEndedDiscordantDistance + 1; | |
6565 ll = mi1[j].loc - minPairEndedDiscordantDistance + 1; | |
6566 rl = mi1[j].loc + minPairEndedDiscordantDistance - 1; | |
6567 rm = mi1[j].loc + maxPairEndedDiscordantDistance - 1; | |
6568 | |
6569 while (pos<size2 && mi2[pos].loc < lm) | |
6570 { | |
6571 pos++; | |
6572 } | |
6573 | |
6574 k = pos; | |
6575 while (k<size2 && mi2[k].loc<=rm) | |
6576 { | |
6577 if ( mi2[k].loc <= ll || mi2[k].loc >= rl) | |
6578 { | |
6579 if ( (mi1[j].loc < mi2[k].loc && mi1[j].dir==1 && mi2[k].dir == -1) || | |
6580 (mi1[j].loc > mi2[k].loc && mi1[j].dir==-1 && mi2[k].dir == 1) ) | |
6581 { | |
6582 _msf_seqList[i*2].hits[0]=1; | |
6583 _msf_seqList[i*2+1].hits[0]=1; | |
6584 | |
6585 if(nosamMode != 0) | |
6586 { | |
6587 size1=0; | |
6588 size2=0; | |
6589 } | |
6590 | |
6591 break; | |
6592 } | |
6593 } | |
6594 k++; | |
6595 } | |
6596 } | |
6597 | |
6598 _msf_seqHits[i*2] += size1; | |
6599 _msf_seqHits[i*2+1] += size2; | |
6600 | |
6601 | |
6602 if (_msf_seqHits[i*2+1] * _msf_seqHits[i*2] > DISCORDANT_CUT_OFF && nosamMode != 0) | |
6603 { | |
6604 _msf_seqList[i*2].hits[0]=1; | |
6605 _msf_seqList[i*2+1].hits[0]=1; | |
6606 size1=0; | |
6607 size2=0; | |
6608 } | |
6609 | |
6610 | |
6611 | |
6612 | |
6613 int tmp = 0; | |
6614 int rNo = 0; | |
6615 int loc = 0; | |
6616 int err = 0; | |
6617 float sc = 0; | |
6618 char l = 0; | |
6619 | |
6620 //write the OEA data | |
6621 if(_msf_seqHits[i*2] == 0 ) | |
6622 { | |
6623 for(k = 0;k < size2 && _msf_oeaMapping[i*2+1] < maxOEAOutput ;k++) | |
6624 { | |
6625 rNo = i*2+1; | |
6626 loc = mi2[k].loc*mi2[k].dir; | |
6627 err = mi2[k].err; | |
6628 sc = mi2[k].score; | |
6629 | |
6630 l = strlen(_msf_refGenName); | |
6631 | |
6632 tmp = fwrite(&rNo, sizeof(int), 1, out1); | |
6633 | |
6634 tmp = fwrite(&l, sizeof(char), 1, out1); | |
6635 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1); | |
6636 | |
6637 tmp = fwrite(&loc, sizeof(int), 1, out1); | |
6638 tmp = fwrite(&err, sizeof(int), 1, out1); | |
6639 tmp = fwrite(&sc, sizeof(float), 1, out1); | |
6640 | |
6641 if(mi2[k].cigarSize > SEQ_LENGTH || mi2[k].cigarSize <= 0) | |
6642 printf("ERROR CIGAR size=%d %s\n", mi2[k].cigarSize, _msf_seqList[i*2+1].seq); | |
6643 | |
6644 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out1); | |
6645 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out1); | |
6646 | |
6647 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out1); | |
6648 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out1); | |
6649 | |
6650 _msf_oeaMapping[i*2+1]++; | |
6651 } | |
6652 } | |
6653 if(_msf_seqHits[i*2+1] == 0) | |
6654 { | |
6655 for(j = 0;j < size1 && _msf_oeaMapping[i*2] < maxOEAOutput;j++) | |
6656 { | |
6657 rNo = i*2; | |
6658 loc = mi1[j].loc*mi1[j].dir; | |
6659 err = mi1[j].err; | |
6660 sc = mi1[j].score; | |
6661 | |
6662 l = strlen(_msf_refGenName); | |
6663 | |
6664 tmp = fwrite(&rNo, sizeof(int), 1, out1); | |
6665 | |
6666 tmp = fwrite(&l, sizeof(char), 1, out1); | |
6667 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1); | |
6668 | |
6669 tmp = fwrite(&loc, sizeof(int), 1, out1); | |
6670 tmp = fwrite(&err, sizeof(int), 1, out1); | |
6671 tmp = fwrite(&sc, sizeof(float), 1, out1); | |
6672 | |
6673 if(mi1[j].cigarSize > SEQ_LENGTH || mi1[j].cigarSize <= 0 ) | |
6674 printf("ERROR %d %s\n", mi1[j].cigarSize, _msf_seqList[i*2+1].seq); | |
6675 | |
6676 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out1); | |
6677 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out1); | |
6678 | |
6679 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out1); | |
6680 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out1); | |
6681 | |
6682 _msf_oeaMapping[i*2]++; | |
6683 } | |
6684 } | |
6685 } | |
6686 | |
6687 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2; | |
6688 | |
6689 | |
6690 | |
6691 | |
6692 rqual1[SEQ_LENGTH] = '\0'; | |
6693 rqual2[SEQ_LENGTH] = '\0'; | |
6694 rqual1[0] = '\0'; | |
6695 rqual2[0] = '\0'; | |
6696 | |
6697 | |
6698 | |
6699 seq1 = _msf_seqList[i*2].seq; | |
6700 rseq1 = _msf_seqList[i*2].rseq; | |
6701 qual1 = _msf_seqList[i*2].qual; | |
6702 | |
6703 | |
6704 | |
6705 strncpy(rqual1, _msf_seqList[i*2].qual, SEQ_LENGTH); | |
6706 | |
6707 seq2 = _msf_seqList[i*2+1].seq; | |
6708 rseq2 = _msf_seqList[i*2+1].rseq; | |
6709 qual2 = _msf_seqList[i*2+1].qual; | |
6710 | |
6711 | |
6712 strncpy(rqual2, _msf_seqList[i*2+1].qual, SEQ_LENGTH); | |
6713 | |
6714 if (pairedEndDiscordantMode) | |
6715 { | |
6716 for (k=0; k<size1; k++) | |
6717 { | |
6718 mi1[k].score = calculateScore(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, (mi1[k].dir==-1)?rqual1:qual1, mi1[k].cigar); | |
6719 } | |
6720 | |
6721 for (k=0; k<size2; k++) | |
6722 { | |
6723 mi2[k].score = calculateScore(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, (mi2[k].dir==-1)?rqual2:qual2, mi2[k].cigar); | |
6724 } | |
6725 | |
6726 } | |
6727 | |
6728 | |
6729 if (pairedEndDiscordantMode) | |
6730 { | |
6731 for (j=0; j<size1; j++) | |
6732 { | |
6733 for(k = 0; k < size2; k++) | |
6734 { | |
6735 if( | |
6736 (mi2[k].loc-mi1[j].loc >= minPairEndedDiscordantDistance && | |
6737 mi2[k].loc-mi1[j].loc <= maxPairEndedDiscordantDistance && | |
6738 mi1[j].dir > 0 && mi2[k].dir < 0 ) | |
6739 | |
6740 || | |
6741 | |
6742 (mi1[j].loc-mi2[k].loc >= minPairEndedDiscordantDistance && | |
6743 mi1[j].loc-mi2[k].loc <= maxPairEndedDiscordantDistance && | |
6744 mi1[j].dir < 0 && mi2[k].dir > 0) | |
6745 ) | |
6746 { | |
6747 //POSSIBLE CONCORDANT | |
6748 if(_msf_readHasConcordantMapping[i] == 0) | |
6749 { | |
6750 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6751 _msf_readHasConcordantMapping[i] = 1; | |
6752 _msf_seqList[i*2].hits[0] = 1; | |
6753 _msf_seqList[i*2+1].hits[0] = 1; | |
6754 } | |
6755 else | |
6756 { | |
6757 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err) | |
6758 { | |
6759 | |
6760 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == | |
6761 mi1[j].err + mi2[k].err && | |
6762 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc), | |
6763 abs(mi2[k].loc - mi1[j].loc), | |
6764 meanDistanceMapping | |
6765 ) == 0 ) | |
6766 { | |
6767 continue; | |
6768 } | |
6769 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6770 } | |
6771 } | |
6772 } | |
6773 //DISCORDANT TO TEMP FILE FOR POST PROCESSIING | |
6774 else if(_msf_readHasConcordantMapping[i] == 0 && | |
6775 _msf_seqHits[i*2] != 0 && | |
6776 _msf_seqHits[i*2+1] != 0) | |
6777 { | |
6778 | |
6779 int tmp; | |
6780 int rNo = i; | |
6781 int loc = mi1[j].loc*mi1[j].dir; | |
6782 int err = mi1[j].err; | |
6783 float sc = mi1[j].score; | |
6784 | |
6785 char l = strlen(_msf_refGenName); | |
6786 | |
6787 if(_msf_discordantMapping[i*2] < maxDiscordantOutput) | |
6788 { | |
6789 | |
6790 tmp = fwrite(&rNo, sizeof(int), 1, out); | |
6791 | |
6792 tmp = fwrite(&l, sizeof(char), 1, out); | |
6793 tmp = fwrite(_msf_refGenName, sizeof(char), l, out); | |
6794 | |
6795 tmp = fwrite(&loc, sizeof(int), 1, out); | |
6796 tmp = fwrite(&err, sizeof(int), 1, out); | |
6797 tmp = fwrite(&sc, sizeof(float), 1, out); | |
6798 | |
6799 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out); | |
6800 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out); | |
6801 | |
6802 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out); | |
6803 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out); | |
6804 | |
6805 | |
6806 loc = mi2[k].loc*mi2[k].dir; | |
6807 err = mi2[k].err; | |
6808 sc = mi2[k].score; | |
6809 | |
6810 tmp = fwrite(&loc, sizeof(int), 1, out); | |
6811 tmp = fwrite(&err, sizeof(int), 1, out); | |
6812 tmp = fwrite(&sc, sizeof(float), 1, out); | |
6813 | |
6814 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out); | |
6815 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out); | |
6816 | |
6817 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out); | |
6818 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out); | |
6819 | |
6820 | |
6821 _msf_discordantMapping[i*2]++; | |
6822 } | |
6823 //SET THE BEST DISCORDANT | |
6824 //BEGIN {Farhad Hormozdiari} | |
6825 if( bestHitMappingInfo[i*2].loc == -1 && | |
6826 bestHitMappingInfo[i*2+1].loc == -1 && | |
6827 _msf_readHasConcordantMapping[i] == 0) | |
6828 { | |
6829 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6830 _msf_seqList[i*2].hits[0] = 1; | |
6831 _msf_seqList[i*2+1].hits[0] = 1; | |
6832 } | |
6833 else if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err | |
6834 && _msf_readHasConcordantMapping[i] == 0) | |
6835 { | |
6836 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err && | |
6837 findNearest( abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc), | |
6838 abs(mi1[j].loc - mi2[k].loc), | |
6839 meanDistanceMapping | |
6840 ) == 0 | |
6841 ) | |
6842 { | |
6843 continue; | |
6844 } | |
6845 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6846 } | |
6847 //END {Farhad Hormozdiari} | |
6848 } | |
6849 } | |
6850 } | |
6851 } | |
6852 else | |
6853 { | |
6854 for (j=0; j<size1; j++) | |
6855 { | |
6856 for(k = 0; k < size2; k++) | |
6857 { | |
6858 if((mi2[k].loc-mi1[j].loc >= minPairEndedDistance && | |
6859 mi2[k].loc-mi1[j].loc <= maxPairEndedDistance && | |
6860 mi1[j].dir > 0 && mi2[k].dir < 0) | |
6861 || | |
6862 (mi1[j].loc-mi2[k].loc >= minPairEndedDistance && | |
6863 mi1[j].loc-mi2[k].loc <= maxPairEndedDistance && | |
6864 mi1[j].dir < 0 && mi2[k].dir > 0) | |
6865 ) | |
6866 { | |
6867 char *seq; | |
6868 char *qual; | |
6869 char d1; | |
6870 char d2; | |
6871 int isize; | |
6872 int proper=0; | |
6873 // ISIZE CALCULATION | |
6874 // The distance between outer edges | |
6875 isize = abs(mi1[j].loc - mi2[k].loc)+SEQ_LENGTH-2; | |
6876 if (mi1[j].loc - mi2[k].loc > 0) | |
6877 { | |
6878 isize *= -1; | |
6879 } | |
6880 | |
6881 d1 = (mi1[j].dir == -1)?1:0; | |
6882 d2 = (mi2[k].dir == -1)?1:0; | |
6883 | |
6884 //SET THE READ HAS CONCORDANT MAPPING | |
6885 _msf_readHasConcordantMapping[i] = 1; | |
6886 | |
6887 if ( d1 ) | |
6888 { | |
6889 seq = rseq1; | |
6890 qual = rqual1; | |
6891 } | |
6892 else | |
6893 { | |
6894 seq = seq1; | |
6895 qual = qual1; | |
6896 } | |
6897 | |
6898 if ((mi1[j].loc < mi2[k].loc && !d1 && d2) || | |
6899 (mi1[j].loc > mi2[k].loc && d1 && !d2) ) | |
6900 { | |
6901 proper = 2; | |
6902 } | |
6903 else | |
6904 { | |
6905 proper = 0; | |
6906 } | |
6907 | |
6908 | |
6909 _msf_output.POS = mi1[j].loc; | |
6910 _msf_output.MPOS = mi2[k].loc; | |
6911 _msf_output.FLAG = 1+proper+16*d1+32*d2+64; | |
6912 _msf_output.ISIZE = isize; | |
6913 _msf_output.SEQ = seq, | |
6914 _msf_output.QUAL = qual; | |
6915 _msf_output.QNAME = _msf_seqList[i*2].name; | |
6916 _msf_output.RNAME = _msf_refGenName; | |
6917 _msf_output.MAPQ = 255; | |
6918 _msf_output.CIGAR = cigar; | |
6919 _msf_output.MRNAME = "="; | |
6920 | |
6921 _msf_output.optSize = 2; | |
6922 _msf_output.optFields = _msf_optionalFields; | |
6923 | |
6924 _msf_optionalFields[0].tag = "NM"; | |
6925 _msf_optionalFields[0].type = 'i'; | |
6926 _msf_optionalFields[0].iVal = mi1[j].err; | |
6927 | |
6928 _msf_optionalFields[1].tag = "MD"; | |
6929 _msf_optionalFields[1].type = 'Z'; | |
6930 _msf_optionalFields[1].sVal = mi1[j].md; | |
6931 | |
6932 if(!bestMode) | |
6933 output(_msf_output); | |
6934 | |
6935 if ( d2 ) | |
6936 { | |
6937 seq = rseq2; | |
6938 qual = rqual2; | |
6939 } | |
6940 else | |
6941 { | |
6942 seq = seq2; | |
6943 qual = qual2; | |
6944 } | |
6945 | |
6946 _msf_output.POS = mi2[k].loc; | |
6947 _msf_output.MPOS = mi1[j].loc; | |
6948 _msf_output.FLAG = 1+proper+16*d2+32*d1+128; | |
6949 _msf_output.ISIZE = -isize; | |
6950 _msf_output.SEQ = seq, | |
6951 _msf_output.QUAL = qual; | |
6952 _msf_output.QNAME = _msf_seqList[i*2].name; | |
6953 _msf_output.RNAME = _msf_refGenName; | |
6954 _msf_output.MAPQ = 255; | |
6955 _msf_output.CIGAR = cigar; | |
6956 _msf_output.MRNAME = "="; | |
6957 | |
6958 _msf_output.optSize = 2; | |
6959 _msf_output.optFields = _msf_optionalFields; | |
6960 | |
6961 _msf_optionalFields[0].tag = "NM"; | |
6962 _msf_optionalFields[0].type = 'i'; | |
6963 _msf_optionalFields[0].iVal = mi2[k].err;; | |
6964 | |
6965 _msf_optionalFields[1].tag = "MD"; | |
6966 _msf_optionalFields[1].type = 'Z'; | |
6967 _msf_optionalFields[1].sVal = mi2[k].md; | |
6968 | |
6969 if(!bestMode) | |
6970 output(_msf_output); | |
6971 //SET THE BEST CONCORDANT | |
6972 //BEGIN {Farhad Hormozdiari} | |
6973 if(bestHitMappingInfo[i*2].loc == -1 && bestHitMappingInfo[i*2+1].loc == -1) | |
6974 { | |
6975 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6976 } | |
6977 else | |
6978 { | |
6979 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err) | |
6980 { | |
6981 | |
6982 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err && | |
6983 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc), | |
6984 abs(mi2[k].loc - mi1[j].loc), | |
6985 meanDistanceMapping | |
6986 ) == 0 ) | |
6987 { | |
6988 continue; | |
6989 } | |
6990 setPairFullMappingInfo(i, mi1[j], mi2[k]); | |
6991 } | |
6992 } | |
6993 //END {Farhad Hormozdiari} | |
6994 } | |
6995 } | |
6996 } | |
6997 | |
6998 } | |
6999 } | |
7000 | |
7001 freeMem(rqual1, 0); | |
7002 freeMem(rqual2, 0); | |
7003 | |
7004 if (pairedEndDiscordantMode) | |
7005 { | |
7006 fclose(out); | |
7007 fclose(out1); | |
7008 } | |
7009 | |
7010 for (i=0; i<_msf_openFiles; i++) | |
7011 { | |
7012 fclose(in1[i]); | |
7013 fclose(in2[i]); | |
7014 | |
7015 unlink(fname1[i]); | |
7016 unlink(fname2[i]); | |
7017 } | |
7018 | |
7019 freeMem(mi1, sizeof(FullMappingInfo)*_msf_maxLSize); | |
7020 freeMem(mi2, sizeof(FullMappingInfo)*_msf_maxRSize); | |
7021 | |
7022 _msf_openFiles = 0; | |
7023 } | |
7024 | |
7025 /**********************************************/ | |
7026 /**********************************************/ | |
7027 /**********************************************/ | |
7028 /**********************************************/ | |
7029 float str2int(char *str, int index1, int index2) | |
7030 { | |
7031 char tmp[200]; | |
7032 strncpy(tmp, &str[index1], index2-index1); | |
7033 tmp[index2-index1] = '\0'; | |
7034 return atol(tmp); | |
7035 } | |
7036 | |
7037 float calculateScore(int index, char *seq, char *qual,char *md) | |
7038 { | |
7039 int i; | |
7040 int j; | |
7041 char *ref; | |
7042 char *ver; | |
7043 | |
7044 ref = _msf_refGen + index-1; | |
7045 ver = seq; | |
7046 float score = 1; | |
7047 | |
7048 char tmp[200]; | |
7049 int value = 0; | |
7050 int end = 0; | |
7051 int index1 = 0; | |
7052 int index2 = 0; | |
7053 | |
7054 i=0; | |
7055 while(1) | |
7056 { | |
7057 | |
7058 if(i>=strlen(md)) | |
7059 break; | |
7060 | |
7061 index1 = i; | |
7062 | |
7063 while(md[i] >='0' && md[i]<='9') | |
7064 { | |
7065 i++; | |
7066 } | |
7067 | |
7068 index2 = i; | |
7069 | |
7070 value = str2int(md, index1,index2); | |
7071 | |
7072 if(md[i]=='M') | |
7073 { | |
7074 for(j=0;j<value;j++) | |
7075 { | |
7076 tmp[end]='M'; | |
7077 end++; | |
7078 } | |
7079 } | |
7080 else if(md[i]=='I') | |
7081 { | |
7082 for(j=0;j<value;j++) | |
7083 { | |
7084 tmp[end]='I'; | |
7085 end++; | |
7086 } | |
7087 | |
7088 } | |
7089 else if(md[i] == 'D') | |
7090 { | |
7091 for(j=0;j<value;j++) | |
7092 { | |
7093 tmp[end]='D'; | |
7094 end++; | |
7095 } | |
7096 } | |
7097 i++; | |
7098 } | |
7099 | |
7100 tmp[end] = '\0'; | |
7101 | |
7102 j = 0; | |
7103 | |
7104 for (i = 0; i < end; i++) | |
7105 { | |
7106 if(tmp[i] == 'M') | |
7107 { | |
7108 if (*ref != *ver) | |
7109 { | |
7110 score *= 0.001 + 1/pow( 10, ((qual[j]-33)/10.0) ); | |
7111 } | |
7112 | |
7113 ref++; | |
7114 ver++; | |
7115 j++; | |
7116 } | |
7117 else if(tmp[i] == 'I') | |
7118 { | |
7119 ver++; | |
7120 j++; | |
7121 } | |
7122 else if(tmp[i] == 'D') | |
7123 { | |
7124 ref++; | |
7125 } | |
7126 } | |
7127 | |
7128 return score; | |
7129 } | |
7130 | |
7131 int matoi(char *str, int start, int end) | |
7132 { | |
7133 int i = 0; | |
7134 char tmp[200]; | |
7135 | |
7136 for(i=0;i < end-start; i++) | |
7137 tmp[i] = str[start+i]; | |
7138 tmp[i]='\0'; | |
7139 | |
7140 return atoi(tmp); | |
7141 } | |
7142 | |
7143 void convertCigarToMatrix(char *cigar, int cigar_size, char * matrix) | |
7144 { | |
7145 int i = 0; | |
7146 int j = 0; | |
7147 | |
7148 int start = 0; | |
7149 int size = 0; | |
7150 | |
7151 matrix[0] = '\0'; | |
7152 | |
7153 while(i < cigar_size) | |
7154 { | |
7155 if(cigar[i] >= '0' && cigar[i] <= '9') | |
7156 { | |
7157 start = i; | |
7158 | |
7159 while(cigar[i] >= '0' && cigar[i] <= '9' && i < cigar_size) | |
7160 i++; | |
7161 | |
7162 int value = matoi(cigar, start, i); | |
7163 for(j = 0; j < value; j++) | |
7164 { | |
7165 if(cigar[i] == 'M') | |
7166 matrix[size] = 'M'; | |
7167 else if(cigar[i] == 'D') | |
7168 matrix[size] ='D'; | |
7169 else if(cigar[i] == 'I') | |
7170 matrix[size] = 'I'; | |
7171 size++; | |
7172 } | |
7173 } | |
7174 i++; | |
7175 } | |
7176 matrix[size] = '\0'; | |
7177 } | |
7178 | |
7179 | |
7180 | |
7181 void convertMDToMatrix(char *md, int md_size, char * matrix) | |
7182 { | |
7183 int i = 0; | |
7184 int j = 0; | |
7185 | |
7186 int start = 0; | |
7187 int size = 0; | |
7188 | |
7189 matrix[0] = '\0'; | |
7190 | |
7191 while(i < md_size) | |
7192 { | |
7193 if(md[i] >= '0' && md[i] <= '9') | |
7194 { | |
7195 start = i; | |
7196 | |
7197 while(md[i] >= '0' && md[i] <= '9' && i < md_size) | |
7198 i++; | |
7199 | |
7200 int value = matoi(md, start, i); | |
7201 for(j = 0; j < value; j++) | |
7202 { | |
7203 matrix[size] = 'M'; | |
7204 size++; | |
7205 } | |
7206 i--; | |
7207 } | |
7208 else if(md[i] == '^') | |
7209 { | |
7210 matrix[size] = 'D'; | |
7211 size++; | |
7212 } | |
7213 else | |
7214 { | |
7215 matrix[size] = md[i]; | |
7216 size++; | |
7217 } | |
7218 //size++; | |
7219 i++; | |
7220 } | |
7221 matrix[size] = '\0'; | |
7222 } | |
7223 | |
7224 | |
7225 void convertMDCigarToMatrix(char *cigar, int cigar_size, char *md, int md_size, char *matrix) | |
7226 { | |
7227 int i = 0; | |
7228 int j = 0; | |
7229 | |
7230 int size = 0; | |
7231 | |
7232 char tmp1[200]; | |
7233 char tmp2[200]; | |
7234 convertMDToMatrix(md,md_size, tmp2); | |
7235 | |
7236 convertCigarToMatrix(cigar, cigar_size,tmp1); | |
7237 | |
7238 | |
7239 | |
7240 while(i < strlen(tmp1)) | |
7241 { | |
7242 if(tmp1[i]=='M') | |
7243 { | |
7244 if(j < strlen(tmp2)) | |
7245 { | |
7246 if(tmp2[j]=='M') | |
7247 { | |
7248 matrix[size]='M'; | |
7249 size++; | |
7250 } | |
7251 if(tmp2[j]!='M') | |
7252 { | |
7253 matrix[size]=tmp2[j]; | |
7254 size++; | |
7255 } | |
7256 } | |
7257 else | |
7258 { | |
7259 matrix[size]='M'; | |
7260 size++; | |
7261 } | |
7262 } | |
7263 else if(tmp1[i] == 'D') | |
7264 { | |
7265 matrix[size]='D'; | |
7266 size++; | |
7267 j++; | |
7268 matrix[size]=tmp2[j]; | |
7269 size++; | |
7270 | |
7271 } | |
7272 else if(tmp1[i] == 'I') | |
7273 { | |
7274 matrix[size]='I'; | |
7275 size++; | |
7276 } | |
7277 | |
7278 i++; | |
7279 if(j < strlen(tmp2)) | |
7280 j++; | |
7281 } | |
7282 | |
7283 if(strlen(tmp1)) | |
7284 | |
7285 matrix[size] = '\0'; | |
7286 | |
7287 } | |
7288 | |
7289 void convertInsertion(char * in_matrix, char * seq, char *out_matrix) | |
7290 { | |
7291 int i = 0; | |
7292 int j = 0; | |
7293 int size = 0; | |
7294 | |
7295 while( i < strlen(in_matrix)) | |
7296 { | |
7297 if(in_matrix[i] == 'M') | |
7298 { | |
7299 out_matrix[size] = 'M'; | |
7300 size++; | |
7301 j++; | |
7302 } | |
7303 else if(in_matrix[i] == 'D') | |
7304 { | |
7305 out_matrix[size] = 'D'; | |
7306 size++; | |
7307 | |
7308 i++; | |
7309 j++; | |
7310 | |
7311 out_matrix[size] = seq[j]; | |
7312 j++; | |
7313 size++; | |
7314 } | |
7315 else if(in_matrix[i] == 'I') | |
7316 { | |
7317 out_matrix[size] = 'I'; | |
7318 size++; | |
7319 out_matrix[size] = seq[j]; | |
7320 size++; | |
7321 j++; | |
7322 } | |
7323 else | |
7324 { | |
7325 out_matrix[size] = in_matrix[i]; | |
7326 size++; | |
7327 j++; | |
7328 } | |
7329 i++; | |
7330 } | |
7331 out_matrix[size] = '\0'; | |
7332 } | |
7333 | |
7334 /**********************************************/ | |
7335 void outputPairedEndDiscPP() | |
7336 { | |
7337 char tmp_matrix1[200]; | |
7338 char tmp_matrix2[200]; | |
7339 | |
7340 char matrix1[200]; | |
7341 char matrix2[200]; | |
7342 | |
7343 char cigar1[200]; | |
7344 char editString1[200]; | |
7345 | |
7346 char cigar2[200]; | |
7347 char editString2[200]; | |
7348 | |
7349 char seq1[SEQ_LENGTH+1]; | |
7350 char qual1[SEQ_LENGTH+1]; | |
7351 | |
7352 char seq2[SEQ_LENGTH+1]; | |
7353 char qual2[SEQ_LENGTH+1]; | |
7354 | |
7355 char genName[SEQ_LENGTH]; | |
7356 char fname1[FILE_NAME_LENGTH]; | |
7357 char fname2[FILE_NAME_LENGTH]; | |
7358 char l; | |
7359 int l_size; | |
7360 int loc1, loc2; | |
7361 int err1, err2; | |
7362 char dir1, dir2; | |
7363 float sc1, sc2, lsc=0; | |
7364 int flag = 0; | |
7365 int rNo,lrNo = -1; | |
7366 int tmp; | |
7367 FILE *in, *out; | |
7368 | |
7369 sprintf(fname1, "%s__%s__disc", mappingOutputPath, mappingOutput); | |
7370 sprintf(fname2, "%s%s_DIVET.vh", mappingOutputPath, mappingOutput); | |
7371 | |
7372 in = fileOpen(fname1, "r"); | |
7373 out = fileOpen(fname2, "w"); | |
7374 | |
7375 if (in != NULL) | |
7376 { | |
7377 flag = fread(&rNo, sizeof(int), 1, in); | |
7378 } | |
7379 else | |
7380 { | |
7381 flag = 0; | |
7382 } | |
7383 | |
7384 seq1[SEQ_LENGTH] = '\0'; | |
7385 qual1[SEQ_LENGTH] = '\0'; | |
7386 | |
7387 seq2[SEQ_LENGTH] = '\0'; | |
7388 qual2[SEQ_LENGTH] = '\0'; | |
7389 | |
7390 while (flag) | |
7391 { | |
7392 tmp = fread(&l, sizeof(char), 1, in); | |
7393 tmp = fread(genName, sizeof(char), l, in); | |
7394 genName[(int)l]='\0'; | |
7395 tmp = fread(&loc1, sizeof(int), 1, in); | |
7396 tmp = fread(&err1, sizeof(int), 1, in); | |
7397 tmp = fread(&sc1, sizeof(float), 1, in); | |
7398 | |
7399 //tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out); | |
7400 | |
7401 tmp = fread(&l_size, sizeof(int), 1, in); | |
7402 tmp = fread(cigar1, sizeof(char), l_size, in); | |
7403 cigar1[(int)l_size]='\0'; | |
7404 //tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out); | |
7405 | |
7406 //tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out); | |
7407 tmp = fread(&l_size, sizeof(int), 1, in); | |
7408 tmp = fread(editString1, sizeof(char), l_size, in); | |
7409 editString1[(int)l_size]='\0'; | |
7410 //tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out); | |
7411 | |
7412 tmp = fread(&loc2, sizeof(int), 1, in); | |
7413 tmp = fread(&err2, sizeof(int), 1, in); | |
7414 tmp = fread(&sc2, sizeof(float), 1, in); | |
7415 | |
7416 tmp = fread(&l_size, sizeof(int), 1, in); | |
7417 tmp = fread(cigar2, sizeof(char), l_size, in); | |
7418 cigar2[(int)l_size]='\0'; | |
7419 | |
7420 tmp = fread(&l_size, sizeof(int), 1, in); | |
7421 tmp = fread(editString2, sizeof(char), l_size, in); | |
7422 editString2[(int)l_size]='\0'; | |
7423 | |
7424 convertMDCigarToMatrix(cigar1, strlen(cigar1), editString1, strlen(editString1), tmp_matrix1); | |
7425 convertMDCigarToMatrix(cigar2, strlen(cigar2), editString2, strlen(editString2), tmp_matrix2); | |
7426 | |
7427 | |
7428 if(_msf_readHasConcordantMapping[rNo] == 0) | |
7429 { | |
7430 | |
7431 dir1 = dir2 = 'F'; | |
7432 | |
7433 strncpy(seq1, _msf_seqList[rNo*2].seq, SEQ_LENGTH); | |
7434 strncpy(seq2, _msf_seqList[rNo*2+1].seq, SEQ_LENGTH); | |
7435 | |
7436 if (loc1 < 0) | |
7437 { | |
7438 dir1 = 'R'; | |
7439 loc1 = -loc1; | |
7440 | |
7441 strncpy(seq1, _msf_seqList[rNo*2].rseq, SEQ_LENGTH); | |
7442 } | |
7443 | |
7444 if (loc2 < 0) | |
7445 { | |
7446 dir2 = 'R'; | |
7447 loc2 = -loc2; | |
7448 | |
7449 strncpy(seq2, _msf_seqList[rNo*2+1].rseq, SEQ_LENGTH); | |
7450 } | |
7451 | |
7452 convertInsertion(tmp_matrix1, seq1, matrix1); | |
7453 convertInsertion(tmp_matrix2, seq2, matrix2); | |
7454 | |
7455 | |
7456 if (rNo != lrNo) | |
7457 { | |
7458 int j; | |
7459 for (j=0; j<SEQ_LENGTH; j++) | |
7460 { | |
7461 lsc += _msf_seqList[rNo*2].qual[j]+_msf_seqList[rNo*2+1].qual[j]; | |
7462 } | |
7463 lsc /= 2*SEQ_LENGTH; | |
7464 lsc -= 33; | |
7465 lrNo = rNo; | |
7466 } | |
7467 | |
7468 char event = '\0'; | |
7469 | |
7470 | |
7471 if ( dir1 == dir2 ) | |
7472 { | |
7473 event = 'V'; | |
7474 } | |
7475 else | |
7476 { | |
7477 if (loc1 < loc2) | |
7478 { | |
7479 | |
7480 if (dir1 == 'R' && dir2 == 'F') | |
7481 { | |
7482 event = 'E'; | |
7483 | |
7484 } | |
7485 else if ( loc2 - loc1 >= maxPairEndedDiscordantDistance ) | |
7486 { | |
7487 event = 'D'; | |
7488 } | |
7489 else | |
7490 { | |
7491 event = 'I'; | |
7492 } | |
7493 } | |
7494 else if (loc2 < loc1) | |
7495 { | |
7496 if (dir2 == 'R' && dir1 == 'F') | |
7497 { | |
7498 event = 'E'; | |
7499 } | |
7500 else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance ) | |
7501 { | |
7502 event = 'D'; | |
7503 } | |
7504 else | |
7505 { | |
7506 event = 'I'; | |
7507 } | |
7508 } | |
7509 } | |
7510 _msf_seqList[rNo*2].hits[0] = 2; | |
7511 if(event != 'E') | |
7512 fprintf(out, "%s\t%s\t%d\t%d\t%c\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%e\n", | |
7513 _msf_seqList[rNo*2].name, genName, loc1, (loc1+SEQ_LENGTH-1), dir1, | |
7514 loc2, (loc2+SEQ_LENGTH-1), dir2, event, (err1+err2), lsc, sc1*sc2); | |
7515 | |
7516 } | |
7517 flag = fread(&rNo, sizeof(int), 1, in); | |
7518 } | |
7519 | |
7520 fclose(in); | |
7521 fclose(out); | |
7522 | |
7523 unlink(fname1); | |
7524 } | |
7525 | |
7526 void finalizeOEAReads(char *fileName) | |
7527 { | |
7528 FILE *fp_out1; | |
7529 FILE * in; | |
7530 | |
7531 char genName[SEQ_LENGTH]; | |
7532 | |
7533 char fname1[FILE_NAME_LENGTH]; | |
7534 char fname2[FILE_NAME_LENGTH]; | |
7535 | |
7536 char l=0; | |
7537 int loc1=0; | |
7538 | |
7539 int err1; | |
7540 | |
7541 char d; | |
7542 | |
7543 float sc1=0; | |
7544 int flag = 0; | |
7545 int rNo=-1; | |
7546 int tmp=0; | |
7547 | |
7548 int cigarSize = 0; | |
7549 int mdSize = 0; | |
7550 | |
7551 char cigar[SEQ_LENGTH+1]; | |
7552 char md[SEQ_LENGTH+1]; | |
7553 | |
7554 char *seq1, *seq2, *qual1, *qual2; | |
7555 char *rqual1, *rqual2; | |
7556 | |
7557 seq1=NULL; seq2=NULL; qual1=NULL; qual2=NULL; | |
7558 | |
7559 rqual1 = getMem(200*sizeof(char)); | |
7560 rqual2 = getMem(200*sizeof(char)); | |
7561 | |
7562 rqual1[0] = '\0'; | |
7563 rqual2[0] = '\0'; | |
7564 | |
7565 /* | |
7566 char mappingOutput2[2 * SEQ_LENGTH]; | |
7567 int mo_len; | |
7568 mo_len = strlen(mappingOutput); | |
7569 strcpy(mappingOutput2, mappingOutput); | |
7570 | |
7571 if (mappingOutput[mo_len-1]=='m' && mappingOutput[mo_len-2]=='a' && mappingOutput[mo_len-3]=='s' && mappingOutput[mo_len-4]=='.') | |
7572 mappingOutput2[mo_len-4] = 0; | |
7573 */ | |
7574 | |
7575 sprintf(fname1, "%s%s_OEA.sam", mappingOutputPath, mappingOutput); | |
7576 | |
7577 fp_out1 = fileOpen(fname1, "w"); | |
7578 | |
7579 in = NULL; | |
7580 if (pairedEndDiscordantMode){ | |
7581 sprintf(fname2, "%s__%s__oea", mappingOutputPath, mappingOutput); | |
7582 | |
7583 in = fileOpen(fname2, "r"); | |
7584 } | |
7585 | |
7586 | |
7587 if (in != NULL) | |
7588 { | |
7589 flag = fread(&rNo, sizeof(int), 1, in); | |
7590 } | |
7591 else | |
7592 { | |
7593 flag = 0; | |
7594 } | |
7595 | |
7596 while (flag) | |
7597 { | |
7598 cigar[0] = '\0'; | |
7599 md[0] = '\0'; | |
7600 | |
7601 tmp = fread(&l, sizeof(char), 1, in); | |
7602 tmp = fread(genName, sizeof(char), l, in); | |
7603 | |
7604 genName[(int)l]='\0'; | |
7605 | |
7606 | |
7607 tmp = fread(&loc1, sizeof(int), 1, in); | |
7608 tmp = fread(&err1, sizeof(int), 1, in); | |
7609 tmp = fread(&sc1, sizeof(float), 1, in); | |
7610 | |
7611 tmp = fread (&cigarSize, sizeof(int), 1, in); | |
7612 tmp = fread (cigar, sizeof(char), cigarSize, in); | |
7613 | |
7614 cigar[cigarSize] = '\0'; | |
7615 | |
7616 tmp = fread (&mdSize, sizeof(int), 1, in); | |
7617 tmp = fread (md, sizeof(char), mdSize, in); | |
7618 md[mdSize] = '\0'; | |
7619 | |
7620 d = 1; | |
7621 | |
7622 if(loc1 < 0) | |
7623 { | |
7624 d = -1; | |
7625 loc1 *= -1; | |
7626 | |
7627 seq1 = _msf_seqList[rNo].rseq; | |
7628 reverse(_msf_seqList[rNo].qual, rqual1, SEQ_LENGTH); | |
7629 rqual1[SEQ_LENGTH] = '\0'; | |
7630 } | |
7631 else | |
7632 { | |
7633 seq1 = _msf_seqList[rNo].seq; | |
7634 qual1 = _msf_seqList[rNo].qual; | |
7635 } | |
7636 | |
7637 if(rNo % 2 == 0) | |
7638 { | |
7639 seq2 = _msf_seqList[rNo+1].seq; | |
7640 qual2 = _msf_seqList[rNo+1].qual; | |
7641 } | |
7642 else | |
7643 { | |
7644 seq2 = _msf_seqList[rNo-1].seq; | |
7645 qual2 = _msf_seqList[rNo-1].qual; | |
7646 } | |
7647 | |
7648 if(_msf_seqHits[rNo] != 0 && _msf_seqHits[(rNo%2==0)?rNo+1:rNo-1] == 0) | |
7649 { | |
7650 _msf_output.POS = loc1; | |
7651 _msf_output.MPOS = 0; | |
7652 _msf_output.FLAG = (rNo % 2 ==0)? 1+4+32*d+128 : 1+8+16*d+64 ; | |
7653 _msf_output.ISIZE = 0; | |
7654 _msf_output.SEQ = seq1; | |
7655 _msf_output.QUAL = qual1; | |
7656 _msf_output.QNAME = _msf_seqList[rNo].name; | |
7657 _msf_output.RNAME = genName; | |
7658 _msf_output.MAPQ = 255; | |
7659 _msf_output.CIGAR = cigar; | |
7660 _msf_output.MRNAME = "="; | |
7661 | |
7662 | |
7663 _msf_output.optSize = 4; | |
7664 _msf_output.optFields = _msf_optionalFields; | |
7665 | |
7666 _msf_optionalFields[0].tag = "NM"; | |
7667 _msf_optionalFields[0].type = 'i'; | |
7668 _msf_optionalFields[0].iVal = err1; | |
7669 | |
7670 _msf_optionalFields[1].tag = "MD"; | |
7671 _msf_optionalFields[1].type = 'Z'; | |
7672 _msf_optionalFields[1].sVal = md; | |
7673 | |
7674 | |
7675 | |
7676 //for the OEA reads | |
7677 _msf_optionalFields[2].tag = "NS"; | |
7678 _msf_optionalFields[2].type = 'Z'; | |
7679 _msf_optionalFields[2].sVal = seq2; | |
7680 | |
7681 | |
7682 _msf_optionalFields[3].tag = "NQ"; | |
7683 _msf_optionalFields[3].type = 'Z'; | |
7684 _msf_optionalFields[3].sVal = qual2; | |
7685 | |
7686 outputSAM(fp_out1, _msf_output); | |
7687 | |
7688 _msf_seqList[rNo].hits[0] = -1; | |
7689 _msf_seqList[(rNo%2==0)?rNo+1:rNo-1].hits[0] = -1; | |
7690 } | |
7691 flag = fread(&rNo, sizeof(int), 1, in); | |
7692 } | |
7693 | |
7694 freeMem(rqual1, 0); | |
7695 freeMem(rqual2, 0); | |
7696 | |
7697 unlink(fname2); | |
7698 | |
7699 fclose(fp_out1); | |
7700 } | |
7701 | |
7702 | |
7703 void outputTransChromosal(char *fileName1, char *fileName2, FILE * fp_out) | |
7704 { | |
7705 int i = 0; | |
7706 int j = 0; | |
7707 int k = 0; | |
7708 | |
7709 char *index; | |
7710 | |
7711 int size1 = 0; | |
7712 int size2 = 0; | |
7713 | |
7714 FILE *fp1 = NULL; | |
7715 FILE *fp2 = NULL; | |
7716 | |
7717 char geneFileName1[FILE_NAME_LENGTH]; | |
7718 char geneFileName2[FILE_NAME_LENGTH]; | |
7719 | |
7720 FullMappingInfoLink *miL = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink)); | |
7721 FullMappingInfoLink *miR = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink)); | |
7722 | |
7723 | |
7724 if(fileName1 != NULL && fileName2 != NULL) | |
7725 { | |
7726 | |
7727 fp1 = fileOpen(fileName1, "r"); | |
7728 fp2 = fileOpen(fileName2, "r"); | |
7729 | |
7730 index = strstr(fileName1, "__"); | |
7731 strncpy(geneFileName1, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2); | |
7732 geneFileName1[strstr(index + 2, "__") - index - 2] = '\0'; | |
7733 | |
7734 index = strstr(fileName2, "__"); | |
7735 strncpy(geneFileName2, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2); | |
7736 geneFileName2[strstr(index + 2, "__") - index - 2] = '\0'; | |
7737 | |
7738 | |
7739 for(i = 0; i < _msf_seqListSize / 2; i++) | |
7740 { | |
7741 fread(&size1, sizeof(int), 1, fp1); | |
7742 fread(&size2, sizeof(int), 1, fp2); | |
7743 | |
7744 miL[i].mi = getMem(size1 * sizeof(FullMappingInfo) ); | |
7745 miR[i].mi = getMem(size2 * sizeof(FullMappingInfo) ); | |
7746 | |
7747 miL[i].size = size1; | |
7748 miR[i].size = size2; | |
7749 | |
7750 for(j = 0; j < size1; j++) | |
7751 { | |
7752 fread(&(miL[i].mi[j].loc), sizeof(int), 1, fp1); | |
7753 | |
7754 fread (&(miL[i].mi[j].err), sizeof(int), 1, fp1); | |
7755 | |
7756 fread (&(miL[i].mi[j].cigarSize), sizeof(int), 1, fp1); | |
7757 fread ((miL[i].mi[j].cigar), sizeof(char), miL[i].mi[j].cigarSize+1, fp1); | |
7758 | |
7759 fread (&(miL[i].mi[j].mdSize), sizeof(int), 1, fp1); | |
7760 fread ((miL[i].mi[j].md), sizeof(char), miL[i].mi[j].mdSize+1, fp1); | |
7761 | |
7762 miL[i].mi[j].dir = 1; | |
7763 if(miL[i].mi[j].loc < 1) | |
7764 { | |
7765 miL[i].mi[j].loc *= -1; | |
7766 miL[i].mi[j].dir = -1; | |
7767 } | |
7768 } | |
7769 for(k = 0; k < size2; k++) | |
7770 { | |
7771 fread(&(miR[i].mi[k].loc), sizeof(int), 1, fp2); | |
7772 | |
7773 fread (&(miR[i].mi[k].err), sizeof(int), 1, fp2); | |
7774 | |
7775 fread (&(miR[i].mi[k].cigarSize), sizeof(int), 1, fp2); | |
7776 fread ((miR[i].mi[k].cigar), sizeof(char), miR[i].mi[k].cigarSize+1, fp2); | |
7777 | |
7778 fread (&(miR[i].mi[k].mdSize), sizeof(int), 1, fp2); | |
7779 fread ((miR[i].mi[k].md), sizeof(char), miR[i].mi[k].mdSize+1, fp2); | |
7780 | |
7781 miR[i].mi[k].dir = 1; | |
7782 if(miR[i].mi[k].loc < 1) | |
7783 { | |
7784 miR[i].mi[k].loc *= -1; | |
7785 miR[i].mi[k].dir = -1; | |
7786 } | |
7787 } | |
7788 if(_msf_readHasConcordantMapping[i] == 0 && size1 != 0 && size2 != 0 && (size1 * size2 < MAX_TRANS_CHROMOSAL_OUTPUT)) | |
7789 { | |
7790 int d1 = 0; | |
7791 int d2 = 0; | |
7792 char *seq, *qual; | |
7793 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2; | |
7794 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1]; | |
7795 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0'; | |
7796 seq1 = _msf_seqList[i*2].seq; | |
7797 rseq1 = _msf_seqList[i*2].rseq; | |
7798 qual1 = _msf_seqList[i*2].qual; | |
7799 reverse(_msf_seqList[i*2].qual, rqual1, SEQ_LENGTH); | |
7800 | |
7801 seq2 = _msf_seqList[i*2+1].seq; | |
7802 rseq2 = _msf_seqList[i*2+1].rseq; | |
7803 qual2 = _msf_seqList[i*2+1].qual; | |
7804 reverse(_msf_seqList[i*2+1].qual, rqual2, SEQ_LENGTH); | |
7805 | |
7806 for(j = 0; j < size1; j++) | |
7807 { | |
7808 d1 = (miL[i].mi[j].dir == -1)?1:0; | |
7809 | |
7810 if ( d1 ) | |
7811 { | |
7812 seq = rseq1; | |
7813 qual = rqual1; | |
7814 } | |
7815 else | |
7816 { | |
7817 seq = seq1; | |
7818 qual = qual1; | |
7819 } | |
7820 | |
7821 for(k = 0; k < size2; k++) | |
7822 { | |
7823 | |
7824 d2 = (miR[i].mi[k].dir == -1)?1:0; | |
7825 | |
7826 _msf_output.POS = miL[i].mi[j].loc; | |
7827 _msf_output.MPOS = miR[i].mi[k].loc; | |
7828 _msf_output.FLAG = 0; | |
7829 _msf_output.ISIZE = 0; | |
7830 _msf_output.SEQ = seq, | |
7831 _msf_output.QUAL = qual; | |
7832 _msf_output.QNAME = _msf_seqList[i*2].name; | |
7833 _msf_output.RNAME = geneFileName1; | |
7834 _msf_output.MAPQ = 255; | |
7835 _msf_output.CIGAR = miL[i].mi[j].cigar; | |
7836 _msf_output.MRNAME = "="; | |
7837 | |
7838 _msf_output.optSize = 2; | |
7839 _msf_output.optFields = _msf_optionalFields; | |
7840 | |
7841 _msf_optionalFields[0].tag = "NM"; | |
7842 _msf_optionalFields[0].type = 'i'; | |
7843 _msf_optionalFields[0].iVal = miL[i].mi[j].err; | |
7844 | |
7845 _msf_optionalFields[1].tag = "MD"; | |
7846 _msf_optionalFields[1].type = 'Z'; | |
7847 _msf_optionalFields[1].sVal = miL[i].mi[j].md; | |
7848 | |
7849 | |
7850 if ( d2 ) | |
7851 { | |
7852 seq = rseq2; | |
7853 qual = rqual2; | |
7854 } | |
7855 else | |
7856 { | |
7857 seq = seq2; | |
7858 qual = qual2; | |
7859 } | |
7860 | |
7861 outputSAM(fp_out, _msf_output); | |
7862 | |
7863 | |
7864 _msf_output.POS = miR[i].mi[k].loc; | |
7865 _msf_output.MPOS = miL[i].mi[j].loc; | |
7866 _msf_output.FLAG = 0; | |
7867 _msf_output.ISIZE = 0; | |
7868 _msf_output.SEQ = seq, | |
7869 _msf_output.QUAL = qual; | |
7870 _msf_output.QNAME = _msf_seqList[i*2+1].name; | |
7871 _msf_output.RNAME = geneFileName2; | |
7872 _msf_output.MAPQ = 255; | |
7873 _msf_output.CIGAR = miR[i].mi[k].cigar; | |
7874 _msf_output.MRNAME = "="; | |
7875 | |
7876 _msf_output.optSize = 2; | |
7877 _msf_output.optFields = _msf_optionalFields; | |
7878 | |
7879 _msf_optionalFields[0].tag = "NM"; | |
7880 _msf_optionalFields[0].type = 'i'; | |
7881 _msf_optionalFields[0].iVal = miR[i].mi[k].err; | |
7882 | |
7883 _msf_optionalFields[1].tag = "MD"; | |
7884 _msf_optionalFields[1].type = 'Z'; | |
7885 _msf_optionalFields[1].sVal = miR[i].mi[k].md; | |
7886 | |
7887 outputSAM(fp_out, _msf_output); | |
7888 | |
7889 } | |
7890 } | |
7891 } | |
7892 } | |
7893 | |
7894 } | |
7895 | |
7896 for(i = 0; i < _msf_seqListSize / 2; i++) | |
7897 { | |
7898 freeMem(miL[i].mi, miL[i].size * sizeof(FullMappingInfo)); | |
7899 freeMem(miR[i].mi, miR[i].size * sizeof(FullMappingInfo)); | |
7900 } | |
7901 | |
7902 freeMem(miL, _msf_seqListSize * sizeof(FullMappingInfoLink)); | |
7903 freeMem(miR, _msf_seqListSize * sizeof(FullMappingInfoLink)); | |
7904 | |
7905 fclose(fp1); | |
7906 fclose(fp2); | |
7907 } | |
7908 | |
7909 /* | |
7910 if flag is 1 it will output all the possible trans chromsal mapping | |
7911 otherwise only tmp file will be delete | |
7912 | |
7913 */ | |
7914 | |
7915 void outputAllTransChromosal(int flag) | |
7916 { | |
7917 | |
7918 int i = 0; | |
7919 int j = 0; | |
7920 int k = 0; | |
7921 int l = 0; | |
7922 | |
7923 FILE *fp_out = NULL; | |
7924 char fname1[200]; | |
7925 | |
7926 if(flag) | |
7927 { | |
7928 fp_out = fileOpen(fname1, "w"); | |
7929 | |
7930 sprintf(fname1, "%s%s_TRANSCHROMOSOMAL", mappingOutputPath, mappingOutput); | |
7931 | |
7932 // for(i = 0; i < _msf_maxFile; i++) | |
7933 // { | |
7934 i = 0; | |
7935 for(j = i+1; j < _msf_maxFile; j++) | |
7936 { | |
7937 if(i != j) | |
7938 { | |
7939 for(k = 0; k < _msf_fileCount[i]; k++) | |
7940 { | |
7941 for(l = 0; l < _msf_fileCount[j]; l++) | |
7942 { | |
7943 outputTransChromosal(_msf_fileName[i][k][0], _msf_fileName[j][l][1], fp_out); | |
7944 }// for l | |
7945 }// for k | |
7946 }// if | |
7947 }// for j | |
7948 // } //for i | |
7949 } | |
7950 | |
7951 for(i = 0; i < _msf_maxFile; i++) | |
7952 { | |
7953 for(j = 0; j < _msf_fileCount[i]; j++) | |
7954 { | |
7955 unlink(_msf_fileName[i][j][0]); | |
7956 unlink(_msf_fileName[i][j][1]); | |
7957 } | |
7958 } | |
7959 if(flag) | |
7960 fclose(fp_out); | |
7961 } |