1
|
1 /*
|
|
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
|
|
3 * All rights reserved.
|
|
4 *
|
|
5 * Redistribution and use in source and binary forms, with or without modification,
|
|
6 * are permitted provided that the following conditions are met:
|
|
7 *
|
|
8 * Redistributions of source code must retain the above copyright notice, this list
|
|
9 * of conditions and the following disclaimer.
|
|
10 * - Redistributions in binary form must reproduce the above copyright notice, this
|
|
11 * list of conditions and the following disclaimer in the documentation and/or other
|
|
12 * materials provided with the distribution.
|
|
13 * - Neither the names of the University of Washington, Simon Fraser University,
|
|
14 * nor the names of its contributors may be
|
|
15 * used to endorse or promote products derived from this software without specific
|
|
16 * prior written permission.
|
|
17 *
|
|
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 /*
|
|
32 Authors:
|
|
33 Farhad Hormozdiari
|
|
34 Faraz Hach
|
|
35 Can Alkan
|
|
36 Emails:
|
|
37 farhadh AT uw DOT edu
|
|
38 fhach AT cs DOT sfu DOT ca
|
|
39 calkan AT uw DOT edu
|
|
40 */
|
|
41
|
|
42 #include <stdio.h>
|
|
43 #include <stdlib.h>
|
|
44 #include <string.h>
|
|
45 #include <math.h>
|
|
46 #include <dirent.h>
|
|
47 #include <xmmintrin.h>
|
|
48 #include <emmintrin.h>
|
|
49 #include <mmintrin.h>
|
|
50
|
|
51
|
|
52 #include "Common.h"
|
|
53 #include "Reads.h"
|
|
54 #include "HashTable.h"
|
|
55 #include "Output.h"
|
|
56 #include "MrFAST.h"
|
|
57 #include "RefGenome.h"
|
|
58
|
|
59
|
|
60 #define min(a,b) ((a)>(b)?(b):(a))
|
|
61 #define min3(a,b,c) ((a)>(b)?(b>c?c:b):(a>c?c:a))
|
|
62 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4))))
|
|
63
|
|
64 #define MAX_REF_SIZE 18
|
|
65
|
|
66
|
|
67 float calculateScore(int index, char *seq, char *qual, char *md);
|
|
68 unsigned char mrFAST = 1;
|
|
69 char *versionNumberF="0.5";
|
|
70
|
|
71 long long verificationCnt = 0;
|
|
72 long long mappingCnt = 0;
|
|
73 long long mappedSeqCnt = 0;
|
|
74 long long completedSeqCnt = 0;
|
|
75 char *mappingOutput;
|
|
76 /**********************************************/
|
|
77 char *_msf_refGen = NULL;
|
|
78 int _msf_refGenLength = 0;
|
|
79 int _msf_refGenOffset = 0;
|
|
80 char *_msf_refGenName = NULL;
|
|
81
|
|
82 int _msf_refGenBeg;
|
|
83 int _msf_refGenEnd;
|
|
84
|
|
85 IHashTable *_msf_hashTable = NULL;
|
|
86
|
|
87 int *_msf_samplingLocs;
|
|
88 int *_msf_samplingLocsEnds;
|
|
89 int _msf_samplingLocsSize;
|
|
90
|
|
91 Read *_msf_seqList;
|
|
92 int _msf_seqListSize;
|
|
93
|
|
94 Pair *_msf_sort_seqList = NULL;
|
|
95 int *_msf_map_sort_seqList;
|
|
96
|
|
97 ReadIndexTable *_msf_rIndex = NULL;
|
|
98 int _msf_rIndexSize;
|
|
99 int _msf_rIndexMax;
|
|
100
|
|
101 SAM _msf_output;
|
|
102
|
|
103 OPT_FIELDS *_msf_optionalFields;
|
|
104
|
|
105 char *_msf_op;
|
|
106
|
|
107 int *_msf_verifiedLocs = NULL;
|
|
108
|
|
109 char _msf_numbers[200][3];
|
|
110 char _msf_cigar[5];
|
|
111
|
|
112 MappingInfo *_msf_mappingInfo;
|
|
113
|
|
114 int *_msf_seqHits;
|
|
115 int _msf_openFiles = 0;
|
|
116 int _msf_maxLSize=0;
|
|
117 int _msf_maxRSize=0;
|
|
118
|
|
119 BestFullMappingInfo *bestHitMappingInfo;
|
|
120
|
|
121 /*************************/
|
|
122 int _msf_maxFile=0;
|
|
123 char _msf_fileName[4000][200][2][FILE_NAME_LENGTH];
|
|
124 int _msf_fileCount[4000];
|
|
125
|
|
126 char *_msf_readHasConcordantMapping; //boolean if a read has concordant mapping :D
|
|
127
|
|
128 int *_msf_oeaMapping;
|
|
129 int *_msf_discordantMapping;
|
|
130
|
|
131 FILE *bestConcordantFILE;
|
|
132 FILE *bestDiscordantFILE;
|
|
133
|
|
134 int counter = 0;
|
|
135
|
|
136 int scoreF[200][200];
|
|
137 int scoreB[200][200];
|
|
138
|
|
139 int score[200][200];
|
|
140 int direction1[200][200];
|
|
141 int direction2[200][200];
|
|
142
|
|
143 __m128i MASK;
|
|
144
|
|
145 int lookUpTable[15625][15625];
|
|
146
|
|
147 /**************************************************Methods***************************************************/
|
|
148 int smallEditDistanceF(char *a, int lena, char *b, int lenb)
|
|
149 {
|
|
150 int matrix[20][20];
|
|
151 int i = 0;
|
|
152 int j = 0;
|
|
153
|
|
154 for(i = 0; i <= lena; i++)
|
|
155 {
|
|
156 matrix[0][i] = i;
|
|
157 }
|
|
158
|
|
159 for(i = 0; i <= lenb; i++)
|
|
160 {
|
|
161 matrix[i][0] = i;
|
|
162 }
|
|
163
|
|
164
|
|
165 for(i = 1; i <= lenb; i++)
|
|
166 {
|
|
167 for(j = 1; j <= lena; j++)
|
|
168 {
|
|
169 matrix[i][j] = min3(matrix[i-1][j-1]+ (a[j-1] != b[i-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
|
|
170 }
|
|
171 }
|
|
172 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]);
|
|
173 }
|
|
174
|
|
175 int smallEditDistanceB(char *a, int lena, char *b, int lenb)
|
|
176 {
|
|
177 int matrix[20][20];
|
|
178 int i = 0;
|
|
179 int j = 0;
|
|
180
|
|
181 for(i = 0; i <= lena; i++)
|
|
182 {
|
|
183 matrix[0][i] = i;
|
|
184 }
|
|
185
|
|
186 for(i = 0; i <= lenb; i++)
|
|
187 {
|
|
188 matrix[i][0] = i;
|
|
189 }
|
|
190
|
|
191
|
|
192 for(i = 1; i <= lenb; i++)
|
|
193 {
|
|
194 for(j = 1; j <= lena; j++)
|
|
195 {
|
|
196 matrix[i][j] = min3(matrix[i-1][j-1]+ (*(a-j+1) != *(b-i+1)),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
|
|
197 }
|
|
198 }
|
|
199
|
|
200 return (matrix[lenb][lena]>errThreshold?-1:matrix[lenb][lena]);
|
|
201 }
|
|
202
|
|
203 char fastEditDistance(int per1, int per2)
|
|
204 {
|
|
205
|
|
206 int i = 0;
|
|
207 int j = 0;
|
|
208
|
|
209 char str1[7];
|
|
210 char str2[7];
|
|
211
|
|
212 int val1 = per1;
|
|
213 int val2 = per2;
|
|
214
|
|
215 int index = 0;
|
|
216 int mod = 0;
|
|
217
|
|
218 int matrix[7][7];
|
|
219
|
|
220 int min = 20;
|
|
221
|
|
222 while(index < 6)
|
|
223 {
|
|
224 mod = val1%5;
|
|
225 str1[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N'));
|
|
226 val1 = val1 /5;
|
|
227 index++;
|
|
228 }
|
|
229
|
|
230 str1[6] = '\0';
|
|
231
|
|
232 index = 0;
|
|
233 while(index < 6)
|
|
234 {
|
|
235 mod=val2%5;
|
|
236 str2[5-index] = (mod==0 ? 'A':(mod==1?'C':mod==2?'G':(mod==3)?'T':'N'));
|
|
237 val2 = val2 / 5;
|
|
238 index++;
|
|
239 }
|
|
240 str2[6] = '\0';
|
|
241
|
|
242 for(i = 0; i < 7; i++)
|
|
243 {
|
|
244 matrix[0][i] = i;
|
|
245 matrix[i][0] = i;
|
|
246 }
|
|
247
|
|
248 for(i = 1; i < 7; i++)
|
|
249 {
|
|
250 for(j = 1; j < 7; j++)
|
|
251 {
|
|
252 matrix[i][j] = min3(matrix[i-1][j-1]+ (str1[i-1] != str2[j-1]),matrix[i][j-1]+1 ,matrix[i-1][j]+1);
|
|
253 }
|
|
254 }
|
|
255
|
|
256 for(i = 0; i < 7; i++)
|
|
257 {
|
|
258 if(matrix[i][6] < min)
|
|
259 min = matrix[i][6];
|
|
260 }
|
|
261
|
|
262 for(i = 0; i < 7; i++)
|
|
263 {
|
|
264 if(matrix[6][i] < min)
|
|
265 min = matrix[6][i];
|
|
266 }
|
|
267 return min;
|
|
268 }
|
|
269
|
|
270 void initLookUpTable()
|
|
271 {
|
|
272 int i = 0;
|
|
273
|
|
274 MASK = _mm_insert_epi16(MASK,1,0);
|
|
275 MASK = _mm_insert_epi16(MASK,1,1);
|
|
276 MASK = _mm_insert_epi16(MASK,1,2);
|
|
277 MASK = _mm_insert_epi16(MASK,1,3);
|
|
278 MASK = _mm_insert_epi16(MASK,1,4);
|
|
279 MASK = _mm_insert_epi16(MASK,0,5);
|
|
280 MASK = _mm_insert_epi16(MASK,0,6);
|
|
281 MASK = _mm_insert_epi16(MASK,0,7);
|
|
282
|
|
283 for(i = 0 ; i < errThreshold + 1; i++)
|
|
284 {
|
|
285 scoreF[0][i] = i;
|
|
286 scoreF[i][0] = i;
|
|
287 }
|
|
288
|
|
289 for(i = 0 ; i < errThreshold + 1; i++)
|
|
290 {
|
|
291 scoreB[0][i] = i;
|
|
292 scoreB[i][0] = i;
|
|
293 }
|
|
294
|
|
295
|
|
296 }
|
|
297
|
|
298 int backwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb)
|
|
299 {
|
|
300 if(lenb == 0 || lena == 0)
|
|
301 return 0;
|
|
302
|
|
303 int i = 0;
|
|
304 int j = 0;
|
|
305 int k = 0;
|
|
306
|
|
307
|
|
308 int e = errThreshold;
|
|
309
|
|
310 char flag = 0;
|
|
311
|
|
312 int minError = 2*e;
|
|
313
|
|
314 __m128i R0, R1;
|
|
315 __m128i Diag;
|
|
316 __m128i Side1, Side2;
|
|
317 __m128i Down1, Down2;
|
|
318 __m128i Error;
|
|
319 __m128i tmp;
|
|
320
|
|
321 /* initialize */
|
|
322 R0 = _mm_setzero_si128 ();
|
|
323 R1 = _mm_setzero_si128 ();
|
|
324 Diag = _mm_setzero_si128 ();
|
|
325 Side1 = _mm_setzero_si128 ();
|
|
326 Side2 = _mm_setzero_si128 ();
|
|
327 Down1 = _mm_setzero_si128 ();
|
|
328 Down2 = _mm_setzero_si128 ();
|
|
329 Error = _mm_setzero_si128 ();
|
|
330 tmp = _mm_setzero_si128 ();
|
|
331 /* end initialize */
|
|
332
|
|
333 if(lenb <= e)
|
|
334 {
|
|
335 return smallEditDistanceB(a,lena,b,lenb);
|
|
336 }
|
|
337
|
|
338
|
|
339 R1 = _mm_xor_si128(R1, R1);
|
|
340 R0 = _mm_xor_si128(R0, R0);
|
|
341
|
|
342 Diag = _mm_xor_si128(Diag, Diag);
|
|
343 Side1 = _mm_xor_si128(Side1, Side1);
|
|
344 Down1 = _mm_xor_si128(Down1, Down1);
|
|
345
|
|
346 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
347
|
|
348 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
349 Side1 = _mm_insert_epi16(Side1,2*e,1);
|
|
350
|
|
351 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
352 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
353 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
354
|
|
355 R0 = _mm_insert_epi16(R0,0,0);
|
|
356
|
|
357 R1 = _mm_insert_epi16(R1,1,0);
|
|
358 R1 = _mm_insert_epi16(R1,1,1);
|
|
359
|
|
360 for(i=2; i <= e; i++)
|
|
361 {
|
|
362 //set side
|
|
363 Side1 = _mm_slli_si128(Side1,2);
|
|
364 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
365
|
|
366 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
367 Down1 = _mm_slli_si128(Down1,2);
|
|
368 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
369
|
|
370 Diag = _mm_xor_si128(Diag, Diag);
|
|
371 if( i%2 == 0)
|
|
372 {
|
|
373 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
374
|
|
375 for(j=1;j<=i-1;j++)
|
|
376 {
|
|
377 Diag = _mm_slli_si128(Diag, 2);
|
|
378 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0);
|
|
379 }
|
|
380 Diag = _mm_slli_si128(Diag, 2);
|
|
381 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
382
|
|
383 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
|
|
384 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
|
|
385 }
|
|
386
|
|
387 else
|
|
388 {
|
|
389 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
390 for(j=i/2-1;j>=-i/2;j--)
|
|
391 {
|
|
392 Diag = _mm_slli_si128(Diag, 2);
|
|
393 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0);
|
|
394 }
|
|
395 Diag = _mm_slli_si128(Diag, 2);
|
|
396 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
397
|
|
398 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
399 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
400 }
|
|
401 }
|
|
402 Error = _mm_xor_si128(Error, Error);
|
|
403 Side2 = _mm_xor_si128(Side2, Side2);
|
|
404 Down2 = _mm_xor_si128(Down2, Down2);
|
|
405 Down1 = _mm_xor_si128(Down1, Down1);
|
|
406
|
|
407 Error = _mm_insert_epi16(Error,e,0);
|
|
408 Side1 = _mm_insert_epi16(Side2,2*e,0);
|
|
409 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
410 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
411
|
|
412
|
|
413 for(j=0; j < e; j++)
|
|
414 {
|
|
415 Side2 = _mm_slli_si128(Side2, 2);
|
|
416 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
417
|
|
418 Side1 = _mm_slli_si128(Side1, 2);
|
|
419 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
420
|
|
421 Down1 = _mm_slli_si128(Down1, 2);
|
|
422 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
423
|
|
424 Down2 = _mm_slli_si128(Down2, 2);
|
|
425 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
426
|
|
427 Error = _mm_slli_si128(Error, 2);
|
|
428 Error = _mm_insert_epi16(Error, e, 0);
|
|
429 }
|
|
430
|
|
431 Down2= _mm_slli_si128(Down2, 2);
|
|
432 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
433
|
|
434 for(; i <= 2*lenb-(e-1);i++)
|
|
435 {
|
|
436 flag = 0;
|
|
437 Diag = _mm_xor_si128(Diag, Diag);
|
|
438 if( i%2 == 0)
|
|
439 {
|
|
440 for(j=e/2;j>=-e/2;j--)
|
|
441 {
|
|
442 Diag = _mm_slli_si128(Diag, 2);
|
|
443 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0);
|
|
444 }
|
|
445
|
|
446 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
447 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
448
|
|
449
|
|
450 if(_mm_extract_epi16(R0,0) <= e)
|
|
451 flag = 1;
|
|
452 tmp = _mm_srli_si128(R0,2);
|
|
453 for(j=0; j <= e;j++)
|
|
454 {
|
|
455 if(_mm_extract_epi16(tmp,0) <= e)
|
|
456 flag = 1;
|
|
457 tmp = _mm_srli_si128(tmp,2);
|
|
458 }
|
|
459
|
|
460 if(flag == 0)
|
|
461 return -1;
|
|
462
|
|
463 if(i == 2*lenb-e)
|
|
464 {
|
|
465 tmp = _mm_srli_si128(R0,2);
|
|
466 for(k=0; k < e-2;k++)
|
|
467 tmp = _mm_srli_si128(tmp,2);
|
|
468 minError = _mm_extract_epi16(tmp,0);
|
|
469 }
|
|
470
|
|
471 }
|
|
472
|
|
473 else
|
|
474 {
|
|
475 for(j=e/2;j>=-e/2-1;j--)
|
|
476 {
|
|
477 Diag = _mm_slli_si128(Diag, 2);
|
|
478 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i)/2-j-1)),0);
|
|
479 }
|
|
480
|
|
481 // printf("@%d %d %d %d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1), _mm_extract_epi16(Diag,2),
|
|
482 // _mm_extract_epi16(Diag,3));
|
|
483
|
|
484 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
485
|
|
486 // printf("#~%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
|
|
487 // _mm_extract_epi16(R1,3));
|
|
488
|
|
489 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
490
|
|
491 // printf("$%d %d %d %d\n", _mm_extract_epi16(Side2,0), _mm_extract_epi16(Side2,1), _mm_extract_epi16(Side2,2),
|
|
492 // _mm_extract_epi16(Side2,3));
|
|
493
|
|
494 // printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
|
|
495 // _mm_extract_epi16(R1,3));
|
|
496
|
|
497
|
|
498
|
|
499 if(i >= 2*lenb-e)
|
|
500 {
|
|
501 tmp = _mm_srli_si128(R1,2);
|
|
502 for(k=0; k < e-1;k++)
|
|
503 tmp = _mm_srli_si128(tmp,2);
|
|
504 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
505 }
|
|
506 }
|
|
507 }
|
|
508
|
|
509 //first cell
|
|
510 Diag = _mm_xor_si128(Diag,Diag);
|
|
511 Diag = _mm_insert_epi16(Diag, *(b-(lenb-3)) != *(a-lena), 0);
|
|
512 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena-1)), 1);
|
|
513 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-2)), 2);
|
|
514 Diag = _mm_insert_epi16(Diag, 2*e, 3);
|
|
515 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
516 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
517
|
|
518 minError = min(minError, _mm_extract_epi16(R1,2));
|
|
519
|
|
520 //second cell
|
|
521 Diag = _mm_xor_si128(Diag,Diag);
|
|
522 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena)), 0);
|
|
523 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena-1)), 1);
|
|
524 Diag = _mm_insert_epi16(Diag, 2*e, 2);
|
|
525
|
|
526 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
527 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
528
|
|
529 minError = min(minError, _mm_extract_epi16(R0,1));
|
|
530
|
|
531 //third cell
|
|
532 Diag = _mm_xor_si128(Diag,Diag);
|
|
533 Diag = _mm_insert_epi16(Diag, *(b-(lenb-2)) != *(a-(lena+1)), 0);
|
|
534 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena)), 1);
|
|
535 Diag = _mm_insert_epi16(Diag, 2*e, 2);
|
|
536
|
|
537 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
538 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
539
|
|
540 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
541
|
|
542 //forth
|
|
543 Diag = _mm_xor_si128(Diag,Diag);
|
|
544 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+1)), 0);
|
|
545 Diag = _mm_insert_epi16(Diag, 2*e, 1);
|
|
546
|
|
547 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
548 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
549
|
|
550 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
551
|
|
552 //fifth
|
|
553 Diag = _mm_xor_si128(Diag,Diag);
|
|
554 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1)) != *(a-(lena+2)), 0);
|
|
555 Diag = _mm_insert_epi16(Diag, 2*e, 1);
|
|
556
|
|
557 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
558 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
559
|
|
560 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
561
|
|
562 if(minError > e)
|
|
563 return -1;
|
|
564 return minError;
|
|
565 }
|
|
566
|
|
567 int backwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb)
|
|
568 {
|
|
569 if(lenb == 0 || lena == 0)
|
|
570 return 0;
|
|
571
|
|
572 int i = 0;
|
|
573 int j = 0;
|
|
574 int k = 0;
|
|
575
|
|
576
|
|
577 int e = errThreshold;
|
|
578
|
|
579 char flag = 0;
|
|
580
|
|
581 int minError = 2*e;
|
|
582
|
|
583 __m128i R0, R1;
|
|
584 __m128i Diag;
|
|
585 __m128i Side1, Side2;
|
|
586 __m128i Down1, Down2;
|
|
587 __m128i Error;
|
|
588 __m128i tmp;
|
|
589
|
|
590 /* initialize */
|
|
591 R0 = _mm_setzero_si128 ();
|
|
592 R1 = _mm_setzero_si128 ();
|
|
593 Diag = _mm_setzero_si128 ();
|
|
594 Side1 = _mm_setzero_si128 ();
|
|
595 Side2 = _mm_setzero_si128 ();
|
|
596 Down1 = _mm_setzero_si128 ();
|
|
597 Down2 = _mm_setzero_si128 ();
|
|
598 Error = _mm_setzero_si128 ();
|
|
599 tmp = _mm_setzero_si128 ();
|
|
600 /* end initialize */
|
|
601
|
|
602 if(lenb <= e)
|
|
603 {
|
|
604 return smallEditDistanceB(a,lena,b,lenb);
|
|
605 }
|
|
606
|
|
607
|
|
608 R1 = _mm_xor_si128(R1, R1);
|
|
609 R0 = _mm_xor_si128(R0, R0);
|
|
610
|
|
611 Diag = _mm_xor_si128(Diag, Diag);
|
|
612 Side1 = _mm_xor_si128(Side1, Side1);
|
|
613 Down1 = _mm_xor_si128(Down1, Down1);
|
|
614
|
|
615 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
616
|
|
617 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
618 Side1 = _mm_insert_epi16(Side1,2*e,1);
|
|
619
|
|
620 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
621 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
622 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
623
|
|
624 R0 = _mm_insert_epi16(R0,0,0);
|
|
625
|
|
626 R1 = _mm_insert_epi16(R1,1,0);
|
|
627 R1 = _mm_insert_epi16(R1,1,1);
|
|
628
|
|
629 for(i=2; i <= e; i++)
|
|
630 {
|
|
631 //set side
|
|
632 Side1 = _mm_slli_si128(Side1,2);
|
|
633 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
634
|
|
635 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
636 Down1 = _mm_slli_si128(Down1,2);
|
|
637 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
638
|
|
639 Diag = _mm_xor_si128(Diag, Diag);
|
|
640 if( i%2 == 0)
|
|
641 {
|
|
642 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
643
|
|
644 for(j=1;j<=i-1;j++)
|
|
645 {
|
|
646 Diag = _mm_slli_si128(Diag, 2);
|
|
647 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+(i/2-j))) != *(a-(i/2-1-(i/2-j))),0);
|
|
648 }
|
|
649 Diag = _mm_slli_si128(Diag, 2);
|
|
650 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
651
|
|
652 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
|
|
653 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
|
|
654 }
|
|
655
|
|
656 else
|
|
657 {
|
|
658 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
659 for(j=i/2-1;j>=-i/2;j--)
|
|
660 {
|
|
661 Diag = _mm_slli_si128(Diag, 2);
|
|
662 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2+j-1)) != *(a-((i-1)/2-j-1)),0);
|
|
663 }
|
|
664 Diag = _mm_slli_si128(Diag, 2);
|
|
665 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
666
|
|
667 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
668 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
669 }
|
|
670 }
|
|
671 Error = _mm_xor_si128(Error, Error);
|
|
672 Side2 = _mm_xor_si128(Side2, Side2);
|
|
673 Down2 = _mm_xor_si128(Down2, Down2);
|
|
674 Down1 = _mm_xor_si128(Down1, Down1);
|
|
675
|
|
676 Error = _mm_insert_epi16(Error,e,0);
|
|
677 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
678 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
679
|
|
680
|
|
681 for(j=0; j < e; j++)
|
|
682 {
|
|
683 Side2 = _mm_slli_si128(Side2, 2);
|
|
684 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
685
|
|
686 Down1 = _mm_slli_si128(Down1, 2);
|
|
687 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
688
|
|
689 Down2 = _mm_slli_si128(Down2, 2);
|
|
690 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
691
|
|
692 Error = _mm_slli_si128(Error, 2);
|
|
693 Error = _mm_insert_epi16(Error, e, 0);
|
|
694 }
|
|
695
|
|
696 Down2= _mm_slli_si128(Down2, 2);
|
|
697 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
698
|
|
699 for(; i <= 2*lenb-(e-1);i++)
|
|
700 {
|
|
701 flag = 0;
|
|
702 Diag = _mm_xor_si128(Diag, Diag);
|
|
703 if( i%2 == 0)
|
|
704 {
|
|
705 for(j=e/2;j>=-e/2;j--)
|
|
706 {
|
|
707 Diag = _mm_slli_si128(Diag, 2);
|
|
708 Diag = _mm_insert_epi16(Diag, *(b-(i/2-1+j)) != *(a-(i/2-1-j)),0);
|
|
709 }
|
|
710
|
|
711 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
712 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
713
|
|
714 if(_mm_extract_epi16(R0,0) <= e)
|
|
715 flag = 1;
|
|
716 tmp = _mm_srli_si128(R0,2);
|
|
717 for(j=0; j <= e;j++)
|
|
718 {
|
|
719 if(_mm_extract_epi16(tmp,0) <= e)
|
|
720 flag = 1;
|
|
721 tmp = _mm_srli_si128(tmp,2);
|
|
722 }
|
|
723
|
|
724 if(flag == 0)
|
|
725 return -1;
|
|
726
|
|
727 if(i == 2*lenb-e)
|
|
728 {
|
|
729 tmp = _mm_srli_si128(R0,2);
|
|
730 for(k=0; k < e-1;k++)
|
|
731 tmp = _mm_srli_si128(tmp,2);
|
|
732 minError = _mm_extract_epi16(tmp,0);
|
|
733 }
|
|
734
|
|
735 }
|
|
736
|
|
737 else
|
|
738 {
|
|
739 for(j=-e/2+1;j<=e/2;j++)
|
|
740 {
|
|
741 Diag = _mm_slli_si128(Diag, 2);
|
|
742 Diag = _mm_insert_epi16(Diag, *(b-((i+1)/2-j-1)) != *(a-((i-1)/2+j-1)),0);
|
|
743 }
|
|
744
|
|
745 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
746 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
747
|
|
748
|
|
749 if(i >= 2*lenb-e)
|
|
750 {
|
|
751 tmp = _mm_srli_si128(R1,2);
|
|
752 for(k=0; k < e-2;k++)
|
|
753 tmp = _mm_srli_si128(tmp,2);
|
|
754 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
755 }
|
|
756 }
|
|
757 }
|
|
758
|
|
759 j=0;
|
|
760 int tmpE = e;
|
|
761 for(;j<2*(e-2)+1;j++)
|
|
762 {
|
|
763
|
|
764 Diag = _mm_xor_si128(Diag, Diag);
|
|
765 //set the first element
|
|
766 if(j==0)
|
|
767 {
|
|
768 for( k=0;k<=e-1;k++ )
|
|
769 {
|
|
770 Diag = _mm_slli_si128(Diag, 2);
|
|
771 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
772 }
|
|
773
|
|
774 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
775 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
776
|
|
777
|
|
778 tmpE--;
|
|
779 tmp = _mm_srli_si128(R0,2);
|
|
780 for(k=0; k < e-2;k++)
|
|
781 tmp = _mm_srli_si128(tmp,2);
|
|
782 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
783 }
|
|
784 else if(j%2 == 0)
|
|
785 {
|
|
786 for(k=0;k<tmpE;k++)
|
|
787 {
|
|
788 Diag = _mm_slli_si128(Diag, 2);
|
|
789 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
790 }
|
|
791
|
|
792 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
793 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
794
|
|
795 tmpE--;
|
|
796
|
|
797 tmp = _mm_srli_si128(R0,2);
|
|
798 for(k=0; k < tmpE-1;k++)
|
|
799 tmp = _mm_srli_si128(tmp,2);
|
|
800 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
801 }
|
|
802
|
|
803
|
|
804 else
|
|
805 {
|
|
806 for(k=0;k<tmpE;k++)
|
|
807 {
|
|
808 Diag = _mm_slli_si128(Diag, 2);
|
|
809 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
810 }
|
|
811
|
|
812 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
813 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
814
|
|
815 tmp = _mm_srli_si128(R1,2);
|
|
816 for(k=0; k < tmpE-2;k++)
|
|
817 tmp = _mm_srli_si128(tmp,2);
|
|
818 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
819 }
|
|
820 i++;
|
|
821 }
|
|
822 //Diag
|
|
823
|
|
824 Diag = _mm_xor_si128(Diag,Diag);
|
|
825 Diag = _mm_insert_epi16(Diag, 2*e, 0);
|
|
826 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
|
|
827
|
|
828 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
829 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
830
|
|
831 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
|
|
832 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
833
|
|
834 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
835 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
836
|
|
837 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
838
|
|
839 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
|
|
840 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
841
|
|
842 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
843 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
844
|
|
845 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
846
|
|
847 if(minError > e)
|
|
848 return -1;
|
|
849 return minError;
|
|
850 }
|
|
851
|
|
852 inline int backwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb)
|
|
853 {
|
|
854 if(lenb == 0 || lena == 0)
|
|
855 return 0;
|
|
856
|
|
857 int i = 0;
|
|
858 int j = 0;
|
|
859 int k = 0;
|
|
860
|
|
861 int i0;
|
|
862 int i1;
|
|
863 int i2;
|
|
864 int i4;
|
|
865 int i5;
|
|
866
|
|
867 int e = 4;
|
|
868 int mismatch = errThreshold;
|
|
869
|
|
870 int minError = 2*errThreshold;
|
|
871 int index = 0;
|
|
872 int tmpValue = 0;
|
|
873
|
|
874 if(lenb <= e)
|
|
875 {
|
|
876 return smallEditDistanceB(a,lena,b,lenb);
|
|
877 }
|
|
878
|
|
879
|
|
880 __m128i R0, R1;
|
|
881 __m128i Diag;
|
|
882 __m128i Side1, Side2;
|
|
883 __m128i Down1, Down2;
|
|
884 __m128i tmp;
|
|
885 __m128i SeqA, SeqB;
|
|
886 __m128i Result;
|
|
887
|
|
888 /* initialize */
|
|
889 R0 = _mm_setzero_si128 ();
|
|
890 R1 = _mm_setzero_si128 ();
|
|
891 Diag = _mm_setzero_si128 ();
|
|
892 Side1 = _mm_setzero_si128 ();
|
|
893 Side2 = _mm_setzero_si128 ();
|
|
894 Down1 = _mm_setzero_si128 ();
|
|
895 Down2 = _mm_setzero_si128 ();
|
|
896 SeqA = _mm_setzero_si128 ();
|
|
897 SeqB = _mm_setzero_si128 ();
|
|
898 Result = _mm_setzero_si128 ();
|
|
899 /* end initialize */
|
|
900
|
|
901 R1 = _mm_xor_si128(R1, R1);
|
|
902 R0 = _mm_xor_si128(R0, R0);
|
|
903
|
|
904 Diag = _mm_xor_si128(Diag, Diag);
|
|
905 Diag = _mm_insert_epi16(Diag,minError,0);
|
|
906
|
|
907 i0 = (a[0] != b[0]);
|
|
908 i1 = min(i0, ( *(a-1)!=*b) )+1;
|
|
909 i2 = min(i0,( a[0] != *(b-1) ) )+1;
|
|
910
|
|
911 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1);
|
|
912 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1;
|
|
913 i5 = min(i2, (a[0] != *(b-2))+1)+1;
|
|
914
|
|
915 R1 = _mm_insert_epi16(R1, 3, 0);
|
|
916 R1 = _mm_insert_epi16(R1, i1, 1);
|
|
917 R1 = _mm_insert_epi16(R1, i2, 2);
|
|
918 R1 = _mm_insert_epi16(R1, 3, 3);
|
|
919
|
|
920
|
|
921 R0 = _mm_insert_epi16(R0, 4, 0);
|
|
922 R0 = _mm_insert_epi16(R0, i4, 1);
|
|
923 R0 = _mm_insert_epi16(R0, i0, 2);
|
|
924 R0 = _mm_insert_epi16(R0, i5, 3);
|
|
925 R0 = _mm_insert_epi16(R0, 4, 4);
|
|
926
|
|
927
|
|
928 Side2 = _mm_xor_si128(Side2, Side2);
|
|
929 Down2 = _mm_xor_si128(Down2, Down2);
|
|
930 Down1 = _mm_xor_si128(Down1, Down1);
|
|
931 Side1 = _mm_xor_si128(Side1, Side1);
|
|
932
|
|
933 Side2 = _mm_insert_epi16(Side2,minError,0);
|
|
934 Down1 = _mm_insert_epi16(Down1,minError,0);
|
|
935
|
|
936 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
937
|
|
938 index = 0;
|
|
939 for(j=0; j < e; j++)
|
|
940 {
|
|
941 Side2 = _mm_slli_si128(Side2, 2);
|
|
942 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
943
|
|
944 Down1 = _mm_slli_si128(Down1, 2);
|
|
945 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
946
|
|
947 Down2 = _mm_slli_si128(Down2, 2);
|
|
948 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
949
|
|
950 Side1 = _mm_slli_si128(Side1, 2);
|
|
951 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
952
|
|
953 SeqA = _mm_slli_si128(SeqA, 2);
|
|
954 SeqB = _mm_slli_si128(SeqB, 2);
|
|
955 SeqA = _mm_insert_epi16(SeqA,*(a-index),0);
|
|
956 SeqB = _mm_insert_epi16(SeqB,*(b-index),0);
|
|
957 index++;
|
|
958 }
|
|
959
|
|
960 Down2= _mm_slli_si128(Down2, 2);
|
|
961 Down2 = _mm_insert_epi16(Down2,minError,0);
|
|
962
|
|
963 index = 4;
|
|
964 i = 5;
|
|
965
|
|
966 int loopEnd = 2*lenb-(e-1);
|
|
967 for(; i <= loopEnd ;i++)
|
|
968 {
|
|
969
|
|
970 Diag = _mm_xor_si128(Diag, Diag);
|
|
971 if( i%2 == 0)
|
|
972 {
|
|
973 SeqA = _mm_slli_si128(SeqA, 2);
|
|
974 SeqB = _mm_slli_si128(SeqB, 2);
|
|
975 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0);
|
|
976 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0);
|
|
977
|
|
978 index++;
|
|
979
|
|
980 tmp = _mm_shufflelo_epi16(SeqB,27);
|
|
981 tmp = _mm_slli_si128(tmp, 2);
|
|
982 tmpValue = _mm_extract_epi16(tmp, 5);
|
|
983 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
|
|
984
|
|
985 Result = _mm_cmpeq_epi16(SeqA, tmp);
|
|
986 Diag = _mm_andnot_si128(Result, MASK);
|
|
987
|
|
988 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
989 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
990
|
|
991 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold
|
|
992 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold && _mm_extract_epi16(R1, 0) > errThreshold
|
|
993 && _mm_extract_epi16(R1, 1) > errThreshold && _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold)
|
|
994 return -1;
|
|
995
|
|
996 if(i == 2*lenb-e)
|
|
997 {
|
|
998 tmp = _mm_srli_si128(R0,2);
|
|
999 for(k=0; k < e-1;k++)
|
|
1000 tmp = _mm_srli_si128(tmp,2);
|
|
1001 minError = _mm_extract_epi16(tmp,0);
|
|
1002 }
|
|
1003
|
|
1004 }
|
|
1005
|
|
1006 else
|
|
1007 {
|
|
1008 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
|
|
1009 Diag = _mm_andnot_si128(Result, MASK);
|
|
1010
|
|
1011 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1012 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1013
|
|
1014
|
|
1015 if(i >= 2*lenb-e)
|
|
1016 {
|
|
1017 tmp = _mm_srli_si128(R1,2);
|
|
1018 for(k=0; k < e-2;k++)
|
|
1019 tmp = _mm_srli_si128(tmp,2);
|
|
1020 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1021 }
|
|
1022 }
|
|
1023
|
|
1024
|
|
1025 }
|
|
1026
|
|
1027 j=0;
|
|
1028 int tmpE = e;
|
|
1029 for(;j<2*(e-2)+1;j++)
|
|
1030 {
|
|
1031
|
|
1032 Diag = _mm_xor_si128(Diag, Diag);
|
|
1033 //set the first element
|
|
1034 if(j==0)
|
|
1035 {
|
|
1036 for( k=0;k<=e-1;k++ )
|
|
1037 {
|
|
1038 Diag = _mm_slli_si128(Diag, 2);
|
|
1039 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1040 }
|
|
1041
|
|
1042 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1043 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1044
|
|
1045 tmpE--;
|
|
1046
|
|
1047 tmp = _mm_srli_si128(R0,2);
|
|
1048 for(k=0; k < e-2;k++)
|
|
1049 tmp = _mm_srli_si128(tmp,2);
|
|
1050 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1051 }
|
|
1052 else if(j%2 == 0)
|
|
1053 {
|
|
1054 for(k=0;k<tmpE;k++)
|
|
1055 {
|
|
1056 Diag = _mm_slli_si128(Diag, 2);
|
|
1057 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1058 }
|
|
1059
|
|
1060 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1061 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1062
|
|
1063 tmpE--;
|
|
1064
|
|
1065 tmp = _mm_srli_si128(R0,2);
|
|
1066 for(k=0; k < tmpE-1;k++)
|
|
1067 tmp = _mm_srli_si128(tmp,2);
|
|
1068 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1069 }
|
|
1070
|
|
1071
|
|
1072 else
|
|
1073 {
|
|
1074 for(k=0;k<tmpE;k++)
|
|
1075 {
|
|
1076 Diag = _mm_slli_si128(Diag, 2);
|
|
1077 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1078 }
|
|
1079
|
|
1080 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1081 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1082
|
|
1083 tmp = _mm_srli_si128(R1,2);
|
|
1084 for(k=0; k < tmpE-2;k++)
|
|
1085 tmp = _mm_srli_si128(tmp,2);
|
|
1086 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1087 }
|
|
1088 i++;
|
|
1089 }
|
|
1090 //Diag
|
|
1091
|
|
1092 Diag = _mm_xor_si128(Diag,Diag);
|
|
1093 Diag = _mm_insert_epi16(Diag, 2*errThreshold, 0);
|
|
1094 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
|
|
1095
|
|
1096 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1097 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
1098
|
|
1099 Down1 = _mm_insert_epi16(Down1, 2*errThreshold, 0);
|
|
1100 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
1101
|
|
1102 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
1103 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
1104
|
|
1105 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
1106
|
|
1107 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
|
|
1108 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
1109
|
|
1110 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
1111 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
1112
|
|
1113 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
1114
|
|
1115 if(minError > mismatch)
|
|
1116 return -1;
|
|
1117 return minError;
|
|
1118 }
|
|
1119
|
|
1120 int backwardEditDistance4SSE2(char *a, int lena, char *b,int lenb)
|
|
1121 {
|
|
1122 if(lenb == 0 || lena == 0)
|
|
1123 return 0;
|
|
1124
|
|
1125 int i = 0;
|
|
1126 int j = 0;
|
|
1127 int k = 0;
|
|
1128
|
|
1129 int i0;
|
|
1130 int i1;
|
|
1131 int i2;
|
|
1132 int i4;
|
|
1133 int i5;
|
|
1134
|
|
1135 int e = errThreshold;
|
|
1136
|
|
1137 int minError = 2*e;
|
|
1138 int index = 0;
|
|
1139 int tmpValue = 0;
|
|
1140
|
|
1141 if(lenb <= e)
|
|
1142 {
|
|
1143 return smallEditDistanceB(a,lena,b,lenb);
|
|
1144 }
|
|
1145
|
|
1146 __m128i R0, R1;
|
|
1147 __m128i Diag;
|
|
1148 __m128i Side1, Side2;
|
|
1149 __m128i Down1, Down2;
|
|
1150 __m128i tmp;
|
|
1151 __m128i SeqA, SeqB;
|
|
1152 __m128i Result;
|
|
1153
|
|
1154 /* initialize */
|
|
1155 R0 = _mm_setzero_si128 ();
|
|
1156 R1 = _mm_setzero_si128 ();
|
|
1157 Diag = _mm_setzero_si128 ();
|
|
1158 Side1 = _mm_setzero_si128 ();
|
|
1159 Side2 = _mm_setzero_si128 ();
|
|
1160 Down1 = _mm_setzero_si128 ();
|
|
1161 Down2 = _mm_setzero_si128 ();
|
|
1162 SeqA = _mm_setzero_si128 ();
|
|
1163 SeqB = _mm_setzero_si128 ();
|
|
1164 Result = _mm_setzero_si128 ();
|
|
1165 /* end initialize */
|
|
1166
|
|
1167 R1 = _mm_xor_si128(R1, R1);
|
|
1168 R0 = _mm_xor_si128(R0, R0);
|
|
1169
|
|
1170 Diag = _mm_xor_si128(Diag, Diag);
|
|
1171 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
1172
|
|
1173 i0 = (a[0] != b[0]);
|
|
1174 i1 = min(i0, ( *(a-1)!=*b) )+1;
|
|
1175 i2 = min(i0,( a[0] != *(b-1) ) )+1;
|
|
1176
|
|
1177 i0 = min3( i0+ ( *(a-1)!=*(b-1) ),i1+1,i2+1);
|
|
1178 i4 = min(i1, ( *(a-2)!=b[0] )+1)+1;
|
|
1179 i5 = min(i2, (a[0] != *(b-2))+1)+1;
|
|
1180
|
|
1181 R1 = _mm_insert_epi16(R1, 3, 0);
|
|
1182 R1 = _mm_insert_epi16(R1, i1, 1);
|
|
1183 R1 = _mm_insert_epi16(R1, i2, 2);
|
|
1184 R1 = _mm_insert_epi16(R1, 3, 3);
|
|
1185
|
|
1186
|
|
1187 R0 = _mm_insert_epi16(R0, 4, 0);
|
|
1188 R0 = _mm_insert_epi16(R0, i4, 1);
|
|
1189 R0 = _mm_insert_epi16(R0, i0, 2);
|
|
1190 R0 = _mm_insert_epi16(R0, i5, 3);
|
|
1191 R0 = _mm_insert_epi16(R0, 4, 4);
|
|
1192
|
|
1193 Side2 = _mm_xor_si128(Side2, Side2);
|
|
1194 Down2 = _mm_xor_si128(Down2, Down2);
|
|
1195 Down1 = _mm_xor_si128(Down1, Down1);
|
|
1196 Side1 = _mm_xor_si128(Side1, Side1);
|
|
1197
|
|
1198 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
1199 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
1200
|
|
1201 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1202
|
|
1203 index = 0;
|
|
1204 for(j=0; j < e; j++)
|
|
1205 {
|
|
1206 Side2 = _mm_slli_si128(Side2, 2);
|
|
1207 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
1208
|
|
1209 Down1 = _mm_slli_si128(Down1, 2);
|
|
1210 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
1211
|
|
1212 Down2 = _mm_slli_si128(Down2, 2);
|
|
1213 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
1214
|
|
1215 Side1 = _mm_slli_si128(Side1, 2);
|
|
1216 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1217
|
|
1218 SeqA = _mm_slli_si128(SeqA, 2);
|
|
1219 SeqB = _mm_slli_si128(SeqB, 2);
|
|
1220 SeqA = _mm_insert_epi16(SeqA,*(a-index),0);
|
|
1221 SeqB = _mm_insert_epi16(SeqB,*(b-index),0);
|
|
1222 index++;
|
|
1223 }
|
|
1224
|
|
1225 Down2= _mm_slli_si128(Down2, 2);
|
|
1226 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
1227
|
|
1228 index = 4;
|
|
1229 i = 5;
|
|
1230 int loopEnd = 2*lenb-(e-1);
|
|
1231 for(; i <= loopEnd ;i++)
|
|
1232 {
|
|
1233
|
|
1234 Diag = _mm_xor_si128(Diag, Diag);
|
|
1235 if( i%2 == 0)
|
|
1236 {
|
|
1237 SeqA = _mm_slli_si128(SeqA, 2);
|
|
1238 SeqB = _mm_slli_si128(SeqB, 2);
|
|
1239 SeqA = _mm_insert_epi16(SeqA,*(a-(index)),0);
|
|
1240 SeqB = _mm_insert_epi16(SeqB,*(b-(index)),0);
|
|
1241
|
|
1242 index++;
|
|
1243
|
|
1244 tmp = _mm_shufflelo_epi16(SeqB,27);
|
|
1245 tmp = _mm_slli_si128(tmp, 2);
|
|
1246 tmpValue = _mm_extract_epi16(tmp, 5);
|
|
1247 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
|
|
1248
|
|
1249 Result = _mm_cmpeq_epi16(SeqA, tmp);
|
|
1250 Diag = _mm_andnot_si128(Result, MASK);
|
|
1251
|
|
1252 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1253 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1254
|
|
1255 //tmp = _mm_sub_epi16(Error, R0);
|
|
1256 //i0 = _mm_movemask_epi8(tmp);
|
|
1257
|
|
1258 if( _mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e
|
|
1259 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e &&
|
|
1260 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e )
|
|
1261 return -1;
|
|
1262
|
|
1263 if(i == 2*lenb-e)
|
|
1264 {
|
|
1265 tmp = _mm_srli_si128(R0,2);
|
|
1266 for(k=0; k < e-1;k++)
|
|
1267 tmp = _mm_srli_si128(tmp,2);
|
|
1268 minError = _mm_extract_epi16(tmp,0);
|
|
1269 }
|
|
1270
|
|
1271 }
|
|
1272
|
|
1273 else
|
|
1274 {
|
|
1275 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
|
|
1276 Diag = _mm_andnot_si128(Result, MASK);
|
|
1277
|
|
1278 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1279 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1280
|
|
1281 if(i >= 2*lenb-e)
|
|
1282 {
|
|
1283 tmp = _mm_srli_si128(R1,2);
|
|
1284 for(k=0; k < e-2;k++)
|
|
1285 tmp = _mm_srli_si128(tmp,2);
|
|
1286 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1287 }
|
|
1288 }
|
|
1289
|
|
1290
|
|
1291 }
|
|
1292
|
|
1293 j=0;
|
|
1294
|
|
1295 int tmpE = e;
|
|
1296
|
|
1297 for(;j<2*(e-2)+1;j++)
|
|
1298 {
|
|
1299
|
|
1300 Diag = _mm_xor_si128(Diag, Diag);
|
|
1301 //set the first element
|
|
1302 if(j==0)
|
|
1303 {
|
|
1304 for( k=0;k<=e-1;k++ )
|
|
1305 {
|
|
1306 Diag = _mm_slli_si128(Diag, 2);
|
|
1307 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1308 }
|
|
1309
|
|
1310 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1311 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1312
|
|
1313 tmpE--;
|
|
1314
|
|
1315 tmp = _mm_srli_si128(R0,2);
|
|
1316 for(k=0; k < e-2;k++)
|
|
1317 tmp = _mm_srli_si128(tmp,2);
|
|
1318 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1319 }
|
|
1320 else if(j%2 == 0)
|
|
1321 {
|
|
1322 for(k=0;k<tmpE;k++)
|
|
1323 {
|
|
1324 Diag = _mm_slli_si128(Diag, 2);
|
|
1325 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1326 }
|
|
1327
|
|
1328 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1329 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1330
|
|
1331 tmpE--;
|
|
1332
|
|
1333 tmp = _mm_srli_si128(R0,2);
|
|
1334 for(k=0; k < tmpE-1;k++)
|
|
1335 tmp = _mm_srli_si128(tmp,2);
|
|
1336 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1337 }
|
|
1338
|
|
1339
|
|
1340 else
|
|
1341 {
|
|
1342 for(k=0;k<tmpE;k++)
|
|
1343 {
|
|
1344 Diag = _mm_slli_si128(Diag, 2);
|
|
1345 Diag = _mm_insert_epi16(Diag, *(b-(lenb-1-k)) != *(a-((i-lenb)-1+k)),0);
|
|
1346 }
|
|
1347
|
|
1348 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1349 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1350
|
|
1351 tmp = _mm_srli_si128(R1,2);
|
|
1352 for(k=0; k < tmpE-2;k++)
|
|
1353 tmp = _mm_srli_si128(tmp,2);
|
|
1354 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1355 }
|
|
1356 i++;
|
|
1357 }
|
|
1358 //Diag
|
|
1359
|
|
1360 Diag = _mm_xor_si128(Diag,Diag);
|
|
1361 Diag = _mm_insert_epi16(Diag, 2*e, 0);
|
|
1362 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-2)) != *(b-(lenb-1)), 1);
|
|
1363
|
|
1364 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1365 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
1366
|
|
1367 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
|
|
1368 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
1369
|
|
1370 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
1371 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
1372
|
|
1373 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
1374
|
|
1375 Diag = _mm_insert_epi16(Diag, *(a-(lenb+e-1)) != *(b-(lenb-1)), 0);
|
|
1376 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
1377
|
|
1378 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
1379 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
1380
|
|
1381 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
1382
|
|
1383 if(minError > e)
|
|
1384 return -1;
|
|
1385 return minError;
|
|
1386 }
|
|
1387
|
|
1388 inline int forwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb)
|
|
1389 {
|
|
1390 if(lenb == 0 || lena == 0)
|
|
1391 return 0;
|
|
1392
|
|
1393 int i = 0;
|
|
1394 int j = 0;
|
|
1395 int k = 0;
|
|
1396
|
|
1397 int i0=0;
|
|
1398 int i1=0;
|
|
1399 int i2=0;
|
|
1400 int i4=0;
|
|
1401 int i5=0;
|
|
1402
|
|
1403 int mismatch = errThreshold;
|
|
1404 int e = 4;
|
|
1405
|
|
1406 int minError = 4*mismatch+1;
|
|
1407 int index = 0;
|
|
1408 int tmpValue = 0;
|
|
1409
|
|
1410 if(lenb <= e)
|
|
1411 {
|
|
1412 return smallEditDistanceF(a,lena,b,lenb);
|
|
1413 }
|
|
1414
|
|
1415
|
|
1416 register __m128i R0, R1;
|
|
1417 __m128i Diag;
|
|
1418 __m128i Side1, Side2;
|
|
1419 __m128i Down1, Down2;
|
|
1420 __m128i tmp;
|
|
1421 register __m128i SeqA, SeqB;
|
|
1422 __m128i Result;
|
|
1423
|
|
1424 __m128i tmpSeqA;
|
|
1425 __m128i tmpSeqB;
|
|
1426
|
|
1427 /* initialize */
|
|
1428 R0 = _mm_setzero_si128 ();
|
|
1429 R1 = _mm_setzero_si128 ();
|
|
1430 Diag = _mm_setzero_si128 ();
|
|
1431 Side1 = _mm_setzero_si128 ();
|
|
1432 Side2 = _mm_setzero_si128 ();
|
|
1433 Down1 = _mm_setzero_si128 ();
|
|
1434 Down2 = _mm_setzero_si128 ();
|
|
1435 SeqA = _mm_setzero_si128 ();
|
|
1436 SeqB = _mm_setzero_si128 ();
|
|
1437 Result = _mm_setzero_si128 ();
|
|
1438 /* end initialize */
|
|
1439
|
|
1440
|
|
1441 R1 = _mm_xor_si128(R1, R1);
|
|
1442 R0 = _mm_xor_si128(R0, R0);
|
|
1443
|
|
1444 Diag = _mm_xor_si128(Diag, Diag);
|
|
1445 Diag = _mm_insert_epi16(Diag,minError,0);
|
|
1446
|
|
1447 i0 = (a[0] != b[0]);
|
|
1448 i1 = min(i0, (a[1]!=b[0]))+1;
|
|
1449 i2 = min(i0,(a[0]!=b[1]))+1;
|
|
1450
|
|
1451 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1);
|
|
1452 i4 = min(i1, (a[2]!=b[0])+1)+1;
|
|
1453 i5 = min(i2, (a[0]!=b[2])+1)+1;
|
|
1454
|
|
1455 R1 = _mm_insert_epi16(R1, 3, 0);
|
|
1456 R1 = _mm_insert_epi16(R1, i1, 1);
|
|
1457 R1 = _mm_insert_epi16(R1, i2, 2);
|
|
1458 R1 = _mm_insert_epi16(R1, 3, 3);
|
|
1459
|
|
1460 R0 = _mm_insert_epi16(R0, 4, 0);
|
|
1461 R0 = _mm_insert_epi16(R0, i4, 1);
|
|
1462 R0 = _mm_insert_epi16(R0, i0, 2);
|
|
1463 R0 = _mm_insert_epi16(R0, i5, 3);
|
|
1464 R0 = _mm_insert_epi16(R0, 4, 4);
|
|
1465
|
|
1466 Side2 = _mm_xor_si128(Side2, Side2);
|
|
1467 Down2 = _mm_xor_si128(Down2, Down2);
|
|
1468 Down1 = _mm_xor_si128(Down1, Down1);
|
|
1469 Side1 = _mm_xor_si128(Side1, Side1);
|
|
1470
|
|
1471 Side2 = _mm_insert_epi16(Side2,minError,0);
|
|
1472 Down1 = _mm_insert_epi16(Down1,minError,0);
|
|
1473
|
|
1474 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1475
|
|
1476 index = 0;
|
|
1477 for(j=0; j < e; j++)
|
|
1478 {
|
|
1479 Side2 = _mm_slli_si128(Side2, 2);
|
|
1480 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
1481
|
|
1482 Down1 = _mm_slli_si128(Down1, 2);
|
|
1483 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
1484
|
|
1485 Down2 = _mm_slli_si128(Down2, 2);
|
|
1486 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
1487
|
|
1488 Side1 = _mm_slli_si128(Side1, 2);
|
|
1489 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1490
|
|
1491 SeqA = _mm_slli_si128(SeqA, 2);
|
|
1492 SeqB = _mm_slli_si128(SeqB, 2);
|
|
1493 SeqA = _mm_insert_epi16(SeqA,a[index],0);
|
|
1494 SeqB = _mm_insert_epi16(SeqB,b[index],0);
|
|
1495 index++;
|
|
1496 }
|
|
1497
|
|
1498 Down2= _mm_slli_si128(Down2, 2);
|
|
1499 Down2 = _mm_insert_epi16(Down2,minError,0);
|
|
1500
|
|
1501 index = 4;
|
|
1502 i = 5;
|
|
1503
|
|
1504 int loopEnd = 2*lenb-(e-1);
|
|
1505 for(; i <= loopEnd ;i++)
|
|
1506 {
|
|
1507 if( i%2 == 0)
|
|
1508 {
|
|
1509 tmpSeqA = _mm_slli_si128(SeqA, 2);
|
|
1510 tmpSeqB = _mm_slli_si128(SeqB, 2);
|
|
1511 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0);
|
|
1512 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0);
|
|
1513
|
|
1514 index++;
|
|
1515
|
|
1516 tmp = _mm_shufflelo_epi16(SeqB,27);
|
|
1517 tmp = _mm_slli_si128(tmp, 2);
|
|
1518 tmpValue = _mm_extract_epi16(tmp, 5);
|
|
1519 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
|
|
1520
|
|
1521 Result = _mm_cmpeq_epi16(SeqA, tmp);
|
|
1522 Diag = _mm_andnot_si128(Result, MASK);
|
|
1523
|
|
1524 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1525 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1526
|
|
1527 if(_mm_extract_epi16(R0, 0) > errThreshold && _mm_extract_epi16(R0, 1) > errThreshold && _mm_extract_epi16(R0, 2) > errThreshold
|
|
1528 && _mm_extract_epi16(R0, 3) > errThreshold && _mm_extract_epi16(R0, 4) > errThreshold &&
|
|
1529 _mm_extract_epi16(R1, 0) > errThreshold && _mm_extract_epi16(R1, 1) > errThreshold &&
|
|
1530 _mm_extract_epi16(R1, 2) > errThreshold && _mm_extract_epi16(R1, 3) > errThreshold)
|
|
1531 return -1;
|
|
1532
|
|
1533 if(i == 2*lenb-e)
|
|
1534 {
|
|
1535 tmp = _mm_srli_si128(R0,2);
|
|
1536 for(k=0; k < e-1;k++)
|
|
1537 tmp = _mm_srli_si128(tmp,2);
|
|
1538 minError = _mm_extract_epi16(tmp,0);
|
|
1539 }
|
|
1540
|
|
1541 }
|
|
1542
|
|
1543 else
|
|
1544 {
|
|
1545 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
|
|
1546 Diag = _mm_andnot_si128(Result, MASK);
|
|
1547
|
|
1548 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1549 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1550
|
|
1551 if(i >= 2*lenb-e)
|
|
1552 {
|
|
1553 tmp = _mm_srli_si128(R1,2);
|
|
1554 for(k=0; k < e-2;k++)
|
|
1555 tmp = _mm_srli_si128(tmp,2);
|
|
1556 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1557 }
|
|
1558 }
|
|
1559 }
|
|
1560
|
|
1561 j=0;
|
|
1562 int tmpE = e;
|
|
1563 for(;j<2*(e-2)+1;j++)
|
|
1564 {
|
|
1565
|
|
1566 Diag = _mm_xor_si128(Diag, Diag);
|
|
1567 //set the first element
|
|
1568 if(j==0)
|
|
1569 {
|
|
1570 for( k=0;k<=e-1;k++ )
|
|
1571 {
|
|
1572 Diag = _mm_slli_si128(Diag, 2);
|
|
1573 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1574 }
|
|
1575
|
|
1576 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1577 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1578
|
|
1579 tmpE--;
|
|
1580
|
|
1581 tmp = _mm_srli_si128(R0,2);
|
|
1582 for(k=0; k < e-2;k++)
|
|
1583 tmp = _mm_srli_si128(tmp,2);
|
|
1584 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1585 }
|
|
1586 else if(j%2 == 0)
|
|
1587 {
|
|
1588 for(k=0;k<tmpE;k++)
|
|
1589 {
|
|
1590 Diag = _mm_slli_si128(Diag, 2);
|
|
1591 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1592 }
|
|
1593
|
|
1594 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1595 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1596
|
|
1597 tmpE--;
|
|
1598
|
|
1599 tmp = _mm_srli_si128(R0,2);
|
|
1600 for(k=0; k < tmpE-1;k++)
|
|
1601 tmp = _mm_srli_si128(tmp,2);
|
|
1602 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1603 }
|
|
1604
|
|
1605
|
|
1606 else
|
|
1607 {
|
|
1608 for(k=0;k<tmpE;k++)
|
|
1609 {
|
|
1610 Diag = _mm_slli_si128(Diag, 2);
|
|
1611 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1612 }
|
|
1613
|
|
1614 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1615 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1616
|
|
1617 tmp = _mm_srli_si128(R1,2);
|
|
1618 for(k=0; k < tmpE-2;k++)
|
|
1619 tmp = _mm_srli_si128(tmp,2);
|
|
1620 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1621 }
|
|
1622 i++;
|
|
1623 }
|
|
1624 //Diag
|
|
1625
|
|
1626 Diag = _mm_xor_si128(Diag,Diag);
|
|
1627 Diag = _mm_insert_epi16(Diag, minError, 0);
|
|
1628 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
|
|
1629
|
|
1630 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1631 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
1632
|
|
1633 Down1 = _mm_insert_epi16(Down1, minError, 0);
|
|
1634 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
1635
|
|
1636 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
1637 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
1638
|
|
1639 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
1640
|
|
1641 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0);
|
|
1642 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
1643
|
|
1644 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
1645 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
1646
|
|
1647
|
|
1648 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
1649
|
|
1650
|
|
1651 if(minError > mismatch)
|
|
1652 return -1;
|
|
1653 return minError;
|
|
1654 }
|
|
1655
|
|
1656
|
|
1657
|
|
1658 int forwardEditDistance4SSE2(char *a, int lena, char *b,int lenb)
|
|
1659 {
|
|
1660 if(lenb == 0 || lena == 0)
|
|
1661 return 0;
|
|
1662
|
|
1663 int i = 0;
|
|
1664 int j = 0;
|
|
1665 int k = 0;
|
|
1666
|
|
1667 int i0=0;
|
|
1668 int i1=0;
|
|
1669 int i2=0;
|
|
1670 int i4=0;
|
|
1671 int i5=0;
|
|
1672
|
|
1673 int e = errThreshold;
|
|
1674
|
|
1675 int minError = 2*e;
|
|
1676 int index = 0;
|
|
1677 int tmpValue = 0;
|
|
1678
|
|
1679 if(lenb <= e)
|
|
1680 {
|
|
1681 return smallEditDistanceF(a,lena,b,lenb);
|
|
1682 }
|
|
1683
|
|
1684
|
|
1685 register __m128i R0, R1;
|
|
1686 __m128i Diag;
|
|
1687 __m128i Side1, Side2;
|
|
1688 __m128i Down1, Down2;
|
|
1689 __m128i tmp;
|
|
1690 register __m128i SeqA, SeqB;
|
|
1691 __m128i Result;
|
|
1692
|
|
1693 __m128i tmpSeqA;
|
|
1694 __m128i tmpSeqB;
|
|
1695
|
|
1696 /* initialize */
|
|
1697 R0 = _mm_setzero_si128 ();
|
|
1698 R1 = _mm_setzero_si128 ();
|
|
1699 Diag = _mm_setzero_si128 ();
|
|
1700 Side1 = _mm_setzero_si128 ();
|
|
1701 Side2 = _mm_setzero_si128 ();
|
|
1702 Down1 = _mm_setzero_si128 ();
|
|
1703 Down2 = _mm_setzero_si128 ();
|
|
1704 SeqA = _mm_setzero_si128 ();
|
|
1705 SeqB = _mm_setzero_si128 ();
|
|
1706 Result = _mm_setzero_si128 ();
|
|
1707 /* end initialize */
|
|
1708
|
|
1709 R1 = _mm_xor_si128(R1, R1);
|
|
1710 R0 = _mm_xor_si128(R0, R0);
|
|
1711
|
|
1712 Diag = _mm_xor_si128(Diag, Diag);
|
|
1713 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
1714
|
|
1715 i0 = (a[0] != b[0]);
|
|
1716 i1 = min(i0, (a[1]!=b[0]))+1;
|
|
1717 i2 = min(i0,(a[0]!=b[1]))+1;
|
|
1718
|
|
1719 i0 = min3(i0+(a[1]!=b[1]),i1+1,i2+1);
|
|
1720 i4 = min(i1, (a[2]!=b[0])+1)+1;
|
|
1721 i5 = min(i2, (a[0]!=b[2])+1)+1;
|
|
1722
|
|
1723 R1 = _mm_insert_epi16(R1, 3, 0);
|
|
1724 R1 = _mm_insert_epi16(R1, i1, 1);
|
|
1725 R1 = _mm_insert_epi16(R1, i2, 2);
|
|
1726 R1 = _mm_insert_epi16(R1, 3, 3);
|
|
1727
|
|
1728 R0 = _mm_insert_epi16(R0, 4, 0);
|
|
1729 R0 = _mm_insert_epi16(R0, i4, 1);
|
|
1730 R0 = _mm_insert_epi16(R0, i0, 2);
|
|
1731 R0 = _mm_insert_epi16(R0, i5, 3);
|
|
1732 R0 = _mm_insert_epi16(R0, 4, 4);
|
|
1733
|
|
1734 Side2 = _mm_xor_si128(Side2, Side2);
|
|
1735 Down2 = _mm_xor_si128(Down2, Down2);
|
|
1736 Down1 = _mm_xor_si128(Down1, Down1);
|
|
1737 Side1 = _mm_xor_si128(Side1, Side1);
|
|
1738
|
|
1739 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
1740 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
1741
|
|
1742 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1743
|
|
1744 index = 0;
|
|
1745 for(j=0; j < e; j++)
|
|
1746 {
|
|
1747 Side2 = _mm_slli_si128(Side2, 2);
|
|
1748 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
1749
|
|
1750 Down1 = _mm_slli_si128(Down1, 2);
|
|
1751 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
1752
|
|
1753 Down2 = _mm_slli_si128(Down2, 2);
|
|
1754 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
1755
|
|
1756 Side1 = _mm_slli_si128(Side1, 2);
|
|
1757 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1758
|
|
1759 SeqA = _mm_slli_si128(SeqA, 2);
|
|
1760 SeqB = _mm_slli_si128(SeqB, 2);
|
|
1761 SeqA = _mm_insert_epi16(SeqA,a[index],0);
|
|
1762 SeqB = _mm_insert_epi16(SeqB,b[index],0);
|
|
1763 index++;
|
|
1764 }
|
|
1765
|
|
1766 Down2= _mm_slli_si128(Down2, 2);
|
|
1767 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
1768
|
|
1769 index = 4;
|
|
1770 i = 5;
|
|
1771
|
|
1772 int loopEnd = 2*lenb-(e-1);
|
|
1773 for(; i <= loopEnd ;i++)
|
|
1774 {
|
|
1775 //Diag = _mm_xor_si128(Diag, Diag);
|
|
1776 if( i%2 == 0)
|
|
1777 {
|
|
1778 tmpSeqA = _mm_slli_si128(SeqA, 2);
|
|
1779 tmpSeqB = _mm_slli_si128(SeqB, 2);
|
|
1780 SeqA = _mm_insert_epi16(tmpSeqA,a[index],0);
|
|
1781 SeqB = _mm_insert_epi16(tmpSeqB,b[index],0);
|
|
1782
|
|
1783 index++;
|
|
1784
|
|
1785 tmp = _mm_shufflelo_epi16(SeqB,27);
|
|
1786 tmp = _mm_slli_si128(tmp, 2);
|
|
1787 tmpValue = _mm_extract_epi16(tmp, 5);
|
|
1788 tmp = _mm_insert_epi16(tmp, tmpValue, 0);
|
|
1789
|
|
1790 Result = _mm_cmpeq_epi16(SeqA, tmp);
|
|
1791 Diag = _mm_andnot_si128(Result, MASK);
|
|
1792
|
|
1793 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1794 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1795
|
|
1796 if(_mm_extract_epi16(R0, 0) > e && _mm_extract_epi16(R0, 1) > e && _mm_extract_epi16(R0, 2) > e
|
|
1797 && _mm_extract_epi16(R0, 3) > e && _mm_extract_epi16(R0, 4) > e && _mm_extract_epi16(R1, 0) > e &&
|
|
1798 _mm_extract_epi16(R1, 1) > e && _mm_extract_epi16(R1, 2) > e && _mm_extract_epi16(R1, 3) > e)
|
|
1799 return -1;
|
|
1800
|
|
1801 if(i == 2*lenb-e)
|
|
1802 {
|
|
1803 tmp = _mm_srli_si128(R0,2);
|
|
1804 for(k=0; k < e-1;k++)
|
|
1805 tmp = _mm_srli_si128(tmp,2);
|
|
1806 minError = _mm_extract_epi16(tmp,0);
|
|
1807 }
|
|
1808
|
|
1809 }
|
|
1810
|
|
1811 else
|
|
1812 {
|
|
1813 Result = _mm_cmpeq_epi16(SeqA, _mm_shufflelo_epi16(SeqB,27));
|
|
1814 Diag = _mm_andnot_si128(Result, MASK);
|
|
1815
|
|
1816 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1817 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1818
|
|
1819 if(i >= 2*lenb-e)
|
|
1820 {
|
|
1821 tmp = _mm_srli_si128(R1,2);
|
|
1822 for(k=0; k < e-2;k++)
|
|
1823 tmp = _mm_srli_si128(tmp,2);
|
|
1824 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1825 }
|
|
1826 }
|
|
1827
|
|
1828
|
|
1829 }
|
|
1830 j=0;
|
|
1831 int tmpE = e;
|
|
1832 for(;j<2*(e-2)+1;j++)
|
|
1833 {
|
|
1834
|
|
1835 Diag = _mm_xor_si128(Diag, Diag);
|
|
1836 //set the first element
|
|
1837 if(j==0)
|
|
1838 {
|
|
1839 for( k=0;k<=e-1;k++ )
|
|
1840 {
|
|
1841 Diag = _mm_slli_si128(Diag, 2);
|
|
1842 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1843 }
|
|
1844
|
|
1845 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1846 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1847
|
|
1848 tmpE--;
|
|
1849
|
|
1850 tmp = _mm_srli_si128(R0,2);
|
|
1851 for(k=0; k < e-2;k++)
|
|
1852 tmp = _mm_srli_si128(tmp,2);
|
|
1853 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1854 }
|
|
1855 else if(j%2 == 0)
|
|
1856 {
|
|
1857 for(k=0;k<tmpE;k++)
|
|
1858 {
|
|
1859 Diag = _mm_slli_si128(Diag, 2);
|
|
1860 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1861 }
|
|
1862
|
|
1863 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
1864 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
1865
|
|
1866 tmpE--;
|
|
1867
|
|
1868 tmp = _mm_srli_si128(R0,2);
|
|
1869 for(k=0; k < tmpE-1;k++)
|
|
1870 tmp = _mm_srli_si128(tmp,2);
|
|
1871 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1872 }
|
|
1873
|
|
1874
|
|
1875 else
|
|
1876 {
|
|
1877 for(k=0;k<tmpE;k++)
|
|
1878 {
|
|
1879 Diag = _mm_slli_si128(Diag, 2);
|
|
1880 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
1881 }
|
|
1882
|
|
1883 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
1884 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
1885
|
|
1886 tmp = _mm_srli_si128(R1,2);
|
|
1887 for(k=0; k < tmpE-2;k++)
|
|
1888 tmp = _mm_srli_si128(tmp,2);
|
|
1889 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
1890 }
|
|
1891 i++;
|
|
1892 }
|
|
1893 //Diag
|
|
1894
|
|
1895 Diag = _mm_xor_si128(Diag,Diag);
|
|
1896 Diag = _mm_insert_epi16(Diag, 2*e, 0);
|
|
1897 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
|
|
1898
|
|
1899 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1900 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
1901
|
|
1902 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
|
|
1903 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
1904
|
|
1905 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
1906 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
1907
|
|
1908 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
1909
|
|
1910 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 0);
|
|
1911 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
1912
|
|
1913 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
1914 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
1915
|
|
1916 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
1917
|
|
1918 if(minError > e)
|
|
1919 return -1;
|
|
1920 return minError;
|
|
1921 }
|
|
1922
|
|
1923 int forwardEditDistanceSSE2Odd(char *a, int lena, char *b,int lenb)
|
|
1924 {
|
|
1925 if(lenb == 0 || lena == 0)
|
|
1926 return 0;
|
|
1927
|
|
1928 int i = 0;
|
|
1929 int j = 0;
|
|
1930 int k = 0;
|
|
1931
|
|
1932 int e = errThreshold;
|
|
1933
|
|
1934 int minError = 2*e;
|
|
1935
|
|
1936 char flag = 0;
|
|
1937
|
|
1938 if(lenb <= e)
|
|
1939 {
|
|
1940 return smallEditDistanceF(a,lena,b,lenb);
|
|
1941 }
|
|
1942
|
|
1943
|
|
1944 __m128i R0, R1;
|
|
1945 __m128i Diag;
|
|
1946 __m128i Side1, Side2;
|
|
1947 __m128i Down1, Down2;
|
|
1948 __m128i Error;
|
|
1949 __m128i tmp;
|
|
1950
|
|
1951 /* initialize */
|
|
1952 R0 = _mm_setzero_si128 ();
|
|
1953 R1 = _mm_setzero_si128 ();
|
|
1954 Diag = _mm_setzero_si128 ();
|
|
1955 Side1 = _mm_setzero_si128 ();
|
|
1956 Side2 = _mm_setzero_si128 ();
|
|
1957 Down1 = _mm_setzero_si128 ();
|
|
1958 Down2 = _mm_setzero_si128 ();
|
|
1959 Error = _mm_setzero_si128 ();
|
|
1960 tmp = _mm_setzero_si128 ();
|
|
1961 /* end initialize */
|
|
1962
|
|
1963 R1 = _mm_xor_si128(R1, R1);
|
|
1964 R0 = _mm_xor_si128(R0, R0);
|
|
1965
|
|
1966 Diag = _mm_xor_si128(Diag, Diag);
|
|
1967 Side1 = _mm_xor_si128(Side1, Side1);
|
|
1968 Down1 = _mm_xor_si128(Down1, Down1);
|
|
1969
|
|
1970 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
1971
|
|
1972 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1973 Side1 = _mm_insert_epi16(Side1,2*e,1);
|
|
1974
|
|
1975 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
1976 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
1977 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
1978
|
|
1979 R0 = _mm_insert_epi16(R0,0,0);
|
|
1980
|
|
1981 R1 = _mm_insert_epi16(R1,1,0);
|
|
1982 R1 = _mm_insert_epi16(R1,1,1);
|
|
1983
|
|
1984 for(i=2; i <= e; i++)
|
|
1985 {
|
|
1986 //set side
|
|
1987 Side1 = _mm_slli_si128(Side1,2);
|
|
1988 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
1989
|
|
1990 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
1991 Down1 = _mm_slli_si128(Down1,2);
|
|
1992 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
1993
|
|
1994 Diag = _mm_xor_si128(Diag, Diag);
|
|
1995 if( i%2 == 0)
|
|
1996 {
|
|
1997 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
1998
|
|
1999 for(j=1;j<=i-1;j++)
|
|
2000 {
|
|
2001 Diag = _mm_slli_si128(Diag, 2);
|
|
2002 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0);
|
|
2003 }
|
|
2004 Diag = _mm_slli_si128(Diag, 2);
|
|
2005 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
2006
|
|
2007 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
|
|
2008 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
|
|
2009
|
|
2010 }
|
|
2011
|
|
2012 else
|
|
2013 {
|
|
2014 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2015 for(j=i/2-1;j>=-i/2;j--)
|
|
2016 {
|
|
2017 Diag = _mm_slli_si128(Diag, 2);
|
|
2018 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0);
|
|
2019 }
|
|
2020 Diag = _mm_slli_si128(Diag, 2);
|
|
2021 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
2022
|
|
2023 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
2024 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
2025
|
|
2026 }
|
|
2027 }
|
|
2028 Error = _mm_xor_si128(Error, Error);
|
|
2029 Side2 = _mm_xor_si128(Side2, Side2);
|
|
2030 Side1 = _mm_xor_si128(Side1, Side1);
|
|
2031 Down2 = _mm_xor_si128(Down2, Down2);
|
|
2032 Down1 = _mm_xor_si128(Down1, Down1);
|
|
2033
|
|
2034
|
|
2035 Error = _mm_insert_epi16(Error,e,0);
|
|
2036 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
2037 Side1 = _mm_insert_epi16(Side2,2*e,0);
|
|
2038 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2039
|
|
2040
|
|
2041 for(j=0; j < e; j++)
|
|
2042 {
|
|
2043 Side2 = _mm_slli_si128(Side2, 2);
|
|
2044 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
2045
|
|
2046 Side1 = _mm_slli_si128(Side1, 2);
|
|
2047 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
2048
|
|
2049 Down1 = _mm_slli_si128(Down1, 2);
|
|
2050 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
2051
|
|
2052 Down2 = _mm_slli_si128(Down2, 2);
|
|
2053 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
2054
|
|
2055 Error = _mm_slli_si128(Error, 2);
|
|
2056 Error = _mm_insert_epi16(Error, e, 0);
|
|
2057 }
|
|
2058
|
|
2059 Down2= _mm_slli_si128(Down2, 2);
|
|
2060 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
2061
|
|
2062 for(; i <= 2*lenb-(e-1);i++)
|
|
2063 {
|
|
2064 flag = 0;
|
|
2065 Diag = _mm_xor_si128(Diag, Diag);
|
|
2066 if( i%2 == 0)
|
|
2067 {
|
|
2068 for(j=e/2;j>=-e/2;j--)
|
|
2069 {
|
|
2070 Diag = _mm_slli_si128(Diag, 2);
|
|
2071 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0);
|
|
2072 }
|
|
2073
|
|
2074
|
|
2075 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
2076 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
2077
|
|
2078 if(_mm_extract_epi16(R0,0) <= e)
|
|
2079 flag = 1;
|
|
2080
|
|
2081 tmp = _mm_srli_si128(R0,2);
|
|
2082 for(j=0; j < e-1;j++)
|
|
2083 {
|
|
2084 if(_mm_extract_epi16(tmp,0) <= e)
|
|
2085 flag = 1;
|
|
2086 tmp = _mm_srli_si128(tmp,2);
|
|
2087 }
|
|
2088 // printf("#%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2));
|
|
2089 if(flag == 0)
|
|
2090 return -1;
|
|
2091
|
|
2092 if(i == 2*lenb-(e-1))
|
|
2093 {
|
|
2094 tmp = _mm_srli_si128(R0,2);
|
|
2095 for(k=0; k < e-2;k++)
|
|
2096 tmp = _mm_srli_si128(tmp,2);
|
|
2097 minError = _mm_extract_epi16(tmp,0);
|
|
2098 }
|
|
2099
|
|
2100 }
|
|
2101
|
|
2102 else
|
|
2103 {
|
|
2104 for(j=e/2;j>=-e/2-1;j--)
|
|
2105 {
|
|
2106 Diag = _mm_slli_si128(Diag, 2);
|
|
2107 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i)/2-j-1],0);
|
|
2108 }
|
|
2109
|
|
2110 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
2111 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
2112
|
|
2113 //printf("#%d %d %d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1), _mm_extract_epi16(R1,2),
|
|
2114 // _mm_extract_epi16(R1,3));
|
|
2115
|
|
2116 if(i >= 2*lenb-e)
|
|
2117 {
|
|
2118 tmp = _mm_srli_si128(R1,2);
|
|
2119 for(k=0; k < e-1;k++)
|
|
2120 tmp = _mm_srli_si128(tmp,2);
|
|
2121 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
2122 }
|
|
2123 }
|
|
2124 }
|
|
2125
|
|
2126 //first cell
|
|
2127 Diag = _mm_xor_si128(Diag,Diag);
|
|
2128 Diag = _mm_insert_epi16(Diag, b[lenb-3] != a[lena], 0);
|
|
2129 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena-1], 1);
|
|
2130 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-2], 2);
|
|
2131 Diag = _mm_insert_epi16(Diag, 2*e, 3);
|
|
2132 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
2133 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
2134
|
|
2135
|
|
2136 minError = min(minError, _mm_extract_epi16(R1,2));
|
|
2137
|
|
2138 //second cell
|
|
2139 Diag = _mm_xor_si128(Diag,Diag);
|
|
2140 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena], 0);
|
|
2141 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena-1], 1);
|
|
2142 Diag = _mm_insert_epi16(Diag, 2*e, 2);
|
|
2143
|
|
2144 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
2145 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
2146
|
|
2147
|
|
2148 minError = min(minError, _mm_extract_epi16(R0,1));
|
|
2149
|
|
2150 //third cell
|
|
2151 Diag = _mm_xor_si128(Diag,Diag);
|
|
2152 Diag = _mm_insert_epi16(Diag, b[lenb-2] != a[lena+1], 0);
|
|
2153 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena], 1);
|
|
2154 Diag = _mm_insert_epi16(Diag, 2*e, 2);
|
|
2155
|
|
2156 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
2157 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
2158
|
|
2159
|
|
2160 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
2161
|
|
2162 //forth
|
|
2163 Diag = _mm_xor_si128(Diag,Diag);
|
|
2164 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+1], 0);
|
|
2165 Diag = _mm_insert_epi16(Diag, 2*e, 1);
|
|
2166
|
|
2167 R0 = _mm_min_epi16(_mm_srli_si128(R1,2)+Side1, R0+Diag);
|
|
2168 R0 = _mm_min_epi16(R0, R1+Down1);
|
|
2169
|
|
2170 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
2171
|
|
2172 //fifth
|
|
2173 Diag = _mm_xor_si128(Diag,Diag);
|
|
2174 Diag = _mm_insert_epi16(Diag, b[lenb-1] != a[lena+2], 0);
|
|
2175 Diag = _mm_insert_epi16(Diag, 2*e, 1);
|
|
2176
|
|
2177 R1 = _mm_min_epi16(R0+Side2, R1+Diag);
|
|
2178 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down2);
|
|
2179
|
|
2180
|
|
2181 minError = min(minError, _mm_extract_epi16(R1,0));
|
|
2182
|
|
2183 if(minError > e)
|
|
2184 return -1;
|
|
2185 return minError;
|
|
2186
|
|
2187 }
|
|
2188
|
|
2189 int forwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb)
|
|
2190 {
|
|
2191 if(lenb == 0 || lena == 0)
|
|
2192 return 0;
|
|
2193
|
|
2194 int i = 0;
|
|
2195 int j = 0;
|
|
2196 int k = 0;
|
|
2197
|
|
2198 int e = errThreshold;
|
|
2199
|
|
2200 int minError = 2*e;
|
|
2201
|
|
2202 char flag = 0;
|
|
2203
|
|
2204 if(lenb <= e)
|
|
2205 {
|
|
2206 return smallEditDistanceF(a,lena,b,lenb);
|
|
2207 }
|
|
2208
|
|
2209
|
|
2210 __m128i R0, R1;
|
|
2211 __m128i Diag;
|
|
2212 __m128i Side1, Side2;
|
|
2213 __m128i Down1, Down2;
|
|
2214 __m128i Error;
|
|
2215 __m128i tmp;
|
|
2216
|
|
2217 /* initialize */
|
|
2218 R0 = _mm_setzero_si128 ();
|
|
2219 R1 = _mm_setzero_si128 ();
|
|
2220 Diag = _mm_setzero_si128 ();
|
|
2221 Side1 = _mm_setzero_si128 ();
|
|
2222 Side2 = _mm_setzero_si128 ();
|
|
2223 Down1 = _mm_setzero_si128 ();
|
|
2224 Down2 = _mm_setzero_si128 ();
|
|
2225 Error = _mm_setzero_si128 ();
|
|
2226 tmp = _mm_setzero_si128 ();
|
|
2227 /* end initialize */
|
|
2228
|
|
2229 R1 = _mm_xor_si128(R1, R1);
|
|
2230 R0 = _mm_xor_si128(R0, R0);
|
|
2231
|
|
2232 Diag = _mm_xor_si128(Diag, Diag);
|
|
2233 Side1 = _mm_xor_si128(Side1, Side1);
|
|
2234 Down1 = _mm_xor_si128(Down1, Down1);
|
|
2235
|
|
2236 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2237
|
|
2238 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
2239 Side1 = _mm_insert_epi16(Side1,2*e,1);
|
|
2240
|
|
2241 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2242 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
2243 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
2244
|
|
2245 R0 = _mm_insert_epi16(R0,0,0);
|
|
2246
|
|
2247 R1 = _mm_insert_epi16(R1,1,0);
|
|
2248 R1 = _mm_insert_epi16(R1,1,1);
|
|
2249
|
|
2250 for(i=2; i <= e; i++)
|
|
2251 {
|
|
2252 //set side
|
|
2253 Side1 = _mm_slli_si128(Side1,2);
|
|
2254 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
2255
|
|
2256 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
2257 Down1 = _mm_slli_si128(Down1,2);
|
|
2258 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2259
|
|
2260 Diag = _mm_xor_si128(Diag, Diag);
|
|
2261 if( i%2 == 0)
|
|
2262 {
|
|
2263 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2264
|
|
2265 for(j=1;j<=i-1;j++)
|
|
2266 {
|
|
2267 Diag = _mm_slli_si128(Diag, 2);
|
|
2268 Diag = _mm_insert_epi16(Diag, b[i/2-1+(i/2-j)] != a[i/2-1-(i/2-j)],0);
|
|
2269 }
|
|
2270 Diag = _mm_slli_si128(Diag, 2);
|
|
2271 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
2272
|
|
2273 R0 = _mm_min_epi16(R1+Side1, _mm_slli_si128(R0,2)+Diag);
|
|
2274 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down1);
|
|
2275 }
|
|
2276
|
|
2277 else
|
|
2278 {
|
|
2279 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2280 for(j=i/2-1;j>=-i/2;j--)
|
|
2281 {
|
|
2282 Diag = _mm_slli_si128(Diag, 2);
|
|
2283 Diag = _mm_insert_epi16(Diag, b[(i+1)/2+j-1] != a[(i-1)/2-j-1],0);
|
|
2284 }
|
|
2285 Diag = _mm_slli_si128(Diag, 2);
|
|
2286 Diag = _mm_insert_epi16(Diag, 2*e,0);
|
|
2287
|
|
2288 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
2289 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
2290 }
|
|
2291 }
|
|
2292 Error = _mm_xor_si128(Error, Error);
|
|
2293 Side2 = _mm_xor_si128(Side2, Side2);
|
|
2294 Down2 = _mm_xor_si128(Down2, Down2);
|
|
2295 Down1 = _mm_xor_si128(Down1, Down1);
|
|
2296
|
|
2297 Error = _mm_insert_epi16(Error,e,0);
|
|
2298 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
2299 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2300
|
|
2301
|
|
2302 for(j=0; j < e; j++)
|
|
2303 {
|
|
2304 Side2 = _mm_slli_si128(Side2, 2);
|
|
2305 Side2 = _mm_insert_epi16(Side2,1,0);
|
|
2306
|
|
2307 Down1 = _mm_slli_si128(Down1, 2);
|
|
2308 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
2309
|
|
2310 Down2 = _mm_slli_si128(Down2, 2);
|
|
2311 Down2 = _mm_insert_epi16(Down2,1,0);
|
|
2312
|
|
2313 Error = _mm_slli_si128(Error, 2);
|
|
2314 Error = _mm_insert_epi16(Error, e, 0);
|
|
2315 }
|
|
2316
|
|
2317 Down2= _mm_slli_si128(Down2, 2);
|
|
2318 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
2319
|
|
2320 for(; i <= 2*lenb-(e-1);i++)
|
|
2321 {
|
|
2322 flag = 0;
|
|
2323 Diag = _mm_xor_si128(Diag, Diag);
|
|
2324 if( i%2 == 0)
|
|
2325 {
|
|
2326 for(j=e/2;j>=-e/2;j--)
|
|
2327 {
|
|
2328 Diag = _mm_slli_si128(Diag, 2);
|
|
2329 Diag = _mm_insert_epi16(Diag, b[i/2-1+j] != a[i/2-1-j],0);
|
|
2330 }
|
|
2331
|
|
2332 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
2333 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
2334
|
|
2335
|
|
2336 if(_mm_extract_epi16(R0,0) <= e)
|
|
2337 flag = 1;
|
|
2338
|
|
2339 tmp = _mm_srli_si128(R0,2);
|
|
2340 for(j=0; j < e-1;j++)
|
|
2341 {
|
|
2342 if(_mm_extract_epi16(tmp,0) <= e)
|
|
2343 flag = 1;
|
|
2344 tmp = _mm_srli_si128(tmp,2);
|
|
2345 }
|
|
2346
|
|
2347
|
|
2348 if(flag == 0)
|
|
2349 return -1;
|
|
2350
|
|
2351 if(i == 2*lenb-e)
|
|
2352 {
|
|
2353 tmp = _mm_srli_si128(R0,2);
|
|
2354 for(k=0; k < e-1;k++)
|
|
2355 tmp = _mm_srli_si128(tmp,2);
|
|
2356 minError = _mm_extract_epi16(tmp,0);
|
|
2357 }
|
|
2358
|
|
2359 }
|
|
2360
|
|
2361 else
|
|
2362 {
|
|
2363 for(j=-e/2+1;j<=e/2;j++)
|
|
2364 {
|
|
2365 Diag = _mm_slli_si128(Diag, 2);
|
|
2366 Diag = _mm_insert_epi16(Diag, b[(i+1)/2-j-1] != a[(i-1)/2+j-1],0);
|
|
2367 }
|
|
2368
|
|
2369 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
2370 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
2371
|
|
2372 if(i >= 2*lenb-e)
|
|
2373 {
|
|
2374 tmp = _mm_srli_si128(R1,2);
|
|
2375 for(k=0; k < e-2;k++)
|
|
2376 tmp = _mm_srli_si128(tmp,2);
|
|
2377 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
2378 }
|
|
2379 }
|
|
2380 }
|
|
2381
|
|
2382 j=0;
|
|
2383 int tmpE = e;
|
|
2384 for(;j<2*(e-2)+1;j++)
|
|
2385 {
|
|
2386
|
|
2387 Diag = _mm_xor_si128(Diag, Diag);
|
|
2388 //set the first element
|
|
2389 if(j==0)
|
|
2390 {
|
|
2391 for( k=0;k<=e-1;k++ )
|
|
2392 {
|
|
2393 Diag = _mm_slli_si128(Diag, 2);
|
|
2394 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
2395 }
|
|
2396
|
|
2397 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
2398 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
2399
|
|
2400 tmpE--;
|
|
2401
|
|
2402 tmp = _mm_srli_si128(R0,2);
|
|
2403 for(k=0; k < e-2;k++)
|
|
2404 tmp = _mm_srli_si128(tmp,2);
|
|
2405 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
2406 }
|
|
2407 else if(j%2 == 0)
|
|
2408 {
|
|
2409 for(k=0;k<tmpE;k++)
|
|
2410 {
|
|
2411 Diag = _mm_slli_si128(Diag, 2);
|
|
2412 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
2413 }
|
|
2414
|
|
2415 R0 = _mm_min_epi16(R1+Side2, R0+Diag);
|
|
2416 R0 = _mm_min_epi16(R0, _mm_slli_si128(R1,2)+Down2);
|
|
2417
|
|
2418 tmpE--;
|
|
2419
|
|
2420 tmp = _mm_srli_si128(R0,2);
|
|
2421 for(k=0; k < tmpE-1;k++)
|
|
2422 tmp = _mm_srli_si128(tmp,2);
|
|
2423 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
2424 }
|
|
2425
|
|
2426
|
|
2427 else
|
|
2428 {
|
|
2429 for(k=0;k<tmpE;k++)
|
|
2430 {
|
|
2431 Diag = _mm_slli_si128(Diag, 2);
|
|
2432 Diag = _mm_insert_epi16(Diag, b[lenb-1-k] != a[(i-lenb)-1+k],0);
|
|
2433 }
|
|
2434
|
|
2435 R1 = _mm_min_epi16(_mm_srli_si128(R0,2)+Side1, R1+Diag);
|
|
2436 R1 = _mm_min_epi16(R1, R0+Down1);
|
|
2437
|
|
2438 tmp = _mm_srli_si128(R1,2);
|
|
2439 for(k=0; k < tmpE-1;k++)
|
|
2440 tmp = _mm_srli_si128(tmp,2);
|
|
2441 minError = min(minError, _mm_extract_epi16(tmp,0));
|
|
2442 }
|
|
2443 i++;
|
|
2444 }
|
|
2445 //Diag
|
|
2446
|
|
2447 Diag = _mm_xor_si128(Diag,Diag);
|
|
2448 Diag = _mm_insert_epi16(Diag, 2*e, 0);
|
|
2449 Diag = _mm_insert_epi16(Diag, a[lenb+e-2] != b[lenb-1], 1);
|
|
2450
|
|
2451 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
2452 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
2453
|
|
2454 Down1 = _mm_insert_epi16(Down1, 2*e, 0);
|
|
2455 Down1 = _mm_insert_epi16(Down1, 1, 1);
|
|
2456
|
|
2457 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
2458 R1 = _mm_min_epi16(R1, _mm_slli_si128(R0,2)+Down1);
|
|
2459
|
|
2460 minError = min(minError, _mm_extract_epi16(R1,1));
|
|
2461
|
|
2462 Diag = _mm_insert_epi16(Diag, a[lenb+e-1] != b[lenb-1], 1);
|
|
2463 Down1 = _mm_insert_epi16(Down1, 1, 0);
|
|
2464
|
|
2465 R0 = _mm_min_epi16(R1+Down1,R0+Diag);
|
|
2466 R0 = _mm_min_epi16(R0,_mm_srli_si128(R1,2)+Side1);
|
|
2467
|
|
2468 minError = min(minError, _mm_extract_epi16(R0,0));
|
|
2469
|
|
2470 if(minError > e)
|
|
2471 return -1;
|
|
2472 return minError;
|
|
2473 }
|
|
2474
|
|
2475
|
|
2476 int forwardEditDistance2SSE2(char *a, int lena, char *b,int lenb)
|
|
2477 {
|
|
2478 if(lenb == 0 || lena == 0)
|
|
2479 return 0;
|
|
2480
|
|
2481
|
|
2482
|
|
2483 int i0 = 0;
|
|
2484 int i1 = 0;
|
|
2485
|
|
2486
|
|
2487 int error; //0: if the two character are equal 1: if not
|
|
2488
|
|
2489 int i = 0; //loop index
|
|
2490
|
|
2491 int e = 2; //error bound
|
|
2492
|
|
2493 int totalError = 0;
|
|
2494
|
|
2495 __m128i R0;
|
|
2496 __m128i R1;
|
|
2497
|
|
2498 __m128i Side1, Side2,Side; //side matrix
|
|
2499 __m128i Down1, Down2,Down; //down matrix
|
|
2500 __m128i Diag;
|
|
2501
|
|
2502 __m128i tmp;
|
|
2503 __m128i ERROR_REACH;
|
|
2504
|
|
2505 /* initialize */
|
|
2506 R0 = _mm_setzero_si128 ();
|
|
2507 R1 = _mm_setzero_si128 ();
|
|
2508 Diag = _mm_setzero_si128 ();
|
|
2509 Side1 = _mm_setzero_si128 ();
|
|
2510 Side2 = _mm_setzero_si128 ();
|
|
2511 Down1 = _mm_setzero_si128 ();
|
|
2512 Down2 = _mm_setzero_si128 ();
|
|
2513 Side = _mm_setzero_si128 ();
|
|
2514 Down = _mm_setzero_si128 ();
|
|
2515 tmp = _mm_setzero_si128 ();
|
|
2516 ERROR_REACH = _mm_setzero_si128 ();
|
|
2517 /* end initialize */
|
|
2518
|
|
2519
|
|
2520 if(lenb <= e)
|
|
2521 {
|
|
2522 return smallEditDistanceF(a,lena,b,lenb);
|
|
2523 }
|
|
2524
|
|
2525 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e);
|
|
2526
|
|
2527 R0 = _mm_insert_epi16(R0,0,0);
|
|
2528
|
|
2529 R1 = _mm_insert_epi16(R1,1,0);
|
|
2530 R1 = _mm_insert_epi16(R1,1,1);
|
|
2531
|
|
2532 // error = ((a[0]) != (b[0]));
|
|
2533
|
|
2534 Diag = _mm_set_epi16(0,0,0,0,0,2*e,((a[0]) != (b[0])),2*e);
|
|
2535 Side1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1);
|
|
2536 Side2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e);
|
|
2537 Down1 = _mm_set_epi16(0,0,0,0,0,2*e,1,1);
|
|
2538 Down2 = _mm_set_epi16(0,0,0,0,0,1,1,2*e);
|
|
2539
|
|
2540 tmp = _mm_slli_si128(R1,2);
|
|
2541
|
|
2542 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2543 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
2544
|
|
2545 for (i = 3; i < 2*lena; i++)
|
|
2546 {
|
|
2547 if(i % 2 ==1)
|
|
2548 {
|
|
2549
|
|
2550 Diag = _mm_xor_si128(Diag, Diag);
|
|
2551 error = ((a[(i+1)/2-1]) != (b[(i-1)/2-1]));
|
|
2552 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2553 error = ((a[(i-1)/2-1]) != (b[(i+1)/2-1]));
|
|
2554 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2555 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0, ((a[(i-1)/2-1]) != (b[(i+1)/2-1])) ,((a[(i+1)/2-1]) != (b[(i-1)/2-1])));
|
|
2556
|
|
2557
|
|
2558 tmp = _mm_srli_si128(R0,2);
|
|
2559
|
|
2560 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
|
|
2561 R1 = _mm_min_epi16(R1,R0+Down1);
|
|
2562
|
|
2563 if(i > 2 * lenb - 2)
|
|
2564 {
|
|
2565 i1 = _mm_extract_epi16(R1, 1);
|
|
2566 totalError = min(totalError, i1);
|
|
2567 }
|
|
2568 }
|
|
2569
|
|
2570 else if(i % 2 == 0)
|
|
2571 {
|
|
2572 error = ((a[i/2]) != (b[i/2-2]));
|
|
2573 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2574 error = ((a[i/2-1]) != (b[i/2-1]));
|
|
2575 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2576 error = ((a[i/2-2]) != (b[i/2]));
|
|
2577 Diag = _mm_insert_epi16(Diag,error,2);
|
|
2578
|
|
2579 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, ((a[i/2-2]) != (b[i/2])) , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) );
|
|
2580
|
|
2581 tmp = _mm_slli_si128(R1,2);
|
|
2582
|
|
2583 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2584 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
2585
|
|
2586 tmp = _mm_sub_epi16(ERROR_REACH, R0);
|
|
2587 i0 = _mm_movemask_epi8(tmp);
|
|
2588
|
|
2589 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2)
|
|
2590 return -1;
|
|
2591 if(i == 2 * lenb - 2) {
|
|
2592 totalError = _mm_extract_epi16(R0, 2);
|
|
2593 }
|
|
2594 }
|
|
2595 }
|
|
2596
|
|
2597 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2598
|
|
2599 //fill the first part of the error
|
|
2600 error = ((a[i/2]) != (b[i/2-2]));
|
|
2601 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2602 error = ((a[i/2-1]) != (b[i/2-1]));
|
|
2603 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2604 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
2605 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2-1]) != (b[i/2-1])) , ((a[i/2]) != (b[i/2-2])) );
|
|
2606
|
|
2607 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2608 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
|
|
2609
|
|
2610 // i0 = _mm_extract_epi16(R0, 0);
|
|
2611 i1 = _mm_extract_epi16(R0, 1);
|
|
2612
|
|
2613 totalError = min(totalError, i1);
|
|
2614
|
|
2615 //fill the second part of the error
|
|
2616 i++;
|
|
2617
|
|
2618 Diag = _mm_xor_si128(Diag, Diag);
|
|
2619 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2620 error = ((a[i/2]) != (b[lenb-1]));
|
|
2621 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2622 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
2623 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 2*e , ((a[i/2]) != (b[lenb-1])) , 2*e );
|
|
2624
|
|
2625
|
|
2626 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
2627 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
|
|
2628
|
|
2629 // i0 = _mm_extract_epi16(R1, 0);
|
|
2630 i1 = _mm_extract_epi16(R1, 1);
|
|
2631
|
|
2632 totalError = min(totalError, i1);
|
|
2633 //fill the last the last element of the matrix
|
|
2634 i++;
|
|
2635
|
|
2636 Diag = _mm_xor_si128(Diag, Diag);
|
|
2637 error = ((a[i/2]) != (b[lenb-1]));
|
|
2638 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2639
|
|
2640 // Diag = _mm_set_epi16(0, 0, 0, 0, 0, 0 , 0 , ((a[i/2]) != (b[lenb-1])) );
|
|
2641
|
|
2642
|
|
2643 Down = _mm_insert_epi16(Down,1,0);
|
|
2644
|
|
2645 Side = _mm_insert_epi16(Side,1,0);
|
|
2646
|
|
2647 tmp = _mm_srli_si128(R1,2);
|
|
2648
|
|
2649 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag);
|
|
2650 R0 = _mm_min_epi16(R0,tmp+Side);
|
|
2651
|
|
2652 i0 = _mm_extract_epi16(R0, 0);
|
|
2653
|
|
2654 totalError = min(totalError, i0);
|
|
2655
|
|
2656 if(totalError > e)
|
|
2657 return -1;
|
|
2658
|
|
2659 return totalError;
|
|
2660
|
|
2661 }
|
|
2662
|
|
2663 int backwardEditDistance2SSE2(char *a, int lena, char *b,int lenb)
|
|
2664 {
|
|
2665 if(lenb == 0 || lena == 0)
|
|
2666 return 0;
|
|
2667
|
|
2668 int i0 = 0;
|
|
2669 int i1 = 0;
|
|
2670
|
|
2671 int error; //0: if the two character are equal 1: if not
|
|
2672
|
|
2673 int i = 0; //loop index
|
|
2674
|
|
2675 int e = 2; //error bound
|
|
2676
|
|
2677 int totalError = 0;
|
|
2678
|
|
2679 __m128i R0;
|
|
2680 __m128i R1;
|
|
2681
|
|
2682 __m128i Side1, Side2,Side; //side matrix
|
|
2683 __m128i Down1, Down2,Down; //down matrix
|
|
2684 __m128i Diag; //diag matrix
|
|
2685
|
|
2686 __m128i tmp;
|
|
2687 __m128i ERROR_REACH;
|
|
2688
|
|
2689 /* initialize */
|
|
2690 R0 = _mm_setzero_si128 ();
|
|
2691 R1 = _mm_setzero_si128 ();
|
|
2692 Diag = _mm_setzero_si128 ();
|
|
2693 Side1 = _mm_setzero_si128 ();
|
|
2694 Side2 = _mm_setzero_si128 ();
|
|
2695 Side = _mm_setzero_si128 ();
|
|
2696 Down1 = _mm_setzero_si128 ();
|
|
2697 Down2 = _mm_setzero_si128 ();
|
|
2698 Down = _mm_setzero_si128 ();
|
|
2699 ERROR_REACH = _mm_setzero_si128 ();
|
|
2700 tmp = _mm_setzero_si128 ();
|
|
2701 /* end initialize */
|
|
2702
|
|
2703 if(lenb <= e)
|
|
2704 {
|
|
2705 return smallEditDistanceB(a,lena,b,lenb);
|
|
2706 }
|
|
2707
|
|
2708
|
|
2709 ERROR_REACH = _mm_set_epi16(0,0,0,0,0,e,e,e);
|
|
2710
|
|
2711 R0 = _mm_insert_epi16(R0,0,0);
|
|
2712
|
|
2713 R1 = _mm_insert_epi16(R1,1,0);
|
|
2714 R1 = _mm_insert_epi16(R1,1,1);
|
|
2715
|
|
2716 error = ((a[0]) != (b[0]));
|
|
2717
|
|
2718 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2719 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2720 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
2721
|
|
2722 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
2723 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
2724 Side1 = _mm_insert_epi16(Side1,2*e,2);
|
|
2725
|
|
2726 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
2727 Side2 = _mm_insert_epi16(Side2,1,1);
|
|
2728 Side2 = _mm_insert_epi16(Side2,1,2);
|
|
2729
|
|
2730 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
2731 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
2732 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
2733
|
|
2734 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
2735 Down2 = _mm_insert_epi16(Down2,1,1);
|
|
2736 Down2 = _mm_insert_epi16(Down2,1,2);
|
|
2737
|
|
2738 tmp = _mm_slli_si128(R1,2);
|
|
2739
|
|
2740 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2741 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
2742
|
|
2743 // printf("%d %d %d\n", _mm_extract_epi16(R0,0), _mm_extract_epi16(R0,1), _mm_extract_epi16(R0,2));
|
|
2744 for (i = 3; i < 2*lena; i++)
|
|
2745 {
|
|
2746 if(i % 2 ==1)
|
|
2747 {
|
|
2748 Diag = _mm_sub_epi8(Diag, Diag);
|
|
2749 error = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) );
|
|
2750 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2751 error = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) );
|
|
2752 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2753 //printf("#%d #%d\n", _mm_extract_epi16(Diag,0), _mm_extract_epi16(Diag,1));
|
|
2754 tmp = _mm_srli_si128(R0,2);
|
|
2755
|
|
2756 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
|
|
2757 R1 = _mm_min_epi16(R1,R0+Down1);
|
|
2758
|
|
2759 if(i > 2 * lenb - 2) {
|
|
2760 i1 = _mm_extract_epi16(R1, 1);
|
|
2761 totalError = min(totalError, i1);
|
|
2762 }
|
|
2763 // printf("%d %d\n", _mm_extract_epi16(R1,0), _mm_extract_epi16(R1,1));
|
|
2764 }
|
|
2765
|
|
2766 else if(i % 2 == 0)
|
|
2767 {
|
|
2768 error = ( *(a-(i/2)) != *(b-(i/2-2)) );
|
|
2769 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2770 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
|
|
2771 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2772 error = ( *(a-(i/2-2)) != *(b-(i/2)));
|
|
2773 Diag = _mm_insert_epi16(Diag,error,2);
|
|
2774
|
|
2775 tmp = _mm_slli_si128(R1,2);
|
|
2776
|
|
2777 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2778 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
2779
|
|
2780 tmp = _mm_sub_epi16(ERROR_REACH, R0);
|
|
2781 i0 = _mm_movemask_epi8(tmp);
|
|
2782
|
|
2783 if(i0 == 63 && _mm_extract_epi16(R1,0) > errThreshold && _mm_extract_epi16(R1,1) > errThreshold && i < 2 * lenb - 2)
|
|
2784 return -1;
|
|
2785
|
|
2786 if(i == 2 * lenb - 2) {
|
|
2787 totalError = _mm_extract_epi16(R0, 2);
|
|
2788 }
|
|
2789 }
|
|
2790 }
|
|
2791 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
2792
|
|
2793 //fill the first part of the error
|
|
2794 error = ( *(a-(i/2)) != *(b-(i/2-2)) );
|
|
2795 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2796 error = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
|
|
2797 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2798 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
2799
|
|
2800 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
2801 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
|
|
2802
|
|
2803 i0 = _mm_extract_epi16(R0, 0);
|
|
2804 i1 = _mm_extract_epi16(R0, 1);
|
|
2805
|
|
2806 totalError = min(totalError, i1);
|
|
2807
|
|
2808 //fill the second part of the error
|
|
2809 i++;
|
|
2810 Diag = _mm_sub_epi8(Diag, Diag);
|
|
2811 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
2812 error = ( *(a-(i/2)) != *(b-(lenb-1)) );
|
|
2813 Diag = _mm_insert_epi16(Diag,error,1);
|
|
2814 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
2815
|
|
2816 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
2817 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
|
|
2818
|
|
2819 i0 = _mm_extract_epi16(R1, 0);
|
|
2820 i1 = _mm_extract_epi16(R1, 1);
|
|
2821
|
|
2822 totalError = min(totalError, i1);
|
|
2823
|
|
2824 //fill the last the last element of the matrix
|
|
2825 i++;
|
|
2826 Diag = _mm_sub_epi8(Diag, Diag);
|
|
2827 error = ( *(a-(i/2)) != *(b-(lenb-1)) );
|
|
2828 Diag = _mm_insert_epi16(Diag,error,0);
|
|
2829
|
|
2830 Down = _mm_insert_epi16(Down,1,0);
|
|
2831
|
|
2832 Side = _mm_insert_epi16(Side,1,0);
|
|
2833
|
|
2834 tmp = _mm_srli_si128(R1,2);
|
|
2835
|
|
2836 R0 = _mm_min_epi16(R1+Down, _mm_srli_si128(R0,2)+Diag);
|
|
2837 R0 = _mm_min_epi16(R0,tmp+Side);
|
|
2838
|
|
2839 i0 = _mm_extract_epi16(R0, 0);
|
|
2840
|
|
2841 totalError = min(totalError, i0);
|
|
2842
|
|
2843 if(totalError > e || totalError == 0)
|
|
2844 return -1;
|
|
2845 return totalError;
|
|
2846 }
|
|
2847
|
|
2848 void initBestMapping(int totalReadNumber)
|
|
2849 {
|
|
2850 int i = 0;
|
|
2851 bestHitMappingInfo = getMem(totalReadNumber * sizeof(BestFullMappingInfo));
|
|
2852 for(i = 0; i < totalReadNumber; i++) {
|
|
2853 bestHitMappingInfo[i].loc = -1;
|
|
2854 }
|
|
2855 }
|
|
2856
|
|
2857
|
|
2858 void finalizeBestSingleMapping()
|
|
2859 {
|
|
2860 int i = 0;
|
|
2861 char *_tmpQual, *_tmpSeq;
|
|
2862 char rqual[SEQ_LENGTH + 1];
|
|
2863 rqual[SEQ_LENGTH]='\0';
|
|
2864
|
|
2865 for(i = 0; i < _msf_seqListSize; i++)
|
|
2866 {
|
|
2867 if(_msf_seqList[i].hits[0] != 0)
|
|
2868 {
|
|
2869 if (bestHitMappingInfo[i].dir)
|
|
2870 {
|
|
2871 reverse(_msf_seqList[i].qual, rqual, SEQ_LENGTH);
|
|
2872 _tmpQual = rqual;
|
|
2873 _tmpSeq = _msf_seqList[i].rseq;
|
|
2874 }
|
|
2875 else
|
|
2876 {
|
|
2877 _tmpQual = _msf_seqList[i].qual;
|
|
2878 _tmpSeq = _msf_seqList[i].seq;
|
|
2879 }
|
|
2880
|
|
2881
|
|
2882 _msf_output.QNAME = _msf_seqList[i].name;
|
|
2883 _msf_output.FLAG = 16 * bestHitMappingInfo[i].dir;
|
|
2884 _msf_output.RNAME = bestHitMappingInfo[i].chr;
|
|
2885
|
|
2886 _msf_output.POS = bestHitMappingInfo[i].loc;
|
|
2887 _msf_output.MAPQ = 255;
|
|
2888 _msf_output.CIGAR = bestHitMappingInfo[i].cigar ;
|
|
2889 _msf_output.MRNAME = "*";
|
|
2890 _msf_output.MPOS = 0;
|
|
2891 _msf_output.ISIZE = 0;
|
|
2892
|
|
2893
|
|
2894 _msf_output.SEQ = _tmpSeq;
|
|
2895 _msf_output.QUAL = _tmpQual;
|
|
2896
|
|
2897 _msf_output.optSize = 2;
|
|
2898 _msf_output.optFields = _msf_optionalFields;
|
|
2899
|
|
2900 _msf_optionalFields[0].tag = "NM";
|
|
2901 _msf_optionalFields[0].type = 'i';
|
|
2902 _msf_optionalFields[0].iVal = bestHitMappingInfo[i].err;
|
|
2903
|
|
2904 _msf_optionalFields[1].tag = "MD";
|
|
2905 _msf_optionalFields[1].type = 'Z';
|
|
2906 _msf_optionalFields[1].sVal = bestHitMappingInfo[i].md;
|
|
2907
|
|
2908 output(_msf_output);
|
|
2909 }
|
|
2910 }
|
|
2911 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo));
|
|
2912 }
|
|
2913 /**********************************************/
|
|
2914 int compare (const void *a, const void *b)
|
|
2915 {
|
|
2916 return ((Pair *)a)->hv - ((Pair *)b)->hv;
|
|
2917 /*char *s1 = ((Pair *)a)->hv;
|
|
2918 char *s2 = ((Pair *)b)->hv;
|
|
2919 int i = 0;
|
|
2920
|
|
2921 int diff = 0;
|
|
2922 int sign = 0;
|
|
2923
|
|
2924 for(i = 0; i < SEQ_LENGTH; i++)
|
|
2925 {
|
|
2926 diff += (s1[i] != s2[i]);
|
|
2927 if(s1[i] > s2[i])
|
|
2928 sign++;
|
|
2929 else if(s1[i] < s2[i])
|
|
2930 sign--;
|
|
2931 }
|
|
2932
|
|
2933 return diff*sign;*/
|
|
2934 // return strncmp(s1, s2,SEQ_LENGTH);
|
|
2935
|
|
2936 }
|
|
2937 /**********************************************/
|
|
2938 void preProcessReads()
|
|
2939 {
|
|
2940 int i = 0;
|
|
2941
|
|
2942 _msf_sort_seqList = getMem(_msf_seqListSize * sizeof(Pair));
|
|
2943 for(i = 0; i < _msf_seqListSize; i++)
|
|
2944 {
|
|
2945 _msf_sort_seqList[i].hv = hashVal(_msf_seqList[i].seq);
|
|
2946
|
|
2947 _msf_sort_seqList[i].readNumber = i;
|
|
2948 }
|
|
2949
|
|
2950 qsort(_msf_sort_seqList, _msf_seqListSize, sizeof(Pair), compare);
|
|
2951
|
|
2952 /*
|
|
2953 for(i = 0; i < _msf_seqListSize; i++)
|
|
2954 {
|
|
2955 //printf("%s\n", _msf_sort_seqList[i].hv);
|
|
2956 }
|
|
2957 */
|
|
2958
|
|
2959 _msf_map_sort_seqList = getMem(_msf_seqListSize * sizeof(int));
|
|
2960
|
|
2961 for(i = 0; i < _msf_seqListSize; i++)
|
|
2962 _msf_map_sort_seqList[_msf_seqList[i].readNumber] = i;
|
|
2963
|
|
2964 }
|
|
2965 /**********************************************/
|
|
2966
|
|
2967 int verifySingleEnd(int index, char* seq, int offset)
|
|
2968 {
|
|
2969 int curOff = 0;
|
|
2970 int i;
|
|
2971
|
|
2972 char *ref;
|
|
2973
|
|
2974 int err;
|
|
2975 int errCnt =0;
|
|
2976 int errCntOff = 0;
|
|
2977 int NCntOff = 0;
|
|
2978
|
|
2979 ref = _msf_refGen + index - 1;
|
|
2980
|
|
2981 verificationCnt++;
|
|
2982
|
|
2983 for (i = 0; i < SEQ_LENGTH; i++)
|
|
2984 {
|
|
2985 err = *ref != *seq;
|
|
2986 errCnt += err;
|
|
2987 if (errCnt > errThreshold)
|
|
2988 {
|
|
2989
|
|
2990 return -1;
|
|
2991 }
|
|
2992
|
|
2993 if (i >= _msf_samplingLocs[curOff] && i <= _msf_samplingLocsEnds[curOff])
|
|
2994 {
|
|
2995 errCntOff += err;
|
|
2996 NCntOff += (*seq == 'N');
|
|
2997 }
|
|
2998 else if (curOff < _msf_samplingLocsSize && i>=_msf_samplingLocs[curOff+1])
|
|
2999 {
|
|
3000
|
|
3001 if (errCntOff == 0 && NCntOff == 0 && offset > curOff)
|
|
3002 {
|
|
3003 return -1;
|
|
3004 }
|
|
3005
|
|
3006 errCntOff = 0;
|
|
3007 NCntOff = 0;
|
|
3008 curOff++;
|
|
3009
|
|
3010 if ( i >= _msf_samplingLocs[curOff])
|
|
3011 {
|
|
3012 errCntOff += err;
|
|
3013 NCntOff += (*seq == 'N');
|
|
3014 }
|
|
3015 }
|
|
3016
|
|
3017 ref++;
|
|
3018 seq++;
|
|
3019 }
|
|
3020 return errCnt;
|
|
3021 }
|
|
3022
|
|
3023 /*********************************************/
|
|
3024 void initFAST(Read *seqList, int seqListSize, int *samplingLocs, int samplingLocsSize, char *genFileName)
|
|
3025 {
|
|
3026 int i;
|
|
3027
|
|
3028 if (_msf_optionalFields == NULL)
|
|
3029 {
|
|
3030 _msf_op = getMem(SEQ_LENGTH);
|
|
3031 if (pairedEndMode)
|
|
3032 {
|
|
3033 _msf_optionalFields = getMem(8*sizeof(OPT_FIELDS));
|
|
3034 }
|
|
3035 else
|
|
3036 {
|
|
3037 _msf_optionalFields = getMem(2*sizeof(OPT_FIELDS));
|
|
3038 }
|
|
3039
|
|
3040 for (i=0; i<200;i++)
|
|
3041 {
|
|
3042 sprintf(_msf_numbers[i],"%d%c",i, '\0');
|
|
3043 }
|
|
3044 sprintf(_msf_cigar, "%dM", SEQ_LENGTH);
|
|
3045 }
|
|
3046
|
|
3047 if (_msf_samplingLocsEnds == NULL)
|
|
3048 {
|
|
3049 _msf_samplingLocs = samplingLocs;
|
|
3050 _msf_samplingLocsSize = samplingLocsSize;
|
|
3051
|
|
3052 _msf_samplingLocsEnds = getMem(sizeof(int)*_msf_samplingLocsSize);
|
|
3053 for (i=0; i<_msf_samplingLocsSize; i++)
|
|
3054 {
|
|
3055 _msf_samplingLocsEnds[i]=_msf_samplingLocs[i]+WINDOW_SIZE-1;
|
|
3056 }
|
|
3057
|
|
3058 _msf_seqList = seqList;
|
|
3059 _msf_seqListSize = seqListSize;
|
|
3060
|
|
3061 preProcessReads();
|
|
3062
|
|
3063 _msf_oeaMapping = getMem(_msf_seqListSize * sizeof(int));
|
|
3064 for(i = 0; i < _msf_seqListSize; i++)
|
|
3065 {
|
|
3066 _msf_oeaMapping[i] = 0;
|
|
3067 }
|
|
3068
|
|
3069 _msf_discordantMapping = getMem(_msf_seqListSize * sizeof(int));
|
|
3070 for(i = 0; i < _msf_seqListSize; i++)
|
|
3071 {
|
|
3072 _msf_discordantMapping[i] = 0;
|
|
3073 }
|
|
3074
|
|
3075 }
|
|
3076
|
|
3077 if (_msf_refGenName == NULL)
|
|
3078 {
|
|
3079 _msf_refGenName = getMem(4*SEQ_LENGTH);
|
|
3080 }
|
|
3081 _msf_refGen = getRefGenome();
|
|
3082 _msf_refGenLength = strlen(_msf_refGen);
|
|
3083
|
|
3084 _msf_refGenOffset = getRefGenomeOffset();
|
|
3085 snprintf(_msf_refGenName, 4*SEQ_LENGTH,"%s%c", getRefGenomeName(), '\0');
|
|
3086 _msf_refGenName[strlen(getRefGenomeName())] = '\0';
|
|
3087
|
|
3088
|
|
3089 if (_msf_verifiedLocs != NULL){
|
|
3090 freeMem(_msf_verifiedLocs, sizeof(int) * (_msf_refGenLength+1));
|
|
3091 }
|
|
3092
|
|
3093 _msf_verifiedLocs = (int *) getMem(sizeof(int)*(_msf_refGenLength+1));
|
|
3094
|
|
3095 for (i=0; i<=_msf_refGenLength; i++)
|
|
3096 _msf_verifiedLocs[i] = _msf_seqListSize*10+1;
|
|
3097
|
|
3098
|
|
3099
|
|
3100 if (pairedEndMode && _msf_seqHits == NULL)
|
|
3101 {
|
|
3102
|
|
3103 _msf_mappingInfo = getMem(seqListSize * sizeof (MappingInfo));
|
|
3104
|
|
3105 for (i=0; i<seqListSize; i++)
|
|
3106 {
|
|
3107 //_msf_mappingInfo[i].next = getMem(sizeof(MappingLocations));
|
|
3108 _msf_mappingInfo[i].next = NULL;
|
|
3109 _msf_mappingInfo[i].size = 0;
|
|
3110 }
|
|
3111
|
|
3112 _msf_seqHits = getMem((_msf_seqListSize) * sizeof(int));
|
|
3113
|
|
3114
|
|
3115 for (i=0; i<_msf_seqListSize; i++)
|
|
3116 {
|
|
3117 _msf_seqHits[i] = 0;
|
|
3118 }
|
|
3119
|
|
3120 _msf_readHasConcordantMapping = getMem(_msf_seqListSize / 2 * sizeof(char));
|
|
3121 for(i = 0; i < _msf_seqListSize/2; i++)
|
|
3122 {
|
|
3123 _msf_readHasConcordantMapping[i] = 0;
|
|
3124 }
|
|
3125
|
|
3126 initLoadingRefGenome(genFileName);
|
|
3127 }
|
|
3128
|
|
3129 if (_msf_refGenOffset == 0)
|
|
3130 {
|
|
3131 _msf_refGenBeg = 1;
|
|
3132 }
|
|
3133 else
|
|
3134 {
|
|
3135 _msf_refGenBeg = CONTIG_OVERLAP - SEQ_LENGTH + 2;
|
|
3136 }
|
|
3137 _msf_refGenEnd = _msf_refGenLength - SEQ_LENGTH + 1;
|
|
3138
|
|
3139
|
|
3140 }
|
|
3141 /**********************************************/
|
|
3142 void finalizeFAST()
|
|
3143 {
|
|
3144 freeMem(_msf_seqHits, (_msf_seqListSize) * sizeof(int));
|
|
3145 freeMem(_msf_refGenName, 4*SEQ_LENGTH);
|
|
3146
|
|
3147
|
|
3148 /*
|
|
3149 int i;
|
|
3150 for (i=0; i<_msf_rIndexSize; i++)
|
|
3151 {
|
|
3152 freeMem(_msf_rIndex[i].seqInfo, _msf_rIndex[i].seqInfo[0]+1);
|
|
3153 }
|
|
3154 freeMem(_msf_rIndex, _msf_rIndexSize);*/
|
|
3155
|
|
3156
|
|
3157 freeMem(_msf_map_sort_seqList, sizeof(Pair) * _msf_seqListSize);
|
|
3158 freeMem(_msf_sort_seqList, sizeof(int) * _msf_seqListSize);
|
|
3159
|
|
3160 }
|
|
3161
|
|
3162 /*
|
|
3163 Will apply the Levenshtein Dynamic programming.
|
|
3164 Different from verifySingleEndEditDistance fucntion
|
|
3165 as in this fucntion only one dynamic table is made while
|
|
3166 in verifySingleEndEditDistance two dynamic table is made
|
|
3167 for each right and left string
|
|
3168 */
|
|
3169 int editDistance(int refIndex, char *seq, int seqLength, char *matrix)
|
|
3170 {
|
|
3171 int i = 0;
|
|
3172 int size = 0;
|
|
3173 int error = 0;
|
|
3174 int rIndex = 0;
|
|
3175 int directionIndex = 0;
|
|
3176
|
|
3177 int min = 0;
|
|
3178 int minIndex =0;
|
|
3179
|
|
3180 int tempUp = 0;
|
|
3181 int tempDown = 0;
|
|
3182
|
|
3183 char *ref;
|
|
3184
|
|
3185 int errorString = 0;
|
|
3186 /*
|
|
3187 1: Up
|
|
3188 2: Side
|
|
3189 3: Diagnoal Match
|
|
3190 4: Diagnoal Mismatch
|
|
3191 */
|
|
3192
|
|
3193 int upValue;
|
|
3194 int diagValue;
|
|
3195 int sideValue;
|
|
3196
|
|
3197 ref = _msf_refGen + refIndex - 1;
|
|
3198
|
|
3199 rIndex = 1;
|
|
3200
|
|
3201 for(i=0; i <= errThreshold; i++)
|
|
3202 {
|
|
3203 score[0][i] = i;
|
|
3204 score[i][0] = i;
|
|
3205 }
|
|
3206
|
|
3207 while(rIndex <= seqLength +errThreshold)
|
|
3208 {
|
|
3209 tempUp = ((rIndex - errThreshold) > 0 ? ((rIndex > seqLength) ? seqLength - errThreshold :rIndex - errThreshold) : 1 );
|
|
3210 tempDown = ((rIndex >= seqLength-errThreshold ) ? seqLength+1 :rIndex + errThreshold + 1);
|
|
3211 for(i = tempUp ; i < tempDown ; i++)
|
|
3212 {
|
|
3213 errorString = (*(ref+rIndex-1) == *(seq+i-1));
|
|
3214
|
|
3215 upValue = score[i-1][rIndex]+1;
|
|
3216 diagValue = score[i-1][rIndex-1]+ !errorString;
|
|
3217 sideValue = score[i][rIndex-1]+1;
|
|
3218
|
|
3219 if(i != tempUp && i != tempDown-1)
|
|
3220 score[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
3221
|
|
3222 else if( (i == ((rIndex - errThreshold) > 0 ? rIndex - errThreshold : 1)) && rIndex <= seqLength )
|
|
3223 score[i][rIndex] = min(sideValue, diagValue);
|
|
3224 else if(rIndex > seqLength && (i == seqLength - errThreshold) )
|
|
3225 score[i][rIndex] = sideValue;
|
|
3226 else
|
|
3227 score[i][rIndex] = min(diagValue , upValue);
|
|
3228
|
|
3229 if(i == tempUp)
|
|
3230 error = score[i][rIndex];
|
|
3231 else if(error > score[i][rIndex])
|
|
3232 error = score[i][rIndex];
|
|
3233 }
|
|
3234 rIndex++;
|
|
3235 }
|
|
3236
|
|
3237 min = score[seqLength][seqLength+errThreshold];
|
|
3238 minIndex = seqLength + errThreshold;
|
|
3239
|
|
3240 // Find the Best error for all the possible ways.
|
|
3241 for(i = 1; i <= 2*errThreshold; i++)
|
|
3242 {
|
|
3243 if(min >= score[seqLength][seqLength+errThreshold-i] && seqLength+errThreshold-i > 0)
|
|
3244 {
|
|
3245 min = score[seqLength][seqLength+errThreshold-i];
|
|
3246 minIndex = seqLength+errThreshold-i;
|
|
3247 }
|
|
3248 }
|
|
3249
|
|
3250 error = score[seqLength][minIndex];
|
|
3251
|
|
3252 directionIndex = seqLength;
|
|
3253 rIndex = minIndex;
|
|
3254 while(directionIndex != 0 || rIndex != 0)
|
|
3255 {
|
|
3256
|
|
3257 if(rIndex == 0)
|
|
3258 {
|
|
3259 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1)
|
|
3260 {
|
|
3261 matrix[size] = *(seq+directionIndex-1);
|
|
3262 size++;
|
|
3263 matrix[size] = 'I';
|
|
3264 directionIndex--;
|
|
3265 }
|
|
3266 }
|
|
3267 else if(directionIndex == 0)
|
|
3268 {
|
|
3269 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1)
|
|
3270 {
|
|
3271 matrix[size] = *(ref+rIndex-1);
|
|
3272 size++;
|
|
3273 matrix[size] = 'D';
|
|
3274 rIndex--;
|
|
3275 }
|
|
3276 }
|
|
3277 else if(directionIndex-rIndex == errThreshold)
|
|
3278 {
|
|
3279 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1)
|
|
3280 {
|
|
3281 matrix[size] = *(seq+directionIndex-1);
|
|
3282 size++;
|
|
3283 matrix[size] = 'I';
|
|
3284 directionIndex--;
|
|
3285 }
|
|
3286 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
|
|
3287 {
|
|
3288 matrix[size] = *(ref+rIndex-1);
|
|
3289 rIndex--;
|
|
3290 directionIndex--;
|
|
3291 }
|
|
3292 else
|
|
3293 {
|
|
3294 matrix[size] = 'M';
|
|
3295 rIndex--;
|
|
3296 directionIndex--;
|
|
3297 }
|
|
3298
|
|
3299 }
|
|
3300 else if(rIndex - directionIndex == errThreshold)
|
|
3301 {
|
|
3302 if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1)
|
|
3303 {
|
|
3304 matrix[size] = *(ref+rIndex-1);
|
|
3305 size++;
|
|
3306 matrix[size] = 'D';
|
|
3307 rIndex--;
|
|
3308 }
|
|
3309 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
|
|
3310 {
|
|
3311 matrix[size] = *(ref+rIndex-1);
|
|
3312 rIndex--;
|
|
3313 directionIndex--;
|
|
3314 }
|
|
3315 else
|
|
3316 {
|
|
3317 matrix[size] = 'M';
|
|
3318 rIndex--;
|
|
3319 directionIndex--;
|
|
3320 }
|
|
3321 }
|
|
3322 else
|
|
3323 {
|
|
3324 if(score[directionIndex][rIndex] - score[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
3325 {
|
|
3326 matrix[size] = *(seq+directionIndex-1);
|
|
3327 size++;
|
|
3328 matrix[size] = 'I';
|
|
3329 directionIndex--;
|
|
3330 }
|
|
3331 else if(score[directionIndex][rIndex] - score[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
3332 {
|
|
3333 matrix[size] = *(ref+rIndex-1);
|
|
3334 size++;
|
|
3335 matrix[size] = 'D';
|
|
3336 rIndex--;
|
|
3337 }
|
|
3338 else if( score[directionIndex][rIndex] - score[directionIndex-1][rIndex-1] == 1 )
|
|
3339 {
|
|
3340 matrix[size] = *(ref+rIndex-1);
|
|
3341 rIndex--;
|
|
3342 directionIndex--;
|
|
3343 }
|
|
3344 else
|
|
3345 {
|
|
3346 matrix[size] = 'M';
|
|
3347 rIndex--;
|
|
3348 directionIndex--;
|
|
3349 }
|
|
3350 }
|
|
3351 size++;
|
|
3352 }
|
|
3353
|
|
3354 matrix[size] = '\0';
|
|
3355
|
|
3356 char returnString[200];
|
|
3357
|
|
3358 returnString[0] = '\0';
|
|
3359 reverse(matrix, returnString, size);
|
|
3360 sprintf(matrix, "%s", returnString);
|
|
3361
|
|
3362 return error;
|
|
3363 }
|
|
3364
|
|
3365 /*
|
|
3366 Will apply the Levenshtein Dynamic programming.
|
|
3367 in both right and left direction as long as the
|
|
3368 threshould error is reached or end of string length
|
|
3369
|
|
3370 */
|
|
3371 int msfHashVal(char *seq)
|
|
3372 {
|
|
3373 int i=0;
|
|
3374 int val=0, numericVal=0;
|
|
3375
|
|
3376 while(i<6)
|
|
3377 {
|
|
3378 switch (seq[i])
|
|
3379 {
|
|
3380 case 'A':
|
|
3381 numericVal = 0;
|
|
3382 break;
|
|
3383 case 'C':
|
|
3384 numericVal = 1;
|
|
3385 break;
|
|
3386 case 'G' :
|
|
3387 numericVal = 2;
|
|
3388 break;
|
|
3389 case 'T':
|
|
3390 numericVal = 3;
|
|
3391 break;
|
|
3392 default:
|
|
3393 return -1;
|
|
3394 break;
|
|
3395 }
|
|
3396 val = (val << 2)|numericVal;
|
|
3397 i++;
|
|
3398 }
|
|
3399 return val;
|
|
3400 }
|
|
3401
|
|
3402
|
|
3403
|
|
3404 int verifySingleEndEditDistance2(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
|
|
3405 {
|
|
3406 int i = 0;
|
|
3407
|
|
3408 char * ref;
|
|
3409 char * tempref;
|
|
3410
|
|
3411 int rIndex = 0; //reference Index
|
|
3412
|
|
3413 int e = errThreshold;
|
|
3414 int error = 0;
|
|
3415 int error1 = 0;
|
|
3416 int totalError = 0;
|
|
3417
|
|
3418
|
|
3419 /*
|
|
3420 1: Up
|
|
3421 2: Side
|
|
3422 3: Diagnoal Match
|
|
3423 4: Diagnoal Mismatch
|
|
3424 */
|
|
3425
|
|
3426
|
|
3427 int minIndex1 = 0;
|
|
3428 int minIndex2 = 0;
|
|
3429
|
|
3430
|
|
3431 int directionIndex = 0;
|
|
3432
|
|
3433 int size = 0;
|
|
3434
|
|
3435 int startIndex1 = 0;
|
|
3436
|
|
3437 rIndex = 1;
|
|
3438
|
|
3439
|
|
3440 char matrixR[200];
|
|
3441 char matrixL[200];
|
|
3442
|
|
3443 ref = _msf_refGen + refIndex - 1;
|
|
3444 tempref = _msf_refGen + refIndex - 1;
|
|
3445
|
|
3446 int jumpIndex = 0;
|
|
3447
|
|
3448 if(rSeqLength != 0)
|
|
3449 {
|
|
3450 error1 = forwardEditDistance2SSE2(ref+segLength+jumpIndex, rSeqLength-jumpIndex, rSeq+jumpIndex, rSeqLength-jumpIndex);
|
|
3451 if(error1 == -1)
|
|
3452 return -1;
|
|
3453 }
|
|
3454
|
|
3455
|
|
3456 if(lSeqLength != 0)
|
|
3457 {
|
|
3458 error = backwardEditDistance2SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
|
|
3459 if(error == -1)
|
|
3460 {
|
|
3461 return -1;
|
|
3462 }
|
|
3463 }
|
|
3464
|
|
3465 matrixL[0] = '\0';
|
|
3466 matrixR[0] = '\0';
|
|
3467
|
|
3468
|
|
3469 ref = _msf_refGen + refIndex - 1;
|
|
3470
|
|
3471 if(error1+error > errThreshold)
|
|
3472 return -1;
|
|
3473
|
|
3474 ref = _msf_refGen + refIndex - 1;
|
|
3475
|
|
3476 rIndex = startIndex1+1;
|
|
3477
|
|
3478 int i0 = 0;
|
|
3479 int i1 = 0;
|
|
3480 int i2 = 0;
|
|
3481
|
|
3482 __m128i R0;
|
|
3483 __m128i R1;
|
|
3484
|
|
3485 __m128i Side1, Side2,Side; //side matrix
|
|
3486 __m128i Down1, Down2,Down; //down matrix
|
|
3487 __m128i Diag; //
|
|
3488
|
|
3489 __m128i tmp;
|
|
3490
|
|
3491 /* initialize */
|
|
3492 R0 = _mm_setzero_si128 ();
|
|
3493 R1 = _mm_setzero_si128 ();
|
|
3494 Diag = _mm_setzero_si128 ();
|
|
3495 Side1 = _mm_setzero_si128 ();
|
|
3496 Side2 = _mm_setzero_si128 ();
|
|
3497 Down1 = _mm_setzero_si128 ();
|
|
3498 Down2 = _mm_setzero_si128 ();
|
|
3499 Down = _mm_setzero_si128 ();
|
|
3500 Side = _mm_setzero_si128 ();
|
|
3501 tmp = _mm_setzero_si128 ();
|
|
3502 /* end initialize */
|
|
3503
|
|
3504 int mismatch[3] = {0,0,0};
|
|
3505
|
|
3506 if(lSeqLength != 0)
|
|
3507 {
|
|
3508 char *a;
|
|
3509 char *b;
|
|
3510
|
|
3511 a = ref-1;
|
|
3512 b = lSeq+lSeqLength-1;
|
|
3513
|
|
3514 R0 = _mm_insert_epi16(R0,0,0);
|
|
3515
|
|
3516 score[0][0] = 0;
|
|
3517
|
|
3518 R1 = _mm_insert_epi16(R1,1,0);
|
|
3519 R1 = _mm_insert_epi16(R1,1,1);
|
|
3520
|
|
3521 score[1][0] = 1;
|
|
3522 direction1[1][0] = 1;
|
|
3523 score[0][1] = 1;
|
|
3524 direction1[0][1] = 2;
|
|
3525
|
|
3526 mismatch[0] = ((a[0]) != (b[0]));
|
|
3527
|
|
3528 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
3529 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
|
|
3530 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3531
|
|
3532 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
3533 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
3534 Side1 = _mm_insert_epi16(Side1,2*e,2);
|
|
3535
|
|
3536 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
3537 Side2 = _mm_insert_epi16(Side2,1,1);
|
|
3538 Side2 = _mm_insert_epi16(Side2,1,2);
|
|
3539
|
|
3540 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
3541 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
3542 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
3543
|
|
3544 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
3545 Down2 = _mm_insert_epi16(Down2,1,1);
|
|
3546 Down2 = _mm_insert_epi16(Down2,1,2);
|
|
3547
|
|
3548 tmp = _mm_slli_si128(R1,2);
|
|
3549
|
|
3550 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3551 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
3552
|
|
3553 i0 = _mm_extract_epi16(R0, 0);
|
|
3554 i1 = _mm_extract_epi16(R0, 1);
|
|
3555 i2 = _mm_extract_epi16(R0, 2);
|
|
3556
|
|
3557 score[0][2] = i0;
|
|
3558 score[1][1] = i1;
|
|
3559 score[2][0] = i2;
|
|
3560
|
|
3561 direction1[0][2] = 2;
|
|
3562 direction1[1][1] = ((mismatch[0] == 0)? 3 : 4);
|
|
3563 direction1[2][0] = 1;
|
|
3564
|
|
3565 for (i = 3; i < 2*lSeqLength; i++)
|
|
3566 {
|
|
3567 if(i % 2 ==1)
|
|
3568 {
|
|
3569 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3570 mismatch[0] = ( *(a-((i+1)/2-1)) != *(b-((i-1)/2-1)) );
|
|
3571 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3572 mismatch[1] = ( *(a-((i-1)/2-1)) != *(b-((i+1)/2-1)) );
|
|
3573 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3574
|
|
3575 tmp = _mm_srli_si128(R0,2);
|
|
3576
|
|
3577 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
|
|
3578 R1 = _mm_min_epi16(R1,R0+Down1);
|
|
3579
|
|
3580 i0 = _mm_extract_epi16(R1, 0);
|
|
3581 i1 = _mm_extract_epi16(R1, 1);
|
|
3582
|
|
3583 score[i/2][i/2+1] = i0;
|
|
3584 score[i/2+1][i/2] = i1;
|
|
3585
|
|
3586 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
|
|
3587 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
|
|
3588 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4;
|
|
3589
|
|
3590 direction1[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3591 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 :
|
|
3592 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4;
|
|
3593
|
|
3594 if(i > 2 * lSeqLength - 2)
|
|
3595 {
|
|
3596 error = min(error, i1);
|
|
3597 if(error == i1)
|
|
3598 minIndex1 = i-lSeqLength;
|
|
3599 }
|
|
3600 }
|
|
3601
|
|
3602 else if(i % 2 == 0)
|
|
3603 {
|
|
3604 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) );
|
|
3605 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3606 mismatch[1] = ( *(a-(i/2-1)) != *(b-(i/2-1)) );
|
|
3607 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3608 mismatch[2] = ( *(a-(i/2-2)) != *(b-(i/2)) );
|
|
3609 Diag = _mm_insert_epi16(Diag,mismatch[2],2);
|
|
3610
|
|
3611 tmp = _mm_slli_si128(R1,2);
|
|
3612
|
|
3613 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3614 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
3615
|
|
3616 i0 = _mm_extract_epi16(R0, 0);
|
|
3617 i1 = _mm_extract_epi16(R0, 1);
|
|
3618 i2 = _mm_extract_epi16(R0, 2);
|
|
3619
|
|
3620 score[i/2-1][i/2+1] = i0;
|
|
3621 score[i/2][i/2] = i1;
|
|
3622 score[i/2+1][i/2-1] = i2;
|
|
3623
|
|
3624 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 : (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
|
|
3625
|
|
3626 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3627 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
|
|
3628 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4;
|
|
3629
|
|
3630 direction1[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2] == 0) ? 3 :
|
|
3631 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4;
|
|
3632
|
|
3633 if( (i/2) % segLength == 0 && i1 == 0) // the segment has been processed no need to process it again
|
|
3634 {
|
|
3635 return -1;
|
|
3636 }
|
|
3637
|
|
3638 if(i == 2 * lSeqLength - 2)
|
|
3639 {
|
|
3640 error = i2;
|
|
3641 minIndex1 = i-lSeqLength;
|
|
3642 }
|
|
3643 }
|
|
3644 }
|
|
3645
|
|
3646 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
3647
|
|
3648 //fill the first part of the error
|
|
3649 mismatch[0] = ( *(a-(i/2)) != *(b-(i/2-2)) );
|
|
3650 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3651 mismatch[1] = ( *(a-(i/2-1)) !=*(b-(i/2-1)) );
|
|
3652 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3653 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3654
|
|
3655 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3656 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
|
|
3657
|
|
3658 i0 = _mm_extract_epi16(R0, 0);
|
|
3659 i1 = _mm_extract_epi16(R0, 1);
|
|
3660
|
|
3661 error = min(error, i1);
|
|
3662 if(error == i1)
|
|
3663 minIndex1 = i-lSeqLength;
|
|
3664
|
|
3665 score[i/2-1][i/2+1] = i0;
|
|
3666 score[i/2][i/2] = i1;
|
|
3667
|
|
3668 direction1[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
|
|
3669 (score[i/2-1][i/2+1]-score[i/2-1][i/2]) ? 2 : 4;
|
|
3670
|
|
3671 direction1[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3672 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
|
|
3673 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4;
|
|
3674
|
|
3675 //fill the second part of the error
|
|
3676 i++;
|
|
3677 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3678 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
3679 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) );
|
|
3680 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
|
|
3681 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3682
|
|
3683 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
3684 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
|
|
3685
|
|
3686 i0 = _mm_extract_epi16(R1, 0);
|
|
3687 i1 = _mm_extract_epi16(R1, 1);
|
|
3688
|
|
3689 error = min(error, i1);
|
|
3690 if(error == i1)
|
|
3691 minIndex1 = i-lSeqLength;
|
|
3692
|
|
3693 score[i/2-1][i/2+2] = i0;
|
|
3694 score[i/2][i/2+1] = i1;
|
|
3695
|
|
3696 direction1[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 :
|
|
3697 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 4;
|
|
3698
|
|
3699 direction1[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2]) ? 3 :
|
|
3700 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
|
|
3701 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4;
|
|
3702
|
|
3703 //fill the last the last element of the matrix
|
|
3704 i++;
|
|
3705 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3706 mismatch[0] = ( *(a-(i/2)) != *(b-(lSeqLength-1)) );
|
|
3707 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3708
|
|
3709 Down = _mm_insert_epi16(Down,1,0);
|
|
3710
|
|
3711 Side = _mm_insert_epi16(Side,1,0);
|
|
3712
|
|
3713 tmp = _mm_srli_si128(R1,2);
|
|
3714
|
|
3715 R0 = _mm_min_epi16(R1+Down, R0+Diag);
|
|
3716 R0 = _mm_min_epi16(R0,tmp+Side);
|
|
3717
|
|
3718 i0 = _mm_extract_epi16(R0, 0);
|
|
3719
|
|
3720 error = min(error, i0);
|
|
3721 if(error == 0)
|
|
3722 return -1;
|
|
3723 if(error == i0)
|
|
3724 minIndex1 = i-lSeqLength;
|
|
3725 if(mismatch[0] == 0)
|
|
3726 direction1[lSeqLength][lSeqLength+errThreshold] = 3;
|
|
3727 else
|
|
3728 {
|
|
3729 if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength][lSeqLength+errThreshold-1] == 1)
|
|
3730 direction1[lSeqLength][lSeqLength+errThreshold] = 2;
|
|
3731 else if(score[lSeqLength][lSeqLength+errThreshold] - score[lSeqLength-1][lSeqLength+errThreshold] == 1)
|
|
3732 direction1[lSeqLength][lSeqLength+errThreshold] = 1;
|
|
3733 else
|
|
3734 direction1[lSeqLength][lSeqLength+errThreshold] = 4;
|
|
3735 }
|
|
3736 }
|
|
3737 error1 = error;
|
|
3738 error = 0;
|
|
3739
|
|
3740 directionIndex = lSeqLength;
|
|
3741 rIndex = minIndex1;
|
|
3742
|
|
3743
|
|
3744 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
|
|
3745
|
|
3746 ref = ref + segLength;
|
|
3747
|
|
3748 if(rSeqLength <= e)
|
|
3749 {
|
|
3750 char *a;
|
|
3751 char *b;
|
|
3752
|
|
3753 int tmp_index = 0;
|
|
3754
|
|
3755 a = ref;
|
|
3756 b = rSeq;
|
|
3757
|
|
3758 for(tmp_index = 0; tmp_index < rSeqLength; tmp_index++)
|
|
3759 {
|
|
3760 matrixR[tmp_index] = (a[tmp_index]==b[tmp_index]) ? 'M' : a[tmp_index] ;
|
|
3761 }
|
|
3762 matrixR[tmp_index] = '\0';
|
|
3763 }
|
|
3764 else if(rSeqLength != 0 && rSeqLength >= e)
|
|
3765 {
|
|
3766 char *a;
|
|
3767 char *b;
|
|
3768
|
|
3769 a = ref;
|
|
3770 b = rSeq;
|
|
3771
|
|
3772 R0 = _mm_sub_epi8(R0, R0);
|
|
3773 R1 = _mm_sub_epi8(R1, R1);
|
|
3774
|
|
3775 R0 = _mm_insert_epi16(R0,0,0);
|
|
3776
|
|
3777 score[0][0] = 0;
|
|
3778
|
|
3779 R1 = _mm_insert_epi16(R1,1,0);
|
|
3780 R1 = _mm_insert_epi16(R1,1,1);
|
|
3781
|
|
3782 score[1][0] = 1;
|
|
3783 direction2[1][0] = 1;
|
|
3784 score[0][1] = 1;
|
|
3785 direction2[0][1] = 2;
|
|
3786
|
|
3787 mismatch[0] = ((a[0]) != (b[0]));
|
|
3788
|
|
3789 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
3790 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
|
|
3791 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3792
|
|
3793 Side1 = _mm_insert_epi16(Side1,1,0);
|
|
3794 Side1 = _mm_insert_epi16(Side1,1,1);
|
|
3795 Side1 = _mm_insert_epi16(Side1,2*e,2);
|
|
3796
|
|
3797 Side2 = _mm_insert_epi16(Side2,2*e,0);
|
|
3798 Side2 = _mm_insert_epi16(Side2,1,1);
|
|
3799 Side2 = _mm_insert_epi16(Side2,1,2);
|
|
3800
|
|
3801 Down1 = _mm_insert_epi16(Down1,1,0);
|
|
3802 Down1 = _mm_insert_epi16(Down1,1,1);
|
|
3803 Down1 = _mm_insert_epi16(Down1,2*e,2);
|
|
3804
|
|
3805 Down2 = _mm_insert_epi16(Down2,2*e,0);
|
|
3806 Down2 = _mm_insert_epi16(Down2,1,1);
|
|
3807 Down2 = _mm_insert_epi16(Down2,1,2);
|
|
3808
|
|
3809 tmp = _mm_slli_si128(R1,2);
|
|
3810
|
|
3811 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3812 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
3813
|
|
3814 i0 = _mm_extract_epi16(R0, 0);
|
|
3815 i1 = _mm_extract_epi16(R0, 1);
|
|
3816 i2 = _mm_extract_epi16(R0, 2);
|
|
3817
|
|
3818 score[0][2] = i0;
|
|
3819 score[1][1] = i1;
|
|
3820 score[2][0] = i2;
|
|
3821
|
|
3822 direction2[0][2] = 2;
|
|
3823 direction2[1][1] = ((mismatch[0] == 0)? 3 : 4);
|
|
3824 direction2[2][0] = 1;
|
|
3825
|
|
3826
|
|
3827 for (i = 3; i < 2*rSeqLength; i++)
|
|
3828 {
|
|
3829 if(i % 2 ==1)
|
|
3830 {
|
|
3831 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3832 mismatch[0] = ((a[(i+1)/2-1]) != (b[(i-1)/2-1]));
|
|
3833 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3834 mismatch[1] = ((a[(i-1)/2-1]) != (b[(i+1)/2-1]));
|
|
3835 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3836
|
|
3837 tmp = _mm_srli_si128(R0,2);
|
|
3838
|
|
3839 R1 = _mm_min_epi16(tmp+Side1, R1+Diag);
|
|
3840 R1 = _mm_min_epi16(R1,R0+Down1);
|
|
3841
|
|
3842 i0 = _mm_extract_epi16(R1, 0);
|
|
3843 i1 = _mm_extract_epi16(R1, 1);
|
|
3844
|
|
3845 score[i/2][i/2+1] = i0;
|
|
3846 score[i/2+1][i/2] = i1;
|
|
3847
|
|
3848 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
|
|
3849 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
|
|
3850 (score[i/2][i/2+1]-score[i/2][i/2]==1) ? 2 : 4;
|
|
3851
|
|
3852 direction2[i/2+1][i/2] = (score[i/2+1][i/2]==score[i/2][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3853 (score[i/2+1][i/2]-score[i/2][i/2]==1) ? 1 :
|
|
3854 (score[i/2+1][i/2]-score[i/2+1][i/2-1]==1)? 2 : 4;
|
|
3855
|
|
3856
|
|
3857 if(i > 2 * rSeqLength - 2)
|
|
3858 {
|
|
3859 error = min(error, i1);
|
|
3860 if(error == i1)
|
|
3861 minIndex2 = i-rSeqLength;
|
|
3862 }
|
|
3863 }
|
|
3864
|
|
3865 else if(i % 2 == 0)
|
|
3866 {
|
|
3867 mismatch[0] = ((a[i/2]) != (b[i/2-2]));
|
|
3868 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3869 mismatch[1] = ((a[i/2-1]) != (b[i/2-1]));
|
|
3870 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3871 mismatch[2] = ((a[i/2-2]) != (b[i/2]));
|
|
3872 Diag = _mm_insert_epi16(Diag,mismatch[2],2);
|
|
3873
|
|
3874 tmp = _mm_slli_si128(R1,2);
|
|
3875
|
|
3876 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3877 R0 = _mm_min_epi16(R0,tmp+Down2);
|
|
3878
|
|
3879 i0 = _mm_extract_epi16(R0, 0);
|
|
3880 i1 = _mm_extract_epi16(R0, 1);
|
|
3881 i2 = _mm_extract_epi16(R0, 2);
|
|
3882
|
|
3883 score[i/2-1][i/2+1] = i0;
|
|
3884 score[i/2][i/2] = i1;
|
|
3885 score[i/2+1][i/2-1] = i2;
|
|
3886
|
|
3887 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
|
|
3888 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
|
|
3889
|
|
3890 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3891 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
|
|
3892 (score[i/2][i/2]-score[i/2][i/2-1]==1) ? 2 : 4;
|
|
3893
|
|
3894 direction2[i/2+1][i/2-1] = (score[i/2+1][i/2-1]==score[i/2][i/2-2] && mismatch[2]==0) ? 3 :
|
|
3895 (score[i/2+1][i/2-1]-score[i/2][i/2-1]==1) ? 1 : 4;
|
|
3896
|
|
3897
|
|
3898 if(i == 2 * rSeqLength - 2)
|
|
3899 {
|
|
3900 error = i2;
|
|
3901 minIndex2 = i-rSeqLength;
|
|
3902 }
|
|
3903 }
|
|
3904 }
|
|
3905
|
|
3906 Down1 = _mm_insert_epi16(Down1,2*e,0);
|
|
3907
|
|
3908 //fill the first part of the error
|
|
3909 mismatch[0] = ((a[i/2]) != (b[i/2-2]));
|
|
3910 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3911 mismatch[1] = ((a[i/2-1]) != (b[i/2-1]));
|
|
3912 Diag = _mm_insert_epi16(Diag,mismatch[1],1);
|
|
3913 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3914
|
|
3915 R0 = _mm_min_epi16(R1+Side1, R0+Diag);
|
|
3916 R0 = _mm_min_epi16(R0,_mm_slli_si128(R1,2)+Down1);
|
|
3917
|
|
3918 i0 = _mm_extract_epi16(R0, 0);
|
|
3919 i1 = _mm_extract_epi16(R0, 1);
|
|
3920
|
|
3921 error = min(error, i1);
|
|
3922 if(error == i1)
|
|
3923 minIndex2 = i-rSeqLength;
|
|
3924
|
|
3925 score[i/2-1][i/2+1] = i0;
|
|
3926 score[i/2][i/2] = i1;
|
|
3927
|
|
3928 direction2[i/2-1][i/2+1] = (score[i/2-1][i/2+1]==score[i/2-2][i/2] && mismatch[0] == 0) ? 3 :
|
|
3929 (score[i/2-1][i/2+1]-score[i/2-1][i/2]==1) ? 2 : 4;
|
|
3930
|
|
3931 direction2[i/2][i/2] = (score[i/2][i/2]==score[i/2-1][i/2-1] && mismatch[1] == 0) ? 3 :
|
|
3932 (score[i/2][i/2]-score[i/2-1][i/2]==1) ? 1 :
|
|
3933 (score[i/2][i/2]-score[i/2][i/2-1]==1)? 2 : 4;
|
|
3934
|
|
3935
|
|
3936 //fill the second part of the error
|
|
3937 i++;
|
|
3938 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3939 Diag = _mm_insert_epi16(Diag,2*e,0);
|
|
3940 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1]));
|
|
3941 Diag = _mm_insert_epi16(Diag,mismatch[0],1);
|
|
3942 Diag = _mm_insert_epi16(Diag,2*e,2);
|
|
3943
|
|
3944 R1 = _mm_min_epi16(R0+Side1, _mm_slli_si128(R1,2)+Diag);
|
|
3945 R1 = _mm_min_epi16(R1,_mm_slli_si128(R0,2)+Down1);
|
|
3946
|
|
3947 i0 = _mm_extract_epi16(R1, 0);
|
|
3948 i1 = _mm_extract_epi16(R1, 1);
|
|
3949
|
|
3950 error = min(error, i1);
|
|
3951 if(error == i1)
|
|
3952 minIndex2 = i-rSeqLength;
|
|
3953
|
|
3954 score[i/2-1][i/2+2] = i0;
|
|
3955 score[i/2][i/2+1] = i1;
|
|
3956
|
|
3957 direction2[i/2-1][i/2+2] = (score[i/2-1][i/2+2]==score[i/2-2][i/2+1] && mismatch[0] == 0) ? 3 :
|
|
3958 (score[i/2-1][i/2+2]-score[i/2-1][i/2+1]==1) ? 2 : 3;
|
|
3959
|
|
3960 direction2[i/2][i/2+1] = (score[i/2][i/2+1]==score[i/2-1][i/2] && mismatch[0] == 0) ? 3 :
|
|
3961 (score[i/2][i/2+1]-score[i/2-1][i/2+1]==1)? 1 :
|
|
3962 (score[i/2][i/2+1]-score[i/2][i/2]==1)? 2 : 4;
|
|
3963
|
|
3964
|
|
3965 //fill the last the last element of the matrix
|
|
3966 i++;
|
|
3967 Diag = _mm_sub_epi8(Diag, Diag);
|
|
3968 mismatch[0] = ((a[i/2]) != (b[rSeqLength-1]));
|
|
3969 Diag = _mm_insert_epi16(Diag,mismatch[0],0);
|
|
3970
|
|
3971 Down = _mm_sub_epi8(Down, Down);
|
|
3972 Down = _mm_insert_epi16(Down,1,0);
|
|
3973
|
|
3974 Side = _mm_sub_epi8(Side, Side);
|
|
3975 Side = _mm_insert_epi16(Side,1,0);
|
|
3976
|
|
3977 tmp = _mm_srli_si128(R1,2);
|
|
3978
|
|
3979 R0 = _mm_min_epi16(R1+Down, R0+Diag);
|
|
3980 R0 = _mm_min_epi16(R0,tmp+Side);
|
|
3981
|
|
3982 i0 = _mm_extract_epi16(R0, 0);
|
|
3983
|
|
3984 error = min(error, i0);
|
|
3985 if(error == i0)
|
|
3986 minIndex2 = i-rSeqLength;
|
|
3987
|
|
3988 if(mismatch[0] == 0)
|
|
3989 direction2[rSeqLength][rSeqLength+errThreshold] = 3;
|
|
3990 else
|
|
3991 {
|
|
3992 if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength][rSeqLength+errThreshold-1] == 1)
|
|
3993 direction2[lSeqLength][lSeqLength+errThreshold] = 2;
|
|
3994 else if(score[rSeqLength][rSeqLength+errThreshold] - score[rSeqLength-1][rSeqLength+errThreshold] == 1)
|
|
3995 direction2[rSeqLength][rSeqLength+errThreshold] = 1;
|
|
3996 else
|
|
3997 direction2[rSeqLength][rSeqLength+errThreshold] = 4;
|
|
3998 }
|
|
3999
|
|
4000 }
|
|
4001
|
|
4002 totalError = error1 + error;
|
|
4003
|
|
4004 size = 0;
|
|
4005 directionIndex = rSeqLength;
|
|
4006 rIndex = minIndex2;
|
|
4007
|
|
4008
|
|
4009 if(rSeqLength > e)
|
|
4010 {
|
|
4011 while(directionIndex != 0 || rIndex != 0)
|
|
4012 {
|
|
4013
|
|
4014 if(direction2[directionIndex][rIndex] == 3)
|
|
4015 {
|
|
4016 matrixR[size] = 'M';
|
|
4017 rIndex--;
|
|
4018 directionIndex--;
|
|
4019 }
|
|
4020 else if(direction2[directionIndex][rIndex] == 4)
|
|
4021 {
|
|
4022 matrixR[size] = *(ref+rIndex-1);
|
|
4023 rIndex--;
|
|
4024 directionIndex--;
|
|
4025 }
|
|
4026 else if(direction2[directionIndex][rIndex] == 2)
|
|
4027 {
|
|
4028 matrixR[size] = *(ref+rIndex-1);
|
|
4029 size++;
|
|
4030 matrixR[size] = 'D';
|
|
4031 rIndex--;
|
|
4032 }
|
|
4033 else
|
|
4034 {
|
|
4035 matrixR[size] = *(rSeq+directionIndex-1);
|
|
4036 size++;
|
|
4037 matrixR[size] = 'I';
|
|
4038 directionIndex--;
|
|
4039 }
|
|
4040 size++;
|
|
4041 }
|
|
4042 matrixR[size] = '\0';
|
|
4043 }
|
|
4044 size = 0;
|
|
4045 directionIndex = lSeqLength;
|
|
4046 rIndex = minIndex1;
|
|
4047
|
|
4048 while(directionIndex != 0 || rIndex != 0)
|
|
4049 {
|
|
4050
|
|
4051 if(direction1[directionIndex][rIndex] == 3)
|
|
4052 {
|
|
4053 matrixL[size] = 'M';
|
|
4054 rIndex--;
|
|
4055 directionIndex--;
|
|
4056 }
|
|
4057 else if(direction1[directionIndex][rIndex] == 4)
|
|
4058 {
|
|
4059 matrixL[size] = *(tempref-rIndex);
|
|
4060 rIndex--;
|
|
4061 directionIndex--;
|
|
4062 }
|
|
4063 else if(direction1[directionIndex][rIndex] == 2)
|
|
4064 {
|
|
4065 matrixL[size] = 'D';
|
|
4066 size++;
|
|
4067 matrixL[size] = *(tempref-rIndex);
|
|
4068 rIndex--;
|
|
4069 }
|
|
4070 else
|
|
4071 {
|
|
4072 matrixL[size] = 'I';
|
|
4073 size++;
|
|
4074 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
4075 directionIndex--;
|
|
4076 }
|
|
4077
|
|
4078 size++;
|
|
4079 }
|
|
4080
|
|
4081 matrixL[size] = '\0';
|
|
4082
|
|
4083 char middle[200];
|
|
4084 middle[0] = '\0';
|
|
4085
|
|
4086 for(i = 0; i < segLength; i++)
|
|
4087 middle[i] = 'M';
|
|
4088 middle[segLength] = '\0';
|
|
4089
|
|
4090 char rmatrixR[200];
|
|
4091
|
|
4092 reverse(matrixR, rmatrixR, strlen(matrixR));
|
|
4093
|
|
4094 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
|
|
4095
|
|
4096 return totalError;
|
|
4097 }
|
|
4098
|
|
4099 int verifySingleEndEditDistance4(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
|
|
4100 {
|
|
4101
|
|
4102 int i = 0;
|
|
4103
|
|
4104 char * ref;
|
|
4105 char * tempref;
|
|
4106
|
|
4107 int rIndex = 0; //reference Index
|
|
4108
|
|
4109 int error = 0;
|
|
4110 int error1 = 0;
|
|
4111
|
|
4112 int error2 = 0;
|
|
4113 int error3 = 0;
|
|
4114 int totalError = 0;
|
|
4115 int errorSegment = 0;
|
|
4116
|
|
4117 int ERROR_BOUND = errThreshold;
|
|
4118
|
|
4119
|
|
4120 /*
|
|
4121 1: Up
|
|
4122 2: Side
|
|
4123 3: Diagnoal Match
|
|
4124 4: Diagnoal Mismatch
|
|
4125 */
|
|
4126
|
|
4127 int min = 0;
|
|
4128 int minIndex1 = 0;
|
|
4129 int minIndex2 = 0;
|
|
4130
|
|
4131 int directionIndex = 0;
|
|
4132
|
|
4133
|
|
4134 int size = 0;
|
|
4135
|
|
4136 ref = _msf_refGen + refIndex - 1;
|
|
4137 tempref = _msf_refGen + refIndex - 1;
|
|
4138
|
|
4139
|
|
4140 if(lSeqLength != 0)
|
|
4141 {
|
|
4142 error3 = backwardEditDistance4SSE2(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
|
|
4143 if(error3 == -1 || error3 == 0){
|
|
4144 return -1;
|
|
4145 }
|
|
4146 }
|
|
4147
|
|
4148 if(rSeqLength != 0)
|
|
4149 {
|
|
4150 error2 = forwardEditDistance4SSE2(ref+segLength, rSeqLength, rSeq, rSeqLength);
|
|
4151 if(error2 == -1)
|
|
4152 return -1;
|
|
4153 }
|
|
4154
|
|
4155 if(error2 + error3 > errThreshold)
|
|
4156 return -1;
|
|
4157
|
|
4158 rIndex = 1;
|
|
4159
|
|
4160 int prevError = 0;
|
|
4161
|
|
4162 int tempUp = 0;
|
|
4163 int tempDown = 0;
|
|
4164
|
|
4165 int errorString = 0;
|
|
4166
|
|
4167 int upValue;
|
|
4168 int diagValue;
|
|
4169 int sideValue;
|
|
4170
|
|
4171 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0)
|
|
4172 {
|
|
4173 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
|
|
4174 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
4175 for(i = tempUp ; i < tempDown ; i++)
|
|
4176 {
|
|
4177 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
|
|
4178
|
|
4179 upValue = scoreB[i-1][rIndex]+1;
|
|
4180 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
|
|
4181 sideValue = scoreB[i][rIndex-1]+1;
|
|
4182
|
|
4183 if(i != tempUp && i != tempDown-1)
|
|
4184 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
4185
|
|
4186 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
|
|
4187 scoreB[i][rIndex] = min(sideValue, diagValue);
|
|
4188 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
|
|
4189 scoreB[i][rIndex] = sideValue;
|
|
4190 else
|
|
4191 scoreB[i][rIndex] = min(diagValue , upValue);
|
|
4192
|
|
4193 if(i == tempUp)
|
|
4194 error = scoreB[i][rIndex];
|
|
4195 else if(error > scoreB[i][rIndex])
|
|
4196 error = scoreB[i][rIndex];
|
|
4197 }
|
|
4198 if(rIndex <= lSeqLength)
|
|
4199 {
|
|
4200 errorSegment = error-prevError;
|
|
4201 }
|
|
4202 rIndex++;
|
|
4203 }
|
|
4204
|
|
4205 if(lSeqLength != 0)
|
|
4206 {
|
|
4207 min = scoreB[lSeqLength][lSeqLength+errThreshold];
|
|
4208 minIndex1 = lSeqLength + errThreshold;
|
|
4209
|
|
4210 // Find the Best error for all the possible ways.
|
|
4211 for(i = 1; i <= 2*errThreshold; i++)
|
|
4212 {
|
|
4213 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0)
|
|
4214 {
|
|
4215 min = scoreB[lSeqLength][lSeqLength+errThreshold-i];
|
|
4216 minIndex1 = lSeqLength+errThreshold-i;
|
|
4217 }
|
|
4218 }
|
|
4219 error = scoreB[lSeqLength][minIndex1];
|
|
4220 }
|
|
4221
|
|
4222 error1 = error;
|
|
4223
|
|
4224 error = 0;
|
|
4225 errorSegment = 0;
|
|
4226
|
|
4227 directionIndex = lSeqLength;
|
|
4228 rIndex = minIndex1;
|
|
4229
|
|
4230
|
|
4231 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
|
|
4232
|
|
4233 ref = ref + segLength;
|
|
4234
|
|
4235 if(rSeqLength != 0)
|
|
4236 {
|
|
4237 rIndex = 1;
|
|
4238 while(rIndex <= rSeqLength+errThreshold-error1)
|
|
4239 {
|
|
4240 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
|
|
4241 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
4242 for(i = tempUp; i < tempDown ; i++)
|
|
4243 {
|
|
4244 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
|
|
4245
|
|
4246 upValue = scoreF[i-1][rIndex]+1;
|
|
4247 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
|
|
4248 sideValue = scoreF[i][rIndex-1]+1;
|
|
4249
|
|
4250 if(i != tempUp && i != tempDown-1)
|
|
4251 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
4252 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
|
|
4253 scoreF[i][rIndex] = min(sideValue, diagValue);
|
|
4254 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
|
|
4255 scoreF[i][rIndex] = sideValue;
|
|
4256 else
|
|
4257 scoreF[i][rIndex] = min(diagValue , upValue);
|
|
4258
|
|
4259 if(i == tempUp)
|
|
4260 error = scoreF[i][rIndex];
|
|
4261 if(error > scoreF[i][rIndex])
|
|
4262 error = scoreF[i][rIndex];
|
|
4263 }
|
|
4264 if(rIndex <= rSeqLength)
|
|
4265 {
|
|
4266 errorSegment = error;
|
|
4267 }
|
|
4268
|
|
4269 rIndex++;
|
|
4270 }
|
|
4271
|
|
4272 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1];
|
|
4273 minIndex2 = rSeqLength + errThreshold-error1;
|
|
4274
|
|
4275 // Find the Best error for all the possible ways.
|
|
4276 for(i = 1; i <= 2*(errThreshold-error1); i++)
|
|
4277 {
|
|
4278 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0)
|
|
4279 {
|
|
4280 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i];
|
|
4281 minIndex2 = rSeqLength+errThreshold-error1-i;
|
|
4282 }
|
|
4283 }
|
|
4284 error = scoreF[rSeqLength][minIndex2];
|
|
4285 }
|
|
4286
|
|
4287 totalError = error + error1;
|
|
4288
|
|
4289 if(errThreshold > 4)
|
|
4290 printf("ERROR in errorThreshold.\n");
|
|
4291
|
|
4292
|
|
4293 if(totalError != error2 + error3 && totalError > errThreshold)
|
|
4294 {
|
|
4295 printf("ErrorF=%d, ErrorB=%d Error=%d Error=%d\n", error2,error3,error1,error);
|
|
4296
|
|
4297 scanf("%d", &i);
|
|
4298 }
|
|
4299
|
|
4300 char matrixR[200];
|
|
4301 char matrixL[200];
|
|
4302
|
|
4303 matrixR[0] = '\0';
|
|
4304 matrixL[0] = '\0';
|
|
4305
|
|
4306 size = 0;
|
|
4307 directionIndex = rSeqLength;
|
|
4308 rIndex = minIndex2;
|
|
4309
|
|
4310 while(directionIndex != 0 || rIndex != 0)
|
|
4311 {
|
|
4312 if(directionIndex-rIndex == errThreshold)
|
|
4313 {
|
|
4314 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
|
|
4315 {
|
|
4316 matrixR[size] = *(rSeq+directionIndex-1);
|
|
4317 size++;
|
|
4318 matrixR[size] = 'I';
|
|
4319 directionIndex--;
|
|
4320 }
|
|
4321 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4322 {
|
|
4323 matrixR[size] = *(ref+rIndex-1);
|
|
4324 rIndex--;
|
|
4325 directionIndex--;
|
|
4326 }
|
|
4327 else
|
|
4328 {
|
|
4329 matrixR[size] = 'M';
|
|
4330 rIndex--;
|
|
4331 directionIndex--;
|
|
4332 }
|
|
4333
|
|
4334 }
|
|
4335 else if(rIndex - directionIndex == errThreshold)
|
|
4336 {
|
|
4337 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
|
|
4338 {
|
|
4339 matrixR[size] = *(ref+rIndex-1);
|
|
4340 size++;
|
|
4341 matrixR[size] = 'D';
|
|
4342 rIndex--;
|
|
4343 }
|
|
4344 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4345 {
|
|
4346 matrixR[size] = *(ref+rIndex-1);
|
|
4347 rIndex--;
|
|
4348 directionIndex--;
|
|
4349 }
|
|
4350 else
|
|
4351 {
|
|
4352 matrixR[size] = 'M';
|
|
4353 rIndex--;
|
|
4354 directionIndex--;
|
|
4355 }
|
|
4356 }
|
|
4357 else
|
|
4358 {
|
|
4359 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
4360 {
|
|
4361 matrixR[size] = *(rSeq+directionIndex-1);
|
|
4362 size++;
|
|
4363 matrixR[size] = 'I';
|
|
4364 directionIndex--;
|
|
4365 }
|
|
4366 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
4367 {
|
|
4368 matrixR[size] = *(ref+rIndex-1);
|
|
4369 size++;
|
|
4370 matrixR[size] = 'D';
|
|
4371 rIndex--;
|
|
4372 }
|
|
4373 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4374 {
|
|
4375 matrixR[size] = *(ref+rIndex-1);
|
|
4376 rIndex--;
|
|
4377 directionIndex--;
|
|
4378 }
|
|
4379 else
|
|
4380 {
|
|
4381 matrixR[size] = 'M';
|
|
4382 rIndex--;
|
|
4383 directionIndex--;
|
|
4384 }
|
|
4385 }
|
|
4386 size++;
|
|
4387 }
|
|
4388 matrixR[size] = '\0';
|
|
4389
|
|
4390 size = 0;
|
|
4391 directionIndex = lSeqLength;
|
|
4392 rIndex = minIndex1;
|
|
4393
|
|
4394
|
|
4395 while(directionIndex != 0 || rIndex != 0)
|
|
4396 {
|
|
4397 if(directionIndex-rIndex == errThreshold)
|
|
4398 {
|
|
4399 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
|
|
4400 {
|
|
4401 matrixL[size] = 'I';
|
|
4402 size++;
|
|
4403 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
4404 directionIndex--;
|
|
4405 }
|
|
4406 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4407 {
|
|
4408 matrixL[size] = *(tempref-rIndex);
|
|
4409 rIndex--;
|
|
4410 directionIndex--;
|
|
4411 }
|
|
4412 else
|
|
4413 {
|
|
4414 matrixL[size] = 'M';
|
|
4415 rIndex--;
|
|
4416 directionIndex--;
|
|
4417 }
|
|
4418
|
|
4419 }
|
|
4420 else if(rIndex - directionIndex == errThreshold)
|
|
4421 {
|
|
4422 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
|
|
4423 {
|
|
4424 matrixL[size] = 'D';
|
|
4425 size++;
|
|
4426 matrixL[size] = *(tempref-rIndex);
|
|
4427 rIndex--;
|
|
4428 }
|
|
4429 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4430 {
|
|
4431 matrixL[size] = *(tempref-rIndex);
|
|
4432 rIndex--;
|
|
4433 directionIndex--;
|
|
4434 }
|
|
4435 else
|
|
4436 {
|
|
4437 matrixL[size] = 'M';
|
|
4438 rIndex--;
|
|
4439 directionIndex--;
|
|
4440 }
|
|
4441 }
|
|
4442 else
|
|
4443 {
|
|
4444 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
4445 {
|
|
4446 matrixL[size] = 'I';
|
|
4447 size++;
|
|
4448 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
4449 directionIndex--;
|
|
4450 }
|
|
4451 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
4452 {
|
|
4453 matrixL[size] = 'D';
|
|
4454 size++;
|
|
4455 matrixL[size] = *(tempref-rIndex);
|
|
4456 rIndex--;
|
|
4457 }
|
|
4458 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4459 {
|
|
4460 matrixL[size] = *(tempref-rIndex);
|
|
4461 rIndex--;
|
|
4462 directionIndex--;
|
|
4463 }
|
|
4464 else
|
|
4465 {
|
|
4466 matrixL[size] = 'M';
|
|
4467 rIndex--;
|
|
4468 directionIndex--;
|
|
4469 }
|
|
4470 }
|
|
4471
|
|
4472 size++;
|
|
4473 }
|
|
4474
|
|
4475 matrixL[size] = '\0';
|
|
4476 char middle[200];
|
|
4477 middle[0] = '\0';
|
|
4478
|
|
4479 for(i = 0; i < segLength; i++)
|
|
4480 middle[i] = 'M';
|
|
4481 middle[segLength] = '\0';
|
|
4482
|
|
4483 char rmatrixR[200];
|
|
4484
|
|
4485 reverse(matrixR, rmatrixR, strlen(matrixR));
|
|
4486
|
|
4487 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
|
|
4488
|
|
4489 return totalError;
|
|
4490
|
|
4491 }
|
|
4492
|
|
4493 int verifySingleEndEditDistanceExtention(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
|
|
4494 char *matrix, int *map_location, short *seqHashValue)
|
|
4495 {
|
|
4496 int i = 0;
|
|
4497
|
|
4498 char * ref;
|
|
4499 char * tempref;
|
|
4500
|
|
4501 int rIndex = 0; //reference Index
|
|
4502
|
|
4503 int error = 0;
|
|
4504 int error1 = 0;
|
|
4505
|
|
4506 int error2 = 0;
|
|
4507 int error3 = 0;
|
|
4508 int totalError = 0;
|
|
4509 int errorSegment = 0;
|
|
4510
|
|
4511 int ERROR_BOUND = min(4, errThreshold);
|
|
4512
|
|
4513
|
|
4514 /*
|
|
4515 1: Up
|
|
4516 2: Side
|
|
4517 3: Diagnoal Match
|
|
4518 4: Diagnoal Mismatch
|
|
4519 */
|
|
4520
|
|
4521 int min = 0;
|
|
4522 int minIndex1 = 0;
|
|
4523 int minIndex2 = 0;
|
|
4524
|
|
4525 int directionIndex = 0;
|
|
4526
|
|
4527
|
|
4528 int size = 0;
|
|
4529
|
|
4530 ref = _msf_refGen + refIndex - 1;
|
|
4531 tempref = _msf_refGen + refIndex - 1;
|
|
4532
|
|
4533
|
|
4534 if(lSeqLength != 0)
|
|
4535 {
|
|
4536 error3 = backwardEditDistanceSSE2Extention(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
|
|
4537 if(error3 == -1){
|
|
4538 return -1;
|
|
4539 }
|
|
4540 }
|
|
4541
|
|
4542 if(rSeqLength != 0)
|
|
4543 {
|
|
4544 error2 = forwardEditDistanceSSE2Extention(ref+segLength, rSeqLength, rSeq, rSeqLength);
|
|
4545 if(error2 == -1)
|
|
4546 return -1;
|
|
4547 }
|
|
4548
|
|
4549 if(error2 + error3 > errThreshold)
|
|
4550 return -1;
|
|
4551
|
|
4552 rIndex = 1;
|
|
4553
|
|
4554 int prevError = 0;
|
|
4555
|
|
4556 int tempUp = 0;
|
|
4557 int tempDown = 0;
|
|
4558
|
|
4559 int errorString = 0;
|
|
4560
|
|
4561 int upValue;
|
|
4562 int diagValue;
|
|
4563 int sideValue;
|
|
4564 if(lSeqLength > ERROR_BOUND)
|
|
4565 {
|
|
4566 while(rIndex <= lSeqLength+ERROR_BOUND && lSeqLength != 0)
|
|
4567 {
|
|
4568 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
|
|
4569 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
4570 for(i = tempUp ; i < tempDown ; i++)
|
|
4571 {
|
|
4572 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
|
|
4573
|
|
4574 upValue = scoreB[i-1][rIndex]+1;
|
|
4575 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
|
|
4576 sideValue = scoreB[i][rIndex-1]+1;
|
|
4577
|
|
4578 if(i != tempUp && i != tempDown-1)
|
|
4579 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
4580
|
|
4581 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
|
|
4582 scoreB[i][rIndex] = min(sideValue, diagValue);
|
|
4583 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
|
|
4584 scoreB[i][rIndex] = sideValue;
|
|
4585 else
|
|
4586 scoreB[i][rIndex] = min(diagValue , upValue);
|
|
4587
|
|
4588 if(i == tempUp)
|
|
4589 error = scoreB[i][rIndex];
|
|
4590 else if(error > scoreB[i][rIndex])
|
|
4591 error = scoreB[i][rIndex];
|
|
4592 }
|
|
4593 if(rIndex <= lSeqLength)
|
|
4594 {
|
|
4595 errorSegment = error-prevError;
|
|
4596 }
|
|
4597 rIndex++;
|
|
4598 }
|
|
4599
|
|
4600 if(lSeqLength != 0)
|
|
4601 {
|
|
4602 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND];
|
|
4603 minIndex1 = lSeqLength + ERROR_BOUND;
|
|
4604
|
|
4605 // Find the Best error for all the possible ways.
|
|
4606 for(i = 1; i <= 2*ERROR_BOUND; i++)
|
|
4607 {
|
|
4608 if(min >= scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i] && lSeqLength+ERROR_BOUND-i > 0)
|
|
4609 {
|
|
4610 min = scoreB[lSeqLength][lSeqLength+ERROR_BOUND-i];
|
|
4611 minIndex1 = lSeqLength+ERROR_BOUND-i;
|
|
4612 }
|
|
4613 }
|
|
4614 error = scoreB[lSeqLength][minIndex1];
|
|
4615 }
|
|
4616 }
|
|
4617 else
|
|
4618 {
|
|
4619 int j = 0;
|
|
4620 for(i = 1; i <= lSeqLength; i++)
|
|
4621 {
|
|
4622 for(j = 1; j <= lSeqLength; j++)
|
|
4623 {
|
|
4624 scoreB[i][j] = min3(scoreB[i-1][j-1]+ (*(ref-j) != *(lSeq+lSeqLength-i) ),scoreB[i][j-1]+1 ,scoreB[i-1][j]+1);
|
|
4625 }
|
|
4626 }
|
|
4627 error = scoreB[lSeqLength][lSeqLength];
|
|
4628 minIndex1 = lSeqLength;
|
|
4629
|
|
4630 }
|
|
4631 error1 = error;
|
|
4632
|
|
4633 error = 0;
|
|
4634 errorSegment = 0;
|
|
4635
|
|
4636 directionIndex = lSeqLength;
|
|
4637 rIndex = minIndex1;
|
|
4638
|
|
4639 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
|
|
4640
|
|
4641 ref = ref + segLength;
|
|
4642
|
|
4643 if(rSeqLength != 0 && rSeqLength > ERROR_BOUND)
|
|
4644 {
|
|
4645 ERROR_BOUND = min(ERROR_BOUND, rSeqLength);
|
|
4646
|
|
4647 if(rSeqLength == ERROR_BOUND)
|
|
4648 {
|
|
4649 for(i=0; i < 2*ERROR_BOUND; i++)
|
|
4650 scoreF[0][i] = i;
|
|
4651 }
|
|
4652
|
|
4653 rIndex = 1;
|
|
4654 while(rIndex <= rSeqLength+ERROR_BOUND)
|
|
4655 {
|
|
4656 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
|
|
4657 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
4658 for(i = tempUp; i < tempDown ; i++)
|
|
4659 {
|
|
4660 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
|
|
4661 upValue = scoreF[i-1][rIndex]+1;
|
|
4662 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
|
|
4663 sideValue = scoreF[i][rIndex-1]+1;
|
|
4664
|
|
4665 if(i != tempUp && i != tempDown-1)
|
|
4666 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
4667 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
|
|
4668 scoreF[i][rIndex] = min(sideValue, diagValue);
|
|
4669 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
|
|
4670 scoreF[i][rIndex] = sideValue;
|
|
4671 else
|
|
4672 scoreF[i][rIndex] = min(diagValue , upValue);
|
|
4673
|
|
4674 if(i == tempUp)
|
|
4675 error = scoreF[i][rIndex];
|
|
4676 if(error > scoreF[i][rIndex])
|
|
4677 error = scoreF[i][rIndex];
|
|
4678 }
|
|
4679 if(rIndex <= rSeqLength)
|
|
4680 {
|
|
4681 errorSegment = error;
|
|
4682 }
|
|
4683 rIndex++;
|
|
4684 }
|
|
4685 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND];
|
|
4686 minIndex2 = rSeqLength + ERROR_BOUND;
|
|
4687
|
|
4688 // Find the Best error for all the possible ways.
|
|
4689 for(i = 1; i <= 2*ERROR_BOUND; i++)
|
|
4690 {
|
|
4691 if(min > scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i] && rSeqLength+ERROR_BOUND-i > 0)
|
|
4692 {
|
|
4693 min = scoreF[rSeqLength][rSeqLength+ERROR_BOUND-i];
|
|
4694 minIndex2 = rSeqLength+ERROR_BOUND-i;
|
|
4695 }
|
|
4696 }
|
|
4697 error = scoreF[rSeqLength][minIndex2];
|
|
4698 }
|
|
4699 else
|
|
4700 {
|
|
4701 int j = 0;
|
|
4702 for(i = 1; i <= rSeqLength; i++)
|
|
4703 {
|
|
4704 for(j = 1; j <= rSeqLength; j++)
|
|
4705 {
|
|
4706 scoreF[i][j] = min3(scoreF[i-1][j-1]+ (*(ref+j-1) != *(rSeq+i-1) ),scoreF[i][j-1]+1 ,scoreF[i-1][j]+1);
|
|
4707 }
|
|
4708 }
|
|
4709 error = scoreF[rSeqLength][rSeqLength];
|
|
4710 minIndex2 = rSeqLength;
|
|
4711 }
|
|
4712
|
|
4713 totalError = error + error1;
|
|
4714
|
|
4715 if(totalError != error2+error3)
|
|
4716 {
|
|
4717 for(i = 0; i < lSeqLength; i++)
|
|
4718 printf("%c", *(tempref-1-i));
|
|
4719 printf("\n");
|
|
4720 for(i = 0; i < lSeqLength; i++)
|
|
4721 printf("%c", *(lSeq+i));
|
|
4722 printf("\n");
|
|
4723
|
|
4724 for(i = 0; i < rSeqLength; i++)
|
|
4725 printf("%c", *(tempref+segLength+i));
|
|
4726 printf("\n");
|
|
4727
|
|
4728 for(i = 0; i < rSeqLength; i++)
|
|
4729 printf("%c", *(rSeq+i));
|
|
4730 printf("\n");
|
|
4731
|
|
4732 printf("ERROR=%d\n", totalError);
|
|
4733 printf("ERROR_SSE=%d\n", error3+error2);
|
|
4734
|
|
4735 printf("ERROR_SSE_back=%d E_SSE_forw=%d\n", error3, error2);
|
|
4736 printf("ERROR_back=%d E_forw=%d\n", error1, error);
|
|
4737
|
|
4738 }
|
|
4739
|
|
4740 char matrixR[200];
|
|
4741 char matrixL[200];
|
|
4742
|
|
4743 matrixR[0] = '\0';
|
|
4744 matrixL[0] = '\0';
|
|
4745
|
|
4746 size = 0;
|
|
4747 directionIndex = rSeqLength;
|
|
4748 rIndex = minIndex2;
|
|
4749
|
|
4750
|
|
4751 while(directionIndex != 0 || rIndex != 0)
|
|
4752 {
|
|
4753 if(directionIndex-rIndex == errThreshold)
|
|
4754 {
|
|
4755 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
|
|
4756 {
|
|
4757 matrixR[size] = *(rSeq+directionIndex-1);
|
|
4758 size++;
|
|
4759 matrixR[size] = 'I';
|
|
4760 directionIndex--;
|
|
4761 }
|
|
4762 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4763 {
|
|
4764 matrixR[size] = *(ref+rIndex-1);
|
|
4765 rIndex--;
|
|
4766 directionIndex--;
|
|
4767 }
|
|
4768 else
|
|
4769 {
|
|
4770 matrixR[size] = 'M';
|
|
4771 rIndex--;
|
|
4772 directionIndex--;
|
|
4773 }
|
|
4774
|
|
4775 }
|
|
4776 else if(rIndex - directionIndex == errThreshold)
|
|
4777 {
|
|
4778 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
|
|
4779 {
|
|
4780 matrixR[size] = *(ref+rIndex-1);
|
|
4781 size++;
|
|
4782 matrixR[size] = 'D';
|
|
4783 rIndex--;
|
|
4784 }
|
|
4785 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4786 {
|
|
4787 matrixR[size] = *(ref+rIndex-1);
|
|
4788 rIndex--;
|
|
4789 directionIndex--;
|
|
4790 }
|
|
4791 else
|
|
4792 {
|
|
4793 matrixR[size] = 'M';
|
|
4794 rIndex--;
|
|
4795 directionIndex--;
|
|
4796 }
|
|
4797 }
|
|
4798 else
|
|
4799 {
|
|
4800 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
4801 {
|
|
4802 matrixR[size] = *(rSeq+directionIndex-1);
|
|
4803 size++;
|
|
4804 matrixR[size] = 'I';
|
|
4805 directionIndex--;
|
|
4806 }
|
|
4807 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
4808 {
|
|
4809 matrixR[size] = *(ref+rIndex-1);
|
|
4810 size++;
|
|
4811 matrixR[size] = 'D';
|
|
4812 rIndex--;
|
|
4813 }
|
|
4814 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
4815 {
|
|
4816 matrixR[size] = *(ref+rIndex-1);
|
|
4817 rIndex--;
|
|
4818 directionIndex--;
|
|
4819 }
|
|
4820 else
|
|
4821 {
|
|
4822 matrixR[size] = 'M';
|
|
4823 rIndex--;
|
|
4824 directionIndex--;
|
|
4825 }
|
|
4826 }
|
|
4827 size++;
|
|
4828 }
|
|
4829 matrixR[size] = '\0';
|
|
4830
|
|
4831 size = 0;
|
|
4832 directionIndex = lSeqLength;
|
|
4833 rIndex = minIndex1;
|
|
4834
|
|
4835
|
|
4836 while(directionIndex != 0 || rIndex != 0)
|
|
4837 {
|
|
4838 if(directionIndex-rIndex == errThreshold)
|
|
4839 {
|
|
4840 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
|
|
4841 {
|
|
4842 matrixL[size] = 'I';
|
|
4843 size++;
|
|
4844 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
4845 directionIndex--;
|
|
4846 }
|
|
4847 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4848 {
|
|
4849 matrixL[size] = *(tempref-rIndex);
|
|
4850 rIndex--;
|
|
4851 directionIndex--;
|
|
4852 }
|
|
4853 else
|
|
4854 {
|
|
4855 matrixL[size] = 'M';
|
|
4856 rIndex--;
|
|
4857 directionIndex--;
|
|
4858 }
|
|
4859
|
|
4860 }
|
|
4861 else if(rIndex - directionIndex == errThreshold)
|
|
4862 {
|
|
4863 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
|
|
4864 {
|
|
4865 matrixL[size] = 'D';
|
|
4866 size++;
|
|
4867 matrixL[size] = *(tempref-rIndex);
|
|
4868 rIndex--;
|
|
4869 }
|
|
4870 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4871 {
|
|
4872 matrixL[size] = *(tempref-rIndex);
|
|
4873 rIndex--;
|
|
4874 directionIndex--;
|
|
4875 }
|
|
4876 else
|
|
4877 {
|
|
4878 matrixL[size] = 'M';
|
|
4879 rIndex--;
|
|
4880 directionIndex--;
|
|
4881 }
|
|
4882 }
|
|
4883 else
|
|
4884 {
|
|
4885 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
4886 {
|
|
4887 matrixL[size] = 'I';
|
|
4888 size++;
|
|
4889 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
4890 directionIndex--;
|
|
4891 }
|
|
4892 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
4893 {
|
|
4894 matrixL[size] = 'D';
|
|
4895 size++;
|
|
4896 matrixL[size] = *(tempref-rIndex);
|
|
4897 rIndex--;
|
|
4898 }
|
|
4899 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
4900 {
|
|
4901 matrixL[size] = *(tempref-rIndex);
|
|
4902 rIndex--;
|
|
4903 directionIndex--;
|
|
4904 }
|
|
4905 else
|
|
4906 {
|
|
4907 matrixL[size] = 'M';
|
|
4908 rIndex--;
|
|
4909 directionIndex--;
|
|
4910 }
|
|
4911 }
|
|
4912 size++;
|
|
4913 }
|
|
4914 matrixL[size] = '\0';
|
|
4915
|
|
4916 char middle[200];
|
|
4917 middle[0] = '\0';
|
|
4918 for(i = 0; i < segLength; i++)
|
|
4919 middle[i] = 'M';
|
|
4920 middle[segLength] = '\0';
|
|
4921
|
|
4922 char rmatrixR[200];
|
|
4923
|
|
4924 reverse(matrixR, rmatrixR, strlen(matrixR));
|
|
4925
|
|
4926 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
|
|
4927
|
|
4928
|
|
4929 return totalError;
|
|
4930
|
|
4931 }
|
|
4932
|
|
4933
|
|
4934 int verifySingleEndEditDistance(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength, char *matrix, int *map_location, short *seqHashValue)
|
|
4935 {
|
|
4936
|
|
4937 int i = 0;
|
|
4938
|
|
4939 char * ref;
|
|
4940 char * tempref;
|
|
4941
|
|
4942 int rIndex = 0; //reference Index
|
|
4943
|
|
4944 int error = 0;
|
|
4945 int error1 = 0;
|
|
4946
|
|
4947 int error2 = 0;
|
|
4948 int error3 = 0;
|
|
4949
|
|
4950 int totalError = 0;
|
|
4951 int errorSegment = 0;
|
|
4952
|
|
4953 int ERROR_BOUND = errThreshold;
|
|
4954
|
|
4955 /*
|
|
4956 1: Up
|
|
4957 2: Side
|
|
4958 3: Diagnoal Match
|
|
4959 4: Diagnoal Mismatch
|
|
4960 */
|
|
4961
|
|
4962 int min = 0;
|
|
4963 int minIndex1 = 0;
|
|
4964 int minIndex2 = 0;
|
|
4965
|
|
4966 int directionIndex = 0;
|
|
4967
|
|
4968
|
|
4969 int size = 0;
|
|
4970
|
|
4971 ref = _msf_refGen + refIndex - 1;
|
|
4972 tempref = _msf_refGen + refIndex - 1;
|
|
4973
|
|
4974
|
|
4975 if(rSeqLength != 0)
|
|
4976 {
|
|
4977 if(errThreshold %2 == 1)
|
|
4978 error2 = forwardEditDistanceSSE2Odd(ref+segLength, rSeqLength, rSeq, rSeqLength);
|
|
4979 else
|
|
4980 error2 = forwardEditDistanceSSE2G(ref+segLength, rSeqLength, rSeq, rSeqLength);
|
|
4981 if(error2 == -1)
|
|
4982 return -1;
|
|
4983 }
|
|
4984
|
|
4985 if(lSeqLength != 0)
|
|
4986 {
|
|
4987 if(errThreshold % 2 == 1)
|
|
4988 error3 = backwardEditDistanceSSE2Odd(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
|
|
4989 else
|
|
4990 error3 = backwardEditDistanceSSE2G(ref-1, lSeqLength, lSeq+lSeqLength-1, lSeqLength);
|
|
4991 if(error3 == -1 || error3 == 0){
|
|
4992 return -1;
|
|
4993 }
|
|
4994 }
|
|
4995
|
|
4996 if(error3 + error2 > errThreshold)
|
|
4997 return -1;
|
|
4998
|
|
4999 for(i = 0 ; i < errThreshold + 1; i++)
|
|
5000 {
|
|
5001 scoreB[0][i] = i;
|
|
5002 scoreB[i][0] = i;
|
|
5003 }
|
|
5004
|
|
5005 rIndex = 1;
|
|
5006 int prevError = 0;
|
|
5007
|
|
5008 int tempUp = 0;
|
|
5009 int tempDown = 0;
|
|
5010
|
|
5011 int errorString = 0;
|
|
5012
|
|
5013 int upValue;
|
|
5014 int diagValue;
|
|
5015 int sideValue;
|
|
5016
|
|
5017 while(rIndex <= lSeqLength+errThreshold && lSeqLength != 0)
|
|
5018 {
|
|
5019 tempUp = ((rIndex - ERROR_BOUND) > 0 ? ((rIndex > lSeqLength) ? lSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1 );
|
|
5020 tempDown = ((rIndex >= lSeqLength-ERROR_BOUND ) ? lSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
5021 for(i = tempUp ; i < tempDown ; i++)
|
|
5022 {
|
|
5023 errorString = (*(ref-rIndex) == *(lSeq+lSeqLength-i));
|
|
5024
|
|
5025 upValue = scoreB[i-1][rIndex]+1;
|
|
5026 diagValue = scoreB[i-1][rIndex-1]+ !errorString;
|
|
5027 sideValue = scoreB[i][rIndex-1]+1;
|
|
5028
|
|
5029 if(i != tempUp && i != tempDown-1)
|
|
5030 scoreB[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
5031
|
|
5032 else if( (i == ((rIndex - ERROR_BOUND) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= lSeqLength )
|
|
5033 scoreB[i][rIndex] = min(sideValue, diagValue);
|
|
5034 else if(rIndex > lSeqLength && (i == lSeqLength - ERROR_BOUND) )
|
|
5035 scoreB[i][rIndex] = sideValue;
|
|
5036 else
|
|
5037 scoreB[i][rIndex] = min(diagValue , upValue);
|
|
5038
|
|
5039 if(i == tempUp)
|
|
5040 error = scoreB[i][rIndex];
|
|
5041 else if(error > scoreB[i][rIndex])
|
|
5042 error = scoreB[i][rIndex];
|
|
5043 }
|
|
5044 if(rIndex <= lSeqLength)
|
|
5045 {
|
|
5046 errorSegment = error-prevError;
|
|
5047 }
|
|
5048 rIndex++;
|
|
5049 }
|
|
5050 if(lSeqLength != 0)
|
|
5051 {
|
|
5052 min = scoreB[lSeqLength][lSeqLength+errThreshold];
|
|
5053 minIndex1 = lSeqLength + errThreshold;
|
|
5054
|
|
5055 // Find the Best error for all the possible ways.
|
|
5056 for(i = 1; i <= 2*errThreshold; i++)
|
|
5057 {
|
|
5058 if(min >= scoreB[lSeqLength][lSeqLength+errThreshold-i] && lSeqLength+errThreshold-i > 0)
|
|
5059 {
|
|
5060 min = scoreB[lSeqLength][lSeqLength+errThreshold-i];
|
|
5061 minIndex1 = lSeqLength+errThreshold-i;
|
|
5062 }
|
|
5063 }
|
|
5064 error = scoreB[lSeqLength][minIndex1];
|
|
5065 }
|
|
5066
|
|
5067 error1 = error;
|
|
5068
|
|
5069 error = 0;
|
|
5070 errorSegment = 0;
|
|
5071
|
|
5072 directionIndex = lSeqLength;
|
|
5073 rIndex = minIndex1;
|
|
5074
|
|
5075 *map_location = ((lSeqLength == 0) ? refIndex : refIndex - rIndex) ;
|
|
5076
|
|
5077 ref = ref + segLength;
|
|
5078
|
|
5079 if(rSeqLength != 0)
|
|
5080 {
|
|
5081 for(i = 0 ; i < errThreshold + 1; i++)
|
|
5082 {
|
|
5083 scoreF[0][i] = i;
|
|
5084 scoreF[i][0] = i;
|
|
5085 }
|
|
5086
|
|
5087
|
|
5088 rIndex = 1;
|
|
5089 while(rIndex <= rSeqLength+errThreshold-error1)
|
|
5090 {
|
|
5091 tempUp = (rIndex - ERROR_BOUND) > 0 ? ((rIndex > rSeqLength) ? rSeqLength - ERROR_BOUND :rIndex - ERROR_BOUND) : 1;
|
|
5092 tempDown = ((rIndex >= rSeqLength- ERROR_BOUND ) ? rSeqLength+1 :rIndex + ERROR_BOUND + 1);
|
|
5093 for(i = tempUp; i < tempDown ; i++)
|
|
5094 {
|
|
5095 errorString = (*(ref+rIndex-1) == *(rSeq+i-1));
|
|
5096
|
|
5097 upValue = scoreF[i-1][rIndex]+1;
|
|
5098 diagValue = scoreF[i-1][rIndex-1]+ !errorString;
|
|
5099 sideValue = scoreF[i][rIndex-1]+1;
|
|
5100
|
|
5101 if(i != tempUp && i != tempDown-1)
|
|
5102 scoreF[i][rIndex] = min3(sideValue, diagValue , upValue);
|
|
5103 else if( (i == ((rIndex - ERROR_BOUND ) > 0 ? rIndex - ERROR_BOUND : 1)) && rIndex <= rSeqLength )
|
|
5104 scoreF[i][rIndex] = min(sideValue, diagValue);
|
|
5105 else if(rIndex > rSeqLength && (i == rSeqLength - ERROR_BOUND) )
|
|
5106 scoreF[i][rIndex] = sideValue;
|
|
5107 else
|
|
5108 scoreF[i][rIndex] = min(diagValue , upValue);
|
|
5109
|
|
5110 if(i == tempUp)
|
|
5111 error = scoreF[i][rIndex];
|
|
5112 if(error > scoreF[i][rIndex])
|
|
5113 error = scoreF[i][rIndex];
|
|
5114 }
|
|
5115 if(rIndex <= rSeqLength)
|
|
5116 {
|
|
5117 errorSegment = error;
|
|
5118 }
|
|
5119 rIndex++;
|
|
5120 }
|
|
5121
|
|
5122 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1];
|
|
5123 minIndex2 = rSeqLength + errThreshold-error1;
|
|
5124
|
|
5125 // Find the Best error for all the possible ways.
|
|
5126 for(i = 1; i <= 2*(errThreshold-error1); i++)
|
|
5127 {
|
|
5128 if(min > scoreF[rSeqLength][rSeqLength+errThreshold-error1-i] && rSeqLength+errThreshold-error1-i > 0)
|
|
5129 {
|
|
5130 min = scoreF[rSeqLength][rSeqLength+errThreshold-error1-i];
|
|
5131 minIndex2 = rSeqLength+errThreshold-error1-i;
|
|
5132 }
|
|
5133 }
|
|
5134 error = scoreF[rSeqLength][minIndex2];
|
|
5135 }
|
|
5136
|
|
5137 totalError = error + error1;
|
|
5138
|
|
5139
|
|
5140 if(totalError != error2 + error3 && totalError > errThreshold)
|
|
5141 {
|
|
5142 for(i = 0; i < lSeqLength; i++)
|
|
5143 printf("%c", *(tempref-1-i));
|
|
5144 printf("\n");
|
|
5145 for(i = 0; i < lSeqLength; i++)
|
|
5146 printf("%c", *(lSeq+i));
|
|
5147 printf("\n");
|
|
5148
|
|
5149 for(i = 0; i < rSeqLength; i++)
|
|
5150 printf("%c", *(tempref+segLength+i));
|
|
5151 printf("\n");
|
|
5152
|
|
5153 for(i = 0; i < rSeqLength; i++)
|
|
5154 printf("%c", *(rSeq+i));
|
|
5155 printf("\n");
|
|
5156
|
|
5157
|
|
5158 printf("SSEF=%d SSEB%d\n", error2, error3);
|
|
5159 printf("F=%d B=%d\n", error, error1);
|
|
5160 scanf("%d", &i);
|
|
5161 }
|
|
5162
|
|
5163 char matrixR[200];
|
|
5164 char matrixL[200];
|
|
5165
|
|
5166 matrixR[0] = '\0';
|
|
5167 matrixL[0] = '\0';
|
|
5168
|
|
5169 size = 0;
|
|
5170 directionIndex = rSeqLength;
|
|
5171 rIndex = minIndex2;
|
|
5172
|
|
5173 while(directionIndex != 0 || rIndex != 0)
|
|
5174 {
|
|
5175 if(directionIndex-rIndex == errThreshold)
|
|
5176 {
|
|
5177 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1)
|
|
5178 {
|
|
5179 matrixR[size] = *(rSeq+directionIndex-1);
|
|
5180 size++;
|
|
5181 matrixR[size] = 'I';
|
|
5182 directionIndex--;
|
|
5183 }
|
|
5184 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
5185 {
|
|
5186 matrixR[size] = *(ref+rIndex-1);
|
|
5187 rIndex--;
|
|
5188 directionIndex--;
|
|
5189 }
|
|
5190 else
|
|
5191 {
|
|
5192 matrixR[size] = 'M';
|
|
5193 rIndex--;
|
|
5194 directionIndex--;
|
|
5195 }
|
|
5196
|
|
5197 }
|
|
5198 else if(rIndex - directionIndex == errThreshold)
|
|
5199 {
|
|
5200 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1)
|
|
5201 {
|
|
5202 matrixR[size] = *(ref+rIndex-1);
|
|
5203 size++;
|
|
5204 matrixR[size] = 'D';
|
|
5205 rIndex--;
|
|
5206 }
|
|
5207 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
5208 {
|
|
5209 matrixR[size] = *(ref+rIndex-1);
|
|
5210 rIndex--;
|
|
5211 directionIndex--;
|
|
5212 }
|
|
5213 else
|
|
5214 {
|
|
5215 matrixR[size] = 'M';
|
|
5216 rIndex--;
|
|
5217 directionIndex--;
|
|
5218 }
|
|
5219 }
|
|
5220 else
|
|
5221 {
|
|
5222 if(scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
5223 {
|
|
5224 matrixR[size] = *(rSeq+directionIndex-1);
|
|
5225 size++;
|
|
5226 matrixR[size] = 'I';
|
|
5227 directionIndex--;
|
|
5228 }
|
|
5229 else if(scoreF[directionIndex][rIndex] - scoreF[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
5230 {
|
|
5231 matrixR[size] = *(ref+rIndex-1);
|
|
5232 size++;
|
|
5233 matrixR[size] = 'D';
|
|
5234 rIndex--;
|
|
5235 }
|
|
5236 else if( scoreF[directionIndex][rIndex] - scoreF[directionIndex-1][rIndex-1] == 1 )
|
|
5237 {
|
|
5238 matrixR[size] = *(ref+rIndex-1);
|
|
5239 rIndex--;
|
|
5240 directionIndex--;
|
|
5241 }
|
|
5242 else
|
|
5243 {
|
|
5244 matrixR[size] = 'M';
|
|
5245 rIndex--;
|
|
5246 directionIndex--;
|
|
5247 }
|
|
5248 }
|
|
5249 size++;
|
|
5250 }
|
|
5251 matrixR[size] = '\0';
|
|
5252
|
|
5253 size = 0;
|
|
5254 directionIndex = lSeqLength;
|
|
5255 rIndex = minIndex1;
|
|
5256
|
|
5257
|
|
5258 while(directionIndex != 0 || rIndex != 0)
|
|
5259 {
|
|
5260 if(directionIndex-rIndex == errThreshold)
|
|
5261 {
|
|
5262 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1)
|
|
5263 {
|
|
5264 matrixL[size] = 'I';
|
|
5265 size++;
|
|
5266 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
5267 directionIndex--;
|
|
5268 }
|
|
5269 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
5270 {
|
|
5271 matrixL[size] = *(tempref-rIndex);
|
|
5272 rIndex--;
|
|
5273 directionIndex--;
|
|
5274 }
|
|
5275 else
|
|
5276 {
|
|
5277 matrixL[size] = 'M';
|
|
5278 rIndex--;
|
|
5279 directionIndex--;
|
|
5280 }
|
|
5281
|
|
5282 }
|
|
5283 else if(rIndex - directionIndex == errThreshold)
|
|
5284 {
|
|
5285 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1)
|
|
5286 {
|
|
5287 matrixL[size] = 'D';
|
|
5288 size++;
|
|
5289 matrixL[size] = *(tempref-rIndex);
|
|
5290 rIndex--;
|
|
5291 }
|
|
5292 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
5293 {
|
|
5294 matrixL[size] = *(tempref-rIndex);
|
|
5295 rIndex--;
|
|
5296 directionIndex--;
|
|
5297 }
|
|
5298 else
|
|
5299 {
|
|
5300 matrixL[size] = 'M';
|
|
5301 rIndex--;
|
|
5302 directionIndex--;
|
|
5303 }
|
|
5304 }
|
|
5305 else
|
|
5306 {
|
|
5307 if(scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex] == 1 && directionIndex != 0)
|
|
5308 {
|
|
5309 matrixL[size] = 'I';
|
|
5310 size++;
|
|
5311 matrixL[size] = *(lSeq+lSeqLength-directionIndex);
|
|
5312 directionIndex--;
|
|
5313 }
|
|
5314 else if(scoreB[directionIndex][rIndex] - scoreB[directionIndex][rIndex-1] == 1 && rIndex != 0)
|
|
5315 {
|
|
5316 matrixL[size] = 'D';
|
|
5317 size++;
|
|
5318 matrixL[size] = *(tempref-rIndex);
|
|
5319 rIndex--;
|
|
5320 }
|
|
5321 else if( scoreB[directionIndex][rIndex] - scoreB[directionIndex-1][rIndex-1] == 1 )
|
|
5322 {
|
|
5323 matrixL[size] = *(tempref-rIndex);
|
|
5324 rIndex--;
|
|
5325 directionIndex--;
|
|
5326 }
|
|
5327 else
|
|
5328 {
|
|
5329 matrixL[size] = 'M';
|
|
5330 rIndex--;
|
|
5331 directionIndex--;
|
|
5332 }
|
|
5333 }
|
|
5334 size++;
|
|
5335 }
|
|
5336 matrixL[size] = '\0';
|
|
5337 char middle[200];
|
|
5338 middle[0] = '\0';
|
|
5339 for(i = 0; i < segLength; i++)
|
|
5340 middle[i] = 'M';
|
|
5341 middle[segLength] = '\0';
|
|
5342
|
|
5343 char rmatrixR[200];
|
|
5344
|
|
5345 reverse(matrixR, rmatrixR, strlen(matrixR));
|
|
5346
|
|
5347 sprintf(matrix, "%s%s%s", matrixL, middle, rmatrixR);
|
|
5348
|
|
5349 return totalError;
|
|
5350 }
|
|
5351
|
|
5352
|
|
5353 int addCigarSize(int cnt){
|
|
5354 if (cnt<10) return 1;
|
|
5355 else if (cnt < 100) return 2;
|
|
5356 return 3;
|
|
5357 }
|
|
5358
|
|
5359 /*
|
|
5360 Generate Cigar from the back tracking matrix
|
|
5361 */
|
|
5362 void generateCigar(char *matrix, int matrixLength, char *cigar)
|
|
5363 {
|
|
5364 int i = 0;
|
|
5365
|
|
5366 int counterM=0;
|
|
5367 int counterI=0;
|
|
5368 int counterD=0;
|
|
5369
|
|
5370 int cigarSize = 0;
|
|
5371
|
|
5372 cigar[0] = '\0';
|
|
5373
|
|
5374 while(i < matrixLength)
|
|
5375 {
|
|
5376 if(matrix[i]=='M')
|
|
5377 {
|
|
5378 counterM++;
|
|
5379 if(counterI != 0)
|
|
5380 {
|
|
5381 sprintf(cigar, "%s%dI", cigar, counterI);
|
|
5382 cigarSize += addCigarSize(counterI) + 1;
|
|
5383 cigar[cigarSize] = '\0';
|
|
5384 counterI=0;
|
|
5385 }
|
|
5386 else if(counterD != 0)
|
|
5387 {
|
|
5388 sprintf(cigar, "%s%dD", cigar, counterD);
|
|
5389 cigarSize += addCigarSize(counterD) + 1;
|
|
5390 cigar[cigarSize] = '\0';
|
|
5391 counterD=0;
|
|
5392 }
|
|
5393 }
|
|
5394 else if(matrix[i] == 'I')
|
|
5395 {
|
|
5396 if(counterM != 0)
|
|
5397 {
|
|
5398 sprintf(cigar, "%s%dM", cigar, counterM);
|
|
5399 cigarSize += addCigarSize(counterM) + 1;
|
|
5400 cigar[cigarSize] = '\0';
|
|
5401 counterM = 0;
|
|
5402 }
|
|
5403 else if(counterD != 0)
|
|
5404 {
|
|
5405 sprintf(cigar, "%s%dD", cigar, counterD);
|
|
5406 cigarSize += addCigarSize(counterD) + 1;
|
|
5407 cigar[cigarSize] = '\0';
|
|
5408 counterD=0;
|
|
5409 }
|
|
5410 counterI++;
|
|
5411 i++;
|
|
5412
|
|
5413 }
|
|
5414 else if (matrix[i] == 'D')
|
|
5415 {
|
|
5416 if(counterM != 0)
|
|
5417 {
|
|
5418 sprintf(cigar, "%s%dM", cigar, counterM);
|
|
5419 cigarSize += addCigarSize(counterM) + 1;
|
|
5420 cigar[cigarSize] = '\0';
|
|
5421 counterM = 0;
|
|
5422 }
|
|
5423 else if(counterI != 0)
|
|
5424 {
|
|
5425 sprintf(cigar, "%s%dI", cigar, counterI);
|
|
5426 cigarSize += addCigarSize(counterI) + 1;
|
|
5427 cigar[cigarSize] = '\0';
|
|
5428 counterI=0;
|
|
5429 }
|
|
5430
|
|
5431 counterD++;
|
|
5432 i++;
|
|
5433
|
|
5434 }
|
|
5435 else
|
|
5436 {
|
|
5437 counterM++;
|
|
5438 if(counterI != 0)
|
|
5439 {
|
|
5440 sprintf(cigar, "%s%dI", cigar, counterI);
|
|
5441 cigarSize += addCigarSize(counterI) + 1;
|
|
5442 cigar[cigarSize] = '\0';
|
|
5443 counterI=0;
|
|
5444 }
|
|
5445 else if(counterD != 0)
|
|
5446 {
|
|
5447 sprintf(cigar, "%s%dD", cigar, counterD);
|
|
5448 cigarSize += addCigarSize(counterD) + 1;
|
|
5449 cigar[cigarSize] = '\0';
|
|
5450 counterD=0;
|
|
5451 }
|
|
5452 }
|
|
5453 i++;
|
|
5454 }
|
|
5455
|
|
5456 if(counterM != 0)
|
|
5457 {
|
|
5458 sprintf(cigar, "%s%dM", cigar, counterM);
|
|
5459 cigarSize += addCigarSize(counterM) + 1;
|
|
5460 cigar[cigarSize] = '\0';
|
|
5461 counterM = 0;
|
|
5462 }
|
|
5463 else if(counterI != 0)
|
|
5464 {
|
|
5465 sprintf(cigar, "%s%dI", cigar, counterI);
|
|
5466 cigarSize += addCigarSize(counterI) + 1;
|
|
5467 cigar[cigarSize] = '\0';
|
|
5468 counterI = 0;
|
|
5469 }
|
|
5470 else if(counterD != 0)
|
|
5471 {
|
|
5472 sprintf(cigar, "%s%dD", cigar, counterD);
|
|
5473 cigarSize += addCigarSize(counterD) + 1;
|
|
5474 cigar[cigarSize] = '\0';
|
|
5475 counterD = 0;
|
|
5476 }
|
|
5477
|
|
5478 cigar[cigarSize] = '\0';
|
|
5479 }
|
|
5480
|
|
5481 /*
|
|
5482 Creates the Cigar output from the mismatching positions format [0-9]+(([ACTGN]|\^[ACTGN]+)[0-9]+)*
|
|
5483 */
|
|
5484 void generateCigarFromMD(char *mismatch, int mismatchLength, char *cigar)
|
|
5485 {
|
|
5486 int i = 0;
|
|
5487 int j = 0;
|
|
5488
|
|
5489 int start = 0;
|
|
5490 int cigarSize = 0;
|
|
5491
|
|
5492 cigar[0] = '\0';
|
|
5493
|
|
5494 while(i < mismatchLength)
|
|
5495 {
|
|
5496 if(mismatch[i] >= '0' && mismatch[i] <= '9')
|
|
5497 {
|
|
5498 start = i;
|
|
5499
|
|
5500 while(mismatch[i] >= '0' && mismatch[i] <= '9' && i < mismatchLength)
|
|
5501 i++;
|
|
5502
|
|
5503 int value = atoi(mismatch+start);
|
|
5504 for(j = 0; j < value-1; j++)
|
|
5505 {
|
|
5506 cigar[cigarSize] = 'M';
|
|
5507 cigarSize++;
|
|
5508 }
|
|
5509 cigar[cigarSize] = 'M';
|
|
5510 }
|
|
5511 else if(mismatch[i] == '^')
|
|
5512 {
|
|
5513 cigar[cigarSize] = 'I';
|
|
5514 i++;
|
|
5515 }
|
|
5516 else if(mismatch[i] == '\'')
|
|
5517 {
|
|
5518 cigar[cigarSize] = 'D';
|
|
5519 i++;
|
|
5520 }
|
|
5521 else
|
|
5522 {
|
|
5523 cigar[cigarSize] = 'M';
|
|
5524 cigarSize++;
|
|
5525 }
|
|
5526 cigarSize++;
|
|
5527 i++;
|
|
5528 }
|
|
5529 cigar[cigarSize] = '\0';
|
|
5530 }
|
|
5531
|
|
5532 void generateSNPSAM(char *matrix, int matrixLength, char *outputSNP)
|
|
5533 {
|
|
5534
|
|
5535 int i = 0;
|
|
5536
|
|
5537 int counterM = 0;
|
|
5538 int counterD = 0;
|
|
5539
|
|
5540 char delete[100];
|
|
5541
|
|
5542 int snpSize = 0;
|
|
5543
|
|
5544 outputSNP[0] = '\0';
|
|
5545 delete[0] = '\0';
|
|
5546
|
|
5547
|
|
5548 while(i < matrixLength)
|
|
5549 {
|
|
5550 if(matrix[i]=='M')
|
|
5551 {
|
|
5552 counterM++;
|
|
5553 if(counterD != 0)
|
|
5554 {
|
|
5555 delete[counterD] = '\0';
|
|
5556 counterD=0;
|
|
5557 sprintf(outputSNP, "%s^%s", outputSNP,delete);
|
|
5558 snpSize += strlen(delete) + 1;
|
|
5559 outputSNP[snpSize] = '\0';
|
|
5560 delete[0] = '\0';
|
|
5561 }
|
|
5562 }
|
|
5563 else if(matrix[i] == 'D')
|
|
5564 {
|
|
5565 if(counterM != 0)
|
|
5566 {
|
|
5567 sprintf(outputSNP, "%s%d", outputSNP, counterM);
|
|
5568 snpSize += addCigarSize(counterM);
|
|
5569 outputSNP[snpSize] = '\0';
|
|
5570 counterM=0;
|
|
5571 delete[counterD] = matrix[i+1];
|
|
5572 i++;
|
|
5573 counterD++;
|
|
5574 }
|
|
5575 else if(counterD != 0)
|
|
5576 {
|
|
5577 delete[counterD] = matrix[i+1];
|
|
5578 counterD++;
|
|
5579 i++;
|
|
5580 }
|
|
5581 else
|
|
5582 {
|
|
5583 delete[counterD] = matrix[i+1];
|
|
5584 counterD++;
|
|
5585 i++;
|
|
5586 }
|
|
5587 }
|
|
5588 else if(matrix[i] == 'I')
|
|
5589 {
|
|
5590 if(counterM != 0)
|
|
5591 {
|
|
5592 // sprintf(outputSNP, "%s%d\0", outputSNP, counterM);
|
|
5593 //counterM++;
|
|
5594 }
|
|
5595 else if(counterD != 0)
|
|
5596 {
|
|
5597 delete[counterD] = '\0';
|
|
5598 sprintf(outputSNP, "%s^%s", outputSNP, delete);
|
|
5599 snpSize += strlen(delete) + 1;
|
|
5600 outputSNP[snpSize] = '\0';
|
|
5601 counterD = 0;
|
|
5602 delete[0] = '\0';
|
|
5603 }
|
|
5604 i++;
|
|
5605
|
|
5606 }
|
|
5607 else
|
|
5608 {
|
|
5609 if(counterM != 0)
|
|
5610 {
|
|
5611 sprintf(outputSNP, "%s%d", outputSNP, counterM);
|
|
5612 snpSize += addCigarSize(counterM);
|
|
5613 outputSNP[snpSize] = '\0';
|
|
5614 counterM = 0;
|
|
5615 }
|
|
5616 if(counterD != 0)
|
|
5617 {
|
|
5618 delete[counterD] = '\0';
|
|
5619 counterD=0;
|
|
5620 sprintf(outputSNP, "%s^%s", outputSNP, delete);
|
|
5621 snpSize += strlen(delete) + 1;
|
|
5622 outputSNP[snpSize] = '\0';
|
|
5623 delete[0] = '\0';
|
|
5624 }
|
|
5625 sprintf(outputSNP,"%s%c",outputSNP,matrix[i]);
|
|
5626 snpSize += 1;
|
|
5627 outputSNP[snpSize] = '\0';
|
|
5628 }
|
|
5629 i++;
|
|
5630 }
|
|
5631
|
|
5632 if(counterM != 0)
|
|
5633 {
|
|
5634 sprintf(outputSNP, "%s%d", outputSNP, counterM);
|
|
5635 snpSize += addCigarSize(counterM);
|
|
5636 outputSNP[snpSize] = '\0';
|
|
5637 counterM = 0;
|
|
5638 }
|
|
5639 else if(counterD != 0)
|
|
5640 {
|
|
5641 delete[counterD] = '\0';
|
|
5642 sprintf(outputSNP, "%s^%s", outputSNP, delete);
|
|
5643 snpSize += strlen(delete) + 1;
|
|
5644 outputSNP[snpSize] = '\0';
|
|
5645 counterD = 0;
|
|
5646 }
|
|
5647
|
|
5648 outputSNP[snpSize] = '\0';
|
|
5649 }
|
|
5650 /**********************************************/
|
|
5651
|
|
5652 /*
|
|
5653 direction = 0 forward
|
|
5654 1 backward
|
|
5655
|
|
5656 */
|
|
5657
|
|
5658 void mapSingleEndSeq(unsigned int *l1, int s1, int readNumber, int readSegment, int direction)
|
|
5659 {
|
|
5660 int j = 0;
|
|
5661 int z = 0;
|
|
5662 int *locs = (int *) l1;
|
|
5663 char *_tmpSeq, *_tmpQual;
|
|
5664 char rqual[SEQ_LENGTH+1];
|
|
5665 rqual[SEQ_LENGTH]='\0';
|
|
5666
|
|
5667 int genLoc = 0;
|
|
5668 int leftSeqLength = 0;
|
|
5669 int rightSeqLength = 0;
|
|
5670 int middleSeqLength = 0;
|
|
5671
|
|
5672 char matrix[200];
|
|
5673 char editString[200];
|
|
5674 char cigar[MAX_CIGAR_SIZE];
|
|
5675
|
|
5676 short *_tmpHashValue;
|
|
5677
|
|
5678 if (direction)
|
|
5679 {
|
|
5680 reverse(_msf_seqList[readNumber].qual, rqual, SEQ_LENGTH);
|
|
5681 _tmpQual = rqual;
|
|
5682 _tmpSeq = _msf_seqList[readNumber].rseq;
|
|
5683 _tmpHashValue = _msf_seqList[readNumber].rhashValue;
|
|
5684 }
|
|
5685 else
|
|
5686 {
|
|
5687 _tmpQual = _msf_seqList[readNumber].qual;
|
|
5688 _tmpSeq = _msf_seqList[readNumber].seq;
|
|
5689 _tmpHashValue = _msf_seqList[readNumber].hashValue;
|
|
5690 }
|
|
5691
|
|
5692 int readId = 2*readNumber+direction;
|
|
5693 for (z=0; z<s1; z++)
|
|
5694 {
|
|
5695
|
|
5696
|
|
5697 int map_location = 0;
|
|
5698 int a = 0;
|
|
5699 int o = readSegment;
|
|
5700
|
|
5701 genLoc = locs[z];//-_msf_samplingLocs[o];
|
|
5702
|
|
5703
|
|
5704 if ( genLoc-_msf_samplingLocs[o] < _msf_refGenBeg ||
|
|
5705 genLoc-_msf_samplingLocs[o] > _msf_refGenEnd ||
|
|
5706 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId ||
|
|
5707 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId
|
|
5708 )
|
|
5709 continue;
|
|
5710 int err = -1;
|
|
5711
|
|
5712
|
|
5713 map_location = 0;
|
|
5714
|
|
5715 leftSeqLength = _msf_samplingLocs[o];
|
|
5716 middleSeqLength = WINDOW_SIZE;
|
|
5717 a = leftSeqLength + middleSeqLength;
|
|
5718 rightSeqLength = SEQ_LENGTH - a;
|
|
5719
|
|
5720 if(errThreshold == 2)
|
|
5721 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength,
|
|
5722 _tmpSeq + a, rightSeqLength,
|
|
5723 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5724 else if(errThreshold == 4)
|
|
5725 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength,
|
|
5726 _tmpSeq + a, rightSeqLength,
|
|
5727 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5728 else if(errThreshold ==3)
|
|
5729 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
|
|
5730 _tmpSeq + a, rightSeqLength,
|
|
5731 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5732 /*else if(errThreshold == 6)
|
|
5733 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
|
|
5734 _tmpSeq + a, rightSeqLength,
|
|
5735 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5736 */
|
|
5737 else
|
|
5738 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength,
|
|
5739 _tmpSeq + a, rightSeqLength,
|
|
5740 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5741
|
|
5742 if(err != -1)
|
|
5743 {
|
|
5744 generateSNPSAM(matrix, strlen(matrix), editString);
|
|
5745 generateCigar(matrix, strlen(matrix), cigar);
|
|
5746 }
|
|
5747
|
|
5748 if(err != -1 && !bestMode)
|
|
5749 {
|
|
5750
|
|
5751 mappingCnt++;
|
|
5752
|
|
5753 int j = 0;
|
|
5754 int k = 0;
|
|
5755 for(k = 0; k < readSegment+1; k++)
|
|
5756 {
|
|
5757 for(j = -errThreshold ; j <= errThreshold; j++)
|
|
5758 {
|
|
5759 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg &&
|
|
5760 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd)
|
|
5761 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId;
|
|
5762 }
|
|
5763 }
|
|
5764 _msf_seqList[readNumber].hits[0]++;
|
|
5765
|
|
5766 _msf_output.QNAME = _msf_seqList[readNumber].name;
|
|
5767 _msf_output.FLAG = 16 * direction;
|
|
5768 _msf_output.RNAME = _msf_refGenName;
|
|
5769 _msf_output.POS = map_location + _msf_refGenOffset;
|
|
5770 _msf_output.MAPQ = 255;
|
|
5771 _msf_output.CIGAR = cigar;
|
|
5772 _msf_output.MRNAME = "*";
|
|
5773 _msf_output.MPOS = 0;
|
|
5774 _msf_output.ISIZE = 0;
|
|
5775 _msf_output.SEQ = _tmpSeq;
|
|
5776 _msf_output.QUAL = _tmpQual;
|
|
5777
|
|
5778 _msf_output.optSize = 2;
|
|
5779 _msf_output.optFields = _msf_optionalFields;
|
|
5780
|
|
5781 _msf_optionalFields[0].tag = "NM";
|
|
5782 _msf_optionalFields[0].type = 'i';
|
|
5783 _msf_optionalFields[0].iVal = err;
|
|
5784
|
|
5785 _msf_optionalFields[1].tag = "MD";
|
|
5786 _msf_optionalFields[1].type = 'Z';
|
|
5787 _msf_optionalFields[1].sVal = editString;
|
|
5788
|
|
5789 output(_msf_output);
|
|
5790
|
|
5791
|
|
5792 if (_msf_seqList[readNumber].hits[0] == 1)
|
|
5793 {
|
|
5794 mappedSeqCnt++;
|
|
5795 }
|
|
5796
|
|
5797 if ( maxHits == 0 )
|
|
5798 {
|
|
5799 _msf_seqList[readNumber].hits[0] = 2;
|
|
5800 }
|
|
5801
|
|
5802
|
|
5803 if ( maxHits!=0 && _msf_seqList[readNumber].hits[0] == maxHits)
|
|
5804 {
|
|
5805 completedSeqCnt++;
|
|
5806 break;
|
|
5807 }
|
|
5808
|
|
5809 }
|
|
5810 else if(err != -1 && bestMode)
|
|
5811 {
|
|
5812 mappingCnt++;
|
|
5813 _msf_seqList[readNumber].hits[0]++;
|
|
5814
|
|
5815 if (_msf_seqList[readNumber].hits[0] == 1)
|
|
5816 {
|
|
5817 mappedSeqCnt++;
|
|
5818 }
|
|
5819
|
|
5820 if ( maxHits == 0 )
|
|
5821 {
|
|
5822 _msf_seqList[readNumber].hits[0] = 2;
|
|
5823 }
|
|
5824
|
|
5825 if(err < bestHitMappingInfo[readNumber].err || bestHitMappingInfo[readNumber].loc == -1)
|
|
5826 {
|
|
5827 setFullMappingInfo(readNumber, map_location + _msf_refGenOffset, direction, err, 0, editString, _msf_refGenName, cigar );
|
|
5828 }
|
|
5829 }
|
|
5830 else
|
|
5831 {
|
|
5832 for(j = -errThreshold ; j <= errThreshold; j++)
|
|
5833 {
|
|
5834 if(genLoc+j > _msf_refGenBeg &&
|
|
5835 genLoc+j < _msf_refGenEnd)
|
|
5836 _msf_verifiedLocs[genLoc+j] = -readId;
|
|
5837 }
|
|
5838 }
|
|
5839 }
|
|
5840 }
|
|
5841
|
|
5842
|
|
5843 int mapAllSingleEndSeq()
|
|
5844 {
|
|
5845 int i = 0;
|
|
5846 int j = 0;
|
|
5847 int k = 0;
|
|
5848
|
|
5849
|
|
5850 unsigned int *locs = NULL;
|
|
5851
|
|
5852
|
|
5853 int prev_hash = 0;
|
|
5854
|
|
5855 for(i = 0; i < _msf_seqListSize; i++)
|
|
5856 {
|
|
5857 for(j = 0; j < _msf_samplingLocsSize; j++)
|
|
5858 {
|
|
5859 k = _msf_sort_seqList[i].readNumber;
|
|
5860 // if(j != 0)
|
|
5861 // if(strncmp(_msf_seqList[k].seq+_msf_samplingLocs[j], _msf_seqList[k].seq+_msf_samplingLocs[j-1], segSize) == 0)
|
|
5862 // continue;
|
|
5863 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]))
|
|
5864 // continue;
|
|
5865 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]));
|
|
5866 if ( locs != NULL)
|
|
5867 {
|
|
5868 mapSingleEndSeq(locs+1, locs[0],k ,j, 0);
|
|
5869 }
|
|
5870 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]);
|
|
5871 }
|
|
5872 }
|
|
5873 i = 0;
|
|
5874
|
|
5875 for(i = 0; i < _msf_seqListSize; i++)
|
|
5876 {
|
|
5877 for(j = 0; j < _msf_samplingLocsSize; j++)
|
|
5878 {
|
|
5879 k = _msf_sort_seqList[i].readNumber;
|
|
5880
|
|
5881 // if(j != 0)
|
|
5882 // if(strncmp(_msf_seqList[k].rseq+_msf_samplingLocs[j], _msf_seqList[k].rseq+_msf_samplingLocs[j-1], segSize) == 0)
|
|
5883 // continue;
|
|
5884 // if(prev_hash == hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]))
|
|
5885 // continue;
|
|
5886 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j]));
|
|
5887 if ( locs != NULL)
|
|
5888 {
|
|
5889 mapSingleEndSeq(locs+1, locs[0],k ,j, 1);
|
|
5890 }
|
|
5891 prev_hash = hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]);
|
|
5892 }
|
|
5893 }
|
|
5894 return 1;
|
|
5895 }
|
|
5896
|
|
5897
|
|
5898 /**********************************************/
|
|
5899 /**********************************************/
|
|
5900 /**********************************************/
|
|
5901 /**********************************************/
|
|
5902 /**********************************************/
|
|
5903 int compareOut (const void *a, const void *b)
|
|
5904 {
|
|
5905 FullMappingInfo *aInfo = (FullMappingInfo *)a;
|
|
5906 FullMappingInfo *bInfo = (FullMappingInfo *)b;
|
|
5907 return aInfo->loc - bInfo->loc;
|
|
5908 }
|
|
5909
|
|
5910
|
|
5911
|
|
5912 /**********************************************/
|
|
5913
|
|
5914 /*
|
|
5915 direction 0: Forward
|
|
5916 1: Reverse
|
|
5917 */
|
|
5918
|
|
5919 void mapPairEndSeqList(unsigned int *l1, int s1, int readNumber, int readSegment, int direction)
|
|
5920 {
|
|
5921 int z = 0;
|
|
5922 int *locs = (int *) l1;
|
|
5923 char *_tmpSeq;
|
|
5924
|
|
5925 char rqual[SEQ_LENGTH+1];
|
|
5926
|
|
5927 char matrix[200];
|
|
5928 char editString[200];
|
|
5929 char cigar[MAX_CIGAR_SIZE];
|
|
5930
|
|
5931 short *_tmpHashValue;
|
|
5932
|
|
5933 int leftSeqLength = 0;
|
|
5934 int middleSeqLength = 0;
|
|
5935 int rightSeqLength =0;
|
|
5936 int a = 0;
|
|
5937
|
|
5938 rqual[SEQ_LENGTH]='\0';
|
|
5939
|
|
5940
|
|
5941 int r = readNumber;
|
|
5942
|
|
5943 char d = (direction==1)?-1:1;
|
|
5944
|
|
5945 if (d==-1)
|
|
5946 {
|
|
5947 _tmpSeq = _msf_seqList[readNumber].rseq;
|
|
5948 _tmpHashValue = _msf_seqList[r].rhashValue;
|
|
5949 }
|
|
5950 else
|
|
5951 {
|
|
5952 _tmpSeq = _msf_seqList[readNumber].seq;
|
|
5953 _tmpHashValue = _msf_seqList[r].hashValue;
|
|
5954 }
|
|
5955
|
|
5956 int readId = 2*readNumber+direction;
|
|
5957 for (z=0; z<s1; z++)
|
|
5958 {
|
|
5959 int genLoc = locs[z];//-_msf_samplingLocs[o];
|
|
5960 int err = -1;
|
|
5961 int map_location = 0;
|
|
5962 int o = readSegment;
|
|
5963
|
|
5964 leftSeqLength = _msf_samplingLocs[o];
|
|
5965 middleSeqLength = WINDOW_SIZE;
|
|
5966 a = leftSeqLength + middleSeqLength;
|
|
5967 rightSeqLength = SEQ_LENGTH - a;
|
|
5968
|
|
5969 if(genLoc - leftSeqLength < _msf_refGenBeg || genLoc + rightSeqLength + middleSeqLength > _msf_refGenEnd ||
|
|
5970 _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == readId || _msf_verifiedLocs[genLoc-_msf_samplingLocs[o]] == -readId)
|
|
5971 continue;
|
|
5972
|
|
5973 if(errThreshold == 2)
|
|
5974 err = verifySingleEndEditDistance2(genLoc, _tmpSeq, leftSeqLength,
|
|
5975 _tmpSeq + a, rightSeqLength,
|
|
5976 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5977 else if(errThreshold == 4)
|
|
5978 err = verifySingleEndEditDistance4(genLoc, _tmpSeq, leftSeqLength,
|
|
5979 _tmpSeq + a, rightSeqLength,
|
|
5980 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5981 else if(errThreshold ==3)
|
|
5982 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
|
|
5983 _tmpSeq + a, rightSeqLength,
|
|
5984 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5985 /*else if(errThreshold == 6)
|
|
5986 err = verifySingleEndEditDistance(genLoc, _tmpSeq, leftSeqLength,
|
|
5987 _tmpSeq + a, rightSeqLength,
|
|
5988 middleSeqLength, matrix, &map_location, _tmpHashValue);*/
|
|
5989 else
|
|
5990 err = verifySingleEndEditDistanceExtention(genLoc, _tmpSeq, leftSeqLength,
|
|
5991 _tmpSeq + a, rightSeqLength,
|
|
5992 middleSeqLength, matrix, &map_location, _tmpHashValue);
|
|
5993
|
|
5994
|
|
5995 if (err != -1)
|
|
5996 {
|
|
5997 int j = 0;
|
|
5998 int k = 0;
|
|
5999
|
|
6000 for(k = 0; k < readSegment+1; k++)
|
|
6001 {
|
|
6002 for(j = -errThreshold ; j <= errThreshold; j++)
|
|
6003 {
|
|
6004 if(genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j >= _msf_refGenBeg &&
|
|
6005 genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j <= _msf_refGenEnd)
|
|
6006 _msf_verifiedLocs[genLoc-(k*(_msf_samplingLocs[1]-_msf_samplingLocs[0]))+j] = readId;
|
|
6007 }
|
|
6008 }
|
|
6009
|
|
6010
|
|
6011 generateSNPSAM(matrix, strlen(matrix), editString);
|
|
6012 generateCigar(matrix, strlen(matrix), cigar);
|
|
6013
|
|
6014 MappingLocations *parent = NULL;
|
|
6015 MappingLocations *child = _msf_mappingInfo[r].next;
|
|
6016
|
|
6017 genLoc = map_location + _msf_refGenOffset;
|
|
6018 int i = 0;
|
|
6019 for (i=0; i<(_msf_mappingInfo[r].size/MAP_CHUNKS); i++)
|
|
6020 {
|
|
6021 parent = child;
|
|
6022 child = child->next;
|
|
6023 }
|
|
6024
|
|
6025 if (child==NULL)
|
|
6026 {
|
|
6027 MappingLocations *tmp = getMem(sizeof(MappingLocations));
|
|
6028
|
|
6029 tmp->next = NULL;
|
|
6030 tmp->loc[0]=genLoc * d;
|
|
6031 tmp->err[0]=err;
|
|
6032
|
|
6033 tmp->cigarSize[0] = strlen(cigar);
|
|
6034 sprintf(tmp->cigar[0],"%s", cigar);
|
|
6035
|
|
6036 tmp->mdSize[0] = strlen(editString);
|
|
6037 sprintf(tmp->md[0],"%s", editString);
|
|
6038
|
|
6039 if (parent == NULL)
|
|
6040 _msf_mappingInfo[r].next = tmp;
|
|
6041 else
|
|
6042 parent->next = tmp;
|
|
6043 }
|
|
6044 else
|
|
6045 {
|
|
6046 if(strlen(cigar) > SEQ_LENGTH || strlen(editString) > SEQ_LENGTH)
|
|
6047 {
|
|
6048 printf("ERROR in %d read size(After mapping) exceedes cigar=%d md =%d cigar=%s md =%s\n", r, (int)strlen(cigar), (int)strlen(editString), cigar, editString);
|
|
6049 }
|
|
6050
|
|
6051 child->loc[_msf_mappingInfo[r].size % MAP_CHUNKS] = genLoc * d;
|
|
6052 child->err[_msf_mappingInfo[r].size % MAP_CHUNKS] = err;
|
|
6053
|
|
6054 child->cigarSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(cigar);
|
|
6055 sprintf(child->cigar[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",cigar);
|
|
6056
|
|
6057 child->mdSize[_msf_mappingInfo[r].size % MAP_CHUNKS] = strlen(editString);
|
|
6058 sprintf(child->md[_msf_mappingInfo[r].size % MAP_CHUNKS],"%s",editString);
|
|
6059 }
|
|
6060 _msf_mappingInfo[r].size++;
|
|
6061
|
|
6062 }
|
|
6063 else
|
|
6064 {
|
|
6065 _msf_verifiedLocs[genLoc] = -readId;
|
|
6066 }
|
|
6067
|
|
6068 }
|
|
6069 }
|
|
6070
|
|
6071 /**********************************************/
|
|
6072 void mapPairedEndSeq()
|
|
6073 {
|
|
6074 int i = 0;
|
|
6075 int j = 0;
|
|
6076 int k = 0;
|
|
6077
|
|
6078 unsigned int *locs = NULL;
|
|
6079 while ( i < _msf_seqListSize )
|
|
6080 {
|
|
6081 for(j = 0; j < _msf_samplingLocsSize; j++)
|
|
6082 {
|
|
6083 k = _msf_sort_seqList[i].readNumber;
|
|
6084 locs = getCandidates ( hashVal(_msf_seqList[k].seq+_msf_samplingLocs[j]));
|
|
6085 if ( locs != NULL)
|
|
6086 {
|
|
6087 mapPairEndSeqList(locs+1, locs[0],k ,j, 0);
|
|
6088 }
|
|
6089 }
|
|
6090 i++;
|
|
6091 }
|
|
6092 i = 0;
|
|
6093
|
|
6094 while ( i < _msf_seqListSize )
|
|
6095 {
|
|
6096 for(j = 0; j < _msf_samplingLocsSize; j++)
|
|
6097 {
|
|
6098 k = _msf_sort_seqList[i].readNumber;
|
|
6099 locs = getCandidates ( hashVal(_msf_seqList[k].rseq+_msf_samplingLocs[j]));
|
|
6100 if ( locs != NULL)
|
|
6101 {
|
|
6102 mapPairEndSeqList(locs+1, locs[0],k ,j, 1);
|
|
6103 }
|
|
6104 }
|
|
6105
|
|
6106 i++;
|
|
6107 }
|
|
6108 char fname1[FILE_NAME_LENGTH];
|
|
6109 char fname2[FILE_NAME_LENGTH];
|
|
6110 MappingLocations *cur;
|
|
6111 int tmpOut;
|
|
6112 int lmax=0, rmax=0;
|
|
6113
|
|
6114 sprintf(fname1, "%s__%s__%s__%d__1.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles);
|
|
6115 sprintf(fname2, "%s__%s__%s__%d__2.tmp",mappingOutputPath, _msf_refGenName, mappingOutput, _msf_openFiles);
|
|
6116
|
|
6117 FILE* out;
|
|
6118 FILE* out1 = fileOpen(fname1, "w");
|
|
6119 FILE* out2 = fileOpen(fname2, "w");
|
|
6120
|
|
6121 _msf_openFiles++;
|
|
6122
|
|
6123 for (i=0; i<_msf_seqListSize; i++)
|
|
6124 {
|
|
6125
|
|
6126 if (i%2==0)
|
|
6127 {
|
|
6128 out = out1;
|
|
6129
|
|
6130 if (lmax < _msf_mappingInfo[i].size)
|
|
6131 {
|
|
6132 lmax = _msf_mappingInfo[i].size;
|
|
6133 }
|
|
6134 }
|
|
6135 else
|
|
6136 {
|
|
6137 out = out2;
|
|
6138 if (rmax < _msf_mappingInfo[i].size)
|
|
6139 {
|
|
6140 rmax = _msf_mappingInfo[i].size;
|
|
6141 }
|
|
6142 }
|
|
6143
|
|
6144 tmpOut = fwrite(&(_msf_mappingInfo[i].size), sizeof(int), 1, out);
|
|
6145 if (_msf_mappingInfo[i].size > 0)
|
|
6146 {
|
|
6147 cur = _msf_mappingInfo[i].next;
|
|
6148 for (j=0; j < _msf_mappingInfo[i].size; j++)
|
|
6149 {
|
|
6150 if ( j>0 && j%MAP_CHUNKS==0)
|
|
6151 {
|
|
6152 cur = cur->next;
|
|
6153 }
|
|
6154 if(cur->cigarSize[j % MAP_CHUNKS] > SEQ_LENGTH || cur->mdSize[j % MAP_CHUNKS] > SEQ_LENGTH)
|
|
6155 {
|
|
6156 printf("ERROR in %d read size exceeds cigar=%d md =%d cigar=%s md =%s\n", i, cur->cigarSize[j % MAP_CHUNKS], cur->mdSize[j % MAP_CHUNKS], cur->cigar[j % MAP_CHUNKS], cur->md[j % MAP_CHUNKS]);
|
|
6157 }
|
|
6158
|
|
6159 tmpOut = fwrite(&(cur->loc[j % MAP_CHUNKS]), sizeof(int), 1, out);
|
|
6160
|
|
6161 tmpOut = fwrite(&(cur->err[j % MAP_CHUNKS]), sizeof(int), 1, out);
|
|
6162
|
|
6163 tmpOut = fwrite(&(cur->cigarSize[j % MAP_CHUNKS]), sizeof(int), 1, out);
|
|
6164 tmpOut = fwrite((cur->cigar[j % MAP_CHUNKS]), sizeof(char), (cur->cigarSize[j % MAP_CHUNKS]), out);
|
|
6165
|
|
6166 tmpOut = fwrite(&(cur->mdSize[j % MAP_CHUNKS]), sizeof(int), 1, out);
|
|
6167 tmpOut = fwrite((cur->md[j % MAP_CHUNKS]), sizeof(char), (cur->mdSize[j % MAP_CHUNKS]), out);
|
|
6168
|
|
6169 }
|
|
6170 _msf_mappingInfo[i].size = 0;
|
|
6171 //_msf_mappingInfo[i].next = NULL;
|
|
6172 }
|
|
6173 }
|
|
6174
|
|
6175 _msf_maxLSize += lmax;
|
|
6176 _msf_maxRSize += rmax;
|
|
6177
|
|
6178 fclose(out1);
|
|
6179 fclose(out2);
|
|
6180
|
|
6181 }
|
|
6182
|
|
6183 void outputPairFullMappingInfo(FILE *fp, int readNumber)
|
|
6184 {
|
|
6185
|
|
6186 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
|
|
6187 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1];
|
|
6188
|
|
6189 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0';
|
|
6190
|
|
6191 seq1 = _msf_seqList[readNumber*2].seq;
|
|
6192 rseq1 = _msf_seqList[readNumber*2].rseq;
|
|
6193 qual1 = _msf_seqList[readNumber*2].qual;
|
|
6194
|
|
6195 reverse(_msf_seqList[readNumber*2].qual, rqual1, SEQ_LENGTH);
|
|
6196
|
|
6197 seq2 = _msf_seqList[readNumber*2+1].seq;
|
|
6198 rseq2 = _msf_seqList[readNumber*2+1].rseq;
|
|
6199 qual2 = _msf_seqList[readNumber*2+1].qual;
|
|
6200
|
|
6201 reverse(_msf_seqList[readNumber*2+1].qual, rqual2, SEQ_LENGTH);
|
|
6202
|
|
6203
|
|
6204 if(bestHitMappingInfo[readNumber*2].loc == -1 && bestHitMappingInfo[readNumber*2+1].loc == -1)
|
|
6205 return;
|
|
6206 else
|
|
6207 {
|
|
6208
|
|
6209 char *seq;
|
|
6210 char *qual;
|
|
6211 char d1;
|
|
6212 char d2;
|
|
6213 int isize;
|
|
6214 int proper=0;
|
|
6215 // ISIZE CALCULATION
|
|
6216 // The distance between outer edges
|
|
6217 isize = abs(bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc)+SEQ_LENGTH - 2;
|
|
6218
|
|
6219 if (bestHitMappingInfo[readNumber*2].loc - bestHitMappingInfo[readNumber*2+1].loc > 0)
|
|
6220 {
|
|
6221 isize *= -1;
|
|
6222 }
|
|
6223 d1 = (bestHitMappingInfo[readNumber*2].dir == -1)?1:0;
|
|
6224 d2 = (bestHitMappingInfo[readNumber*2+1].dir == -1)?1:0;
|
|
6225
|
|
6226 if ( d1 )
|
|
6227 {
|
|
6228 seq = rseq1;
|
|
6229 qual = rqual1;
|
|
6230 }
|
|
6231 else
|
|
6232 {
|
|
6233 seq = seq1;
|
|
6234 qual = qual1;
|
|
6235 }
|
|
6236 if ( (bestHitMappingInfo[readNumber*2].loc < bestHitMappingInfo[readNumber*2+1].loc && !d1 && d2) ||
|
|
6237 (bestHitMappingInfo[readNumber*2].loc > bestHitMappingInfo[readNumber*2+1].loc && d1 && !d2) )
|
|
6238 {
|
|
6239 proper = 2;
|
|
6240 }
|
|
6241 else
|
|
6242 {
|
|
6243 proper = 0;
|
|
6244 }
|
|
6245
|
|
6246 _msf_output.POS = bestHitMappingInfo[readNumber*2].loc;
|
|
6247 _msf_output.MPOS = bestHitMappingInfo[readNumber*2+1].loc;
|
|
6248 _msf_output.FLAG = 1+proper+16*d1+32*d2+64;
|
|
6249 _msf_output.ISIZE = isize;
|
|
6250 _msf_output.SEQ = seq,
|
|
6251 _msf_output.QUAL = qual;
|
|
6252 _msf_output.QNAME = _msf_seqList[readNumber*2].name;
|
|
6253 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr;
|
|
6254 _msf_output.MAPQ = 255;
|
|
6255 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2].cigar;
|
|
6256 _msf_output.MRNAME = "=";
|
|
6257
|
|
6258 _msf_output.optSize = 2;
|
|
6259 _msf_output.optFields = _msf_optionalFields;
|
|
6260
|
|
6261 _msf_optionalFields[0].tag = "NM";
|
|
6262 _msf_optionalFields[0].type = 'i';
|
|
6263 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2].err;
|
|
6264
|
|
6265 _msf_optionalFields[1].tag = "MD";
|
|
6266 _msf_optionalFields[1].type = 'Z';
|
|
6267 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2].md;
|
|
6268
|
|
6269 outputSAM(fp, _msf_output);
|
|
6270 output(_msf_output);
|
|
6271
|
|
6272 if ( d2 )
|
|
6273 {
|
|
6274 seq = rseq2;
|
|
6275 qual = rqual2;
|
|
6276 }
|
|
6277 else
|
|
6278 {
|
|
6279 seq = seq2;
|
|
6280 qual = qual2;
|
|
6281 }
|
|
6282
|
|
6283 _msf_output.POS = bestHitMappingInfo[readNumber*2+1].loc;
|
|
6284 _msf_output.MPOS = bestHitMappingInfo[readNumber*2].loc;
|
|
6285 _msf_output.FLAG = 1+proper+16*d2+32*d1+128;
|
|
6286 _msf_output.ISIZE = -isize;
|
|
6287 _msf_output.SEQ = seq,
|
|
6288 _msf_output.QUAL = qual;
|
|
6289 _msf_output.QNAME = _msf_seqList[readNumber*2].name;
|
|
6290 _msf_output.RNAME = bestHitMappingInfo[readNumber*2].chr;
|
|
6291 _msf_output.MAPQ = 255;
|
|
6292 _msf_output.CIGAR = bestHitMappingInfo[readNumber*2+1].cigar;
|
|
6293 _msf_output.MRNAME = "=";
|
|
6294
|
|
6295 _msf_output.optSize = 2;
|
|
6296 _msf_output.optFields = _msf_optionalFields;
|
|
6297
|
|
6298 _msf_optionalFields[0].tag = "NM";
|
|
6299 _msf_optionalFields[0].type = 'i';
|
|
6300 _msf_optionalFields[0].iVal = bestHitMappingInfo[readNumber*2+1].err;
|
|
6301
|
|
6302 _msf_optionalFields[1].tag = "MD";
|
|
6303 _msf_optionalFields[1].type = 'Z';
|
|
6304 _msf_optionalFields[1].sVal = bestHitMappingInfo[readNumber*2+1].md;
|
|
6305
|
|
6306 outputSAM(fp, _msf_output);
|
|
6307 output(_msf_output);
|
|
6308 }
|
|
6309 }
|
|
6310
|
|
6311
|
|
6312 /*
|
|
6313 Find the closet one to the c
|
|
6314 @return 0: if the x1 is closer to c
|
|
6315 1: if the x2 is closer to c
|
|
6316 2: if both distance are equal
|
|
6317 -1: if error
|
|
6318 */
|
|
6319 int findNearest(int x1, int x2, int c)
|
|
6320 {
|
|
6321
|
|
6322 if (abs(x1 - c) > abs(x2 - c) )
|
|
6323 return 0;
|
|
6324 else if ( abs(x1 - c) < abs(x2 - c) )
|
|
6325 return 1;
|
|
6326 else if ( abs(x1 - c) == abs(x2 - c) )
|
|
6327 return 2;
|
|
6328 else
|
|
6329 return -1;
|
|
6330 }
|
|
6331
|
|
6332 void initBestConcordantDiscordant(int readNumber)
|
|
6333 {
|
|
6334 char bestConcordantFileName[FILE_NAME_LENGTH];
|
|
6335 //char bestDiscordantFileName[FILE_NAME_LENGTH];
|
|
6336
|
|
6337 //OPEN THE BEST CONCORDANT FILE
|
|
6338 //BEGIN{Farhad Hormozdiari}
|
|
6339 /* begin {calkan} */
|
|
6340 //sprintf(bestConcordantFileName, "%s%s__BEST.CONCORDANT", mappingOutputPath, mappingOutput);
|
|
6341 sprintf(bestConcordantFileName, "%s%s_BEST.sam", mappingOutputPath, mappingOutput);
|
|
6342
|
|
6343 bestConcordantFILE = fileOpen(bestConcordantFileName, "w");
|
|
6344 bestDiscordantFILE = bestConcordantFILE;
|
|
6345 /* end {calkan} */
|
|
6346 //END{Farhad Hormozdiari}
|
|
6347
|
|
6348
|
|
6349 //OPEN THE BEST DISCORDANT FILE
|
|
6350 //BEGIN{Farhad Hormozdiari}
|
|
6351 /* begin {calkan}
|
|
6352 sprintf(bestDiscordantFileName, "%s%s__BEST.DISCORDANT", mappingOutputPath, mappingOutput);
|
|
6353 bestDiscordantFILE = fileOpen(bestDiscordantFileName, "w");
|
|
6354 end {calkan} */
|
|
6355
|
|
6356 //END{Farhad Hormozdiari}
|
|
6357
|
|
6358 initBestMapping(readNumber);
|
|
6359 }
|
|
6360
|
|
6361 void finalizeBestConcordantDiscordant()
|
|
6362 {
|
|
6363 int i = 0;
|
|
6364
|
|
6365 for(i = 0; i<_msf_seqListSize/2; i++)
|
|
6366 {
|
|
6367 if(_msf_readHasConcordantMapping[i]==1)
|
|
6368 outputPairFullMappingInfo(bestConcordantFILE, i);
|
|
6369 else
|
|
6370 outputPairFullMappingInfo(bestDiscordantFILE, i);
|
|
6371 }
|
|
6372
|
|
6373 fclose(bestConcordantFILE);
|
|
6374 // fclose(bestDiscordantFILE);
|
|
6375
|
|
6376 freeMem(bestHitMappingInfo, _msf_seqListSize * sizeof(FullMappingInfo));
|
|
6377 }
|
|
6378
|
|
6379 void setFullMappingInfo(int readNumber, int loc, int dir, int err, int score, char *md, char * refName, char *cigar)
|
|
6380 {
|
|
6381 bestHitMappingInfo[readNumber].loc = loc;
|
|
6382 bestHitMappingInfo[readNumber].dir = dir;
|
|
6383 bestHitMappingInfo[readNumber].err = err;
|
|
6384 bestHitMappingInfo[readNumber].score = score;
|
|
6385
|
|
6386 strncpy(bestHitMappingInfo[readNumber].md, md, strlen(md)+1);
|
|
6387 strncpy(bestHitMappingInfo[readNumber].chr, refName, strlen(refName)+1);
|
|
6388 strncpy(bestHitMappingInfo[readNumber].cigar, cigar, strlen(cigar)+1);
|
|
6389 }
|
|
6390
|
|
6391
|
|
6392 void setPairFullMappingInfo(int readNumber, FullMappingInfo mi1, FullMappingInfo mi2)
|
|
6393 {
|
|
6394
|
|
6395 bestHitMappingInfo[readNumber*2].loc = mi1.loc;
|
|
6396 bestHitMappingInfo[readNumber*2].dir = mi1.dir;
|
|
6397 bestHitMappingInfo[readNumber*2].err = mi1.err;
|
|
6398 bestHitMappingInfo[readNumber*2].score = mi1.score;
|
|
6399 snprintf(bestHitMappingInfo[readNumber*2].chr, MAX_REF_SIZE, "%s", _msf_refGenName);
|
|
6400
|
|
6401
|
|
6402 strncpy(bestHitMappingInfo[readNumber*2].md, mi1.md, strlen(mi1.md)+1);
|
|
6403 strncpy(bestHitMappingInfo[readNumber*2].cigar, mi1.cigar, strlen(mi1.cigar)+1);
|
|
6404
|
|
6405
|
|
6406 /*
|
|
6407 sprintf(bestHitMappingInfo[readNumber*2].md, "%s\0", mi1.md);
|
|
6408 sprintf(bestHitMappingInfo[readNumber*2].cigar, "%s\0", mi1.cigar);
|
|
6409 */
|
|
6410
|
|
6411
|
|
6412 bestHitMappingInfo[readNumber*2+1].loc = mi2.loc;
|
|
6413 bestHitMappingInfo[readNumber*2+1].dir = mi2.dir;
|
|
6414 bestHitMappingInfo[readNumber*2+1].err = mi2.err;
|
|
6415 bestHitMappingInfo[readNumber*2+1].score = mi2.score;
|
|
6416
|
|
6417 snprintf(bestHitMappingInfo[readNumber*2+1].chr, MAX_REF_SIZE, "%s", _msf_refGenName);
|
|
6418
|
|
6419 /*
|
|
6420 sprintf(bestHitMappingInfo[readNumber*2+1].md, "%s\0", mi2.md);
|
|
6421 sprintf(bestHitMappingInfo[readNumber*2+1].cigar, "%s\0", mi2.cigar);
|
|
6422 */
|
|
6423
|
|
6424 strncpy(bestHitMappingInfo[readNumber*2+1].md, mi2.md, strlen(mi2.md)+1);
|
|
6425 strncpy(bestHitMappingInfo[readNumber*2+1].cigar, mi2.cigar, strlen(mi2.cigar)+1);
|
|
6426
|
|
6427 }
|
|
6428
|
|
6429 /**********************************************/
|
|
6430 void outputPairedEnd()
|
|
6431 {
|
|
6432 int i = 0;
|
|
6433
|
|
6434 char cigar[MAX_CIGAR_SIZE];
|
|
6435
|
|
6436 int tmpOut;
|
|
6437
|
|
6438 loadRefGenome(&_msf_refGen, &_msf_refGenName, &tmpOut);
|
|
6439
|
|
6440 FILE* in1[_msf_openFiles];
|
|
6441 FILE* in2[_msf_openFiles];
|
|
6442
|
|
6443 char fname1[_msf_openFiles][FILE_NAME_LENGTH];
|
|
6444 char fname2[_msf_openFiles][FILE_NAME_LENGTH];
|
|
6445
|
|
6446 // discordant
|
|
6447 FILE *out=NULL, *out1=NULL;
|
|
6448
|
|
6449 char fname3[FILE_NAME_LENGTH];
|
|
6450 char fname4[FILE_NAME_LENGTH];
|
|
6451
|
|
6452 int meanDistanceMapping = 0;
|
|
6453
|
|
6454 char *rqual1;
|
|
6455 char *rqual2;
|
|
6456
|
|
6457 rqual1 = getMem((SEQ_LENGTH+1)*sizeof(char));
|
|
6458 rqual2 = getMem((SEQ_LENGTH+1)*sizeof(char));
|
|
6459
|
|
6460 if (pairedEndDiscordantMode)
|
|
6461 {
|
|
6462 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput);
|
|
6463 sprintf(fname4, "%s__%s__oea", mappingOutputPath, mappingOutput);
|
|
6464 out = fileOpen(fname3, "a");
|
|
6465 out1 = fileOpen(fname4, "a");
|
|
6466 }
|
|
6467
|
|
6468 FullMappingInfo *mi1 = getMem(sizeof(FullMappingInfo) * _msf_maxLSize);
|
|
6469 FullMappingInfo *mi2 = getMem(sizeof(FullMappingInfo) * _msf_maxRSize);
|
|
6470
|
|
6471 _msf_fileCount[_msf_maxFile] = 0;
|
|
6472 for (i=0; i<_msf_openFiles; i++)
|
|
6473 {
|
|
6474 sprintf(fname1[i], "%s__%s__%s__%d__1.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i);
|
|
6475 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][0], "%s", fname1[i]);
|
|
6476
|
|
6477 sprintf(fname2[i], "%s__%s__%s__%d__2.tmp", mappingOutputPath, _msf_refGenName, mappingOutput, i);
|
|
6478 sprintf(_msf_fileName[_msf_maxFile][_msf_fileCount[_msf_maxFile]][1], "%s", fname2[i]);
|
|
6479
|
|
6480 in1[i] = fileOpen(fname1[i], "r");
|
|
6481 in2[i] = fileOpen(fname2[i], "r");
|
|
6482 _msf_fileCount[_msf_maxFile]++;
|
|
6483 }
|
|
6484 _msf_maxFile++;
|
|
6485
|
|
6486 int size;
|
|
6487 int j, k;
|
|
6488 int size1, size2;
|
|
6489
|
|
6490 meanDistanceMapping = (pairedEndDiscordantMode==1)? (minPairEndedDiscordantDistance+maxPairEndedDiscordantDistance)/2 + SEQ_LENGTH
|
|
6491 : (minPairEndedDistance + maxPairEndedDistance) / 2 + SEQ_LENGTH;
|
|
6492
|
|
6493 for (i=0; i<_msf_seqListSize/2; i++)
|
|
6494 {
|
|
6495 size1 = size2 = 0;
|
|
6496 for (j=0; j<_msf_openFiles; j++)
|
|
6497 {
|
|
6498 tmpOut = fread(&size, sizeof(int), 1, in1[j]);
|
|
6499 if ( size > 0 )
|
|
6500 {
|
|
6501 for (k=0; k<size; k++)
|
|
6502 {
|
|
6503 mi1[size1+k].dir = 1;
|
|
6504 tmpOut = fread (&(mi1[size1+k].loc), sizeof(int), 1, in1[j]);
|
|
6505 tmpOut = fread (&(mi1[size1+k].err), sizeof(int), 1, in1[j]);
|
|
6506
|
|
6507 tmpOut = fread (&(mi1[size1+k].cigarSize), sizeof(int), 1, in1[j]);
|
|
6508 tmpOut = fread ((mi1[size1+k].cigar), sizeof(char), mi1[size1+k].cigarSize, in1[j]);
|
|
6509 mi1[size1+k].cigar[mi1[size1+k].cigarSize] = '\0';
|
|
6510
|
|
6511 tmpOut = fread (&(mi1[size1+k].mdSize), sizeof(int), 1, in1[j]);
|
|
6512 tmpOut = fread ((mi1[size1+k].md), sizeof(char), (mi1[size1+k].mdSize), in1[j]);
|
|
6513 mi1[size1+k].md[mi1[size1+k].mdSize] = '\0';
|
|
6514
|
|
6515 if (mi1[size1+k].loc<1)
|
|
6516 {
|
|
6517 mi1[size1+k].loc *= -1;
|
|
6518 mi1[size1+k].dir = -1;
|
|
6519 }
|
|
6520 }
|
|
6521 qsort(mi1+size1, size, sizeof(FullMappingInfo), compareOut);
|
|
6522 size1+=size;
|
|
6523 }
|
|
6524 }
|
|
6525
|
|
6526 for (j=0; j<_msf_openFiles; j++)
|
|
6527 {
|
|
6528 tmpOut = fread(&size, sizeof(int), 1, in2[j]);
|
|
6529 if ( size > 0 )
|
|
6530 {
|
|
6531 for (k=0; k<size; k++)
|
|
6532 {
|
|
6533 mi2[size2+k].dir = 1;
|
|
6534 tmpOut = fread (&(mi2[size2+k].loc), sizeof(int), 1, in2[j]);
|
|
6535 tmpOut = fread (&(mi2[size2+k].err), sizeof(int), 1, in2[j]);
|
|
6536
|
|
6537 tmpOut = fread (&(mi2[size2+k].cigarSize), sizeof(int), 1, in2[j]);
|
|
6538 tmpOut = fread ((mi2[size2+k].cigar), sizeof(char), mi2[size2+k].cigarSize, in2[j]);
|
|
6539 mi2[size2+k].cigar[mi2[size2+k].cigarSize] = '\0';
|
|
6540
|
|
6541 tmpOut = fread (&(mi2[size2+k].mdSize), sizeof(int), 1, in2[j]);
|
|
6542 tmpOut = fread ((mi2[size2+k].md), sizeof(char), mi2[size2+k].mdSize, in2[j]);
|
|
6543 mi2[size2+k].md[mi2[size2+k].mdSize] = '\0';
|
|
6544
|
|
6545 if (mi2[size2+k].loc<1)
|
|
6546 {
|
|
6547 mi2[size2+k].loc *= -1;
|
|
6548 mi2[size2+k].dir = -1;
|
|
6549 }
|
|
6550 }
|
|
6551 qsort(mi2+size2, size, sizeof(FullMappingInfo), compareOut);
|
|
6552 size2+=size;
|
|
6553 }
|
|
6554 }
|
|
6555
|
|
6556 int lm, ll, rl, rm;
|
|
6557 int pos = 0;
|
|
6558
|
|
6559 if (pairedEndDiscordantMode)
|
|
6560 {
|
|
6561
|
|
6562 for (j=0; j<size1; j++)
|
|
6563 {
|
|
6564 lm = mi1[j].loc - maxPairEndedDiscordantDistance + 1;
|
|
6565 ll = mi1[j].loc - minPairEndedDiscordantDistance + 1;
|
|
6566 rl = mi1[j].loc + minPairEndedDiscordantDistance - 1;
|
|
6567 rm = mi1[j].loc + maxPairEndedDiscordantDistance - 1;
|
|
6568
|
|
6569 while (pos<size2 && mi2[pos].loc < lm)
|
|
6570 {
|
|
6571 pos++;
|
|
6572 }
|
|
6573
|
|
6574 k = pos;
|
|
6575 while (k<size2 && mi2[k].loc<=rm)
|
|
6576 {
|
|
6577 if ( mi2[k].loc <= ll || mi2[k].loc >= rl)
|
|
6578 {
|
|
6579 if ( (mi1[j].loc < mi2[k].loc && mi1[j].dir==1 && mi2[k].dir == -1) ||
|
|
6580 (mi1[j].loc > mi2[k].loc && mi1[j].dir==-1 && mi2[k].dir == 1) )
|
|
6581 {
|
|
6582 _msf_seqList[i*2].hits[0]=1;
|
|
6583 _msf_seqList[i*2+1].hits[0]=1;
|
|
6584
|
|
6585 if(nosamMode != 0)
|
|
6586 {
|
|
6587 size1=0;
|
|
6588 size2=0;
|
|
6589 }
|
|
6590
|
|
6591 break;
|
|
6592 }
|
|
6593 }
|
|
6594 k++;
|
|
6595 }
|
|
6596 }
|
|
6597
|
|
6598 _msf_seqHits[i*2] += size1;
|
|
6599 _msf_seqHits[i*2+1] += size2;
|
|
6600
|
|
6601
|
|
6602 if (_msf_seqHits[i*2+1] * _msf_seqHits[i*2] > DISCORDANT_CUT_OFF && nosamMode != 0)
|
|
6603 {
|
|
6604 _msf_seqList[i*2].hits[0]=1;
|
|
6605 _msf_seqList[i*2+1].hits[0]=1;
|
|
6606 size1=0;
|
|
6607 size2=0;
|
|
6608 }
|
|
6609
|
|
6610
|
|
6611
|
|
6612
|
|
6613 int tmp = 0;
|
|
6614 int rNo = 0;
|
|
6615 int loc = 0;
|
|
6616 int err = 0;
|
|
6617 float sc = 0;
|
|
6618 char l = 0;
|
|
6619
|
|
6620 //write the OEA data
|
|
6621 if(_msf_seqHits[i*2] == 0 )
|
|
6622 {
|
|
6623 for(k = 0;k < size2 && _msf_oeaMapping[i*2+1] < maxOEAOutput ;k++)
|
|
6624 {
|
|
6625 rNo = i*2+1;
|
|
6626 loc = mi2[k].loc*mi2[k].dir;
|
|
6627 err = mi2[k].err;
|
|
6628 sc = mi2[k].score;
|
|
6629
|
|
6630 l = strlen(_msf_refGenName);
|
|
6631
|
|
6632 tmp = fwrite(&rNo, sizeof(int), 1, out1);
|
|
6633
|
|
6634 tmp = fwrite(&l, sizeof(char), 1, out1);
|
|
6635 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1);
|
|
6636
|
|
6637 tmp = fwrite(&loc, sizeof(int), 1, out1);
|
|
6638 tmp = fwrite(&err, sizeof(int), 1, out1);
|
|
6639 tmp = fwrite(&sc, sizeof(float), 1, out1);
|
|
6640
|
|
6641 if(mi2[k].cigarSize > SEQ_LENGTH || mi2[k].cigarSize <= 0)
|
|
6642 printf("ERROR CIGAR size=%d %s\n", mi2[k].cigarSize, _msf_seqList[i*2+1].seq);
|
|
6643
|
|
6644 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out1);
|
|
6645 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out1);
|
|
6646
|
|
6647 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out1);
|
|
6648 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out1);
|
|
6649
|
|
6650 _msf_oeaMapping[i*2+1]++;
|
|
6651 }
|
|
6652 }
|
|
6653 if(_msf_seqHits[i*2+1] == 0)
|
|
6654 {
|
|
6655 for(j = 0;j < size1 && _msf_oeaMapping[i*2] < maxOEAOutput;j++)
|
|
6656 {
|
|
6657 rNo = i*2;
|
|
6658 loc = mi1[j].loc*mi1[j].dir;
|
|
6659 err = mi1[j].err;
|
|
6660 sc = mi1[j].score;
|
|
6661
|
|
6662 l = strlen(_msf_refGenName);
|
|
6663
|
|
6664 tmp = fwrite(&rNo, sizeof(int), 1, out1);
|
|
6665
|
|
6666 tmp = fwrite(&l, sizeof(char), 1, out1);
|
|
6667 tmp = fwrite(_msf_refGenName, sizeof(char), l, out1);
|
|
6668
|
|
6669 tmp = fwrite(&loc, sizeof(int), 1, out1);
|
|
6670 tmp = fwrite(&err, sizeof(int), 1, out1);
|
|
6671 tmp = fwrite(&sc, sizeof(float), 1, out1);
|
|
6672
|
|
6673 if(mi1[j].cigarSize > SEQ_LENGTH || mi1[j].cigarSize <= 0 )
|
|
6674 printf("ERROR %d %s\n", mi1[j].cigarSize, _msf_seqList[i*2+1].seq);
|
|
6675
|
|
6676 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out1);
|
|
6677 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out1);
|
|
6678
|
|
6679 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out1);
|
|
6680 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out1);
|
|
6681
|
|
6682 _msf_oeaMapping[i*2]++;
|
|
6683 }
|
|
6684 }
|
|
6685 }
|
|
6686
|
|
6687 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
|
|
6688
|
|
6689
|
|
6690
|
|
6691
|
|
6692 rqual1[SEQ_LENGTH] = '\0';
|
|
6693 rqual2[SEQ_LENGTH] = '\0';
|
|
6694 rqual1[0] = '\0';
|
|
6695 rqual2[0] = '\0';
|
|
6696
|
|
6697
|
|
6698
|
|
6699 seq1 = _msf_seqList[i*2].seq;
|
|
6700 rseq1 = _msf_seqList[i*2].rseq;
|
|
6701 qual1 = _msf_seqList[i*2].qual;
|
|
6702
|
|
6703
|
|
6704
|
|
6705 strncpy(rqual1, _msf_seqList[i*2].qual, SEQ_LENGTH);
|
|
6706
|
|
6707 seq2 = _msf_seqList[i*2+1].seq;
|
|
6708 rseq2 = _msf_seqList[i*2+1].rseq;
|
|
6709 qual2 = _msf_seqList[i*2+1].qual;
|
|
6710
|
|
6711
|
|
6712 strncpy(rqual2, _msf_seqList[i*2+1].qual, SEQ_LENGTH);
|
|
6713
|
|
6714 if (pairedEndDiscordantMode)
|
|
6715 {
|
|
6716 for (k=0; k<size1; k++)
|
|
6717 {
|
|
6718 mi1[k].score = calculateScore(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, (mi1[k].dir==-1)?rqual1:qual1, mi1[k].cigar);
|
|
6719 }
|
|
6720
|
|
6721 for (k=0; k<size2; k++)
|
|
6722 {
|
|
6723 mi2[k].score = calculateScore(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, (mi2[k].dir==-1)?rqual2:qual2, mi2[k].cigar);
|
|
6724 }
|
|
6725
|
|
6726 }
|
|
6727
|
|
6728
|
|
6729 if (pairedEndDiscordantMode)
|
|
6730 {
|
|
6731 for (j=0; j<size1; j++)
|
|
6732 {
|
|
6733 for(k = 0; k < size2; k++)
|
|
6734 {
|
|
6735 if(
|
|
6736 (mi2[k].loc-mi1[j].loc >= minPairEndedDiscordantDistance &&
|
|
6737 mi2[k].loc-mi1[j].loc <= maxPairEndedDiscordantDistance &&
|
|
6738 mi1[j].dir > 0 && mi2[k].dir < 0 )
|
|
6739
|
|
6740 ||
|
|
6741
|
|
6742 (mi1[j].loc-mi2[k].loc >= minPairEndedDiscordantDistance &&
|
|
6743 mi1[j].loc-mi2[k].loc <= maxPairEndedDiscordantDistance &&
|
|
6744 mi1[j].dir < 0 && mi2[k].dir > 0)
|
|
6745 )
|
|
6746 {
|
|
6747 //POSSIBLE CONCORDANT
|
|
6748 if(_msf_readHasConcordantMapping[i] == 0)
|
|
6749 {
|
|
6750 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6751 _msf_readHasConcordantMapping[i] = 1;
|
|
6752 _msf_seqList[i*2].hits[0] = 1;
|
|
6753 _msf_seqList[i*2+1].hits[0] = 1;
|
|
6754 }
|
|
6755 else
|
|
6756 {
|
|
6757 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err)
|
|
6758 {
|
|
6759
|
|
6760 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err ==
|
|
6761 mi1[j].err + mi2[k].err &&
|
|
6762 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
|
|
6763 abs(mi2[k].loc - mi1[j].loc),
|
|
6764 meanDistanceMapping
|
|
6765 ) == 0 )
|
|
6766 {
|
|
6767 continue;
|
|
6768 }
|
|
6769 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6770 }
|
|
6771 }
|
|
6772 }
|
|
6773 //DISCORDANT TO TEMP FILE FOR POST PROCESSIING
|
|
6774 else if(_msf_readHasConcordantMapping[i] == 0 &&
|
|
6775 _msf_seqHits[i*2] != 0 &&
|
|
6776 _msf_seqHits[i*2+1] != 0)
|
|
6777 {
|
|
6778
|
|
6779 int tmp;
|
|
6780 int rNo = i;
|
|
6781 int loc = mi1[j].loc*mi1[j].dir;
|
|
6782 int err = mi1[j].err;
|
|
6783 float sc = mi1[j].score;
|
|
6784
|
|
6785 char l = strlen(_msf_refGenName);
|
|
6786
|
|
6787 if(_msf_discordantMapping[i*2] < maxDiscordantOutput)
|
|
6788 {
|
|
6789
|
|
6790 tmp = fwrite(&rNo, sizeof(int), 1, out);
|
|
6791
|
|
6792 tmp = fwrite(&l, sizeof(char), 1, out);
|
|
6793 tmp = fwrite(_msf_refGenName, sizeof(char), l, out);
|
|
6794
|
|
6795 tmp = fwrite(&loc, sizeof(int), 1, out);
|
|
6796 tmp = fwrite(&err, sizeof(int), 1, out);
|
|
6797 tmp = fwrite(&sc, sizeof(float), 1, out);
|
|
6798
|
|
6799 tmp = fwrite (&(mi1[j].cigarSize), sizeof(int), 1, out);
|
|
6800 tmp = fwrite ((mi1[j].cigar), sizeof(char), mi1[j].cigarSize, out);
|
|
6801
|
|
6802 tmp = fwrite (&(mi1[j].mdSize), sizeof(int), 1, out);
|
|
6803 tmp = fwrite ((mi1[j].md), sizeof(char), mi1[j].mdSize, out);
|
|
6804
|
|
6805
|
|
6806 loc = mi2[k].loc*mi2[k].dir;
|
|
6807 err = mi2[k].err;
|
|
6808 sc = mi2[k].score;
|
|
6809
|
|
6810 tmp = fwrite(&loc, sizeof(int), 1, out);
|
|
6811 tmp = fwrite(&err, sizeof(int), 1, out);
|
|
6812 tmp = fwrite(&sc, sizeof(float), 1, out);
|
|
6813
|
|
6814 tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out);
|
|
6815 tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out);
|
|
6816
|
|
6817 tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out);
|
|
6818 tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out);
|
|
6819
|
|
6820
|
|
6821 _msf_discordantMapping[i*2]++;
|
|
6822 }
|
|
6823 //SET THE BEST DISCORDANT
|
|
6824 //BEGIN {Farhad Hormozdiari}
|
|
6825 if( bestHitMappingInfo[i*2].loc == -1 &&
|
|
6826 bestHitMappingInfo[i*2+1].loc == -1 &&
|
|
6827 _msf_readHasConcordantMapping[i] == 0)
|
|
6828 {
|
|
6829 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6830 _msf_seqList[i*2].hits[0] = 1;
|
|
6831 _msf_seqList[i*2+1].hits[0] = 1;
|
|
6832 }
|
|
6833 else if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err
|
|
6834 && _msf_readHasConcordantMapping[i] == 0)
|
|
6835 {
|
|
6836 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err &&
|
|
6837 findNearest( abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
|
|
6838 abs(mi1[j].loc - mi2[k].loc),
|
|
6839 meanDistanceMapping
|
|
6840 ) == 0
|
|
6841 )
|
|
6842 {
|
|
6843 continue;
|
|
6844 }
|
|
6845 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6846 }
|
|
6847 //END {Farhad Hormozdiari}
|
|
6848 }
|
|
6849 }
|
|
6850 }
|
|
6851 }
|
|
6852 else
|
|
6853 {
|
|
6854 for (j=0; j<size1; j++)
|
|
6855 {
|
|
6856 for(k = 0; k < size2; k++)
|
|
6857 {
|
|
6858 if((mi2[k].loc-mi1[j].loc >= minPairEndedDistance &&
|
|
6859 mi2[k].loc-mi1[j].loc <= maxPairEndedDistance &&
|
|
6860 mi1[j].dir > 0 && mi2[k].dir < 0)
|
|
6861 ||
|
|
6862 (mi1[j].loc-mi2[k].loc >= minPairEndedDistance &&
|
|
6863 mi1[j].loc-mi2[k].loc <= maxPairEndedDistance &&
|
|
6864 mi1[j].dir < 0 && mi2[k].dir > 0)
|
|
6865 )
|
|
6866 {
|
|
6867 char *seq;
|
|
6868 char *qual;
|
|
6869 char d1;
|
|
6870 char d2;
|
|
6871 int isize;
|
|
6872 int proper=0;
|
|
6873 // ISIZE CALCULATION
|
|
6874 // The distance between outer edges
|
|
6875 isize = abs(mi1[j].loc - mi2[k].loc)+SEQ_LENGTH-2;
|
|
6876 if (mi1[j].loc - mi2[k].loc > 0)
|
|
6877 {
|
|
6878 isize *= -1;
|
|
6879 }
|
|
6880
|
|
6881 d1 = (mi1[j].dir == -1)?1:0;
|
|
6882 d2 = (mi2[k].dir == -1)?1:0;
|
|
6883
|
|
6884 //SET THE READ HAS CONCORDANT MAPPING
|
|
6885 _msf_readHasConcordantMapping[i] = 1;
|
|
6886
|
|
6887 if ( d1 )
|
|
6888 {
|
|
6889 seq = rseq1;
|
|
6890 qual = rqual1;
|
|
6891 }
|
|
6892 else
|
|
6893 {
|
|
6894 seq = seq1;
|
|
6895 qual = qual1;
|
|
6896 }
|
|
6897
|
|
6898 if ((mi1[j].loc < mi2[k].loc && !d1 && d2) ||
|
|
6899 (mi1[j].loc > mi2[k].loc && d1 && !d2) )
|
|
6900 {
|
|
6901 proper = 2;
|
|
6902 }
|
|
6903 else
|
|
6904 {
|
|
6905 proper = 0;
|
|
6906 }
|
|
6907
|
|
6908
|
|
6909 _msf_output.POS = mi1[j].loc;
|
|
6910 _msf_output.MPOS = mi2[k].loc;
|
|
6911 _msf_output.FLAG = 1+proper+16*d1+32*d2+64;
|
|
6912 _msf_output.ISIZE = isize;
|
|
6913 _msf_output.SEQ = seq,
|
|
6914 _msf_output.QUAL = qual;
|
|
6915 _msf_output.QNAME = _msf_seqList[i*2].name;
|
|
6916 _msf_output.RNAME = _msf_refGenName;
|
|
6917 _msf_output.MAPQ = 255;
|
|
6918 _msf_output.CIGAR = cigar;
|
|
6919 _msf_output.MRNAME = "=";
|
|
6920
|
|
6921 _msf_output.optSize = 2;
|
|
6922 _msf_output.optFields = _msf_optionalFields;
|
|
6923
|
|
6924 _msf_optionalFields[0].tag = "NM";
|
|
6925 _msf_optionalFields[0].type = 'i';
|
|
6926 _msf_optionalFields[0].iVal = mi1[j].err;
|
|
6927
|
|
6928 _msf_optionalFields[1].tag = "MD";
|
|
6929 _msf_optionalFields[1].type = 'Z';
|
|
6930 _msf_optionalFields[1].sVal = mi1[j].md;
|
|
6931
|
|
6932 if(!bestMode)
|
|
6933 output(_msf_output);
|
|
6934
|
|
6935 if ( d2 )
|
|
6936 {
|
|
6937 seq = rseq2;
|
|
6938 qual = rqual2;
|
|
6939 }
|
|
6940 else
|
|
6941 {
|
|
6942 seq = seq2;
|
|
6943 qual = qual2;
|
|
6944 }
|
|
6945
|
|
6946 _msf_output.POS = mi2[k].loc;
|
|
6947 _msf_output.MPOS = mi1[j].loc;
|
|
6948 _msf_output.FLAG = 1+proper+16*d2+32*d1+128;
|
|
6949 _msf_output.ISIZE = -isize;
|
|
6950 _msf_output.SEQ = seq,
|
|
6951 _msf_output.QUAL = qual;
|
|
6952 _msf_output.QNAME = _msf_seqList[i*2].name;
|
|
6953 _msf_output.RNAME = _msf_refGenName;
|
|
6954 _msf_output.MAPQ = 255;
|
|
6955 _msf_output.CIGAR = cigar;
|
|
6956 _msf_output.MRNAME = "=";
|
|
6957
|
|
6958 _msf_output.optSize = 2;
|
|
6959 _msf_output.optFields = _msf_optionalFields;
|
|
6960
|
|
6961 _msf_optionalFields[0].tag = "NM";
|
|
6962 _msf_optionalFields[0].type = 'i';
|
|
6963 _msf_optionalFields[0].iVal = mi2[k].err;;
|
|
6964
|
|
6965 _msf_optionalFields[1].tag = "MD";
|
|
6966 _msf_optionalFields[1].type = 'Z';
|
|
6967 _msf_optionalFields[1].sVal = mi2[k].md;
|
|
6968
|
|
6969 if(!bestMode)
|
|
6970 output(_msf_output);
|
|
6971 //SET THE BEST CONCORDANT
|
|
6972 //BEGIN {Farhad Hormozdiari}
|
|
6973 if(bestHitMappingInfo[i*2].loc == -1 && bestHitMappingInfo[i*2+1].loc == -1)
|
|
6974 {
|
|
6975 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6976 }
|
|
6977 else
|
|
6978 {
|
|
6979 if(bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err >= mi1[j].err + mi2[k].err)
|
|
6980 {
|
|
6981
|
|
6982 if( bestHitMappingInfo[i*2].err + bestHitMappingInfo[i*2+1].err == mi1[j].err + mi2[k].err &&
|
|
6983 findNearest(abs(bestHitMappingInfo[i*2+1].loc - bestHitMappingInfo[i*2].loc),
|
|
6984 abs(mi2[k].loc - mi1[j].loc),
|
|
6985 meanDistanceMapping
|
|
6986 ) == 0 )
|
|
6987 {
|
|
6988 continue;
|
|
6989 }
|
|
6990 setPairFullMappingInfo(i, mi1[j], mi2[k]);
|
|
6991 }
|
|
6992 }
|
|
6993 //END {Farhad Hormozdiari}
|
|
6994 }
|
|
6995 }
|
|
6996 }
|
|
6997
|
|
6998 }
|
|
6999 }
|
|
7000
|
|
7001 freeMem(rqual1, 0);
|
|
7002 freeMem(rqual2, 0);
|
|
7003
|
|
7004 if (pairedEndDiscordantMode)
|
|
7005 {
|
|
7006 fclose(out);
|
|
7007 fclose(out1);
|
|
7008 }
|
|
7009
|
|
7010 for (i=0; i<_msf_openFiles; i++)
|
|
7011 {
|
|
7012 fclose(in1[i]);
|
|
7013 fclose(in2[i]);
|
|
7014
|
|
7015 unlink(fname1[i]);
|
|
7016 unlink(fname2[i]);
|
|
7017 }
|
|
7018
|
|
7019 freeMem(mi1, sizeof(FullMappingInfo)*_msf_maxLSize);
|
|
7020 freeMem(mi2, sizeof(FullMappingInfo)*_msf_maxRSize);
|
|
7021
|
|
7022 _msf_openFiles = 0;
|
|
7023 }
|
|
7024
|
|
7025 /**********************************************/
|
|
7026 /**********************************************/
|
|
7027 /**********************************************/
|
|
7028 /**********************************************/
|
|
7029 float str2int(char *str, int index1, int index2)
|
|
7030 {
|
|
7031 char tmp[200];
|
|
7032 strncpy(tmp, &str[index1], index2-index1);
|
|
7033 tmp[index2-index1] = '\0';
|
|
7034 return atol(tmp);
|
|
7035 }
|
|
7036
|
|
7037 float calculateScore(int index, char *seq, char *qual,char *md)
|
|
7038 {
|
|
7039 int i;
|
|
7040 int j;
|
|
7041 char *ref;
|
|
7042 char *ver;
|
|
7043
|
|
7044 ref = _msf_refGen + index-1;
|
|
7045 ver = seq;
|
|
7046 float score = 1;
|
|
7047
|
|
7048 char tmp[200];
|
|
7049 int value = 0;
|
|
7050 int end = 0;
|
|
7051 int index1 = 0;
|
|
7052 int index2 = 0;
|
|
7053
|
|
7054 i=0;
|
|
7055 while(1)
|
|
7056 {
|
|
7057
|
|
7058 if(i>=strlen(md))
|
|
7059 break;
|
|
7060
|
|
7061 index1 = i;
|
|
7062
|
|
7063 while(md[i] >='0' && md[i]<='9')
|
|
7064 {
|
|
7065 i++;
|
|
7066 }
|
|
7067
|
|
7068 index2 = i;
|
|
7069
|
|
7070 value = str2int(md, index1,index2);
|
|
7071
|
|
7072 if(md[i]=='M')
|
|
7073 {
|
|
7074 for(j=0;j<value;j++)
|
|
7075 {
|
|
7076 tmp[end]='M';
|
|
7077 end++;
|
|
7078 }
|
|
7079 }
|
|
7080 else if(md[i]=='I')
|
|
7081 {
|
|
7082 for(j=0;j<value;j++)
|
|
7083 {
|
|
7084 tmp[end]='I';
|
|
7085 end++;
|
|
7086 }
|
|
7087
|
|
7088 }
|
|
7089 else if(md[i] == 'D')
|
|
7090 {
|
|
7091 for(j=0;j<value;j++)
|
|
7092 {
|
|
7093 tmp[end]='D';
|
|
7094 end++;
|
|
7095 }
|
|
7096 }
|
|
7097 i++;
|
|
7098 }
|
|
7099
|
|
7100 tmp[end] = '\0';
|
|
7101
|
|
7102 j = 0;
|
|
7103
|
|
7104 for (i = 0; i < end; i++)
|
|
7105 {
|
|
7106 if(tmp[i] == 'M')
|
|
7107 {
|
|
7108 if (*ref != *ver)
|
|
7109 {
|
|
7110 score *= 0.001 + 1/pow( 10, ((qual[j]-33)/10.0) );
|
|
7111 }
|
|
7112
|
|
7113 ref++;
|
|
7114 ver++;
|
|
7115 j++;
|
|
7116 }
|
|
7117 else if(tmp[i] == 'I')
|
|
7118 {
|
|
7119 ver++;
|
|
7120 j++;
|
|
7121 }
|
|
7122 else if(tmp[i] == 'D')
|
|
7123 {
|
|
7124 ref++;
|
|
7125 }
|
|
7126 }
|
|
7127
|
|
7128 return score;
|
|
7129 }
|
|
7130
|
|
7131 int matoi(char *str, int start, int end)
|
|
7132 {
|
|
7133 int i = 0;
|
|
7134 char tmp[200];
|
|
7135
|
|
7136 for(i=0;i < end-start; i++)
|
|
7137 tmp[i] = str[start+i];
|
|
7138 tmp[i]='\0';
|
|
7139
|
|
7140 return atoi(tmp);
|
|
7141 }
|
|
7142
|
|
7143 void convertCigarToMatrix(char *cigar, int cigar_size, char * matrix)
|
|
7144 {
|
|
7145 int i = 0;
|
|
7146 int j = 0;
|
|
7147
|
|
7148 int start = 0;
|
|
7149 int size = 0;
|
|
7150
|
|
7151 matrix[0] = '\0';
|
|
7152
|
|
7153 while(i < cigar_size)
|
|
7154 {
|
|
7155 if(cigar[i] >= '0' && cigar[i] <= '9')
|
|
7156 {
|
|
7157 start = i;
|
|
7158
|
|
7159 while(cigar[i] >= '0' && cigar[i] <= '9' && i < cigar_size)
|
|
7160 i++;
|
|
7161
|
|
7162 int value = matoi(cigar, start, i);
|
|
7163 for(j = 0; j < value; j++)
|
|
7164 {
|
|
7165 if(cigar[i] == 'M')
|
|
7166 matrix[size] = 'M';
|
|
7167 else if(cigar[i] == 'D')
|
|
7168 matrix[size] ='D';
|
|
7169 else if(cigar[i] == 'I')
|
|
7170 matrix[size] = 'I';
|
|
7171 size++;
|
|
7172 }
|
|
7173 }
|
|
7174 i++;
|
|
7175 }
|
|
7176 matrix[size] = '\0';
|
|
7177 }
|
|
7178
|
|
7179
|
|
7180
|
|
7181 void convertMDToMatrix(char *md, int md_size, char * matrix)
|
|
7182 {
|
|
7183 int i = 0;
|
|
7184 int j = 0;
|
|
7185
|
|
7186 int start = 0;
|
|
7187 int size = 0;
|
|
7188
|
|
7189 matrix[0] = '\0';
|
|
7190
|
|
7191 while(i < md_size)
|
|
7192 {
|
|
7193 if(md[i] >= '0' && md[i] <= '9')
|
|
7194 {
|
|
7195 start = i;
|
|
7196
|
|
7197 while(md[i] >= '0' && md[i] <= '9' && i < md_size)
|
|
7198 i++;
|
|
7199
|
|
7200 int value = matoi(md, start, i);
|
|
7201 for(j = 0; j < value; j++)
|
|
7202 {
|
|
7203 matrix[size] = 'M';
|
|
7204 size++;
|
|
7205 }
|
|
7206 i--;
|
|
7207 }
|
|
7208 else if(md[i] == '^')
|
|
7209 {
|
|
7210 matrix[size] = 'D';
|
|
7211 size++;
|
|
7212 }
|
|
7213 else
|
|
7214 {
|
|
7215 matrix[size] = md[i];
|
|
7216 size++;
|
|
7217 }
|
|
7218 //size++;
|
|
7219 i++;
|
|
7220 }
|
|
7221 matrix[size] = '\0';
|
|
7222 }
|
|
7223
|
|
7224
|
|
7225 void convertMDCigarToMatrix(char *cigar, int cigar_size, char *md, int md_size, char *matrix)
|
|
7226 {
|
|
7227 int i = 0;
|
|
7228 int j = 0;
|
|
7229
|
|
7230 int size = 0;
|
|
7231
|
|
7232 char tmp1[200];
|
|
7233 char tmp2[200];
|
|
7234 convertMDToMatrix(md,md_size, tmp2);
|
|
7235
|
|
7236 convertCigarToMatrix(cigar, cigar_size,tmp1);
|
|
7237
|
|
7238
|
|
7239
|
|
7240 while(i < strlen(tmp1))
|
|
7241 {
|
|
7242 if(tmp1[i]=='M')
|
|
7243 {
|
|
7244 if(j < strlen(tmp2))
|
|
7245 {
|
|
7246 if(tmp2[j]=='M')
|
|
7247 {
|
|
7248 matrix[size]='M';
|
|
7249 size++;
|
|
7250 }
|
|
7251 if(tmp2[j]!='M')
|
|
7252 {
|
|
7253 matrix[size]=tmp2[j];
|
|
7254 size++;
|
|
7255 }
|
|
7256 }
|
|
7257 else
|
|
7258 {
|
|
7259 matrix[size]='M';
|
|
7260 size++;
|
|
7261 }
|
|
7262 }
|
|
7263 else if(tmp1[i] == 'D')
|
|
7264 {
|
|
7265 matrix[size]='D';
|
|
7266 size++;
|
|
7267 j++;
|
|
7268 matrix[size]=tmp2[j];
|
|
7269 size++;
|
|
7270
|
|
7271 }
|
|
7272 else if(tmp1[i] == 'I')
|
|
7273 {
|
|
7274 matrix[size]='I';
|
|
7275 size++;
|
|
7276 }
|
|
7277
|
|
7278 i++;
|
|
7279 if(j < strlen(tmp2))
|
|
7280 j++;
|
|
7281 }
|
|
7282
|
|
7283 if(strlen(tmp1))
|
|
7284
|
|
7285 matrix[size] = '\0';
|
|
7286
|
|
7287 }
|
|
7288
|
|
7289 void convertInsertion(char * in_matrix, char * seq, char *out_matrix)
|
|
7290 {
|
|
7291 int i = 0;
|
|
7292 int j = 0;
|
|
7293 int size = 0;
|
|
7294
|
|
7295 while( i < strlen(in_matrix))
|
|
7296 {
|
|
7297 if(in_matrix[i] == 'M')
|
|
7298 {
|
|
7299 out_matrix[size] = 'M';
|
|
7300 size++;
|
|
7301 j++;
|
|
7302 }
|
|
7303 else if(in_matrix[i] == 'D')
|
|
7304 {
|
|
7305 out_matrix[size] = 'D';
|
|
7306 size++;
|
|
7307
|
|
7308 i++;
|
|
7309 j++;
|
|
7310
|
|
7311 out_matrix[size] = seq[j];
|
|
7312 j++;
|
|
7313 size++;
|
|
7314 }
|
|
7315 else if(in_matrix[i] == 'I')
|
|
7316 {
|
|
7317 out_matrix[size] = 'I';
|
|
7318 size++;
|
|
7319 out_matrix[size] = seq[j];
|
|
7320 size++;
|
|
7321 j++;
|
|
7322 }
|
|
7323 else
|
|
7324 {
|
|
7325 out_matrix[size] = in_matrix[i];
|
|
7326 size++;
|
|
7327 j++;
|
|
7328 }
|
|
7329 i++;
|
|
7330 }
|
|
7331 out_matrix[size] = '\0';
|
|
7332 }
|
|
7333
|
|
7334 /**********************************************/
|
|
7335 void outputPairedEndDiscPP()
|
|
7336 {
|
|
7337 char tmp_matrix1[200];
|
|
7338 char tmp_matrix2[200];
|
|
7339
|
|
7340 char matrix1[200];
|
|
7341 char matrix2[200];
|
|
7342
|
|
7343 char cigar1[200];
|
|
7344 char editString1[200];
|
|
7345
|
|
7346 char cigar2[200];
|
|
7347 char editString2[200];
|
|
7348
|
|
7349 char seq1[SEQ_LENGTH+1];
|
|
7350 char qual1[SEQ_LENGTH+1];
|
|
7351
|
|
7352 char seq2[SEQ_LENGTH+1];
|
|
7353 char qual2[SEQ_LENGTH+1];
|
|
7354
|
|
7355 char genName[SEQ_LENGTH];
|
|
7356 char fname1[FILE_NAME_LENGTH];
|
|
7357 char fname2[FILE_NAME_LENGTH];
|
|
7358 char l;
|
|
7359 int l_size;
|
|
7360 int loc1, loc2;
|
|
7361 int err1, err2;
|
|
7362 char dir1, dir2;
|
|
7363 float sc1, sc2, lsc=0;
|
|
7364 int flag = 0;
|
|
7365 int rNo,lrNo = -1;
|
|
7366 int tmp;
|
|
7367 FILE *in, *out;
|
|
7368
|
|
7369 sprintf(fname1, "%s__%s__disc", mappingOutputPath, mappingOutput);
|
|
7370 sprintf(fname2, "%s%s_DIVET.vh", mappingOutputPath, mappingOutput);
|
|
7371
|
|
7372 in = fileOpen(fname1, "r");
|
|
7373 out = fileOpen(fname2, "w");
|
|
7374
|
|
7375 if (in != NULL)
|
|
7376 {
|
|
7377 flag = fread(&rNo, sizeof(int), 1, in);
|
|
7378 }
|
|
7379 else
|
|
7380 {
|
|
7381 flag = 0;
|
|
7382 }
|
|
7383
|
|
7384 seq1[SEQ_LENGTH] = '\0';
|
|
7385 qual1[SEQ_LENGTH] = '\0';
|
|
7386
|
|
7387 seq2[SEQ_LENGTH] = '\0';
|
|
7388 qual2[SEQ_LENGTH] = '\0';
|
|
7389
|
|
7390 while (flag)
|
|
7391 {
|
|
7392 tmp = fread(&l, sizeof(char), 1, in);
|
|
7393 tmp = fread(genName, sizeof(char), l, in);
|
|
7394 genName[(int)l]='\0';
|
|
7395 tmp = fread(&loc1, sizeof(int), 1, in);
|
|
7396 tmp = fread(&err1, sizeof(int), 1, in);
|
|
7397 tmp = fread(&sc1, sizeof(float), 1, in);
|
|
7398
|
|
7399 //tmp = fwrite (&(mi2[k].cigarSize), sizeof(int), 1, out);
|
|
7400
|
|
7401 tmp = fread(&l_size, sizeof(int), 1, in);
|
|
7402 tmp = fread(cigar1, sizeof(char), l_size, in);
|
|
7403 cigar1[(int)l_size]='\0';
|
|
7404 //tmp = fwrite ((mi2[k].cigar), sizeof(char), mi2[k].cigarSize, out);
|
|
7405
|
|
7406 //tmp = fwrite (&(mi2[k].mdSize), sizeof(int), 1, out);
|
|
7407 tmp = fread(&l_size, sizeof(int), 1, in);
|
|
7408 tmp = fread(editString1, sizeof(char), l_size, in);
|
|
7409 editString1[(int)l_size]='\0';
|
|
7410 //tmp = fwrite ((mi2[k].md), sizeof(char), mi2[k].mdSize, out);
|
|
7411
|
|
7412 tmp = fread(&loc2, sizeof(int), 1, in);
|
|
7413 tmp = fread(&err2, sizeof(int), 1, in);
|
|
7414 tmp = fread(&sc2, sizeof(float), 1, in);
|
|
7415
|
|
7416 tmp = fread(&l_size, sizeof(int), 1, in);
|
|
7417 tmp = fread(cigar2, sizeof(char), l_size, in);
|
|
7418 cigar2[(int)l_size]='\0';
|
|
7419
|
|
7420 tmp = fread(&l_size, sizeof(int), 1, in);
|
|
7421 tmp = fread(editString2, sizeof(char), l_size, in);
|
|
7422 editString2[(int)l_size]='\0';
|
|
7423
|
|
7424 convertMDCigarToMatrix(cigar1, strlen(cigar1), editString1, strlen(editString1), tmp_matrix1);
|
|
7425 convertMDCigarToMatrix(cigar2, strlen(cigar2), editString2, strlen(editString2), tmp_matrix2);
|
|
7426
|
|
7427
|
|
7428 if(_msf_readHasConcordantMapping[rNo] == 0)
|
|
7429 {
|
|
7430
|
|
7431 dir1 = dir2 = 'F';
|
|
7432
|
|
7433 strncpy(seq1, _msf_seqList[rNo*2].seq, SEQ_LENGTH);
|
|
7434 strncpy(seq2, _msf_seqList[rNo*2+1].seq, SEQ_LENGTH);
|
|
7435
|
|
7436 if (loc1 < 0)
|
|
7437 {
|
|
7438 dir1 = 'R';
|
|
7439 loc1 = -loc1;
|
|
7440
|
|
7441 strncpy(seq1, _msf_seqList[rNo*2].rseq, SEQ_LENGTH);
|
|
7442 }
|
|
7443
|
|
7444 if (loc2 < 0)
|
|
7445 {
|
|
7446 dir2 = 'R';
|
|
7447 loc2 = -loc2;
|
|
7448
|
|
7449 strncpy(seq2, _msf_seqList[rNo*2+1].rseq, SEQ_LENGTH);
|
|
7450 }
|
|
7451
|
|
7452 convertInsertion(tmp_matrix1, seq1, matrix1);
|
|
7453 convertInsertion(tmp_matrix2, seq2, matrix2);
|
|
7454
|
|
7455
|
|
7456 if (rNo != lrNo)
|
|
7457 {
|
|
7458 int j;
|
|
7459 for (j=0; j<SEQ_LENGTH; j++)
|
|
7460 {
|
|
7461 lsc += _msf_seqList[rNo*2].qual[j]+_msf_seqList[rNo*2+1].qual[j];
|
|
7462 }
|
|
7463 lsc /= 2*SEQ_LENGTH;
|
|
7464 lsc -= 33;
|
|
7465 lrNo = rNo;
|
|
7466 }
|
|
7467
|
|
7468 char event = '\0';
|
|
7469
|
|
7470
|
|
7471 if ( dir1 == dir2 )
|
|
7472 {
|
|
7473 event = 'V';
|
|
7474 }
|
|
7475 else
|
|
7476 {
|
|
7477 if (loc1 < loc2)
|
|
7478 {
|
|
7479
|
|
7480 if (dir1 == 'R' && dir2 == 'F')
|
|
7481 {
|
|
7482 event = 'E';
|
|
7483
|
|
7484 }
|
|
7485 else if ( loc2 - loc1 >= maxPairEndedDiscordantDistance )
|
|
7486 {
|
|
7487 event = 'D';
|
|
7488 }
|
|
7489 else
|
|
7490 {
|
|
7491 event = 'I';
|
|
7492 }
|
|
7493 }
|
|
7494 else if (loc2 < loc1)
|
|
7495 {
|
|
7496 if (dir2 == 'R' && dir1 == 'F')
|
|
7497 {
|
|
7498 event = 'E';
|
|
7499 }
|
|
7500 else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance )
|
|
7501 {
|
|
7502 event = 'D';
|
|
7503 }
|
|
7504 else
|
|
7505 {
|
|
7506 event = 'I';
|
|
7507 }
|
|
7508 }
|
|
7509 }
|
|
7510 _msf_seqList[rNo*2].hits[0] = 2;
|
|
7511 if(event != 'E')
|
|
7512 fprintf(out, "%s\t%s\t%d\t%d\t%c\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%e\n",
|
|
7513 _msf_seqList[rNo*2].name, genName, loc1, (loc1+SEQ_LENGTH-1), dir1,
|
|
7514 loc2, (loc2+SEQ_LENGTH-1), dir2, event, (err1+err2), lsc, sc1*sc2);
|
|
7515
|
|
7516 }
|
|
7517 flag = fread(&rNo, sizeof(int), 1, in);
|
|
7518 }
|
|
7519
|
|
7520 fclose(in);
|
|
7521 fclose(out);
|
|
7522
|
|
7523 unlink(fname1);
|
|
7524 }
|
|
7525
|
|
7526 void finalizeOEAReads(char *fileName)
|
|
7527 {
|
|
7528 FILE *fp_out1;
|
|
7529 FILE * in;
|
|
7530
|
|
7531 char genName[SEQ_LENGTH];
|
|
7532
|
|
7533 char fname1[FILE_NAME_LENGTH];
|
|
7534 char fname2[FILE_NAME_LENGTH];
|
|
7535
|
|
7536 char l=0;
|
|
7537 int loc1=0;
|
|
7538
|
|
7539 int err1;
|
|
7540
|
|
7541 char d;
|
|
7542
|
|
7543 float sc1=0;
|
|
7544 int flag = 0;
|
|
7545 int rNo=-1;
|
|
7546 int tmp=0;
|
|
7547
|
|
7548 int cigarSize = 0;
|
|
7549 int mdSize = 0;
|
|
7550
|
|
7551 char cigar[SEQ_LENGTH+1];
|
|
7552 char md[SEQ_LENGTH+1];
|
|
7553
|
|
7554 char *seq1, *seq2, *qual1, *qual2;
|
|
7555 char *rqual1, *rqual2;
|
|
7556
|
|
7557 seq1=NULL; seq2=NULL; qual1=NULL; qual2=NULL;
|
|
7558
|
|
7559 rqual1 = getMem(200*sizeof(char));
|
|
7560 rqual2 = getMem(200*sizeof(char));
|
|
7561
|
|
7562 rqual1[0] = '\0';
|
|
7563 rqual2[0] = '\0';
|
|
7564
|
|
7565 /*
|
|
7566 char mappingOutput2[2 * SEQ_LENGTH];
|
|
7567 int mo_len;
|
|
7568 mo_len = strlen(mappingOutput);
|
|
7569 strcpy(mappingOutput2, mappingOutput);
|
|
7570
|
|
7571 if (mappingOutput[mo_len-1]=='m' && mappingOutput[mo_len-2]=='a' && mappingOutput[mo_len-3]=='s' && mappingOutput[mo_len-4]=='.')
|
|
7572 mappingOutput2[mo_len-4] = 0;
|
|
7573 */
|
|
7574
|
|
7575 sprintf(fname1, "%s%s_OEA.sam", mappingOutputPath, mappingOutput);
|
|
7576
|
|
7577 fp_out1 = fileOpen(fname1, "w");
|
|
7578
|
|
7579 in = NULL;
|
|
7580 if (pairedEndDiscordantMode){
|
|
7581 sprintf(fname2, "%s__%s__oea", mappingOutputPath, mappingOutput);
|
|
7582
|
|
7583 in = fileOpen(fname2, "r");
|
|
7584 }
|
|
7585
|
|
7586
|
|
7587 if (in != NULL)
|
|
7588 {
|
|
7589 flag = fread(&rNo, sizeof(int), 1, in);
|
|
7590 }
|
|
7591 else
|
|
7592 {
|
|
7593 flag = 0;
|
|
7594 }
|
|
7595
|
|
7596 while (flag)
|
|
7597 {
|
|
7598 cigar[0] = '\0';
|
|
7599 md[0] = '\0';
|
|
7600
|
|
7601 tmp = fread(&l, sizeof(char), 1, in);
|
|
7602 tmp = fread(genName, sizeof(char), l, in);
|
|
7603
|
|
7604 genName[(int)l]='\0';
|
|
7605
|
|
7606
|
|
7607 tmp = fread(&loc1, sizeof(int), 1, in);
|
|
7608 tmp = fread(&err1, sizeof(int), 1, in);
|
|
7609 tmp = fread(&sc1, sizeof(float), 1, in);
|
|
7610
|
|
7611 tmp = fread (&cigarSize, sizeof(int), 1, in);
|
|
7612 tmp = fread (cigar, sizeof(char), cigarSize, in);
|
|
7613
|
|
7614 cigar[cigarSize] = '\0';
|
|
7615
|
|
7616 tmp = fread (&mdSize, sizeof(int), 1, in);
|
|
7617 tmp = fread (md, sizeof(char), mdSize, in);
|
|
7618 md[mdSize] = '\0';
|
|
7619
|
|
7620 d = 1;
|
|
7621
|
|
7622 if(loc1 < 0)
|
|
7623 {
|
|
7624 d = -1;
|
|
7625 loc1 *= -1;
|
|
7626
|
|
7627 seq1 = _msf_seqList[rNo].rseq;
|
|
7628 reverse(_msf_seqList[rNo].qual, rqual1, SEQ_LENGTH);
|
|
7629 rqual1[SEQ_LENGTH] = '\0';
|
|
7630 }
|
|
7631 else
|
|
7632 {
|
|
7633 seq1 = _msf_seqList[rNo].seq;
|
|
7634 qual1 = _msf_seqList[rNo].qual;
|
|
7635 }
|
|
7636
|
|
7637 if(rNo % 2 == 0)
|
|
7638 {
|
|
7639 seq2 = _msf_seqList[rNo+1].seq;
|
|
7640 qual2 = _msf_seqList[rNo+1].qual;
|
|
7641 }
|
|
7642 else
|
|
7643 {
|
|
7644 seq2 = _msf_seqList[rNo-1].seq;
|
|
7645 qual2 = _msf_seqList[rNo-1].qual;
|
|
7646 }
|
|
7647
|
|
7648 if(_msf_seqHits[rNo] != 0 && _msf_seqHits[(rNo%2==0)?rNo+1:rNo-1] == 0)
|
|
7649 {
|
|
7650 _msf_output.POS = loc1;
|
|
7651 _msf_output.MPOS = 0;
|
|
7652 _msf_output.FLAG = (rNo % 2 ==0)? 1+4+32*d+128 : 1+8+16*d+64 ;
|
|
7653 _msf_output.ISIZE = 0;
|
|
7654 _msf_output.SEQ = seq1;
|
|
7655 _msf_output.QUAL = qual1;
|
|
7656 _msf_output.QNAME = _msf_seqList[rNo].name;
|
|
7657 _msf_output.RNAME = genName;
|
|
7658 _msf_output.MAPQ = 255;
|
|
7659 _msf_output.CIGAR = cigar;
|
|
7660 _msf_output.MRNAME = "=";
|
|
7661
|
|
7662
|
|
7663 _msf_output.optSize = 4;
|
|
7664 _msf_output.optFields = _msf_optionalFields;
|
|
7665
|
|
7666 _msf_optionalFields[0].tag = "NM";
|
|
7667 _msf_optionalFields[0].type = 'i';
|
|
7668 _msf_optionalFields[0].iVal = err1;
|
|
7669
|
|
7670 _msf_optionalFields[1].tag = "MD";
|
|
7671 _msf_optionalFields[1].type = 'Z';
|
|
7672 _msf_optionalFields[1].sVal = md;
|
|
7673
|
|
7674
|
|
7675
|
|
7676 //for the OEA reads
|
|
7677 _msf_optionalFields[2].tag = "NS";
|
|
7678 _msf_optionalFields[2].type = 'Z';
|
|
7679 _msf_optionalFields[2].sVal = seq2;
|
|
7680
|
|
7681
|
|
7682 _msf_optionalFields[3].tag = "NQ";
|
|
7683 _msf_optionalFields[3].type = 'Z';
|
|
7684 _msf_optionalFields[3].sVal = qual2;
|
|
7685
|
|
7686 outputSAM(fp_out1, _msf_output);
|
|
7687
|
|
7688 _msf_seqList[rNo].hits[0] = -1;
|
|
7689 _msf_seqList[(rNo%2==0)?rNo+1:rNo-1].hits[0] = -1;
|
|
7690 }
|
|
7691 flag = fread(&rNo, sizeof(int), 1, in);
|
|
7692 }
|
|
7693
|
|
7694 freeMem(rqual1, 0);
|
|
7695 freeMem(rqual2, 0);
|
|
7696
|
|
7697 unlink(fname2);
|
|
7698
|
|
7699 fclose(fp_out1);
|
|
7700 }
|
|
7701
|
|
7702
|
|
7703 void outputTransChromosal(char *fileName1, char *fileName2, FILE * fp_out)
|
|
7704 {
|
|
7705 int i = 0;
|
|
7706 int j = 0;
|
|
7707 int k = 0;
|
|
7708
|
|
7709 char *index;
|
|
7710
|
|
7711 int size1 = 0;
|
|
7712 int size2 = 0;
|
|
7713
|
|
7714 FILE *fp1 = NULL;
|
|
7715 FILE *fp2 = NULL;
|
|
7716
|
|
7717 char geneFileName1[FILE_NAME_LENGTH];
|
|
7718 char geneFileName2[FILE_NAME_LENGTH];
|
|
7719
|
|
7720 FullMappingInfoLink *miL = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink));
|
|
7721 FullMappingInfoLink *miR = getMem(_msf_seqListSize * sizeof(FullMappingInfoLink));
|
|
7722
|
|
7723
|
|
7724 if(fileName1 != NULL && fileName2 != NULL)
|
|
7725 {
|
|
7726
|
|
7727 fp1 = fileOpen(fileName1, "r");
|
|
7728 fp2 = fileOpen(fileName2, "r");
|
|
7729
|
|
7730 index = strstr(fileName1, "__");
|
|
7731 strncpy(geneFileName1, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2);
|
|
7732 geneFileName1[strstr(index + 2, "__") - index - 2] = '\0';
|
|
7733
|
|
7734 index = strstr(fileName2, "__");
|
|
7735 strncpy(geneFileName2, index + 2 * sizeof(char), strstr(index + 2, "__") - index - 2);
|
|
7736 geneFileName2[strstr(index + 2, "__") - index - 2] = '\0';
|
|
7737
|
|
7738
|
|
7739 for(i = 0; i < _msf_seqListSize / 2; i++)
|
|
7740 {
|
|
7741 fread(&size1, sizeof(int), 1, fp1);
|
|
7742 fread(&size2, sizeof(int), 1, fp2);
|
|
7743
|
|
7744 miL[i].mi = getMem(size1 * sizeof(FullMappingInfo) );
|
|
7745 miR[i].mi = getMem(size2 * sizeof(FullMappingInfo) );
|
|
7746
|
|
7747 miL[i].size = size1;
|
|
7748 miR[i].size = size2;
|
|
7749
|
|
7750 for(j = 0; j < size1; j++)
|
|
7751 {
|
|
7752 fread(&(miL[i].mi[j].loc), sizeof(int), 1, fp1);
|
|
7753
|
|
7754 fread (&(miL[i].mi[j].err), sizeof(int), 1, fp1);
|
|
7755
|
|
7756 fread (&(miL[i].mi[j].cigarSize), sizeof(int), 1, fp1);
|
|
7757 fread ((miL[i].mi[j].cigar), sizeof(char), miL[i].mi[j].cigarSize+1, fp1);
|
|
7758
|
|
7759 fread (&(miL[i].mi[j].mdSize), sizeof(int), 1, fp1);
|
|
7760 fread ((miL[i].mi[j].md), sizeof(char), miL[i].mi[j].mdSize+1, fp1);
|
|
7761
|
|
7762 miL[i].mi[j].dir = 1;
|
|
7763 if(miL[i].mi[j].loc < 1)
|
|
7764 {
|
|
7765 miL[i].mi[j].loc *= -1;
|
|
7766 miL[i].mi[j].dir = -1;
|
|
7767 }
|
|
7768 }
|
|
7769 for(k = 0; k < size2; k++)
|
|
7770 {
|
|
7771 fread(&(miR[i].mi[k].loc), sizeof(int), 1, fp2);
|
|
7772
|
|
7773 fread (&(miR[i].mi[k].err), sizeof(int), 1, fp2);
|
|
7774
|
|
7775 fread (&(miR[i].mi[k].cigarSize), sizeof(int), 1, fp2);
|
|
7776 fread ((miR[i].mi[k].cigar), sizeof(char), miR[i].mi[k].cigarSize+1, fp2);
|
|
7777
|
|
7778 fread (&(miR[i].mi[k].mdSize), sizeof(int), 1, fp2);
|
|
7779 fread ((miR[i].mi[k].md), sizeof(char), miR[i].mi[k].mdSize+1, fp2);
|
|
7780
|
|
7781 miR[i].mi[k].dir = 1;
|
|
7782 if(miR[i].mi[k].loc < 1)
|
|
7783 {
|
|
7784 miR[i].mi[k].loc *= -1;
|
|
7785 miR[i].mi[k].dir = -1;
|
|
7786 }
|
|
7787 }
|
|
7788 if(_msf_readHasConcordantMapping[i] == 0 && size1 != 0 && size2 != 0 && (size1 * size2 < MAX_TRANS_CHROMOSAL_OUTPUT))
|
|
7789 {
|
|
7790 int d1 = 0;
|
|
7791 int d2 = 0;
|
|
7792 char *seq, *qual;
|
|
7793 char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
|
|
7794 char rqual1[SEQ_LENGTH+1], rqual2[SEQ_LENGTH+1];
|
|
7795 rqual1[SEQ_LENGTH] = rqual2[SEQ_LENGTH] = '\0';
|
|
7796 seq1 = _msf_seqList[i*2].seq;
|
|
7797 rseq1 = _msf_seqList[i*2].rseq;
|
|
7798 qual1 = _msf_seqList[i*2].qual;
|
|
7799 reverse(_msf_seqList[i*2].qual, rqual1, SEQ_LENGTH);
|
|
7800
|
|
7801 seq2 = _msf_seqList[i*2+1].seq;
|
|
7802 rseq2 = _msf_seqList[i*2+1].rseq;
|
|
7803 qual2 = _msf_seqList[i*2+1].qual;
|
|
7804 reverse(_msf_seqList[i*2+1].qual, rqual2, SEQ_LENGTH);
|
|
7805
|
|
7806 for(j = 0; j < size1; j++)
|
|
7807 {
|
|
7808 d1 = (miL[i].mi[j].dir == -1)?1:0;
|
|
7809
|
|
7810 if ( d1 )
|
|
7811 {
|
|
7812 seq = rseq1;
|
|
7813 qual = rqual1;
|
|
7814 }
|
|
7815 else
|
|
7816 {
|
|
7817 seq = seq1;
|
|
7818 qual = qual1;
|
|
7819 }
|
|
7820
|
|
7821 for(k = 0; k < size2; k++)
|
|
7822 {
|
|
7823
|
|
7824 d2 = (miR[i].mi[k].dir == -1)?1:0;
|
|
7825
|
|
7826 _msf_output.POS = miL[i].mi[j].loc;
|
|
7827 _msf_output.MPOS = miR[i].mi[k].loc;
|
|
7828 _msf_output.FLAG = 0;
|
|
7829 _msf_output.ISIZE = 0;
|
|
7830 _msf_output.SEQ = seq,
|
|
7831 _msf_output.QUAL = qual;
|
|
7832 _msf_output.QNAME = _msf_seqList[i*2].name;
|
|
7833 _msf_output.RNAME = geneFileName1;
|
|
7834 _msf_output.MAPQ = 255;
|
|
7835 _msf_output.CIGAR = miL[i].mi[j].cigar;
|
|
7836 _msf_output.MRNAME = "=";
|
|
7837
|
|
7838 _msf_output.optSize = 2;
|
|
7839 _msf_output.optFields = _msf_optionalFields;
|
|
7840
|
|
7841 _msf_optionalFields[0].tag = "NM";
|
|
7842 _msf_optionalFields[0].type = 'i';
|
|
7843 _msf_optionalFields[0].iVal = miL[i].mi[j].err;
|
|
7844
|
|
7845 _msf_optionalFields[1].tag = "MD";
|
|
7846 _msf_optionalFields[1].type = 'Z';
|
|
7847 _msf_optionalFields[1].sVal = miL[i].mi[j].md;
|
|
7848
|
|
7849
|
|
7850 if ( d2 )
|
|
7851 {
|
|
7852 seq = rseq2;
|
|
7853 qual = rqual2;
|
|
7854 }
|
|
7855 else
|
|
7856 {
|
|
7857 seq = seq2;
|
|
7858 qual = qual2;
|
|
7859 }
|
|
7860
|
|
7861 outputSAM(fp_out, _msf_output);
|
|
7862
|
|
7863
|
|
7864 _msf_output.POS = miR[i].mi[k].loc;
|
|
7865 _msf_output.MPOS = miL[i].mi[j].loc;
|
|
7866 _msf_output.FLAG = 0;
|
|
7867 _msf_output.ISIZE = 0;
|
|
7868 _msf_output.SEQ = seq,
|
|
7869 _msf_output.QUAL = qual;
|
|
7870 _msf_output.QNAME = _msf_seqList[i*2+1].name;
|
|
7871 _msf_output.RNAME = geneFileName2;
|
|
7872 _msf_output.MAPQ = 255;
|
|
7873 _msf_output.CIGAR = miR[i].mi[k].cigar;
|
|
7874 _msf_output.MRNAME = "=";
|
|
7875
|
|
7876 _msf_output.optSize = 2;
|
|
7877 _msf_output.optFields = _msf_optionalFields;
|
|
7878
|
|
7879 _msf_optionalFields[0].tag = "NM";
|
|
7880 _msf_optionalFields[0].type = 'i';
|
|
7881 _msf_optionalFields[0].iVal = miR[i].mi[k].err;
|
|
7882
|
|
7883 _msf_optionalFields[1].tag = "MD";
|
|
7884 _msf_optionalFields[1].type = 'Z';
|
|
7885 _msf_optionalFields[1].sVal = miR[i].mi[k].md;
|
|
7886
|
|
7887 outputSAM(fp_out, _msf_output);
|
|
7888
|
|
7889 }
|
|
7890 }
|
|
7891 }
|
|
7892 }
|
|
7893
|
|
7894 }
|
|
7895
|
|
7896 for(i = 0; i < _msf_seqListSize / 2; i++)
|
|
7897 {
|
|
7898 freeMem(miL[i].mi, miL[i].size * sizeof(FullMappingInfo));
|
|
7899 freeMem(miR[i].mi, miR[i].size * sizeof(FullMappingInfo));
|
|
7900 }
|
|
7901
|
|
7902 freeMem(miL, _msf_seqListSize * sizeof(FullMappingInfoLink));
|
|
7903 freeMem(miR, _msf_seqListSize * sizeof(FullMappingInfoLink));
|
|
7904
|
|
7905 fclose(fp1);
|
|
7906 fclose(fp2);
|
|
7907 }
|
|
7908
|
|
7909 /*
|
|
7910 if flag is 1 it will output all the possible trans chromsal mapping
|
|
7911 otherwise only tmp file will be delete
|
|
7912
|
|
7913 */
|
|
7914
|
|
7915 void outputAllTransChromosal(int flag)
|
|
7916 {
|
|
7917
|
|
7918 int i = 0;
|
|
7919 int j = 0;
|
|
7920 int k = 0;
|
|
7921 int l = 0;
|
|
7922
|
|
7923 FILE *fp_out = NULL;
|
|
7924 char fname1[200];
|
|
7925
|
|
7926 if(flag)
|
|
7927 {
|
|
7928 fp_out = fileOpen(fname1, "w");
|
|
7929
|
|
7930 sprintf(fname1, "%s%s_TRANSCHROMOSOMAL", mappingOutputPath, mappingOutput);
|
|
7931
|
|
7932 // for(i = 0; i < _msf_maxFile; i++)
|
|
7933 // {
|
|
7934 i = 0;
|
|
7935 for(j = i+1; j < _msf_maxFile; j++)
|
|
7936 {
|
|
7937 if(i != j)
|
|
7938 {
|
|
7939 for(k = 0; k < _msf_fileCount[i]; k++)
|
|
7940 {
|
|
7941 for(l = 0; l < _msf_fileCount[j]; l++)
|
|
7942 {
|
|
7943 outputTransChromosal(_msf_fileName[i][k][0], _msf_fileName[j][l][1], fp_out);
|
|
7944 }// for l
|
|
7945 }// for k
|
|
7946 }// if
|
|
7947 }// for j
|
|
7948 // } //for i
|
|
7949 }
|
|
7950
|
|
7951 for(i = 0; i < _msf_maxFile; i++)
|
|
7952 {
|
|
7953 for(j = 0; j < _msf_fileCount[i]; j++)
|
|
7954 {
|
|
7955 unlink(_msf_fileName[i][j][0]);
|
|
7956 unlink(_msf_fileName[i][j][1]);
|
|
7957 }
|
|
7958 }
|
|
7959 if(flag)
|
|
7960 fclose(fp_out);
|
|
7961 }
|