comparison alignCustomAmplicon/utils.cpp @ 5:0aaf65fbb48a draft default tip

Uploaded
author fcaramia
date Wed, 20 Mar 2013 00:22:08 -0400
parents d32bddcff685
children
comparison
equal deleted inserted replaced
4:22f35f830f48 5:0aaf65fbb48a
4 4
5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; 5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ;
6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; 6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ;
7 using namespace std; 7 using namespace std;
8 8
9 void printMatrix (double* m, int rows, int cols) 9 void printMatrix (float* m, int rows, int cols)
10 { 10 {
11 for(int i = 0; i < rows; i++) 11 for(int i = 0; i < rows; i++)
12 { 12 {
13 for(int j = 0; j < cols; j++) 13 for(int j = 0; j < cols; j++)
14 { 14 {
42 } 42 }
43 43
44 return d[size1-1][size2-1]; // return edit distance 44 return d[size1-1][size2-1]; // return edit distance
45 } 45 }
46 46
47 double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) 47 float nw_align2 (string &seq1, string &seq2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
48 { 48 {
49 //Do NW alignment of 2 strings... 49 //Do NW alignment of 2 strings...
50 int size1 = seq1.length(); 50 int size1 = seq1.length();
51 int size2 = seq2.length(); 51 int size2 = seq2.length();
52 52
53 double scores[size1+1][size2+1]; 53 float scores[size1+1][size2+1];
54 char dir[size1+1][size2+1]; 54 char dir[size1+1][size2+1];
55 char gaps[size1+1][size2+1]; 55 char gaps[size1+1][size2+1];
56 56
57 for(int i = 0; i <= size1; i++) 57 for(int i = 0; i <= size1; i++)
58 { 58 {
59 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; 59 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen;
60 dir[i][0] = 'U'; 60 dir[i][0] = 'U';
61 gaps[i][0] = 'O'; 61 gaps[i][0] = 'D';
62 } 62 }
63 for(int j = 0; j <= size2; j++) 63 for(int j = 0; j <= size2; j++)
64 { 64 {
65 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; 65 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen;
66 dir[0][j] = 'L'; 66 dir[0][j] = 'L';
67 gaps[0][j] = 'O'; 67 gaps[0][j] = 'I';
68 } 68 }
69 69
70 scores[0][0] = 0; 70 scores[0][0] = 0;
71 gaps[0][0] = 'N'; 71 gaps[0][0] = 'N';
72 72
74 { 74 {
75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; 75 for(int i = 0; i <= size1; i++) scores[i][0] = 0;
76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; 76 for(int j = 0; j <= size2; j++) scores[0][j] = 0;
77 } 77 }
78 78
79 double match; 79 float match;
80 double del; 80 float del;
81 double insert; 81 float insert;
82 82
83 for(int i = 1; i <= size1; i++) 83 for(int i = 1; i <= size1; i++)
84 for(int j = 1; j <= size2; j++) 84 for(int j = 1; j <= size2; j++)
85 { 85 {
86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') 86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N')
87 { 87 {
88 match = scores[i-1][j-1]; 88 match = scores[i-1][j-1];
89 } 89 }
90 else 90 else
91 { 91 {
92 match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw 92 match = scores[i-1][j-1] + (float)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw
93 } 93 }
94 del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); 94 del = scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt);
95 insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); 95 insert = scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt);
96
96 //debug 97 //debug
97 //printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert); 98 //if (match == del || match == insert)
98 99 //{
99 if (match>del && match > insert) 100 //printf("I:%d J:%d match = %f del %f insert %f %c %c\n",i,j,match,del,insert,seq1[i-1],seq2[j-1]);
101 //}
102
103 if (match > del && match > insert)
100 { 104 {
101 scores[i][j] = match; 105 scores[i][j] = match;
102 dir[i][j] = 'D'; 106 dir[i][j] = 'D';
103 gaps[i][j] = 'N'; 107 gaps[i][j] = 'N';
104 } 108 }
109 else if(match >= del && match >= insert && seq1[i-1] != seq2[j-1])
110 {
111 scores[i][j] = match;
112 dir[i][j] = 'D';
113 gaps[i][j] = 'N';
114 }
105 else if(del > insert) 115 else if(del > insert)
106 { 116 {
107 scores[i][j] = del; 117 scores[i][j] = del;
108 dir[i][j] = 'U'; 118 dir[i][j] = 'U';
109 gaps[i][j] = 'O'; 119 gaps[i][j] = 'D';
110 } 120 }
111 else 121 else
112 { 122 {
113 scores[i][j] = insert; 123 scores[i][j] = insert;
114 dir[i][j] = 'L'; 124 dir[i][j] = 'L';
115 gaps[i][j] = 'O'; 125 gaps[i][j] = 'I';
116 } 126 }
117 } 127 }
118 128
119 //debug 129 //debug
120 //if (debug) 130 //if (debug)
127 137
128 int cont1 = size1; 138 int cont1 = size1;
129 int cont2 = size2; 139 int cont2 = size2;
130 int max1 = size1; 140 int max1 = size1;
131 int max2 = size2; 141 int max2 = size2;
132 double num = scores[size1][size2];; 142 float num = scores[size1][size2];;
133 143
134 if (noEndGapPenalty) 144 if (noEndGapPenalty)
135 { 145 {
136 //Search Maxes 146 //Search Maxes
137 147
238 248
239 return num; 249 return num;
240 250
241 } 251 }
242 252
243 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) 253 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
244 { 254 {
245 //Do NW alignment of 2 strings... 255 //Do NW alignment of 2 strings...
246 int size1 = a1[0].length(); 256 int size1 = a1[0].length();
247 int size2 = a2[0].length(); 257 int size2 = a2[0].length();
248 int v1 = a1.size(); 258 int v1 = a1.size();
258 for (int i=0;i<v2;++i) 268 for (int i=0;i<v2;++i)
259 alignment2.push_back(string()); 269 alignment2.push_back(string());
260 270
261 271
262 272
263 double scores[size1+1][size2+1]; 273 float scores[size1+1][size2+1];
264 char dir[size1+1][size2+1]; 274 char dir[size1+1][size2+1];
265 char gaps[size1+1][size2+1]; 275 char gaps[size1+1][size2+1];
266 276
267 for(int i = 0; i <= size1; i++) 277 for(int i = 0; i <= size1; i++)
268 { 278 {
269 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; 279 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen;
270 dir[i][0] = 'U'; 280 dir[i][0] = 'U';
271 gaps[i][0] = 'O'; 281 gaps[i][0] = 'D';
272 } 282 }
273 for(int j = 0; j <= size2; j++) 283 for(int j = 0; j <= size2; j++)
274 { 284 {
275 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; 285 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen;
276 dir[0][j] = 'L'; 286 dir[0][j] = 'L';
277 gaps[0][j] = 'O'; 287 gaps[0][j] = 'I';
278 } 288 }
279 289
280 scores[0][0] = 0; 290 scores[0][0] = 0;
281 gaps[0][0] = 'N'; 291 gaps[0][0] = 'N';
282 292
284 { 294 {
285 for(int i = 0; i <= size1; i++) scores[i][0] = 0; 295 for(int i = 0; i <= size1; i++) scores[i][0] = 0;
286 for(int j = 0; j <= size2; j++) scores[0][j] = 0; 296 for(int j = 0; j <= size2; j++) scores[0][j] = 0;
287 } 297 }
288 298
289 double match; 299 float match;
290 double del; 300 float del;
291 double insert; 301 float insert;
302 bool missmatch;
292 303
293 for(int i = 1; i <= size1; i++) 304 for(int i = 1; i <= size1; i++)
294 for(int j = 1; j <= size2; j++) 305 for(int j = 1; j <= size2; j++)
295 { 306 {
296 307
297 match = del = insert = 0.0f; 308 match = del = insert = 0.0f;
309 missmatch = true;
298 for(int z=0;z<v1;++z) 310 for(int z=0;z<v1;++z)
299 { 311 {
300 for(int w=0;w<v2;++w) 312 for(int w=0;w<v2;++w)
301 { 313 {
302 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') 314 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N')
303 { 315 {
304 match += scores[i-1][j-1]; 316 match += scores[i-1][j-1];
305 } 317 }
306 else 318 else
307 { 319 {
308 match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); 320 match += scores[i-1][j-1] + (float)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty);
321 if(a1[z][i-1] == a2[w][j-1])
322 {
323 missmatch = false;
324 }
309 } 325 }
310 del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); 326 del += scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt);
311 insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); 327 insert += scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt);
312 } 328 }
313 } 329 }
314 330
315 match /= combs; 331 match /= combs;
316 del /= combs; 332 del /= combs;
320 { 336 {
321 scores[i][j] = match; 337 scores[i][j] = match;
322 dir[i][j] = 'D'; 338 dir[i][j] = 'D';
323 gaps[i][j] = 'N'; 339 gaps[i][j] = 'N';
324 } 340 }
341 else if(match >= del && match >= insert && missmatch)
342 {
343 scores[i][j] = match;
344 dir[i][j] = 'D';
345 gaps[i][j] = 'N';
346 }
325 else if(del > insert) 347 else if(del > insert)
326 { 348 {
327 scores[i][j] = del; 349 scores[i][j] = del;
328 dir[i][j] = 'U'; 350 dir[i][j] = 'U';
329 gaps[i][j] = 'O'; 351 gaps[i][j] = 'D';
330 } 352 }
331 else 353 else
332 { 354 {
333 scores[i][j] = insert; 355 scores[i][j] = insert;
334 dir[i][j] = 'L'; 356 dir[i][j] = 'L';
335 gaps[i][j] = 'O'; 357 gaps[i][j] = 'I';
336 } 358 }
337 } 359 }
338 360
339 //debug 361 //debug
340 // if (debug) 362 // if (debug)
344 366
345 int cont1 = size1; 367 int cont1 = size1;
346 int cont2 = size2; 368 int cont2 = size2;
347 int max1 = size1; 369 int max1 = size1;
348 int max2 = size2; 370 int max2 = size2;
349 double num = scores[size1][size2]; 371 float num = scores[size1][size2];
350 372
351 if (noEndGapPenalty) 373 if (noEndGapPenalty)
352 { 374 {
353 //Search Maxes 375 //Search Maxes
354 376