Mercurial > repos > fcaramia > custom_amplicon_alignment
comparison alignCustomAmplicon/utils.cpp @ 0:d32bddcff685 draft
Uploaded
author | fcaramia |
---|---|
date | Wed, 09 Jan 2013 00:24:09 -0500 |
parents | |
children | 0aaf65fbb48a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d32bddcff685 |
---|---|
1 #include <stdio.h> | |
2 #include <string> | |
3 #include <vector> | |
4 | |
5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; | |
6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; | |
7 using namespace std; | |
8 | |
9 void printMatrix (double* m, int rows, int cols) | |
10 { | |
11 for(int i = 0; i < rows; i++) | |
12 { | |
13 for(int j = 0; j < cols; j++) | |
14 { | |
15 printf("%.2f ", *(m + i*cols +j)); | |
16 } | |
17 printf("\n"); | |
18 } | |
19 | |
20 } | |
21 | |
22 int levenshteinDistance(string s, string t) | |
23 { | |
24 //Calculate Edit distance of 2 strings | |
25 if(s == t) | |
26 return 0; | |
27 int size1 = s.length()+1; | |
28 int size2 = t.length()+1; | |
29 int d[size1][size2]; | |
30 | |
31 for (int i=0; i<size1; i++)d[i][0] = i; | |
32 for (int i=0; i<size2; i++)d[0][i] = i; | |
33 | |
34 for (int i=1; i<size1; i++) | |
35 for (int j=1; j<size2; j++) | |
36 { | |
37 if(s[i-1] == t[j-1] ) | |
38 d[i][j] = d[i-1][j-1]; //if equeal no operation | |
39 else | |
40 d[i][j] = min(d[i-1][j] + 1,d[i][j-1] + 1,d[i-1][j-1] + 1 ); //Addition, deletion or substitution | |
41 | |
42 } | |
43 | |
44 return d[size1-1][size2-1]; // return edit distance | |
45 } | |
46 | |
47 double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | |
48 { | |
49 //Do NW alignment of 2 strings... | |
50 int size1 = seq1.length(); | |
51 int size2 = seq2.length(); | |
52 | |
53 double scores[size1+1][size2+1]; | |
54 char dir[size1+1][size2+1]; | |
55 char gaps[size1+1][size2+1]; | |
56 | |
57 for(int i = 0; i <= size1; i++) | |
58 { | |
59 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | |
60 dir[i][0] = 'U'; | |
61 gaps[i][0] = 'O'; | |
62 } | |
63 for(int j = 0; j <= size2; j++) | |
64 { | |
65 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | |
66 dir[0][j] = 'L'; | |
67 gaps[0][j] = 'O'; | |
68 } | |
69 | |
70 scores[0][0] = 0; | |
71 gaps[0][0] = 'N'; | |
72 | |
73 if (noFrontGapPenalty) | |
74 { | |
75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | |
76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | |
77 } | |
78 | |
79 double match; | |
80 double del; | |
81 double insert; | |
82 | |
83 for(int i = 1; i <= size1; i++) | |
84 for(int j = 1; j <= size2; j++) | |
85 { | |
86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') | |
87 { | |
88 match = scores[i-1][j-1]; | |
89 } | |
90 else | |
91 { | |
92 match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw | |
93 } | |
94 del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | |
95 insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | |
96 //debug | |
97 //printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert); | |
98 | |
99 if (match>del && match > insert) | |
100 { | |
101 scores[i][j] = match; | |
102 dir[i][j] = 'D'; | |
103 gaps[i][j] = 'N'; | |
104 } | |
105 else if(del > insert) | |
106 { | |
107 scores[i][j] = del; | |
108 dir[i][j] = 'U'; | |
109 gaps[i][j] = 'O'; | |
110 } | |
111 else | |
112 { | |
113 scores[i][j] = insert; | |
114 dir[i][j] = 'L'; | |
115 gaps[i][j] = 'O'; | |
116 } | |
117 } | |
118 | |
119 //debug | |
120 //if (debug) | |
121 // printMatrix(*scores, size1+1, size2+1); | |
122 //printMatrix(*dir, size1+1, size2+1); | |
123 | |
124 string alignment1=""; | |
125 string alignment2=""; | |
126 | |
127 | |
128 int cont1 = size1; | |
129 int cont2 = size2; | |
130 int max1 = size1; | |
131 int max2 = size2; | |
132 double num = scores[size1][size2];; | |
133 | |
134 if (noEndGapPenalty) | |
135 { | |
136 //Search Maxes | |
137 | |
138 for(int i = 1; i <= size1; i++) | |
139 { | |
140 if(num<scores[i][size2]) | |
141 { | |
142 num = scores[i][size2]; | |
143 max1 = i; | |
144 max2 = size2; | |
145 } | |
146 } | |
147 for(int j = 1; j <= size2; j++) | |
148 { | |
149 if(num<scores[size1][j]) | |
150 { | |
151 num = scores[size1][j]; | |
152 max1 = size1; | |
153 max2 = j; | |
154 } | |
155 } | |
156 cont1 = max1; | |
157 cont2 = max2; | |
158 } | |
159 | |
160 while (cont1 > 0 && cont2 > 0) | |
161 { | |
162 if(dir[cont1][cont2] == 'D') | |
163 { | |
164 alignment1+= seq1[cont1-1]; | |
165 alignment2+= seq2[cont2-1]; | |
166 cont1--; | |
167 cont2--; | |
168 | |
169 } | |
170 else if(dir[cont1][cont2] == 'L') | |
171 { | |
172 alignment1+= '-'; | |
173 alignment2+= seq2[cont2-1]; | |
174 cont2--; | |
175 } | |
176 else | |
177 { | |
178 alignment1+= seq1[cont1-1]; | |
179 alignment2+= '-'; | |
180 cont1--; | |
181 | |
182 } | |
183 } | |
184 | |
185 while (cont1 > 0) | |
186 { | |
187 alignment1+= seq1[cont1-1]; | |
188 alignment2+= '-'; | |
189 cont1--; | |
190 } | |
191 | |
192 while (cont2 > 0) | |
193 { | |
194 alignment1+= '-'; | |
195 alignment2+= seq2[cont2-1]; | |
196 cont2--; | |
197 } | |
198 | |
199 alignment1 = string (alignment1.rbegin(),alignment1.rend()); | |
200 alignment2 = string (alignment2.rbegin(),alignment2.rend()); | |
201 | |
202 | |
203 if (noEndGapPenalty) | |
204 { | |
205 //Need to fill out rest of alignment... | |
206 if (max1 != size1) | |
207 { | |
208 for (int i=max1; i<size1; ++i ) | |
209 { | |
210 alignment1+= seq1 [i]; | |
211 alignment2+= '-'; | |
212 | |
213 } | |
214 } | |
215 | |
216 if (max2 != size2) | |
217 { | |
218 | |
219 for (int i=max2; i<size2; ++i ) | |
220 { | |
221 alignment2+= seq2 [i]; | |
222 alignment1+= '-'; | |
223 | |
224 } | |
225 } | |
226 | |
227 } | |
228 if (debug) | |
229 { | |
230 printf("Seq1: %s\n", alignment1.c_str()); | |
231 printf("Seq2: %s\n\n", alignment2.c_str()); | |
232 | |
233 } | |
234 | |
235 //Returns | |
236 seq1 = alignment1; | |
237 seq2 = alignment2; | |
238 | |
239 return num; | |
240 | |
241 } | |
242 | |
243 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | |
244 { | |
245 //Do NW alignment of 2 strings... | |
246 int size1 = a1[0].length(); | |
247 int size2 = a2[0].length(); | |
248 int v1 = a1.size(); | |
249 int v2 = a2.size(); | |
250 int combs = v1 * v2; | |
251 | |
252 vector<string> msa; | |
253 msa.clear(); | |
254 vector<string> alignment1, alignment2; | |
255 | |
256 for (int i=0;i<v1;++i) | |
257 alignment1.push_back(string()); | |
258 for (int i=0;i<v2;++i) | |
259 alignment2.push_back(string()); | |
260 | |
261 | |
262 | |
263 double scores[size1+1][size2+1]; | |
264 char dir[size1+1][size2+1]; | |
265 char gaps[size1+1][size2+1]; | |
266 | |
267 for(int i = 0; i <= size1; i++) | |
268 { | |
269 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | |
270 dir[i][0] = 'U'; | |
271 gaps[i][0] = 'O'; | |
272 } | |
273 for(int j = 0; j <= size2; j++) | |
274 { | |
275 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | |
276 dir[0][j] = 'L'; | |
277 gaps[0][j] = 'O'; | |
278 } | |
279 | |
280 scores[0][0] = 0; | |
281 gaps[0][0] = 'N'; | |
282 | |
283 if (noFrontGapPenalty) | |
284 { | |
285 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | |
286 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | |
287 } | |
288 | |
289 double match; | |
290 double del; | |
291 double insert; | |
292 | |
293 for(int i = 1; i <= size1; i++) | |
294 for(int j = 1; j <= size2; j++) | |
295 { | |
296 | |
297 match = del = insert = 0.0f; | |
298 for(int z=0;z<v1;++z) | |
299 { | |
300 for(int w=0;w<v2;++w) | |
301 { | |
302 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') | |
303 { | |
304 match += scores[i-1][j-1]; | |
305 } | |
306 else | |
307 { | |
308 match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); | |
309 } | |
310 del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | |
311 insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | |
312 } | |
313 } | |
314 | |
315 match /= combs; | |
316 del /= combs; | |
317 insert /= combs; | |
318 | |
319 if (match > del && match > insert) | |
320 { | |
321 scores[i][j] = match; | |
322 dir[i][j] = 'D'; | |
323 gaps[i][j] = 'N'; | |
324 } | |
325 else if(del > insert) | |
326 { | |
327 scores[i][j] = del; | |
328 dir[i][j] = 'U'; | |
329 gaps[i][j] = 'O'; | |
330 } | |
331 else | |
332 { | |
333 scores[i][j] = insert; | |
334 dir[i][j] = 'L'; | |
335 gaps[i][j] = 'O'; | |
336 } | |
337 } | |
338 | |
339 //debug | |
340 // if (debug) | |
341 // printMatrix(*scores, size1+1, size2+1); | |
342 //printMatrix(*dir, size1+1, size2+1); | |
343 | |
344 | |
345 int cont1 = size1; | |
346 int cont2 = size2; | |
347 int max1 = size1; | |
348 int max2 = size2; | |
349 double num = scores[size1][size2]; | |
350 | |
351 if (noEndGapPenalty) | |
352 { | |
353 //Search Maxes | |
354 | |
355 for(int i = 1; i <= size1; i++) | |
356 { | |
357 if(num<scores[i][size2]) | |
358 { | |
359 num = scores[i][size2]; | |
360 max1 = i; | |
361 max2 = size2; | |
362 } | |
363 } | |
364 for(int j = 1; j <= size2; j++) | |
365 { | |
366 if(num<scores[size1][j]) | |
367 { | |
368 num = scores[size1][j]; | |
369 max1 = size1; | |
370 max2 = j; | |
371 } | |
372 } | |
373 cont1 = max1; | |
374 cont2 = max2; | |
375 } | |
376 | |
377 while (cont1 > 0 && cont2 > 0) | |
378 { | |
379 if(dir[cont1][cont2] == 'D') | |
380 { | |
381 | |
382 for(int z=0;z<v1;++z) | |
383 alignment1[z]+= a1[z][cont1-1]; | |
384 | |
385 for(int w=0;w<v2;++w) | |
386 alignment2[w]+= a2[w][cont2-1]; | |
387 | |
388 cont1--; | |
389 cont2--; | |
390 | |
391 } | |
392 else if(dir[cont1][cont2] == 'L') | |
393 { | |
394 for(int z=0;z<v1;++z) | |
395 alignment1[z]+= '-'; | |
396 for(int w=0;w<v2;++w) | |
397 alignment2[w]+= a2[w][cont2-1]; | |
398 | |
399 cont2--; | |
400 } | |
401 else | |
402 { | |
403 for(int z=0;z<v1;++z) | |
404 alignment1[z]+= a1[z][cont1-1]; | |
405 | |
406 for(int w=0;w<v2;++w) | |
407 alignment2[w]+= '-'; | |
408 | |
409 cont1--; | |
410 | |
411 } | |
412 } | |
413 | |
414 while (cont1 > 0) | |
415 { | |
416 for(int z=0;z<v1;++z) | |
417 alignment1[z]+= a1[z][cont1-1]; | |
418 for(int w=0;w<v2;++w) | |
419 alignment2[w]+= '-'; | |
420 | |
421 cont1--; | |
422 } | |
423 | |
424 while (cont2 > 0) | |
425 { | |
426 for(int z=0;z<v1;++z) | |
427 alignment1[z]+= '-'; | |
428 for(int w=0;w<v2;++w) | |
429 alignment2[w]+= a2[w][cont2-1]; | |
430 | |
431 cont2--; | |
432 } | |
433 | |
434 | |
435 | |
436 | |
437 for(int z=0;z<v1;++z) | |
438 { | |
439 alignment1[z] = string (alignment1[z].rbegin(),alignment1[z].rend()); | |
440 } | |
441 for(int w=0;w<v2;++w) | |
442 { | |
443 alignment2[w] = string (alignment2[w].rbegin(),alignment2[w].rend()); | |
444 } | |
445 | |
446 | |
447 | |
448 if (noEndGapPenalty) | |
449 { | |
450 //Need to fill out rest of alignment... | |
451 if (max1 != size1) | |
452 { | |
453 for (int i=max1; i<size1; ++i ) | |
454 { | |
455 for(int z=0;z<v1;++z) | |
456 alignment1[z]+= a1[z][i]; | |
457 for(int w=0;w<v2;++w) | |
458 alignment2[w]+= '-'; | |
459 | |
460 } | |
461 } | |
462 | |
463 if (max2 != size2) | |
464 { | |
465 | |
466 for (int i=max2; i<size2; ++i ) | |
467 { | |
468 for(int z=0;z<v1;++z) | |
469 alignment1[z]+= '-'; | |
470 for(int w=0;w<v2;++w) | |
471 alignment2[w]+= a2[w][i]; | |
472 | |
473 } | |
474 } | |
475 | |
476 } | |
477 | |
478 //returns | |
479 for(int z=0;z<v1;++z) | |
480 msa.push_back(alignment1[z]); | |
481 for(int w=0;w<v2;++w) | |
482 msa.push_back(alignment2[w]); | |
483 | |
484 if (debug) | |
485 { | |
486 for(int z=0;z<v1;++z) | |
487 printf("%s\n",alignment1[z].c_str()); | |
488 for(int w=0;w<v2;++w) | |
489 printf("%s\n",alignment2[w].c_str()); | |
490 | |
491 } | |
492 | |
493 return msa; | |
494 | |
495 } | |
496 | |
497 | |
498 |