Mercurial > repos > xilinxu > xilinxu
comparison fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.cpp @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:dfe9332138cf | 3:997f5136985f |
---|---|
1 #include <string> | |
2 #include <vector> | |
3 #include <ostream> | |
4 #include <iostream> | |
5 #include <algorithm> | |
6 #include <iomanip> | |
7 #include <err.h> | |
8 | |
9 #include "sequence_alignment.h" | |
10 | |
11 using namespace std; | |
12 | |
13 void SequenceAlignmentResults::print(std::ostream& strm) const | |
14 { | |
15 size_t delta; | |
16 size_t index; | |
17 | |
18 strm << "Query-Alingment = " << query_alignment << endl ; | |
19 strm << "target-Alingment= " << target_alignment << endl ; | |
20 | |
21 | |
22 strm << (alignment_found ? "Alignment Found" : "Alignment NOT found") << endl; | |
23 strm << "Score = " << score << " (" | |
24 << matches << " matches, " | |
25 << neutral_matches << " neutral-matches, " | |
26 << mismatches << " mismatches, " | |
27 << gaps << " gaps) " | |
28 << std::endl ; | |
29 | |
30 strm << "Query = " << query_sequence | |
31 << "(qsize " << query_size | |
32 << " qstart " << query_start | |
33 << " qend " << query_end | |
34 << std::endl ; | |
35 | |
36 strm << "Target= " << target_sequence | |
37 << "(tsize " << target_size | |
38 << " tstart " << target_start | |
39 << " tend " << target_end | |
40 << std::endl ; | |
41 | |
42 strm << endl; | |
43 | |
44 delta = max(target_start, query_start); | |
45 | |
46 | |
47 //Spaces before the query string | |
48 if ( delta - query_start > 0 ) | |
49 strm << std::string( delta - query_start-1, ' ') ; | |
50 //Un-Aligned query part (prefix) | |
51 if ( query_start > 0 ) | |
52 strm << query_sequence.substr(0, query_start-1) ; | |
53 //Aligned query part | |
54 strm << "(" << query_alignment << ")"; | |
55 //Un-Aligned query part (suffix) | |
56 if ( query_end < query_sequence.length() ) | |
57 strm << query_sequence.substr( query_end+1 ) ; | |
58 strm << std::endl ; | |
59 | |
60 //Alignment bars | |
61 if ( delta > 0 ) | |
62 strm << std::string( delta-1, ' ') ; | |
63 strm << "(" ; | |
64 for (index=0; index<query_alignment.length(); index++) { | |
65 strm << ((query_alignment[index]==target_alignment[index]) ? '*' : '|' ); | |
66 } | |
67 strm << ")" ; | |
68 strm << std::endl; | |
69 | |
70 //Spaces before the target string | |
71 if ( delta - target_start > 0 ) | |
72 strm << std::string( delta - target_start, ' ') ; | |
73 //Un-Aligned target part (prefix) | |
74 if ( target_start > 0 ) | |
75 strm << target_sequence.substr(0, target_start-1); | |
76 //Aligned target part | |
77 strm << "(" << target_alignment << ")"; | |
78 | |
79 //Un-Aligned target part (suffix) | |
80 if ( target_end < target_sequence.length() ) | |
81 strm << target_sequence.substr( target_end+1 ); | |
82 strm << std::endl; | |
83 | |
84 } | |
85 | |
86 SequenceAlignment::SequenceAlignment ( ) : | |
87 _gap_panelty(-5), | |
88 _match_panelty(1), | |
89 _mismatch_panelty(-1), | |
90 _neutral_panelty(0.1) | |
91 | |
92 { | |
93 } | |
94 | |
95 | |
96 void SequenceAlignment::set_sequences(const std::string& _query, const std::string& _target) | |
97 { | |
98 _query_sequence = _query ; | |
99 _target_sequence = _target ; | |
100 } | |
101 | |
102 void SequenceAlignment::reset_alignment_results() | |
103 { | |
104 _alignment_results = SequenceAlignmentResults() ; | |
105 // | |
106 //Reset the results | |
107 _alignment_results.query_sequence = query_sequence() ; | |
108 _alignment_results.target_sequence = target_sequence() ; | |
109 } | |
110 | |
111 const SequenceAlignmentResults& SequenceAlignment::align ( const std::string& query, const std::string& target ) | |
112 { | |
113 set_sequences ( query, target ) ; | |
114 | |
115 reset_alignment_results(); | |
116 | |
117 resize_matrix ( query_sequence().length(), target_sequence().length() ) ; | |
118 populate_match_matrix(); | |
119 | |
120 reset_matrix( matrix_width(), matrix_height() ); | |
121 populate_matrix(); | |
122 find_optimal_alignment(); | |
123 | |
124 post_process(); | |
125 | |
126 return _alignment_results; | |
127 } | |
128 | |
129 void SequenceAlignment::resize_matrix(size_t width, size_t height) | |
130 { | |
131 size_t i; | |
132 | |
133 if ( matrix_width() >= width && matrix_height() >= height ) | |
134 return ; | |
135 | |
136 query_border.resize ( width ) ; | |
137 target_border.resize ( height ) ; | |
138 | |
139 score_matrix.resize ( width ); | |
140 for (i=0;i<width;i++) | |
141 score_matrix[i].resize(height) ; | |
142 | |
143 origin_matrix.resize ( width ); | |
144 for (i=0;i<width;i++) | |
145 origin_matrix[i].resize(height) ; | |
146 | |
147 match_matrix.resize ( width ); | |
148 for (i=0;i<width;i++) { | |
149 match_matrix[i].resize(height) ; | |
150 } | |
151 } | |
152 | |
153 void SequenceAlignment::populate_match_matrix() | |
154 { | |
155 for (size_t x=0; x<matrix_width(); x++) | |
156 for(size_t y=0;y<matrix_height();y++) | |
157 match_matrix[x][y] = | |
158 match_value ( query_nucleotide(x), target_nucleotide(y) ) ; | |
159 } | |
160 | |
161 | |
162 void SequenceAlignment::post_process() | |
163 { | |
164 | |
165 } | |
166 | |
167 void SequenceAlignment::print_matrix(std::ostream &strm) const | |
168 { | |
169 size_t query_index ; | |
170 size_t target_index ; | |
171 | |
172 #if 0 | |
173 printf("Match-Matrix:\n"); | |
174 printf(" - "); | |
175 for ( target_index=1; target_index<matrix_height(); target_index++ ) | |
176 printf(" %c ", target_sequence()[target_index-1] ); | |
177 printf("\n"); | |
178 | |
179 for ( query_index=1; query_index<matrix_width(); query_index++ ) { | |
180 | |
181 printf(" %c ", query_sequence()[query_index-1]) ; | |
182 | |
183 for ( target_index=1 ; target_index<matrix_height(); target_index++ ) { | |
184 printf(" %c ", match_matrix[query_index][target_index]); | |
185 } | |
186 printf("\n"); | |
187 } | |
188 #endif | |
189 | |
190 strm << "Score-Matrix:" << endl ; | |
191 | |
192 //Print Target nucleotides | |
193 strm << setw(2) << left << "-" << setw(7) << "-" ; | |
194 for ( query_index=0; query_index<matrix_width(); query_index++ ) | |
195 strm << setw(9) << left << query_nucleotide ( query_index ) ; | |
196 strm << endl; | |
197 strm << setw(2) << left << "-" << setw(7) << "-" ; | |
198 for ( query_index=0; query_index<matrix_width(); query_index++ ) | |
199 strm << setw(9) << left << query_border[query_index] ; | |
200 strm << endl; | |
201 | |
202 for ( target_index=0; target_index<matrix_height(); target_index++ ) { | |
203 | |
204 strm << setw(2) << left << target_nucleotide ( target_index ) ; | |
205 strm << setw(6) << right << target_border[target_index] << setw(1) << " "; | |
206 | |
207 for ( query_index=0 ; query_index<matrix_width(); query_index++ ) { | |
208 char ch ; | |
209 switch (origin ( query_index, target_index ) ) | |
210 { | |
211 case FROM_UPPER: ch = '|' ; break ; | |
212 case FROM_LEFT: ch = '-' ; break ; | |
213 case FROM_UPPER_LEFT: ch = '\\' ; break ; | |
214 case FROM_NOWHERE: ch = '=' ; break ; | |
215 default: ch = '*' ; break ; | |
216 } | |
217 | |
218 strm << left ; | |
219 strm << setw(1) << match(query_index,target_index); | |
220 strm << setw(1) << ch ; | |
221 strm << setw(7) << fixed << setprecision(1) | |
222 << score(query_index,target_index) ; | |
223 } | |
224 strm << endl; | |
225 } | |
226 } | |
227 | |
228 #if 0 | |
229 void LocalSequenceAlignment::reset_matrix( size_t width, size_t height ) | |
230 { | |
231 size_t x,y ; | |
232 | |
233 highest_scored_query_index = 0 ; | |
234 highest_scored_target_index = 0 ; | |
235 | |
236 for (x=0; x<width; x++) | |
237 score_matrix[x][0] = 0 ; | |
238 for (y=0; y<height; y++) | |
239 score_matrix[0][y] = 0 ; | |
240 | |
241 } | |
242 | |
243 void LocalSequenceAlignment::populate_matrix ( ) | |
244 { | |
245 size_t query_index ; | |
246 size_t target_index ; | |
247 | |
248 ssize_t highest_score = 0 ; | |
249 | |
250 for ( query_index=1; query_index<matrix_width(); query_index++ ) { | |
251 for ( target_index=1 ; target_index<matrix_height(); target_index++ ) { | |
252 ssize_t score = alignment_score(query_index, target_index); | |
253 | |
254 //printf("score(q=%zu,t=%zu)=%zu\n", query_index, target_index, score ) ; | |
255 score_matrix[query_index][target_index] = (score>0) ? score : 0 ; | |
256 | |
257 //NOTE | |
258 // not sure ">=" is strictly correct SW (might be just ">") | |
259 if ( score > highest_score ) { | |
260 highest_scored_query_index = query_index ; | |
261 highest_scored_target_index = target_index ; | |
262 highest_score = score ; | |
263 } | |
264 } | |
265 | |
266 } | |
267 } | |
268 | |
269 void LocalSequenceAlignment::find_optimal_alignment ( ) | |
270 { | |
271 size_t query_index = highest_scored_query_index ; | |
272 size_t target_index = highest_scored_target_index; | |
273 | |
274 _alignment_results.query_end = query_index-1 ; | |
275 _alignment_results.target_end= target_index-1 ; | |
276 | |
277 _alignment_results.score = score_matrix[query_index][target_index]; | |
278 | |
279 _alignment_results.matches = 0 ; | |
280 _alignment_results.mismatches = 0 ; | |
281 | |
282 while ( query_index > 0 || target_index > 0 ) { | |
283 if ( score_matrix[query_index][target_index]==0) | |
284 break ; | |
285 | |
286 //go "left" in the matrix | |
287 if ( query_index>0 && | |
288 score_matrix[query_index][target_index] == score_matrix[query_index-1][target_index] + gap_panelty() ) { | |
289 | |
290 _alignment_results.target_alignment += "-" ; | |
291 _alignment_results.query_alignment += query_sequence()[query_index-1] ; | |
292 query_index--; | |
293 } | |
294 else | |
295 //go "up-left" in the matrix | |
296 if ( query_index>0 && target_index>0 && | |
297 score_matrix[query_index][target_index] == | |
298 score_matrix[query_index-1][target_index-1] + match_score(query_index, target_index) ) { | |
299 | |
300 _alignment_results.target_alignment += target_sequence()[target_index-1]; | |
301 _alignment_results.query_alignment += query_sequence()[query_index-1] ; | |
302 | |
303 (query_sequence()[query_index-1] == target_sequence()[target_index-1]) ? | |
304 (++_alignment_results.matches) : (++_alignment_results.mismatches) ; | |
305 | |
306 query_index--; | |
307 target_index--; | |
308 } | |
309 else | |
310 //go "up" in the matrix | |
311 { | |
312 _alignment_results.target_alignment += target_sequence()[target_index-1]; | |
313 _alignment_results.query_alignment += "-" ; | |
314 target_index--; | |
315 } | |
316 } | |
317 | |
318 _alignment_results.query_start = query_index ; | |
319 _alignment_results.target_start= target_index ; | |
320 | |
321 _alignment_results.query_size = query_sequence().length(); | |
322 _alignment_results.target_size= target_sequence().length(); | |
323 | |
324 std::reverse(_alignment_results.target_alignment.begin(), _alignment_results.target_alignment.end()); | |
325 std::reverse(_alignment_results.query_alignment.begin(), _alignment_results.query_alignment.end()); | |
326 } | |
327 #endif | |
328 | |
329 void HalfLocalSequenceAlignment::set_sequences(const std::string& _query, const std::string& _target) | |
330 { | |
331 //_query_sequence = _query + std::string( _target.length(), 'N' ); | |
332 //_target_sequence = std::string( _query.length(), 'N' ) + _target; | |
333 _query_sequence = _query ; | |
334 _target_sequence = _target ; | |
335 } | |
336 | |
337 | |
338 void HalfLocalSequenceAlignment::reset_matrix( size_t width, size_t height ) | |
339 { | |
340 size_t x,y ; | |
341 | |
342 highest_scored_query_index = 0 ; | |
343 highest_scored_target_index = 0 ; | |
344 | |
345 for (x=0; x<width; x++) { | |
346 query_border[x] = | |
347 //gap_panelty() * (ssize_t)x ; | |
348 //((query_sequence()[x-1]=='N') ? neutral_panelty() : gap_panelty()) * (ssize_t)x ; | |
349 //((query_sequence()[x-1]=='N') ? 0 : gap_panelty()) * (ssize_t)x ; | |
350 0 ; | |
351 } | |
352 | |
353 for (y=0; y<height; y++) { | |
354 target_border[y] = | |
355 ( y <= 3 ) ? 0 : (gap_panelty() * (ssize_t)(y-3)); | |
356 //0; | |
357 //((target_sequence()[y-1]=='N') ? 0 : gap_panelty()) * (ssize_t)y ; | |
358 //((target_sequence()[y-1]=='N') ? neutral_panelty() : gap_panelty()) * (ssize_t)y ; | |
359 } | |
360 | |
361 } | |
362 | |
363 void HalfLocalSequenceAlignment::populate_matrix ( ) | |
364 { | |
365 size_t query_index ; | |
366 size_t target_index ; | |
367 DIRECTION origin = FROM_LEFT; | |
368 | |
369 score_type highest_score = -1000000 ; | |
370 highest_scored_query_index = -1 ; | |
371 highest_scored_target_index = -1 ; | |
372 | |
373 for ( query_index=0; query_index<matrix_width(); query_index++ ) { | |
374 for ( target_index=0 ; target_index<matrix_height(); target_index++ ) { | |
375 | |
376 //Note: | |
377 // 'safe_score()' can accept negative value of -1 (and will return the border value) | |
378 score_type up_score = safe_score(query_index, ((ssize_t)target_index)-1) + gap_panelty() ; | |
379 score_type left_score = safe_score(((ssize_t)query_index)-1,target_index ) + gap_panelty() ; | |
380 score_type upleft_score = safe_score(((ssize_t)query_index)-1,((ssize_t)target_index)-1) + | |
381 nucleotide_match_score(query_index, target_index); | |
382 | |
383 //On the diagonal line, best score can not come from upper cell | |
384 //only from left or upper-left cells | |
385 if ( target_index>3 && target_index-3 > query_index ) { | |
386 left_score = -100000 ; | |
387 } | |
388 | |
389 //printf("query_index=%d, target_index=%d, upscore=%f, left_score=%f, upleft_score=%f\n", | |
390 // query_index, target_index, up_score,left_score,upleft_score ); | |
391 | |
392 score_type score = -100000000 ; | |
393 | |
394 if ( upleft_score > score ) { | |
395 score = upleft_score ; | |
396 origin = FROM_UPPER_LEFT; | |
397 } | |
398 if ( up_score > score ) { | |
399 score = up_score ; | |
400 origin = FROM_UPPER ; | |
401 } | |
402 if ( left_score > score ) { | |
403 score = left_score ; | |
404 origin = FROM_LEFT ; | |
405 } | |
406 //printf("query_index=%d, target_index=%d, score=%f origin=%d\n", | |
407 // query_index, target_index, score, origin ); | |
408 | |
409 /*if (score<0) { | |
410 score = 0 ; | |
411 origin = FROM_NOWHERE ; | |
412 }*/ | |
413 | |
414 score_matrix[query_index][target_index] = score ; | |
415 origin_matrix[query_index][target_index] = origin ; | |
416 | |
417 //NOTE | |
418 // not sure ">=" is strictly correct SW (might be just ">") | |
419 if ( score > highest_score ) { | |
420 highest_scored_query_index = query_index ; | |
421 highest_scored_target_index = target_index ; | |
422 highest_score = score ; | |
423 } | |
424 } | |
425 } | |
426 } | |
427 | |
428 bool HalfLocalSequenceAlignment::starting_point_close_to_end_of_sequences(const size_t query_index, const size_t target_index) const | |
429 { | |
430 if ( (size_t)query_index >= query_sequence().length() - 2 || | |
431 (size_t)target_index >= target_sequence().length() - 2 ) { | |
432 /* We've reach either the end of the Adapter | |
433 * (and the adapter is shorter than the query) | |
434 * Or the end of the query | |
435 * (and the adapter covers up to the end of the query, and then continues on) | |
436 * | |
437 * So we can safely start the alignment from this point | |
438 */ | |
439 return true; | |
440 } | |
441 else { | |
442 /* The adapter is not covering the query until the end. | |
443 */ | |
444 return false; | |
445 } | |
446 } | |
447 | |
448 #undef DEBUG_STARTING_POINT | |
449 void HalfLocalSequenceAlignment::find_alignment_starting_point(ssize_t &new_query_index, ssize_t &new_target_index) const | |
450 { | |
451 /* | |
452 * Force the alignment to start from the end of the query, | |
453 * find the best score at the end of the query | |
454 * | |
455 * Try (desperately) to find a match that starts at the end of the query or the end of the target/adapter) | |
456 */ | |
457 score_type max_score = score( matrix_width()-1, matrix_height()-1 ) ; | |
458 for ( size_t q_index = 0 ; q_index < matrix_width(); q_index++ ) { | |
459 for ( size_t t_index = matrix_height()-2 ; t_index < matrix_height(); t_index++ ) { | |
460 if ( origin ( q_index, t_index ) > 0 && | |
461 safe_score ( q_index, t_index ) > max_score ) { | |
462 max_score = safe_score ( q_index, t_index ) ; | |
463 #ifdef DEBUG_STARTING_POINT | |
464 printf("Found new max score = %f at %d,%d\n", max_score, q_index, t_index ) ; | |
465 #endif | |
466 new_target_index = t_index ; | |
467 new_query_index = q_index ; | |
468 } | |
469 } | |
470 } | |
471 for ( size_t q_index = matrix_width()-2 ; q_index < matrix_width(); q_index++ ) { | |
472 for ( size_t t_index = 0 ; t_index < matrix_height(); t_index++ ) { | |
473 if ( origin ( q_index, t_index ) > 0 && | |
474 safe_score ( q_index, t_index ) > max_score ) { | |
475 max_score = safe_score ( q_index, t_index ) ; | |
476 #ifdef DEBUG_STARTING_POINT | |
477 printf("Found new max score = %f at %d,%d\n", max_score, q_index, t_index ) ; | |
478 #endif | |
479 new_target_index = t_index ; | |
480 new_query_index = q_index ; | |
481 } | |
482 } | |
483 } | |
484 #ifdef DEBUG_STARTING_POINT | |
485 printf("Forcing alignment from query_index=%d, target_index=%d, score=%f, origin=%d\n", | |
486 new_query_index, new_target_index, | |
487 score ( new_query_index, new_target_index ), | |
488 origin ( new_query_index, new_target_index ) ); | |
489 #endif | |
490 } | |
491 | |
492 | |
493 #undef DEBUG_FIND_OPTIMAL_ALIGNMENT | |
494 SequenceAlignmentResults HalfLocalSequenceAlignment::find_optimal_alignment_from_point ( const size_t query_start, const size_t target_start ) const | |
495 { | |
496 SequenceAlignmentResults results; | |
497 | |
498 results.query_sequence = query_sequence(); | |
499 results.target_sequence= target_sequence(); | |
500 | |
501 ssize_t query_index = query_start; | |
502 ssize_t target_index = target_start ; | |
503 | |
504 results.query_end = query_index ; | |
505 results.target_end= target_index ; | |
506 | |
507 #ifdef DEBUG_FIND_OPTIMAL_ALIGNMENT | |
508 printf ( "backtrace starting from (qindex=%d, tindex=%d, score=%f)\n", | |
509 query_index, target_index, score_matrix[query_index][target_index]) ; | |
510 #endif | |
511 | |
512 while ( query_index >= 0 && target_index >= 0 ) { | |
513 | |
514 const char q_nuc = query_nucleotide(query_index); | |
515 const char t_nuc = target_nucleotide(target_index); | |
516 | |
517 const DIRECTION current_origin = origin(query_index, target_index); | |
518 const char current_match = match ( query_index, target_index ) ; | |
519 | |
520 | |
521 #ifdef DEBUG_FIND_OPTIMAL_ALIGNMENT | |
522 const score_type current_score = score(query_index, target_index); | |
523 printf("query_index=%d target_index=%d query=%c target=%c score_matrix=%3.1f origin=%d accumulated_score = %3.2f\n", | |
524 query_index, target_index, | |
525 q_nuc, t_nuc, | |
526 current_score, | |
527 current_origin, | |
528 results.score) ; | |
529 #endif | |
530 | |
531 results.query_start = query_index ; | |
532 results.target_start= target_index ; | |
533 | |
534 switch ( current_origin ) | |
535 { | |
536 case FROM_LEFT: | |
537 results.target_alignment += "-" ; | |
538 results.query_alignment += q_nuc ; | |
539 results.gaps++; | |
540 results.score += gap_panelty(); | |
541 | |
542 query_index--; | |
543 break ; | |
544 | |
545 case FROM_UPPER_LEFT: | |
546 results.target_alignment += t_nuc; | |
547 results.query_alignment += q_nuc ; | |
548 | |
549 switch ( current_match ) | |
550 { | |
551 case 'N': | |
552 results.neutral_matches++ ; | |
553 results.score += neutral_panelty(); | |
554 break ; | |
555 | |
556 case 'M': | |
557 results.matches++; | |
558 results.score += match_panelty(); | |
559 break; | |
560 | |
561 case 'x': | |
562 results.mismatches++; | |
563 results.score += mismatch_panelty(); | |
564 break ; | |
565 | |
566 default: | |
567 errx(1,"Internal error: unknown match type (%c) at query_index=%zu, target_index=%zu\n", | |
568 current_match, query_index, target_index ) ; | |
569 } | |
570 | |
571 query_index--; | |
572 target_index--; | |
573 break ; | |
574 | |
575 case FROM_UPPER: | |
576 results.target_alignment += t_nuc ; | |
577 results.query_alignment += "-" ; | |
578 results.gaps++; | |
579 results.score += gap_panelty(); | |
580 | |
581 target_index--; | |
582 break; | |
583 | |
584 case FROM_NOWHERE: | |
585 default: | |
586 print_matrix(); | |
587 printf("Invalid origin (%d) at query_index=%zu, target_index=%zu\n", | |
588 current_origin, query_index, target_index ) ; | |
589 printf("Query = %s\n", query_sequence().c_str()); | |
590 printf("Target= %s\n", target_sequence().c_str()); | |
591 exit(1); | |
592 } | |
593 } | |
594 | |
595 results.query_size = query_sequence().length(); | |
596 results.target_size= target_sequence().length(); | |
597 | |
598 std::reverse(results.target_alignment.begin(),results.target_alignment.end()); | |
599 std::reverse(results.query_alignment.begin(), results.query_alignment.end()); | |
600 | |
601 return results; | |
602 } | |
603 | |
604 void HalfLocalSequenceAlignment::find_optimal_alignment ( ) | |
605 { | |
606 SequenceAlignmentResults results ; | |
607 | |
608 | |
609 //Try to find a good alignment, | |
610 //starting from the highest score cell. | |
611 results = find_optimal_alignment_from_point ( highest_scored_query_index, | |
612 highest_scored_target_index ) ; | |
613 | |
614 //Some heuristics: | |
615 //If the adapter matched 7 nucleotides anywhere in the query | |
616 //without mismatches/gaps, accept it. | |
617 if ( results.matches >= 7 | |
618 && | |
619 results.mismatches == 0 | |
620 && | |
621 results.gaps == 0 ) { | |
622 | |
623 _alignment_results = results ; | |
624 return ; | |
625 } | |
626 | |
627 if ( starting_point_close_to_end_of_sequences ( highest_scored_query_index, | |
628 highest_scored_target_index ) ) { | |
629 //We're already very close to the end of the target or query, | |
630 //can't improve much else, so return what we've got. | |
631 _alignment_results = results ; | |
632 return ; | |
633 } | |
634 | |
635 | |
636 //More heuristics: | |
637 /* The adapter is not covering the query until the end. | |
638 * Force the alignment to start from the end of the query, | |
639 * find the best score at the end of the query | |
640 * | |
641 * Try (desperately) to find a match that starts at the end of the query or the end of the target/adapter) | |
642 */ | |
643 ssize_t query_index = highest_scored_query_index ; | |
644 ssize_t target_index = highest_scored_target_index; | |
645 find_alignment_starting_point ( query_index, target_index ) ; | |
646 | |
647 _alignment_results = results ; | |
648 } | |
649 | |
650 void HalfLocalSequenceAlignment::post_process() | |
651 { | |
652 #if 0 | |
653 //Removes the Ns which were added in 'set_sequences' | |
654 //And adjust the results values accordingly | |
655 | |
656 //return ; | |
657 //_query_sequence.erase ( _query_sequence.find_last_not_of('N') + 1) ; | |
658 //_target_sequence.erase ( 0, _target_sequence.find_first_not_of('N') ) ; | |
659 | |
660 _alignment_results.query_sequence.erase ( _query_sequence.find_last_not_of('N') + 1) ; | |
661 _alignment_results.target_sequence.erase ( 0, _target_sequence.find_first_not_of('N') ) ; | |
662 _alignment_results.query_size = _alignment_results.query_sequence.length(); | |
663 _alignment_results.target_size = _alignment_results.target_sequence.length(); | |
664 | |
665 | |
666 size_t query_n_position = _alignment_results.query_alignment.find_last_not_of('N') ; | |
667 int query_n_count; | |
668 | |
669 if ( query_n_position != string::npos ) | |
670 query_n_count = _alignment_results.query_alignment.length() - query_n_position ; | |
671 else | |
672 query_n_count = 0 ; | |
673 | |
674 int target_n_count = _alignment_results.target_alignment.find_first_not_of('N') ; | |
675 | |
676 if (query_n_position != string::npos ) | |
677 _alignment_results.query_alignment.erase( query_n_position ) ; | |
678 _alignment_results.target_alignment.erase( 0,target_n_count ) ; | |
679 | |
680 //Update Results strucure | |
681 _alignment_results.query_start+= target_n_count ; | |
682 _alignment_results.query_end -= query_n_count ; | |
683 | |
684 _alignment_results.target_start = 0; | |
685 _alignment_results.target_end = _alignment_results.query_end - _alignment_results.query_start ; | |
686 | |
687 _alignment_results.query_alignment.erase ( 0, _alignment_results.query_start ) ; | |
688 _alignment_results.target_alignment.erase ( _alignment_results.target_end+1 ) ; | |
689 | |
690 //Update match/mismatch/gap counts | |
691 _alignment_results.matches = 0 ; | |
692 _alignment_results.mismatches = 0 ; | |
693 _alignment_results.gaps = 0 ; | |
694 | |
695 for (size_t index=0; index<_alignment_results.query_alignment.length(); index++) { | |
696 char q = _alignment_results.query_alignment[index]; | |
697 char t = _alignment_results.target_alignment[index]; | |
698 | |
699 if ( q == '-' || t=='-' ) { | |
700 _alignment_results.gaps ++ ; | |
701 } else { | |
702 if ( q== t ) | |
703 _alignment_results.matches++; | |
704 else | |
705 _alignment_results.mismatches++; | |
706 } | |
707 } | |
708 #endif | |
709 } | |
710 |