annotate fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.cpp @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
1 #include <string>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
2 #include <vector>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
3 #include <ostream>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
4 #include <iostream>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
5 #include <algorithm>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
6 #include <iomanip>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
7 #include <err.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
8
997f5136985f Uploaded
xilinxu
parents:
diff changeset
9 #include "sequence_alignment.h"
997f5136985f Uploaded
xilinxu
parents:
diff changeset
10
997f5136985f Uploaded
xilinxu
parents:
diff changeset
11 using namespace std;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
12
997f5136985f Uploaded
xilinxu
parents:
diff changeset
13 void SequenceAlignmentResults::print(std::ostream& strm) const
997f5136985f Uploaded
xilinxu
parents:
diff changeset
14 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
15 size_t delta;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
16 size_t index;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
17
997f5136985f Uploaded
xilinxu
parents:
diff changeset
18 strm << "Query-Alingment = " << query_alignment << endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
19 strm << "target-Alingment= " << target_alignment << endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
20
997f5136985f Uploaded
xilinxu
parents:
diff changeset
21
997f5136985f Uploaded
xilinxu
parents:
diff changeset
22 strm << (alignment_found ? "Alignment Found" : "Alignment NOT found") << endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
23 strm << "Score = " << score << " ("
997f5136985f Uploaded
xilinxu
parents:
diff changeset
24 << matches << " matches, "
997f5136985f Uploaded
xilinxu
parents:
diff changeset
25 << neutral_matches << " neutral-matches, "
997f5136985f Uploaded
xilinxu
parents:
diff changeset
26 << mismatches << " mismatches, "
997f5136985f Uploaded
xilinxu
parents:
diff changeset
27 << gaps << " gaps) "
997f5136985f Uploaded
xilinxu
parents:
diff changeset
28 << std::endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
29
997f5136985f Uploaded
xilinxu
parents:
diff changeset
30 strm << "Query = " << query_sequence
997f5136985f Uploaded
xilinxu
parents:
diff changeset
31 << "(qsize " << query_size
997f5136985f Uploaded
xilinxu
parents:
diff changeset
32 << " qstart " << query_start
997f5136985f Uploaded
xilinxu
parents:
diff changeset
33 << " qend " << query_end
997f5136985f Uploaded
xilinxu
parents:
diff changeset
34 << std::endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
35
997f5136985f Uploaded
xilinxu
parents:
diff changeset
36 strm << "Target= " << target_sequence
997f5136985f Uploaded
xilinxu
parents:
diff changeset
37 << "(tsize " << target_size
997f5136985f Uploaded
xilinxu
parents:
diff changeset
38 << " tstart " << target_start
997f5136985f Uploaded
xilinxu
parents:
diff changeset
39 << " tend " << target_end
997f5136985f Uploaded
xilinxu
parents:
diff changeset
40 << std::endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
41
997f5136985f Uploaded
xilinxu
parents:
diff changeset
42 strm << endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
43
997f5136985f Uploaded
xilinxu
parents:
diff changeset
44 delta = max(target_start, query_start);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
45
997f5136985f Uploaded
xilinxu
parents:
diff changeset
46
997f5136985f Uploaded
xilinxu
parents:
diff changeset
47 //Spaces before the query string
997f5136985f Uploaded
xilinxu
parents:
diff changeset
48 if ( delta - query_start > 0 )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
49 strm << std::string( delta - query_start-1, ' ') ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
50 //Un-Aligned query part (prefix)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
51 if ( query_start > 0 )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
52 strm << query_sequence.substr(0, query_start-1) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
53 //Aligned query part
997f5136985f Uploaded
xilinxu
parents:
diff changeset
54 strm << "(" << query_alignment << ")";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
55 //Un-Aligned query part (suffix)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
56 if ( query_end < query_sequence.length() )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
57 strm << query_sequence.substr( query_end+1 ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
58 strm << std::endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
59
997f5136985f Uploaded
xilinxu
parents:
diff changeset
60 //Alignment bars
997f5136985f Uploaded
xilinxu
parents:
diff changeset
61 if ( delta > 0 )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
62 strm << std::string( delta-1, ' ') ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
63 strm << "(" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
64 for (index=0; index<query_alignment.length(); index++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
65 strm << ((query_alignment[index]==target_alignment[index]) ? '*' : '|' );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
66 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
67 strm << ")" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
68 strm << std::endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
69
997f5136985f Uploaded
xilinxu
parents:
diff changeset
70 //Spaces before the target string
997f5136985f Uploaded
xilinxu
parents:
diff changeset
71 if ( delta - target_start > 0 )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
72 strm << std::string( delta - target_start, ' ') ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
73 //Un-Aligned target part (prefix)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
74 if ( target_start > 0 )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
75 strm << target_sequence.substr(0, target_start-1);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
76 //Aligned target part
997f5136985f Uploaded
xilinxu
parents:
diff changeset
77 strm << "(" << target_alignment << ")";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
78
997f5136985f Uploaded
xilinxu
parents:
diff changeset
79 //Un-Aligned target part (suffix)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
80 if ( target_end < target_sequence.length() )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
81 strm << target_sequence.substr( target_end+1 );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
82 strm << std::endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
83
997f5136985f Uploaded
xilinxu
parents:
diff changeset
84 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
85
997f5136985f Uploaded
xilinxu
parents:
diff changeset
86 SequenceAlignment::SequenceAlignment ( ) :
997f5136985f Uploaded
xilinxu
parents:
diff changeset
87 _gap_panelty(-5),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
88 _match_panelty(1),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
89 _mismatch_panelty(-1),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
90 _neutral_panelty(0.1)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
91
997f5136985f Uploaded
xilinxu
parents:
diff changeset
92 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
93 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
94
997f5136985f Uploaded
xilinxu
parents:
diff changeset
95
997f5136985f Uploaded
xilinxu
parents:
diff changeset
96 void SequenceAlignment::set_sequences(const std::string& _query, const std::string& _target)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
97 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
98 _query_sequence = _query ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
99 _target_sequence = _target ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
100 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
101
997f5136985f Uploaded
xilinxu
parents:
diff changeset
102 void SequenceAlignment::reset_alignment_results()
997f5136985f Uploaded
xilinxu
parents:
diff changeset
103 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
104 _alignment_results = SequenceAlignmentResults() ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
105 //
997f5136985f Uploaded
xilinxu
parents:
diff changeset
106 //Reset the results
997f5136985f Uploaded
xilinxu
parents:
diff changeset
107 _alignment_results.query_sequence = query_sequence() ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
108 _alignment_results.target_sequence = target_sequence() ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
109 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
110
997f5136985f Uploaded
xilinxu
parents:
diff changeset
111 const SequenceAlignmentResults& SequenceAlignment::align ( const std::string& query, const std::string& target )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
112 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
113 set_sequences ( query, target ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
114
997f5136985f Uploaded
xilinxu
parents:
diff changeset
115 reset_alignment_results();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
116
997f5136985f Uploaded
xilinxu
parents:
diff changeset
117 resize_matrix ( query_sequence().length(), target_sequence().length() ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
118 populate_match_matrix();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
119
997f5136985f Uploaded
xilinxu
parents:
diff changeset
120 reset_matrix( matrix_width(), matrix_height() );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
121 populate_matrix();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
122 find_optimal_alignment();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
123
997f5136985f Uploaded
xilinxu
parents:
diff changeset
124 post_process();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
125
997f5136985f Uploaded
xilinxu
parents:
diff changeset
126 return _alignment_results;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
127 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
128
997f5136985f Uploaded
xilinxu
parents:
diff changeset
129 void SequenceAlignment::resize_matrix(size_t width, size_t height)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
130 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
131 size_t i;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
132
997f5136985f Uploaded
xilinxu
parents:
diff changeset
133 if ( matrix_width() >= width && matrix_height() >= height )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
134 return ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
135
997f5136985f Uploaded
xilinxu
parents:
diff changeset
136 query_border.resize ( width ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
137 target_border.resize ( height ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
138
997f5136985f Uploaded
xilinxu
parents:
diff changeset
139 score_matrix.resize ( width );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
140 for (i=0;i<width;i++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
141 score_matrix[i].resize(height) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
142
997f5136985f Uploaded
xilinxu
parents:
diff changeset
143 origin_matrix.resize ( width );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
144 for (i=0;i<width;i++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
145 origin_matrix[i].resize(height) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
146
997f5136985f Uploaded
xilinxu
parents:
diff changeset
147 match_matrix.resize ( width );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
148 for (i=0;i<width;i++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
149 match_matrix[i].resize(height) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
150 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
151 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
152
997f5136985f Uploaded
xilinxu
parents:
diff changeset
153 void SequenceAlignment::populate_match_matrix()
997f5136985f Uploaded
xilinxu
parents:
diff changeset
154 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
155 for (size_t x=0; x<matrix_width(); x++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
156 for(size_t y=0;y<matrix_height();y++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
157 match_matrix[x][y] =
997f5136985f Uploaded
xilinxu
parents:
diff changeset
158 match_value ( query_nucleotide(x), target_nucleotide(y) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
159 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
160
997f5136985f Uploaded
xilinxu
parents:
diff changeset
161
997f5136985f Uploaded
xilinxu
parents:
diff changeset
162 void SequenceAlignment::post_process()
997f5136985f Uploaded
xilinxu
parents:
diff changeset
163 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
164
997f5136985f Uploaded
xilinxu
parents:
diff changeset
165 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
166
997f5136985f Uploaded
xilinxu
parents:
diff changeset
167 void SequenceAlignment::print_matrix(std::ostream &strm) const
997f5136985f Uploaded
xilinxu
parents:
diff changeset
168 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
169 size_t query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
170 size_t target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
171
997f5136985f Uploaded
xilinxu
parents:
diff changeset
172 #if 0
997f5136985f Uploaded
xilinxu
parents:
diff changeset
173 printf("Match-Matrix:\n");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
174 printf(" - ");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
175 for ( target_index=1; target_index<matrix_height(); target_index++ )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
176 printf(" %c ", target_sequence()[target_index-1] );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
177 printf("\n");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
178
997f5136985f Uploaded
xilinxu
parents:
diff changeset
179 for ( query_index=1; query_index<matrix_width(); query_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
180
997f5136985f Uploaded
xilinxu
parents:
diff changeset
181 printf(" %c ", query_sequence()[query_index-1]) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
182
997f5136985f Uploaded
xilinxu
parents:
diff changeset
183 for ( target_index=1 ; target_index<matrix_height(); target_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
184 printf(" %c ", match_matrix[query_index][target_index]);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
185 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
186 printf("\n");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
187 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
188 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
189
997f5136985f Uploaded
xilinxu
parents:
diff changeset
190 strm << "Score-Matrix:" << endl ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
191
997f5136985f Uploaded
xilinxu
parents:
diff changeset
192 //Print Target nucleotides
997f5136985f Uploaded
xilinxu
parents:
diff changeset
193 strm << setw(2) << left << "-" << setw(7) << "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
194 for ( query_index=0; query_index<matrix_width(); query_index++ )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
195 strm << setw(9) << left << query_nucleotide ( query_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
196 strm << endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
197 strm << setw(2) << left << "-" << setw(7) << "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
198 for ( query_index=0; query_index<matrix_width(); query_index++ )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
199 strm << setw(9) << left << query_border[query_index] ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
200 strm << endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
201
997f5136985f Uploaded
xilinxu
parents:
diff changeset
202 for ( target_index=0; target_index<matrix_height(); target_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
203
997f5136985f Uploaded
xilinxu
parents:
diff changeset
204 strm << setw(2) << left << target_nucleotide ( target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
205 strm << setw(6) << right << target_border[target_index] << setw(1) << " ";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
206
997f5136985f Uploaded
xilinxu
parents:
diff changeset
207 for ( query_index=0 ; query_index<matrix_width(); query_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
208 char ch ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
209 switch (origin ( query_index, target_index ) )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
210 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
211 case FROM_UPPER: ch = '|' ; break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
212 case FROM_LEFT: ch = '-' ; break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
213 case FROM_UPPER_LEFT: ch = '\\' ; break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
214 case FROM_NOWHERE: ch = '=' ; break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
215 default: ch = '*' ; break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
216 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
217
997f5136985f Uploaded
xilinxu
parents:
diff changeset
218 strm << left ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
219 strm << setw(1) << match(query_index,target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
220 strm << setw(1) << ch ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
221 strm << setw(7) << fixed << setprecision(1)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
222 << score(query_index,target_index) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
223 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
224 strm << endl;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
225 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
226 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
227
997f5136985f Uploaded
xilinxu
parents:
diff changeset
228 #if 0
997f5136985f Uploaded
xilinxu
parents:
diff changeset
229 void LocalSequenceAlignment::reset_matrix( size_t width, size_t height )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
230 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
231 size_t x,y ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
232
997f5136985f Uploaded
xilinxu
parents:
diff changeset
233 highest_scored_query_index = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
234 highest_scored_target_index = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
235
997f5136985f Uploaded
xilinxu
parents:
diff changeset
236 for (x=0; x<width; x++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
237 score_matrix[x][0] = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
238 for (y=0; y<height; y++)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
239 score_matrix[0][y] = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
240
997f5136985f Uploaded
xilinxu
parents:
diff changeset
241 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
242
997f5136985f Uploaded
xilinxu
parents:
diff changeset
243 void LocalSequenceAlignment::populate_matrix ( )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
244 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
245 size_t query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
246 size_t target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
247
997f5136985f Uploaded
xilinxu
parents:
diff changeset
248 ssize_t highest_score = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
249
997f5136985f Uploaded
xilinxu
parents:
diff changeset
250 for ( query_index=1; query_index<matrix_width(); query_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
251 for ( target_index=1 ; target_index<matrix_height(); target_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
252 ssize_t score = alignment_score(query_index, target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
253
997f5136985f Uploaded
xilinxu
parents:
diff changeset
254 //printf("score(q=%zu,t=%zu)=%zu\n", query_index, target_index, score ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
255 score_matrix[query_index][target_index] = (score>0) ? score : 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
256
997f5136985f Uploaded
xilinxu
parents:
diff changeset
257 //NOTE
997f5136985f Uploaded
xilinxu
parents:
diff changeset
258 // not sure ">=" is strictly correct SW (might be just ">")
997f5136985f Uploaded
xilinxu
parents:
diff changeset
259 if ( score > highest_score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
260 highest_scored_query_index = query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
261 highest_scored_target_index = target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
262 highest_score = score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
263 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
264 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
265
997f5136985f Uploaded
xilinxu
parents:
diff changeset
266 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
267 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
268
997f5136985f Uploaded
xilinxu
parents:
diff changeset
269 void LocalSequenceAlignment::find_optimal_alignment ( )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
270 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
271 size_t query_index = highest_scored_query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
272 size_t target_index = highest_scored_target_index;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
273
997f5136985f Uploaded
xilinxu
parents:
diff changeset
274 _alignment_results.query_end = query_index-1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
275 _alignment_results.target_end= target_index-1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
276
997f5136985f Uploaded
xilinxu
parents:
diff changeset
277 _alignment_results.score = score_matrix[query_index][target_index];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
278
997f5136985f Uploaded
xilinxu
parents:
diff changeset
279 _alignment_results.matches = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
280 _alignment_results.mismatches = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
281
997f5136985f Uploaded
xilinxu
parents:
diff changeset
282 while ( query_index > 0 || target_index > 0 ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
283 if ( score_matrix[query_index][target_index]==0)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
284 break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
285
997f5136985f Uploaded
xilinxu
parents:
diff changeset
286 //go "left" in the matrix
997f5136985f Uploaded
xilinxu
parents:
diff changeset
287 if ( query_index>0 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
288 score_matrix[query_index][target_index] == score_matrix[query_index-1][target_index] + gap_panelty() ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
289
997f5136985f Uploaded
xilinxu
parents:
diff changeset
290 _alignment_results.target_alignment += "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
291 _alignment_results.query_alignment += query_sequence()[query_index-1] ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
292 query_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
293 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
294 else
997f5136985f Uploaded
xilinxu
parents:
diff changeset
295 //go "up-left" in the matrix
997f5136985f Uploaded
xilinxu
parents:
diff changeset
296 if ( query_index>0 && target_index>0 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
297 score_matrix[query_index][target_index] ==
997f5136985f Uploaded
xilinxu
parents:
diff changeset
298 score_matrix[query_index-1][target_index-1] + match_score(query_index, target_index) ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
299
997f5136985f Uploaded
xilinxu
parents:
diff changeset
300 _alignment_results.target_alignment += target_sequence()[target_index-1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
301 _alignment_results.query_alignment += query_sequence()[query_index-1] ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
302
997f5136985f Uploaded
xilinxu
parents:
diff changeset
303 (query_sequence()[query_index-1] == target_sequence()[target_index-1]) ?
997f5136985f Uploaded
xilinxu
parents:
diff changeset
304 (++_alignment_results.matches) : (++_alignment_results.mismatches) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
305
997f5136985f Uploaded
xilinxu
parents:
diff changeset
306 query_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
307 target_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
308 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
309 else
997f5136985f Uploaded
xilinxu
parents:
diff changeset
310 //go "up" in the matrix
997f5136985f Uploaded
xilinxu
parents:
diff changeset
311 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
312 _alignment_results.target_alignment += target_sequence()[target_index-1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
313 _alignment_results.query_alignment += "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
314 target_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
315 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
316 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
317
997f5136985f Uploaded
xilinxu
parents:
diff changeset
318 _alignment_results.query_start = query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
319 _alignment_results.target_start= target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
320
997f5136985f Uploaded
xilinxu
parents:
diff changeset
321 _alignment_results.query_size = query_sequence().length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
322 _alignment_results.target_size= target_sequence().length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
323
997f5136985f Uploaded
xilinxu
parents:
diff changeset
324 std::reverse(_alignment_results.target_alignment.begin(), _alignment_results.target_alignment.end());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
325 std::reverse(_alignment_results.query_alignment.begin(), _alignment_results.query_alignment.end());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
326 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
327 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
328
997f5136985f Uploaded
xilinxu
parents:
diff changeset
329 void HalfLocalSequenceAlignment::set_sequences(const std::string& _query, const std::string& _target)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
330 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
331 //_query_sequence = _query + std::string( _target.length(), 'N' );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
332 //_target_sequence = std::string( _query.length(), 'N' ) + _target;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
333 _query_sequence = _query ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
334 _target_sequence = _target ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
335 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
336
997f5136985f Uploaded
xilinxu
parents:
diff changeset
337
997f5136985f Uploaded
xilinxu
parents:
diff changeset
338 void HalfLocalSequenceAlignment::reset_matrix( size_t width, size_t height )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
339 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
340 size_t x,y ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
341
997f5136985f Uploaded
xilinxu
parents:
diff changeset
342 highest_scored_query_index = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
343 highest_scored_target_index = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
344
997f5136985f Uploaded
xilinxu
parents:
diff changeset
345 for (x=0; x<width; x++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
346 query_border[x] =
997f5136985f Uploaded
xilinxu
parents:
diff changeset
347 //gap_panelty() * (ssize_t)x ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
348 //((query_sequence()[x-1]=='N') ? neutral_panelty() : gap_panelty()) * (ssize_t)x ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
349 //((query_sequence()[x-1]=='N') ? 0 : gap_panelty()) * (ssize_t)x ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
350 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
351 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
352
997f5136985f Uploaded
xilinxu
parents:
diff changeset
353 for (y=0; y<height; y++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
354 target_border[y] =
997f5136985f Uploaded
xilinxu
parents:
diff changeset
355 ( y <= 3 ) ? 0 : (gap_panelty() * (ssize_t)(y-3));
997f5136985f Uploaded
xilinxu
parents:
diff changeset
356 //0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
357 //((target_sequence()[y-1]=='N') ? 0 : gap_panelty()) * (ssize_t)y ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
358 //((target_sequence()[y-1]=='N') ? neutral_panelty() : gap_panelty()) * (ssize_t)y ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
359 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
360
997f5136985f Uploaded
xilinxu
parents:
diff changeset
361 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
362
997f5136985f Uploaded
xilinxu
parents:
diff changeset
363 void HalfLocalSequenceAlignment::populate_matrix ( )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
364 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
365 size_t query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
366 size_t target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
367 DIRECTION origin = FROM_LEFT;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
368
997f5136985f Uploaded
xilinxu
parents:
diff changeset
369 score_type highest_score = -1000000 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
370 highest_scored_query_index = -1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
371 highest_scored_target_index = -1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
372
997f5136985f Uploaded
xilinxu
parents:
diff changeset
373 for ( query_index=0; query_index<matrix_width(); query_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
374 for ( target_index=0 ; target_index<matrix_height(); target_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
375
997f5136985f Uploaded
xilinxu
parents:
diff changeset
376 //Note:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
377 // 'safe_score()' can accept negative value of -1 (and will return the border value)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
378 score_type up_score = safe_score(query_index, ((ssize_t)target_index)-1) + gap_panelty() ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
379 score_type left_score = safe_score(((ssize_t)query_index)-1,target_index ) + gap_panelty() ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
380 score_type upleft_score = safe_score(((ssize_t)query_index)-1,((ssize_t)target_index)-1) +
997f5136985f Uploaded
xilinxu
parents:
diff changeset
381 nucleotide_match_score(query_index, target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
382
997f5136985f Uploaded
xilinxu
parents:
diff changeset
383 //On the diagonal line, best score can not come from upper cell
997f5136985f Uploaded
xilinxu
parents:
diff changeset
384 //only from left or upper-left cells
997f5136985f Uploaded
xilinxu
parents:
diff changeset
385 if ( target_index>3 && target_index-3 > query_index ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
386 left_score = -100000 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
387 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
388
997f5136985f Uploaded
xilinxu
parents:
diff changeset
389 //printf("query_index=%d, target_index=%d, upscore=%f, left_score=%f, upleft_score=%f\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
390 // query_index, target_index, up_score,left_score,upleft_score );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
391
997f5136985f Uploaded
xilinxu
parents:
diff changeset
392 score_type score = -100000000 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
393
997f5136985f Uploaded
xilinxu
parents:
diff changeset
394 if ( upleft_score > score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
395 score = upleft_score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
396 origin = FROM_UPPER_LEFT;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
397 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
398 if ( up_score > score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
399 score = up_score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
400 origin = FROM_UPPER ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
401 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
402 if ( left_score > score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
403 score = left_score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
404 origin = FROM_LEFT ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
405 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
406 //printf("query_index=%d, target_index=%d, score=%f origin=%d\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
407 // query_index, target_index, score, origin );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
408
997f5136985f Uploaded
xilinxu
parents:
diff changeset
409 /*if (score<0) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
410 score = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
411 origin = FROM_NOWHERE ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
412 }*/
997f5136985f Uploaded
xilinxu
parents:
diff changeset
413
997f5136985f Uploaded
xilinxu
parents:
diff changeset
414 score_matrix[query_index][target_index] = score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
415 origin_matrix[query_index][target_index] = origin ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
416
997f5136985f Uploaded
xilinxu
parents:
diff changeset
417 //NOTE
997f5136985f Uploaded
xilinxu
parents:
diff changeset
418 // not sure ">=" is strictly correct SW (might be just ">")
997f5136985f Uploaded
xilinxu
parents:
diff changeset
419 if ( score > highest_score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
420 highest_scored_query_index = query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
421 highest_scored_target_index = target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
422 highest_score = score ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
423 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
424 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
425 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
426 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
427
997f5136985f Uploaded
xilinxu
parents:
diff changeset
428 bool HalfLocalSequenceAlignment::starting_point_close_to_end_of_sequences(const size_t query_index, const size_t target_index) const
997f5136985f Uploaded
xilinxu
parents:
diff changeset
429 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
430 if ( (size_t)query_index >= query_sequence().length() - 2 ||
997f5136985f Uploaded
xilinxu
parents:
diff changeset
431 (size_t)target_index >= target_sequence().length() - 2 ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
432 /* We've reach either the end of the Adapter
997f5136985f Uploaded
xilinxu
parents:
diff changeset
433 * (and the adapter is shorter than the query)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
434 * Or the end of the query
997f5136985f Uploaded
xilinxu
parents:
diff changeset
435 * (and the adapter covers up to the end of the query, and then continues on)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
436 *
997f5136985f Uploaded
xilinxu
parents:
diff changeset
437 * So we can safely start the alignment from this point
997f5136985f Uploaded
xilinxu
parents:
diff changeset
438 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
439 return true;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
440 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
441 else {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
442 /* The adapter is not covering the query until the end.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
443 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
444 return false;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
445 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
446 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
447
997f5136985f Uploaded
xilinxu
parents:
diff changeset
448 #undef DEBUG_STARTING_POINT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
449 void HalfLocalSequenceAlignment::find_alignment_starting_point(ssize_t &new_query_index, ssize_t &new_target_index) const
997f5136985f Uploaded
xilinxu
parents:
diff changeset
450 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
451 /*
997f5136985f Uploaded
xilinxu
parents:
diff changeset
452 * Force the alignment to start from the end of the query,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
453 * find the best score at the end of the query
997f5136985f Uploaded
xilinxu
parents:
diff changeset
454 *
997f5136985f Uploaded
xilinxu
parents:
diff changeset
455 * Try (desperately) to find a match that starts at the end of the query or the end of the target/adapter)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
456 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
457 score_type max_score = score( matrix_width()-1, matrix_height()-1 ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
458 for ( size_t q_index = 0 ; q_index < matrix_width(); q_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
459 for ( size_t t_index = matrix_height()-2 ; t_index < matrix_height(); t_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
460 if ( origin ( q_index, t_index ) > 0 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
461 safe_score ( q_index, t_index ) > max_score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
462 max_score = safe_score ( q_index, t_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
463 #ifdef DEBUG_STARTING_POINT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
464 printf("Found new max score = %f at %d,%d\n", max_score, q_index, t_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
465 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
466 new_target_index = t_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
467 new_query_index = q_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
468 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
469 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
470 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
471 for ( size_t q_index = matrix_width()-2 ; q_index < matrix_width(); q_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
472 for ( size_t t_index = 0 ; t_index < matrix_height(); t_index++ ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
473 if ( origin ( q_index, t_index ) > 0 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
474 safe_score ( q_index, t_index ) > max_score ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
475 max_score = safe_score ( q_index, t_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
476 #ifdef DEBUG_STARTING_POINT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
477 printf("Found new max score = %f at %d,%d\n", max_score, q_index, t_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
478 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
479 new_target_index = t_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
480 new_query_index = q_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
481 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
482 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
483 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
484 #ifdef DEBUG_STARTING_POINT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
485 printf("Forcing alignment from query_index=%d, target_index=%d, score=%f, origin=%d\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
486 new_query_index, new_target_index,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
487 score ( new_query_index, new_target_index ),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
488 origin ( new_query_index, new_target_index ) );
997f5136985f Uploaded
xilinxu
parents:
diff changeset
489 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
490 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
491
997f5136985f Uploaded
xilinxu
parents:
diff changeset
492
997f5136985f Uploaded
xilinxu
parents:
diff changeset
493 #undef DEBUG_FIND_OPTIMAL_ALIGNMENT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
494 SequenceAlignmentResults HalfLocalSequenceAlignment::find_optimal_alignment_from_point ( const size_t query_start, const size_t target_start ) const
997f5136985f Uploaded
xilinxu
parents:
diff changeset
495 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
496 SequenceAlignmentResults results;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
497
997f5136985f Uploaded
xilinxu
parents:
diff changeset
498 results.query_sequence = query_sequence();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
499 results.target_sequence= target_sequence();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
500
997f5136985f Uploaded
xilinxu
parents:
diff changeset
501 ssize_t query_index = query_start;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
502 ssize_t target_index = target_start ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
503
997f5136985f Uploaded
xilinxu
parents:
diff changeset
504 results.query_end = query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
505 results.target_end= target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
506
997f5136985f Uploaded
xilinxu
parents:
diff changeset
507 #ifdef DEBUG_FIND_OPTIMAL_ALIGNMENT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
508 printf ( "backtrace starting from (qindex=%d, tindex=%d, score=%f)\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
509 query_index, target_index, score_matrix[query_index][target_index]) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
510 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
511
997f5136985f Uploaded
xilinxu
parents:
diff changeset
512 while ( query_index >= 0 && target_index >= 0 ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
513
997f5136985f Uploaded
xilinxu
parents:
diff changeset
514 const char q_nuc = query_nucleotide(query_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
515 const char t_nuc = target_nucleotide(target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
516
997f5136985f Uploaded
xilinxu
parents:
diff changeset
517 const DIRECTION current_origin = origin(query_index, target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
518 const char current_match = match ( query_index, target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
519
997f5136985f Uploaded
xilinxu
parents:
diff changeset
520
997f5136985f Uploaded
xilinxu
parents:
diff changeset
521 #ifdef DEBUG_FIND_OPTIMAL_ALIGNMENT
997f5136985f Uploaded
xilinxu
parents:
diff changeset
522 const score_type current_score = score(query_index, target_index);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
523 printf("query_index=%d target_index=%d query=%c target=%c score_matrix=%3.1f origin=%d accumulated_score = %3.2f\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
524 query_index, target_index,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
525 q_nuc, t_nuc,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
526 current_score,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
527 current_origin,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
528 results.score) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
529 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
530
997f5136985f Uploaded
xilinxu
parents:
diff changeset
531 results.query_start = query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
532 results.target_start= target_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
533
997f5136985f Uploaded
xilinxu
parents:
diff changeset
534 switch ( current_origin )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
535 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
536 case FROM_LEFT:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
537 results.target_alignment += "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
538 results.query_alignment += q_nuc ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
539 results.gaps++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
540 results.score += gap_panelty();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
541
997f5136985f Uploaded
xilinxu
parents:
diff changeset
542 query_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
543 break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
544
997f5136985f Uploaded
xilinxu
parents:
diff changeset
545 case FROM_UPPER_LEFT:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
546 results.target_alignment += t_nuc;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
547 results.query_alignment += q_nuc ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
548
997f5136985f Uploaded
xilinxu
parents:
diff changeset
549 switch ( current_match )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
550 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
551 case 'N':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
552 results.neutral_matches++ ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
553 results.score += neutral_panelty();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
554 break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
555
997f5136985f Uploaded
xilinxu
parents:
diff changeset
556 case 'M':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
557 results.matches++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
558 results.score += match_panelty();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
559 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
560
997f5136985f Uploaded
xilinxu
parents:
diff changeset
561 case 'x':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
562 results.mismatches++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
563 results.score += mismatch_panelty();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
564 break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
565
997f5136985f Uploaded
xilinxu
parents:
diff changeset
566 default:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
567 errx(1,"Internal error: unknown match type (%c) at query_index=%zu, target_index=%zu\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
568 current_match, query_index, target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
569 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
570
997f5136985f Uploaded
xilinxu
parents:
diff changeset
571 query_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
572 target_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
573 break ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
574
997f5136985f Uploaded
xilinxu
parents:
diff changeset
575 case FROM_UPPER:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
576 results.target_alignment += t_nuc ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
577 results.query_alignment += "-" ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
578 results.gaps++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
579 results.score += gap_panelty();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
580
997f5136985f Uploaded
xilinxu
parents:
diff changeset
581 target_index--;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
582 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
583
997f5136985f Uploaded
xilinxu
parents:
diff changeset
584 case FROM_NOWHERE:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
585 default:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
586 print_matrix();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
587 printf("Invalid origin (%d) at query_index=%zu, target_index=%zu\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
588 current_origin, query_index, target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
589 printf("Query = %s\n", query_sequence().c_str());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
590 printf("Target= %s\n", target_sequence().c_str());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
591 exit(1);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
592 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
593 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
594
997f5136985f Uploaded
xilinxu
parents:
diff changeset
595 results.query_size = query_sequence().length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
596 results.target_size= target_sequence().length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
597
997f5136985f Uploaded
xilinxu
parents:
diff changeset
598 std::reverse(results.target_alignment.begin(),results.target_alignment.end());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
599 std::reverse(results.query_alignment.begin(), results.query_alignment.end());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
600
997f5136985f Uploaded
xilinxu
parents:
diff changeset
601 return results;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
602 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
603
997f5136985f Uploaded
xilinxu
parents:
diff changeset
604 void HalfLocalSequenceAlignment::find_optimal_alignment ( )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
605 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
606 SequenceAlignmentResults results ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
607
997f5136985f Uploaded
xilinxu
parents:
diff changeset
608
997f5136985f Uploaded
xilinxu
parents:
diff changeset
609 //Try to find a good alignment,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
610 //starting from the highest score cell.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
611 results = find_optimal_alignment_from_point ( highest_scored_query_index,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
612 highest_scored_target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
613
997f5136985f Uploaded
xilinxu
parents:
diff changeset
614 //Some heuristics:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
615 //If the adapter matched 7 nucleotides anywhere in the query
997f5136985f Uploaded
xilinxu
parents:
diff changeset
616 //without mismatches/gaps, accept it.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
617 if ( results.matches >= 7
997f5136985f Uploaded
xilinxu
parents:
diff changeset
618 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
619 results.mismatches == 0
997f5136985f Uploaded
xilinxu
parents:
diff changeset
620 &&
997f5136985f Uploaded
xilinxu
parents:
diff changeset
621 results.gaps == 0 ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
622
997f5136985f Uploaded
xilinxu
parents:
diff changeset
623 _alignment_results = results ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
624 return ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
625 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
626
997f5136985f Uploaded
xilinxu
parents:
diff changeset
627 if ( starting_point_close_to_end_of_sequences ( highest_scored_query_index,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
628 highest_scored_target_index ) ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
629 //We're already very close to the end of the target or query,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
630 //can't improve much else, so return what we've got.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
631 _alignment_results = results ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
632 return ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
633 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
634
997f5136985f Uploaded
xilinxu
parents:
diff changeset
635
997f5136985f Uploaded
xilinxu
parents:
diff changeset
636 //More heuristics:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
637 /* The adapter is not covering the query until the end.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
638 * Force the alignment to start from the end of the query,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
639 * find the best score at the end of the query
997f5136985f Uploaded
xilinxu
parents:
diff changeset
640 *
997f5136985f Uploaded
xilinxu
parents:
diff changeset
641 * Try (desperately) to find a match that starts at the end of the query or the end of the target/adapter)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
642 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
643 ssize_t query_index = highest_scored_query_index ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
644 ssize_t target_index = highest_scored_target_index;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
645 find_alignment_starting_point ( query_index, target_index ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
646
997f5136985f Uploaded
xilinxu
parents:
diff changeset
647 _alignment_results = results ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
648 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
649
997f5136985f Uploaded
xilinxu
parents:
diff changeset
650 void HalfLocalSequenceAlignment::post_process()
997f5136985f Uploaded
xilinxu
parents:
diff changeset
651 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
652 #if 0
997f5136985f Uploaded
xilinxu
parents:
diff changeset
653 //Removes the Ns which were added in 'set_sequences'
997f5136985f Uploaded
xilinxu
parents:
diff changeset
654 //And adjust the results values accordingly
997f5136985f Uploaded
xilinxu
parents:
diff changeset
655
997f5136985f Uploaded
xilinxu
parents:
diff changeset
656 //return ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
657 //_query_sequence.erase ( _query_sequence.find_last_not_of('N') + 1) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
658 //_target_sequence.erase ( 0, _target_sequence.find_first_not_of('N') ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
659
997f5136985f Uploaded
xilinxu
parents:
diff changeset
660 _alignment_results.query_sequence.erase ( _query_sequence.find_last_not_of('N') + 1) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
661 _alignment_results.target_sequence.erase ( 0, _target_sequence.find_first_not_of('N') ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
662 _alignment_results.query_size = _alignment_results.query_sequence.length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
663 _alignment_results.target_size = _alignment_results.target_sequence.length();
997f5136985f Uploaded
xilinxu
parents:
diff changeset
664
997f5136985f Uploaded
xilinxu
parents:
diff changeset
665
997f5136985f Uploaded
xilinxu
parents:
diff changeset
666 size_t query_n_position = _alignment_results.query_alignment.find_last_not_of('N') ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
667 int query_n_count;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
668
997f5136985f Uploaded
xilinxu
parents:
diff changeset
669 if ( query_n_position != string::npos )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
670 query_n_count = _alignment_results.query_alignment.length() - query_n_position ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
671 else
997f5136985f Uploaded
xilinxu
parents:
diff changeset
672 query_n_count = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
673
997f5136985f Uploaded
xilinxu
parents:
diff changeset
674 int target_n_count = _alignment_results.target_alignment.find_first_not_of('N') ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
675
997f5136985f Uploaded
xilinxu
parents:
diff changeset
676 if (query_n_position != string::npos )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
677 _alignment_results.query_alignment.erase( query_n_position ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
678 _alignment_results.target_alignment.erase( 0,target_n_count ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
679
997f5136985f Uploaded
xilinxu
parents:
diff changeset
680 //Update Results strucure
997f5136985f Uploaded
xilinxu
parents:
diff changeset
681 _alignment_results.query_start+= target_n_count ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
682 _alignment_results.query_end -= query_n_count ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
683
997f5136985f Uploaded
xilinxu
parents:
diff changeset
684 _alignment_results.target_start = 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
685 _alignment_results.target_end = _alignment_results.query_end - _alignment_results.query_start ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
686
997f5136985f Uploaded
xilinxu
parents:
diff changeset
687 _alignment_results.query_alignment.erase ( 0, _alignment_results.query_start ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
688 _alignment_results.target_alignment.erase ( _alignment_results.target_end+1 ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
689
997f5136985f Uploaded
xilinxu
parents:
diff changeset
690 //Update match/mismatch/gap counts
997f5136985f Uploaded
xilinxu
parents:
diff changeset
691 _alignment_results.matches = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
692 _alignment_results.mismatches = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
693 _alignment_results.gaps = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
694
997f5136985f Uploaded
xilinxu
parents:
diff changeset
695 for (size_t index=0; index<_alignment_results.query_alignment.length(); index++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
696 char q = _alignment_results.query_alignment[index];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
697 char t = _alignment_results.target_alignment[index];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
698
997f5136985f Uploaded
xilinxu
parents:
diff changeset
699 if ( q == '-' || t=='-' ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
700 _alignment_results.gaps ++ ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
701 } else {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
702 if ( q== t )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
703 _alignment_results.matches++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
704 else
997f5136985f Uploaded
xilinxu
parents:
diff changeset
705 _alignment_results.mismatches++;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
706 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
707 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
708 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
709 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
710