Mercurial > repos > bitlab > imsame
comparison IMSAME/src/alignmentFunctions.c @ 0:762009a91895 draft
Uploaded
author | bitlab |
---|---|
date | Sat, 15 Dec 2018 18:04:10 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:762009a91895 |
---|---|
1 #include <stdio.h> | |
2 #include <stdlib.h> | |
3 #include <string.h> | |
4 #include <ctype.h> | |
5 #include <pthread.h> | |
6 #include <inttypes.h> | |
7 #include <math.h> | |
8 #include <float.h> | |
9 #include "structs.h" | |
10 #include "alignmentFunctions.h" | |
11 #include "commonFunctions.h" | |
12 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | |
13 #define MIN(x, y) (((x) <= (y)) ? (x) : (y)) | |
14 | |
15 int64_t compare_letters(unsigned char a, unsigned char b){ | |
16 if(a != (unsigned char) 'N' && a != (unsigned char) '>') return (a == b) ? POINT : -POINT; | |
17 return -POINT; | |
18 } | |
19 | |
20 llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used){ | |
21 | |
22 if(mp[*n_pools_used].current == POOL_SIZE){ | |
23 *n_pools_used += 1; | |
24 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools"); | |
25 init_mem_pool_llpos(&mp[*n_pools_used]); | |
26 | |
27 } | |
28 | |
29 llpos * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current; | |
30 mp[*n_pools_used].current++; | |
31 | |
32 | |
33 return new_pos; | |
34 } | |
35 | |
36 void init_mem_pool_llpos(Mempool_l * mp){ | |
37 mp->base = (llpos *) calloc(POOL_SIZE, sizeof(llpos)); | |
38 if(mp->base == NULL) terror("Could not request memory pool"); | |
39 mp->current = 0; | |
40 } | |
41 | |
42 | |
43 void * load_input(void * a){ | |
44 | |
45 LoadingDBArgs * ldbargs = (LoadingDBArgs *) a; | |
46 | |
47 // Requires | |
48 /* | |
49 char * temp_seq_buffer; | |
50 SeqInfo * data_database; 1 per thread | |
51 uint64_t t_len; | |
52 uint64_t word_size; | |
53 uint64_t read_from; | |
54 uint64_t read_to; | |
55 char thread_id; | |
56 */ | |
57 | |
58 uint64_t c_pos; | |
59 | |
60 unsigned char curr_kmer[custom_kmer]; | |
61 unsigned char aux_kmer[custom_kmer+1]; | |
62 curr_kmer[0] = '\0'; | |
63 uint64_t word_size = 0, pos_in_database = 0; | |
64 unsigned char char_converter[91]; | |
65 uint64_t curr_seq = 0; | |
66 char_converter[(unsigned char)'A'] = 0; | |
67 char_converter[(unsigned char)'C'] = 1; | |
68 char_converter[(unsigned char)'G'] = 2; | |
69 char_converter[(unsigned char)'T'] = 3; | |
70 //llpos * aux; | |
71 AVLTree * pointer; | |
72 | |
73 char c; | |
74 | |
75 /* | |
76 if(ldbargs->thread_id == 'A'){ | |
77 printf("read to is: %"PRIu64"\n", ldbargs->read_to); | |
78 printf("make sure: %c %c %c\n", ldbargs->temp_seq_buffer[ldbargs->read_to-1], ldbargs->temp_seq_buffer[ldbargs->read_to], ldbargs->temp_seq_buffer[ldbargs->read_to+1]); | |
79 | |
80 uint64_t z = ldbargs->read_to-1; | |
81 while(ldbargs->temp_seq_buffer[z] != '>'){ | |
82 printf("%c", ldbargs->temp_seq_buffer[z]); | |
83 z--; | |
84 } | |
85 getchar(); | |
86 } | |
87 if(ldbargs->thread_id == 'C'){ | |
88 printf("HELLOOOOOOO im going from %"PRIu64"\n", ldbargs->read_from); | |
89 } | |
90 */ | |
91 | |
92 c_pos = ldbargs->read_from; | |
93 while(ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos; | |
94 ldbargs->read_from = c_pos; | |
95 c_pos = ldbargs->read_to; | |
96 while(c_pos < ldbargs->t_len && ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos; | |
97 ldbargs->read_to = c_pos; | |
98 | |
99 c_pos = ldbargs->read_from; | |
100 c = ldbargs->temp_seq_buffer[c_pos]; | |
101 | |
102 | |
103 //printf("thread going from %"PRIu64" to %"PRIu64"\n", ldbargs->read_from, ldbargs->read_to); | |
104 | |
105 | |
106 while(c_pos < ldbargs->read_to){ | |
107 | |
108 | |
109 if(c == '>'){ | |
110 | |
111 //if(ldbargs->thread_id == 'G') printf("putting in %"PRIu64" @ %"PRIu64"\n", curr_seq, c_pos); | |
112 ldbargs->data_database->start_pos[curr_seq] = pos_in_database; ++curr_seq; | |
113 | |
114 // REalloc sequences and sequence index | |
115 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
116 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
117 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
118 } | |
119 | |
120 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
121 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
122 } | |
123 | |
124 | |
125 | |
126 while(c != '\n'){ c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; } //Skip ID | |
127 | |
128 while(c != '>' && c_pos < ldbargs->read_to){ //Until next id | |
129 | |
130 //if(ldbargs->thread_id == 'A') printf("!!!!!!%"PRIu64" from:%"PRIu64", to %"PRIu64"\n", c_pos, ldbargs->read_from, ldbargs->read_to); | |
131 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; | |
132 c = toupper(c); | |
133 if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){ | |
134 curr_kmer[word_size] = (unsigned char) c; | |
135 if(word_size < custom_kmer) ++word_size; | |
136 | |
137 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) c; ++pos_in_database; | |
138 // REalloc sequences and sequence index | |
139 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
140 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
141 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
142 } | |
143 | |
144 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
145 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
146 } | |
147 }else{ //It can be anything (including N, Y, X ...) | |
148 | |
149 if(c != '\n' && c != '\r' && c != '>'){ | |
150 word_size = 0; | |
151 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) 'N'; ++pos_in_database; //Convert to N | |
152 // REalloc sequences and sequence index | |
153 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
154 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
155 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
156 } | |
157 | |
158 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
159 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
160 } | |
161 } | |
162 } | |
163 if(word_size == custom_kmer){ | |
164 //write to hash table | |
165 | |
166 | |
167 pointer = &ldbargs->ct->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
168 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
169 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
170 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
171 | |
172 | |
173 pointer = insert_AVLTree(pointer, hashOfWord(&curr_kmer[FIXED_K], custom_kmer-FIXED_K), ldbargs->mp_AVL, &ldbargs->n_pools_used_AVL, pos_in_database, ldbargs->mp, &ldbargs->n_pools_used, curr_seq-1); | |
174 | |
175 | |
176 | |
177 // CURRENTLY USING OVERLAPPING | |
178 | |
179 memcpy(aux_kmer, &curr_kmer[1], custom_kmer-1); | |
180 memcpy(curr_kmer, aux_kmer, custom_kmer-1); | |
181 word_size--; | |
182 | |
183 // For NON OVERLAPPING ENABLE THIS | |
184 //word_size = 0; | |
185 } | |
186 | |
187 } | |
188 word_size = 0; | |
189 }else{ | |
190 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; | |
191 } | |
192 | |
193 } | |
194 /* | |
195 if(ldbargs->thread_id == 'T'){ | |
196 uint64_t j; | |
197 for(j=0; j < curr_seq-1; j++){ | |
198 printf("%"PRIu64" - %"PRIu64"\n", ldbargs->data_database->start_pos[j], ldbargs->data_database->start_pos[j+1]); | |
199 } | |
200 } | |
201 */ | |
202 /* | |
203 if(ldbargs->thread_id == 'A'){ | |
204 printf("\nAT %"PRIu64", and pos_database = %"PRIu64"\n", ldbargs->data_database->start_pos[curr_seq-1], pos_in_database); | |
205 uint64_t z = pos_in_database; | |
206 while(z > ldbargs->data_database->start_pos[curr_seq-1]){ | |
207 printf("%c", ldbargs->data_database->sequences[z]); | |
208 z--; | |
209 } | |
210 | |
211 getchar(); | |
212 } | |
213 */ | |
214 | |
215 ldbargs->data_database->start_pos[curr_seq] = pos_in_database; | |
216 ldbargs->data_database->total_len = pos_in_database; | |
217 ldbargs->contained_reads = curr_seq; | |
218 ldbargs->data_database->n_seqs = curr_seq; | |
219 ldbargs->base_coordinates = pos_in_database; | |
220 return NULL; | |
221 } | |
222 | |
223 | |
224 void * computeAlignmentsByThread(void * a){ | |
225 | |
226 /* | |
227 typedef struct { | |
228 SeqInfo * database; //Database sequence and lengths | |
229 SeqInfo * query; //Query sequence and lengths | |
230 uint64_t from; //Starting READ to compute alignments from | |
231 uint64_t to; //End READ to compute alignments from | |
232 Container * container; //Container to hold the multidimensional array | |
233 uint64_t accepted_query_reads; //Number of reads that have a fragment with evalue less than specified | |
234 long double min_e_value; //Minimum evalue to accept read | |
235 } HashTableArgs; | |
236 */ | |
237 | |
238 HashTableArgs * hta = (HashTableArgs *) a; | |
239 Queue * my_current_task = NULL; | |
240 | |
241 unsigned char char_converter[91]; | |
242 char_converter[(unsigned char)'A'] = 0; | |
243 char_converter[(unsigned char)'C'] = 1; | |
244 char_converter[(unsigned char)'G'] = 2; | |
245 char_converter[(unsigned char)'T'] = 3; | |
246 Quickfrag qf; | |
247 int64_t * cell_path_y = (int64_t *) malloc(MAX_READ_SIZE*sizeof(int64_t)); | |
248 if(cell_path_y == NULL) terror("Could not allocate cell paths"); | |
249 | |
250 | |
251 Point p0, p1, p2, p3; //Points for NW anchored | |
252 p0.x = 0; p0.y = 0; | |
253 | |
254 AVLContainer * ptr_table_redirect[4]; | |
255 ptr_table_redirect[0] = hta->container_a; | |
256 ptr_table_redirect[1] = hta->container_b; | |
257 ptr_table_redirect[2] = hta->container_c; | |
258 ptr_table_redirect[3] = hta->container_d; | |
259 unsigned char current_table = 0; | |
260 | |
261 | |
262 | |
263 //To keep track of which reads are we reading | |
264 uint64_t curr_read, curr_db_seq, xlen, ylen; | |
265 uint64_t crrSeqL, pos_of_hit = 0xFFFFFFFFFFFFFFFF; | |
266 | |
267 //Reading from buffer | |
268 char c; | |
269 unsigned char curr_kmer[custom_kmer], b_aux[custom_kmer]; | |
270 llpos * aux; | |
271 AVLTree * pointer; | |
272 | |
273 // For NW-alignment | |
274 int NWaligned; | |
275 uint64_t n_hits, alignments_tried; | |
276 | |
277 BasicAlignment ba; //The resulting alignment from the NW | |
278 uint64_t curr_pos = 0; //Reading-head position | |
279 uint64_t up_to = 0; | |
280 | |
281 int64_t last_diagonal = INT64_MIN; // Diagonal to skip repeated hits | |
282 unsigned char already_aligned = FALSE; // To not count more times the same read | |
283 | |
284 | |
285 | |
286 | |
287 //Get next operation in queue | |
288 while(NULL != ( my_current_task = get_task_from_queue(hta->queue_head, hta->lock))){ | |
289 //Initialize all variables | |
290 | |
291 qf.x_start = qf.y_start = qf.t_len = 0; | |
292 qf.e_value = LDBL_MAX; | |
293 last_diagonal = INT64_MIN; | |
294 | |
295 //Starting from | |
296 curr_read = my_current_task->r1; | |
297 crrSeqL = 0; pos_of_hit = 0; | |
298 | |
299 curr_kmer[0] = '\0'; b_aux[0] = '\0'; | |
300 aux = NULL; | |
301 pointer = NULL; | |
302 | |
303 NWaligned = 0; | |
304 n_hits = 0; | |
305 alignments_tried = 0; | |
306 | |
307 ba.identities = 0; ba.length = 0; ba.igaps = 0xFFFFFFFFFFFFFFFF; ba.egaps = 0xFFFFFFFFFFFFFFFF; | |
308 memset(&hta->markers[0], 0, hta->database->n_seqs); // Reset used tags | |
309 already_aligned = FALSE; | |
310 | |
311 //Set current header position at the position of the read start (the ">") | |
312 curr_pos = hta->query->start_pos[curr_read]; //Skip the ">" | |
313 c = (char) hta->query->sequences[curr_pos]; | |
314 | |
315 //printf("Im doing from %"PRIu64" to %"PRIu64", nseqs=%"PRIu64"\n", my_current_task->r1, my_current_task->r2, hta->query->n_seqs); | |
316 //getchar(); | |
317 | |
318 while(curr_read < my_current_task->r2 && curr_pos < hta->query->total_len){ | |
319 | |
320 | |
321 | |
322 if(curr_read != hta->query->n_seqs) up_to = hta->query->start_pos[curr_read+1]-1; else up_to = hta->query->total_len; | |
323 //printf("Currrpos: %"PRIu64" up to: %"PRIu64" on read: %"PRIu64"\n", curr_pos, up_to, curr_read); | |
324 | |
325 if (curr_pos == up_to) { // Comment, empty or quality (+) line | |
326 crrSeqL = 0; // Reset buffered sequence length | |
327 #ifdef VERBOSE | |
328 printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned); | |
329 #endif | |
330 //if(NWaligned == 0){ printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned);} | |
331 //printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned); if(NWaligned == 0) getchar(); | |
332 NWaligned = 0; | |
333 //fprintf(stdout, "Seq %"PRIu64" has %"PRIu64" hits and tried to align %"PRIu64" times\n", curr_read, n_hits, alignments_tried); | |
334 //fflush(stdout); | |
335 n_hits = 0; | |
336 already_aligned = FALSE; | |
337 alignments_tried = 0; | |
338 last_diagonal = INT64_MIN; // This is not perfect but if the diagonal reaches the value then we have an overflow anyway | |
339 qf.x_start = 0; | |
340 qf.t_len = 0; | |
341 | |
342 //if(hta->full_comp == TRUE) memset(&hta->markers[my_current_task->r1], 0, my_current_task->r2 - my_current_task->r1 + 1); // Reset used tags | |
343 memset(&hta->markers[0], FALSE, hta->database->n_seqs); // Reset used tags | |
344 | |
345 curr_read++; | |
346 //printf("On current read %"PRIu64"\n", curr_read); | |
347 continue; | |
348 } | |
349 | |
350 if(c == 'A' || c == 'C' || c == 'T' || c == 'G'){ | |
351 curr_kmer[crrSeqL] = (unsigned char) c; | |
352 crrSeqL++; | |
353 }else{ | |
354 crrSeqL = 0; | |
355 } | |
356 | |
357 if (crrSeqL >= custom_kmer) { // Full well formed sequence | |
358 | |
359 | |
360 //printf("comparing hit: %.11s\n", (char *)&curr_kmer[0]); | |
361 //getchar(); | |
362 | |
363 // Choose table | |
364 /* | |
365 if(curr_kmer[0] == (unsigned char) 'A'){ | |
366 ptr_table_redirect = hta->container_A; | |
367 }else if(curr_kmer[0] == (unsigned char) 'C'){ | |
368 ptr_table_redirect = hta->container_C; | |
369 }else if(curr_kmer[0] == (unsigned char) 'G'){ | |
370 ptr_table_redirect = hta->container_G; | |
371 }else{ | |
372 ptr_table_redirect = hta->container_T; | |
373 } | |
374 */ | |
375 | |
376 //fprintf(stdout, "%s\n", curr_kmer); | |
377 //fflush(stdout); | |
378 current_table = 0; | |
379 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
380 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
381 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
382 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
383 | |
384 if(pointer == NULL){ | |
385 ++current_table; | |
386 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
387 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
388 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
389 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
390 } | |
391 if(pointer == NULL){ | |
392 ++current_table; | |
393 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
394 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
395 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
396 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
397 } | |
398 if(pointer == NULL){ | |
399 ++current_table; | |
400 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
401 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
402 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
403 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
404 } | |
405 | |
406 //While there are hits | |
407 //fprintf(stdout, "%p\n", aux); | |
408 //fflush(stdout); | |
409 | |
410 uint64_t hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K); | |
411 AVLTree * search = find_AVLTree(pointer, hash_forward); | |
412 if(search != NULL) aux = search->next; else aux = NULL; | |
413 | |
414 | |
415 while(aux != NULL && ((hta->full_comp == FALSE && NWaligned == 0 && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0) || (hta->full_comp && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0))){ | |
416 | |
417 n_hits++; | |
418 //fprintf(stdout, "%p\n", aux); | |
419 //fflush(stdout); | |
420 // ADD OFFSET CUCOOOOOOOOOOOOOOOOOOOOOO!!!!!!!!!!!!!!! | |
421 //printf("my current table is %u\n", current_table); | |
422 //printf("check this woop: %"PRIu64" - %"PRIu64" - %"PRIu64" - %"PRIu64"\n", aux->s_id, hta->contained_reads[current_table], aux->pos, hta->base_coordinates[current_table]); | |
423 curr_db_seq = aux->s_id + hta->contained_reads[current_table]; | |
424 pos_of_hit = aux->pos + hta->base_coordinates[current_table]; | |
425 //printf("Pos of hit db: %"PRIu64", seq num %"PRIu64", contained reads: %"PRIu64", contained coord %"PRIu64"\n", pos_of_hit, curr_db_seq, hta->contained_reads[current_table], hta->base_coordinates[current_table]); | |
426 if(hta->hits != NULL){ | |
427 hta->hits[curr_db_seq]++; | |
428 goto only_hits; // Count only hits and skip the rest | |
429 } | |
430 | |
431 //fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
432 /* | |
433 if(curr_read == 534) fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
434 */ | |
435 #ifdef VERBOSE | |
436 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
437 #endif | |
438 int64_t curr_diagonal = (int64_t)(curr_pos+1) - (int64_t) pos_of_hit; | |
439 | |
440 | |
441 | |
442 if( (last_diagonal != curr_diagonal && !(qf.x_start <= (pos_of_hit + custom_kmer) && pos_of_hit <= (qf.x_start + qf.t_len)))){ | |
443 | |
444 /* | |
445 if(curr_db_seq == hta->database->n_seqs-1){ | |
446 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq]; | |
447 }else{ | |
448 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq]; | |
449 } | |
450 if(curr_read == hta->query->n_seqs-1){ | |
451 ylen = hta->query->total_len - hta->query->start_pos[curr_read]; | |
452 }else{ | |
453 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read]; | |
454 } | |
455 */ | |
456 | |
457 /*if(curr_read == 35){ | |
458 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
459 printf("what do you think will happen: %"PRIu64"\n", hta->database->start_pos[curr_db_seq]); | |
460 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen); | |
461 getchar(); | |
462 }*/ | |
463 | |
464 | |
465 //printf("accepted because: \n"); | |
466 /*if(curr_read % 100 == 0){ | |
467 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
468 } | |
469 */ | |
470 //printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal); | |
471 //printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar(); | |
472 if(hta->markers[curr_db_seq] == FALSE){ | |
473 | |
474 //if(current_table > 0) getchar(); | |
475 alignmentFromQuickHits(hta->database, hta->query, pos_of_hit, curr_pos+1, curr_read, curr_db_seq, &qf, hta->contained_reads[current_table], hta->base_coordinates[current_table]); | |
476 last_diagonal = curr_diagonal; | |
477 }else{ | |
478 qf.e_value = 100000000; | |
479 } | |
480 | |
481 //if(curr_read == 35) printf(" evalue: %Le from %"PRIu64", %"PRIu64" with l: %"PRIu64"\n", qf.e_value, qf.x_start, qf.y_start, qf.t_len); | |
482 }else{ | |
483 | |
484 /*if(curr_read == 35){ | |
485 printf("rejected because: \n"); | |
486 fprintf(stdout, "UNLaunching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
487 printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal); | |
488 printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar(); | |
489 }*/ | |
490 | |
491 qf.e_value = 100000000; | |
492 } | |
493 | |
494 #ifdef VERBOSE | |
495 printf(" evalue: %Le %"PRIu64"\n", qf.e_value, qf.t_len); | |
496 #endif | |
497 //getchar(); | |
498 | |
499 | |
500 | |
501 | |
502 | |
503 //If e-value of current frag is good, then we compute a good gapped alignment | |
504 if(qf.e_value < hta->min_e_value /*&& xlen == 799 && ylen == 2497*/){ | |
505 alignments_tried++; | |
506 ba.identities = ba.length = ba.igaps = ba.egaps = 0; | |
507 //Compute lengths of reads | |
508 if(curr_db_seq == hta->database->n_seqs-1){ | |
509 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq]; | |
510 }else{ | |
511 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq]; | |
512 //printf("!!!\n%"PRIu64", %"PRIu64" :: %"PRIu64"; its db->start_pos[curr_db_seq] db->start_pos[curr_db_seq+1] curr_db_seq\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1], curr_db_seq); | |
513 } | |
514 if(curr_read == hta->query->n_seqs-1){ | |
515 ylen = hta->query->total_len - hta->query->start_pos[curr_read]; | |
516 }else{ | |
517 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read]; | |
518 } | |
519 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen); | |
520 //Perform alignment plus backtracking | |
521 //void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, char * my_x, char * my_y, struct cell ** table, struct cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen) | |
522 if(xlen > MAX_READ_SIZE || ylen > MAX_READ_SIZE){ printf("(%"PRIu64",%"PRIu64")\n", xlen, ylen); terror("Read size reached for gapped alignment."); } | |
523 //fprintf(stdout, "R0 %"PRIu64", %"PRIu64"\n", curr_db_seq, curr_read); | |
524 | |
525 | |
526 #ifdef VERBOSE | |
527 fprintf(stdout, "qfxs %"PRIu64", dbs %"PRIu64", qfys %"PRIu64" qys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]); | |
528 #endif | |
529 | |
530 //fprintf(stdout, "dbFragxs %"PRIu64", dbs %"PRIu64", rFragys %"PRIu64" rys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]); | |
531 /* | |
532 printf("at table: %u\n", current_table); | |
533 fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
534 fprintf(stdout, "Launching NW %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
535 printf("have len: %"PRIu64", %"PRIu64"\n", xlen, ylen); | |
536 printf("Quickfrag (xs, ys): qf::%"PRIu64", %"PRIu64", tlen:%"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len); | |
537 printf("Yea, but start and end of read in db is: %"PRIu64" - %"PRIu64"\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1]); | |
538 */ | |
539 | |
540 p1.x = qf.x_start - hta->database->start_pos[curr_db_seq]; | |
541 //p1.y = qf.y_start - hta->query->start_pos[curr_read]; | |
542 p1.y = qf.y_start - (hta->query->start_pos[curr_read] -1); | |
543 p2.x = p1.x + qf.t_len; | |
544 p2.y = p1.y + qf.t_len; | |
545 p3.x = xlen; | |
546 p3.y = ylen; | |
547 | |
548 #ifdef VERBOSE | |
549 fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y); | |
550 #endif | |
551 //fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y); | |
552 | |
553 calculate_y_cell_path(p0, p1, p2, p3, cell_path_y); | |
554 | |
555 // REMOVE | |
556 /* | |
557 uint64_t r1,r2; | |
558 for(r1=0;r1<MAX_WINDOW_SIZE;r1++){ | |
559 for(r2=0;r2<MAX_WINDOW_SIZE;r2++){ | |
560 hta->table[r1][r2].score = INT64_MIN; | |
561 } | |
562 } | |
563 */ | |
564 | |
565 build_alignment(hta->reconstruct_X, hta->reconstruct_Y, curr_db_seq, curr_read, hta, hta->my_x, hta->my_y, hta->table, hta->mc, hta->writing_buffer_alignment, &ba, xlen, ylen, cell_path_y, &hta->window); | |
566 | |
567 // Set the read to already aligned so that it does not repeat | |
568 hta->markers[curr_db_seq] = 1; | |
569 | |
570 #ifdef VERBOSE | |
571 printf("len 1 %"PRIu64", len 2 %"PRIu64"\n", ba.length, ylen); | |
572 printf("ident %"PRIu64"\n", ba.identities); | |
573 #endif | |
574 | |
575 //If is good | |
576 if(((long double)(ba.length-(ba.igaps+ba.egaps))/ylen) >= hta->min_coverage && ((long double)ba.identities/(ba.length-(ba.igaps+ba.egaps))) >= hta->min_identity){ | |
577 if(already_aligned == FALSE){ | |
578 hta->accepted_query_reads++; | |
579 already_aligned = TRUE; | |
580 //printf("accepted: %"PRIu64"\n", hta->accepted_query_reads); | |
581 } | |
582 | |
583 hta->markers[curr_db_seq] = 1; | |
584 if(hta->out != NULL){ | |
585 //printf("Last was: (%"PRIu64", %"PRIu64")\n", curr_read, curr_db_seq); | |
586 fprintf(hta->out, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*(ba.length-(ba.igaps+ba.egaps))/ylen)), MIN(100,(int)((long double)100*ba.identities/(ba.length-(ba.igaps+ba.egaps)))), ylen); | |
587 fprintf(hta->out, "%s", hta->writing_buffer_alignment); | |
588 //fprintf(stdout, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*ba.identities/ba.length)), MIN(100,(int)(100*ba.length/ylen)), ylen); | |
589 //fprintf(stdout, "%s", hta->writing_buffer_alignment); | |
590 } | |
591 NWaligned = 1; | |
592 }/*else{ | |
593 printf("what: "); | |
594 printf("len x %"PRIu64", len y %"PRIu64"\n", xlen, ylen); | |
595 printf("ident %"PRIu64" len %"PRIu64"\n", ba.identities, ba.length); getchar(); | |
596 }*/ | |
597 | |
598 } | |
599 | |
600 //strncpy(get_from_db, &hta->database->sequences[qf.x_start], qf.t_len); | |
601 //strncpy(get_from_query, &hta->query->sequences[qf.y_start], qf.t_len); | |
602 //fprintf(hta->out, "%s\n%s\n%Le\t%d\n-------------------\n", get_from_db, get_from_query, qf.e_value, (int)(100*qf.coverage)); | |
603 //fprintf(hta->out, "%"PRIu64", %"PRIu64", %"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len); | |
604 | |
605 //printf("Hit comes from %"PRIu64", %"PRIu64"\n", pos_of_hit, curr_pos); | |
606 only_hits: | |
607 aux = aux->next; | |
608 while(aux == NULL && current_table < FIXED_LOADING_THREADS-1){ | |
609 ++current_table; | |
610 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
611 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
612 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
613 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
614 hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K); | |
615 search = find_AVLTree(pointer, hash_forward); | |
616 if(search != NULL) aux = search->next; else aux = NULL; | |
617 } | |
618 //fprintf(stdout, "%p\n", aux); | |
619 //fflush(stdout); | |
620 } | |
621 //printf("SWITCHED\n"); | |
622 | |
623 if(NWaligned == 1 && hta->full_comp == FALSE){ | |
624 if(curr_read < hta->query->n_seqs) curr_pos = hta->query->start_pos[curr_read+1]-2; | |
625 }else{ | |
626 memcpy(b_aux, curr_kmer, custom_kmer); | |
627 memcpy(curr_kmer, &b_aux[1], custom_kmer-1); | |
628 crrSeqL -= 1; | |
629 } | |
630 } | |
631 //printf("current pos: %"PRIu64"\n", curr_pos); | |
632 curr_pos++; | |
633 if(curr_pos < hta->query->total_len) c = (char) hta->query->sequences[curr_pos]; | |
634 | |
635 | |
636 | |
637 } | |
638 | |
639 | |
640 | |
641 | |
642 | |
643 } | |
644 | |
645 | |
646 | |
647 | |
648 | |
649 //fprintf(stdout, "Going from %"PRIu64" to %"PRIu64"\n", hta->from, hta->to); | |
650 //fflush(stdout); | |
651 | |
652 free(cell_path_y); | |
653 | |
654 return NULL; | |
655 | |
656 } | |
657 | |
658 void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, unsigned char * my_x, unsigned char * my_y, struct cell ** table, struct positioned_cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen, int64_t * cell_path_y, long double * window){ | |
659 | |
660 | |
661 //Do some printing of alignments here | |
662 uint64_t maximum_len, i, j, curr_pos_buffer, curr_window_size; | |
663 | |
664 maximum_len = 2*MAX(xlen,ylen); | |
665 memcpy(my_x, &hta->database->sequences[hta->database->start_pos[curr_db_seq]], xlen); | |
666 memcpy(my_y, &hta->query->sequences[hta->query->start_pos[curr_read]], ylen); | |
667 | |
668 struct best_cell bc = NW(my_x, 0, xlen, my_y, 0, ylen, (int64_t) hta->igap, (int64_t) hta->egap, table, mc, 0, cell_path_y, window, &curr_window_size); | |
669 backtrackingNW(my_x, 0, xlen, my_y, 0, ylen, table, reconstruct_X, reconstruct_Y, &bc, &i, &j, ba, cell_path_y, curr_window_size); | |
670 uint64_t offset = 0, before_i = 0, before_j = 0; | |
671 i++; j++; | |
672 | |
673 #ifdef VERBOSE | |
674 uint64_t z=0; | |
675 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_X[z]); | |
676 printf("\n"); | |
677 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_Y[z]); | |
678 #endif | |
679 | |
680 curr_pos_buffer = 0; | |
681 while(i <= maximum_len && j <= maximum_len){ | |
682 offset = 0; | |
683 before_i = i; | |
684 writing_buffer_alignment[curr_pos_buffer++] = 'D'; | |
685 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
686 while(offset < ALIGN_LEN && i <= maximum_len){ | |
687 //fprintf(stdout, "%c", reconstruct_X[i]); | |
688 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_X[i]; | |
689 i++; | |
690 offset++; | |
691 } | |
692 //fprintf(out, "\n"); | |
693 | |
694 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
695 offset = 0; | |
696 before_j = j; | |
697 | |
698 //fprintf(stdout, "\n"); | |
699 | |
700 writing_buffer_alignment[curr_pos_buffer++] = 'Q'; | |
701 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
702 | |
703 while(offset < ALIGN_LEN && j <= maximum_len){ | |
704 //fprintf(stdout, "%c", reconstruct_Y[j]); | |
705 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_Y[j]; | |
706 j++; | |
707 offset++; | |
708 } | |
709 //fprintf(out, "\n"); | |
710 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
711 writing_buffer_alignment[curr_pos_buffer++] = ' '; | |
712 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
713 while(before_i < i){ | |
714 if(reconstruct_X[before_i] != '-' && reconstruct_Y[before_j] != '-' && reconstruct_X[before_i] == reconstruct_Y[before_j]){ | |
715 //fprintf(out, "*"); | |
716 writing_buffer_alignment[curr_pos_buffer++] = '*'; | |
717 ba->identities++; | |
718 }else{ | |
719 //fprintf(out, " "); | |
720 writing_buffer_alignment[curr_pos_buffer++] = ' '; | |
721 } | |
722 before_j++; | |
723 before_i++; | |
724 } | |
725 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
726 | |
727 } | |
728 //fprintf(out, "\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); | |
729 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
730 writing_buffer_alignment[curr_pos_buffer++] = '\0'; | |
731 | |
732 | |
733 } | |
734 | |
735 void alignmentFromQuickHits(SeqInfo * database, SeqInfo * query, uint64_t pos_database, uint64_t pos_query, uint64_t curr_read, uint64_t curr_db_seq, Quickfrag * qf, uint64_t offset_db_reads, uint64_t offset_db_coordinates){ | |
736 | |
737 int64_t read_x_start, read_x_end, read_y_start, read_y_end; | |
738 | |
739 if(curr_db_seq == database->n_seqs-1){ | |
740 read_x_start = database->start_pos[curr_db_seq]; | |
741 read_x_end = database->total_len; | |
742 }else{ | |
743 read_x_start = database->start_pos[curr_db_seq]; | |
744 read_x_end = database->start_pos[curr_db_seq+1] - 1; | |
745 } | |
746 | |
747 //printf("read x start -> %"PRId64", end -> %"PRId64" btw: %"PRIu64"\n", read_x_start, read_x_end, database->n_seqs); | |
748 | |
749 if(curr_read == query->n_seqs-1){ | |
750 read_y_start = query->start_pos[curr_read]; | |
751 read_y_end = query->total_len; | |
752 }else{ | |
753 read_y_start = query->start_pos[curr_read]; | |
754 read_y_end = query->start_pos[curr_read+1] - 1; | |
755 } | |
756 | |
757 //printf("db_end %"PRId64" query_end %"PRId64"\n", read_x_end, read_y_end); | |
758 //printf("pos database: %"PRIu64"\n", pos_database); | |
759 int64_t curr_pos_db = (int64_t) pos_database; | |
760 int64_t curr_pos_qy = (int64_t) pos_query; | |
761 int64_t final_end_x = (int64_t) pos_database - 1, final_start_x = final_end_x - custom_kmer + 1, final_start_y = pos_query - custom_kmer; | |
762 int64_t score_right = custom_kmer * POINT; | |
763 int64_t score_left = score_right; | |
764 int64_t high_left = score_left, high_right = score_right; | |
765 qf->t_len = custom_kmer; | |
766 uint64_t idents = custom_kmer; | |
767 | |
768 /* | |
769 char le_hit[1000]; | |
770 memcpy(le_hit, &database->sequences[final_start_x], FIXED_K); | |
771 | |
772 fprintf(stdout, "HIT: %s\n", le_hit); | |
773 fflush(stdout); | |
774 */ | |
775 //printf("final start x: %"PRId64"\n", final_start_x); | |
776 int keep_going = 1; | |
777 | |
778 //Forward search | |
779 while(keep_going == 1){ | |
780 | |
781 | |
782 if(score_right > 0 && curr_pos_db < database->total_len && curr_pos_qy < query->total_len){ | |
783 if(curr_pos_db > read_x_end || curr_pos_qy > read_y_end) break; | |
784 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_right+=POINT; idents++; }else{ score_right-=POINT;} | |
785 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_right+=POINT; idents++; }else{ score_right-=POINT;} | |
786 if(high_right <= score_right){ | |
787 final_end_x = curr_pos_db; | |
788 high_right = score_right; | |
789 } | |
790 curr_pos_db++; | |
791 curr_pos_qy++; | |
792 }else{ | |
793 keep_going = 0; | |
794 } | |
795 } | |
796 | |
797 //printf("pos here %"PRIu64" curr_pos_db, curr_pos_query %"PRIu64"\n", curr_pos_db, curr_pos_qy); | |
798 //printf("final start x: %"PRId64"\n", final_start_x); | |
799 keep_going = 1; | |
800 curr_pos_db = pos_database - custom_kmer - 1; | |
801 curr_pos_qy = pos_query - custom_kmer - 1; | |
802 | |
803 score_left = high_right; | |
804 | |
805 //Backward search | |
806 while(keep_going == 1){ | |
807 | |
808 if(score_left > 0 && curr_pos_db >= 0 && curr_pos_qy >= 0){ | |
809 if(curr_pos_db < read_x_start || curr_pos_qy < read_y_start ) break; | |
810 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_left+=POINT; idents++; }else{ score_left-=POINT;} | |
811 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_left+=POINT; idents++; }else{ score_left-=POINT;} | |
812 if(high_left <= score_left){ | |
813 final_start_x = curr_pos_db; | |
814 final_start_y = curr_pos_qy; | |
815 //printf("got %"PRIu64" when min is %"PRIu64"\n", final_start_y, read_y_start); | |
816 high_left = score_left; | |
817 } | |
818 curr_pos_db--; | |
819 curr_pos_qy--; | |
820 }else{ | |
821 keep_going = 0; | |
822 } | |
823 } | |
824 | |
825 qf->t_len = final_end_x - final_start_x; | |
826 | |
827 /* | |
828 char s1[1000]; | |
829 char s2[1000]; | |
830 | |
831 memcpy(s1, &database->sequences[final_start_x], qf->t_len); | |
832 memcpy(s2, &query->sequences[final_start_y], qf->t_len); | |
833 s1[qf->t_len] = '\0'; | |
834 s2[qf->t_len] = '\0'; | |
835 | |
836 fprintf(stdout, "%s\n%s\n------\n", s1, s2); | |
837 fflush(stdout); | |
838 | |
839 printf("the real hit was:\n"); | |
840 memcpy(s1, &database->sequences[pos_database-12+1], 12); | |
841 memcpy(s2, &query->sequences[pos_query-12+1], 12); | |
842 s1[12] = '\0'; | |
843 s2[12] = '\0'; | |
844 | |
845 fprintf(stdout, "%s\n%s\n------\n", s1, s2); | |
846 fflush(stdout); | |
847 //getchar(); | |
848 printf("%"PRIu64"\n", idents); | |
849 */ | |
850 | |
851 long double rawscore = (idents*POINT) - (qf->t_len - idents)*(POINT); | |
852 | |
853 long double t_len; | |
854 if(curr_read == query->n_seqs-1){ | |
855 t_len = (long double) query->total_len - query->start_pos[curr_read]; | |
856 }else{ | |
857 t_len = (long double) query->start_pos[curr_read+1] - query->start_pos[curr_read]; | |
858 } | |
859 //printf("final start x: %"PRId64"\n", final_start_x); | |
860 qf->x_start = final_start_x; | |
861 qf->y_start = final_start_y; | |
862 qf->e_value = (long double) QF_KARLIN*t_len*database->total_len*expl(-QF_LAMBDA * rawscore); | |
863 qf->coverage = qf->t_len / t_len; | |
864 | |
865 } | |
866 | |
867 void calculate_y_cell_path(Point p0, Point p1, Point p2, Point p3, int64_t * y_points){ | |
868 | |
869 //Calculate lines between points | |
870 uint64_t i; | |
871 | |
872 #ifdef VERBOSE | |
873 printf("Built on\n"); | |
874 printf("(%"PRIu64", %"PRIu64")\n", p0.x, p0.y); | |
875 printf("(%"PRIu64", %"PRIu64")\n", p1.x, p1.y); | |
876 printf("(%"PRIu64", %"PRIu64")\n", p2.x, p2.y); | |
877 printf("(%"PRIu64", %"PRIu64")\n", p3.x, p3.y); | |
878 #endif | |
879 | |
880 | |
881 | |
882 if(p0.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p0.x); terror("Reached max length in read for anchoring procedure (1)"); } | |
883 if(p1.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p1.x); terror("Reached max length in read for anchoring procedure (2)"); } | |
884 if(p2.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p2.x); terror("Reached max length in read for anchoring procedure (3)"); } | |
885 if(p3.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p3.x); terror("Reached max length in read for anchoring procedure (4)"); } | |
886 | |
887 long double deltax, deltay, deltaerr, error; | |
888 uint64_t y; | |
889 | |
890 //P0 to P1 | |
891 deltax = p1.x - p0.x; | |
892 deltay = p1.y - p0.y; | |
893 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
894 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
895 error = deltaerr - 0.5; | |
896 y = p0.y; | |
897 | |
898 for(i=p0.x;i<p1.x;i++){ | |
899 y_points[i] = (int64_t) y; | |
900 error = error + deltaerr; | |
901 if(error >= 0.5){ | |
902 y++; | |
903 error = error - 1; | |
904 } | |
905 } | |
906 | |
907 //P1 to P2 | |
908 | |
909 deltax = p2.x - p1.x; | |
910 deltay = p2.y - p1.y; | |
911 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
912 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
913 error = deltaerr - 0.5; | |
914 y = p1.y; | |
915 | |
916 for(i=p1.x;i<p2.x;i++){ | |
917 y_points[i] = (int64_t) y; | |
918 error = error + deltaerr; | |
919 if(error >= 0.5){ | |
920 y++; | |
921 error = error - 1; | |
922 } | |
923 } | |
924 | |
925 //P2 to P3 | |
926 | |
927 deltax = p3.x - p2.x; | |
928 deltay = p3.y - p2.y; | |
929 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
930 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
931 error = deltaerr - 0.5; | |
932 y = p2.y; | |
933 | |
934 for(i=p2.x;i<p3.x;i++){ | |
935 y_points[i] = (int64_t) y; | |
936 error = error + deltaerr; | |
937 if(error >= 0.5){ | |
938 y++; | |
939 error = error - 1; | |
940 } | |
941 } | |
942 | |
943 /* | |
944 if(p3.x == 799 && p3.y == 2497){ | |
945 for(i=0;i<p3.x;i++){ | |
946 printf("%"PRIu64": %"PRIu64"\n", i, y_points[i]); | |
947 if(i % 50 == 0) getchar(); | |
948 } | |
949 } | |
950 */ | |
951 | |
952 #ifdef VERBOSE | |
953 for(i=0;i<p3.x;i++){ | |
954 printf("%"PRIu64" -> ", y_points[i]); | |
955 if(i % 50 == 0) getchar(); | |
956 } | |
957 | |
958 #endif | |
959 | |
960 | |
961 | |
962 } | |
963 | |
964 struct best_cell NW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, int64_t iGap, int64_t eGap, struct cell ** table, struct positioned_cell * mc, int show, int64_t * cell_path_y, long double * window, uint64_t * current_window_size){ | |
965 | |
966 | |
967 uint64_t i, j, j_prime; | |
968 int64_t scoreDiagonal = INT64_MIN, scoreLeft = INT64_MIN, scoreRight = INT64_MIN, score = INT64_MIN, delta_dif_1_0, delta_dif_2_1, limit_left, limit_right, j_right_prime = 1, j_left_prime = 1, j_diag_prime = 1; | |
969 | |
970 struct best_cell bc; | |
971 bc.c.score = INT64_MIN; | |
972 bc.c.xpos = 0; bc.c.ypos = 0; | |
973 | |
974 //The window size will be a +-15% of the square root of the product of lengths | |
975 int64_t window_size = MIN(MAX_WINDOW_SIZE/2, (uint64_t) (*window * sqrtl((long double) Xend * (long double) Yend))); | |
976 //printf("xlen: %"PRIu64", ylen: %"PRIu64" w-size: %"PRId64"\n", Xend, Yend, window_size); | |
977 *current_window_size = (uint64_t) window_size; | |
978 | |
979 //The limits to the window | |
980 limit_left = 0; | |
981 limit_right = 2*window_size + 1; | |
982 if(limit_right > MAX_WINDOW_SIZE) limit_right = MAX_WINDOW_SIZE; | |
983 | |
984 struct positioned_cell mf; | |
985 mf.score = INT64_MIN; | |
986 | |
987 | |
988 //First row. iCounter serves as counter from zero | |
989 //printf("..0%%"); | |
990 //Zero will always be | |
991 table[0][0].score = compare_letters(X[0], Y[0]); | |
992 mc[0].score = table[0][0].score; | |
993 mc[0].xpos = 0; | |
994 mc[0].ypos = 0; | |
995 | |
996 //if(Xend == 799 && Yend == 2497) printf("I am %p The count is real %.5s %.5s %p %p \n", &table[0][0], X, Y, X, Y); | |
997 | |
998 | |
999 for(i=1;i<Yend;i++){ | |
1000 //table[0][i].score = (X[0] == Y[i]) ? POINT : -POINT; | |
1001 if(i < MAX_WINDOW_SIZE) table[0][i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap; | |
1002 //table[Xstart][i].xfrom = Xstart; | |
1003 //table[Xstart][i].yfrom = i; | |
1004 //Set every column max | |
1005 mc[i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap; | |
1006 #ifdef VERBOSE | |
1007 printf("%02"PRId64" ", mc[i].score); | |
1008 #endif | |
1009 mc[i].xpos = 0; | |
1010 mc[i].ypos = i; | |
1011 | |
1012 } | |
1013 #ifdef VERBOSE | |
1014 printf("\n"); | |
1015 #endif | |
1016 //Set row max | |
1017 mf.score = table[0][0].score; | |
1018 mf.xpos = 0; | |
1019 mf.ypos = 0; | |
1020 //Init j | |
1021 j = MAX(1,(cell_path_y[1] - window_size)); | |
1022 | |
1023 //Go through full matrix | |
1024 for(i=1;i<Xend;i++){ | |
1025 //Fill first rowcell | |
1026 if(cell_path_y[i-1]+window_size < cell_path_y[i]) return bc; //terror("Sequence proportions make window shift too large"); | |
1027 //Conversion for the j-coordinate | |
1028 j_prime = 1; | |
1029 | |
1030 //table[i][0].score = (X[i] == Y[0]) ? POINT : -POINT; | |
1031 if(cell_path_y[i] - window_size <= 0){ | |
1032 table[i][0].score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap; | |
1033 mf.score = table[i][0].score; | |
1034 }else{ | |
1035 mf.score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap; | |
1036 } | |
1037 | |
1038 mf.xpos = i-1; | |
1039 mf.ypos = 0; | |
1040 | |
1041 delta_dif_1_0 = MAX(1, (cell_path_y[i] - window_size)) - MAX(1,(cell_path_y[i-1] - window_size)); //j-1 | |
1042 if(i>1) delta_dif_2_1 = MAX(1, (cell_path_y[i-1] - window_size)) - MAX(1, (cell_path_y[i-2] - window_size)); //j-2 | |
1043 | |
1044 #ifdef VERBOSE | |
1045 printf("D1_0: %"PRId64" D2_1: %"PRId64"\n", delta_dif_1_0, delta_dif_2_1); | |
1046 #endif | |
1047 | |
1048 #ifdef VERBOSE | |
1049 printf("%02"PRId64" ", mf.score); | |
1050 #endif | |
1051 //printf("Check on i: (%"PRIu64") from - to (%"PRIu64", %"PRIu64")\n", i, 0L, Xend); | |
1052 /* | |
1053 if(1||i==262){ | |
1054 printf("I will go from %"PRIu64" to %"PRIu64" and I am %"PRIu64", %"PRIu64"\n", (uint64_t) MAX(1,(cell_path_y[i] - (int64_t)window_size)), (uint64_t) MIN((int64_t)Yend,(cell_path_y[i] + (int64_t)window_size)), i, j); | |
1055 //printf("lengs: %"PRIu64", %"PRIu64"\n", Xend, Yend); | |
1056 //printf("cp[i]: %"PRId64", cp[i-1] %"PRId64"\n", cell_path_y[i], cell_path_y[i-1]); | |
1057 //printf("min(%"PRId64", %"PRId64" + %"PRId64")-------------------\n", Yend, cell_path_y[i] ,(int64_t)window_size); | |
1058 | |
1059 } | |
1060 */ | |
1061 //getchar(); | |
1062 | |
1063 //printf("@%"PRIu64"[%"PRId64"] -> (%"PRIu64", %"PRIu64") jp %"PRIu64", lright %"PRIu64"\n", i, cell_path_y[i], MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size)), j_prime, limit_right); | |
1064 //printf("M:@%"PRIu64"-> %"PRIu64"\n", i, MIN(Yend,(cell_path_y[i] + window_size))); | |
1065 #ifdef VERBOSE | |
1066 int64_t r; | |
1067 for(r=0;r<MAX(0,(cell_path_y[i] - window_size)); r++){ | |
1068 printf(" "); | |
1069 } | |
1070 #endif | |
1071 | |
1072 /* | |
1073 if(Xend == 799 && Yend == 2497 && i >= 145 && i <= 155){ | |
1074 printf("them limits @i %"PRIu64"::: %"PRIu64", %"PRIu64"\n", i, MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size))); | |
1075 getchar(); | |
1076 } | |
1077 */ | |
1078 | |
1079 | |
1080 for(j=MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size)) && j_prime < limit_right;j++){ | |
1081 //if(i == 8302){ printf("Doing on : (%"PRIu64",%"PRIu64" and jprime=%"PRIu64"\n", i,j,j_prime); getchar(); } | |
1082 //Check if max in row has changed | |
1083 //if(j > MAX(1, cell_path_y[i-1] - window_size +1) && mf.score <= table[i][j-2].score){ | |
1084 //if(j_prime == MAX_WINDOW_SIZE) break; | |
1085 //Calculate the real j position in the windowed table | |
1086 | |
1087 j_left_prime = ((int64_t)j_prime - (2 - delta_dif_1_0)); | |
1088 //j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0)); | |
1089 j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0)); | |
1090 if(i > 1){ | |
1091 j_right_prime = ((int64_t)j_prime - (1 - (delta_dif_1_0 + delta_dif_2_1))); | |
1092 } | |
1093 | |
1094 if(j > MAX(1, cell_path_y[i-1] - window_size +1) && j < MIN(Yend,(cell_path_y[i-1] + window_size)) && j_left_prime < limit_right && table[i-1][j_left_prime].score >= mf.score){ | |
1095 //mf.score = table[i-1][j-2].score; | |
1096 mf.score = table[i-1][j_left_prime].score; | |
1097 mf.xpos = i-1; | |
1098 mf.ypos = j-2; | |
1099 if(table[i-1][j_left_prime].score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
1100 | |
1101 } | |
1102 //printf("RowMax: %"PRId64"@(%"PRIu64", %"PRIu64")\t", mf.score, mf.xpos, mf.ypos); | |
1103 | |
1104 //score = (X[i] == Y[j]) ? POINT : -POINT; | |
1105 score = compare_letters(X[i], Y[j]); | |
1106 | |
1107 //Precondition: Upper row needs to reach up to diagonal | |
1108 //if((cell_path_y[i-1]+window_size) >= j-1){ | |
1109 if(i > 1 && j >= 1 && j-1 >= MAX(1,(cell_path_y[i-2] - window_size)) && j-1 < MIN(Yend,(cell_path_y[i-2] + window_size)) && j_right_prime >= limit_left && j_right_prime < limit_right && table[i-2][j_right_prime].score >= mc[j-1].score ){ | |
1110 //mc[j-1].score = table[i-2][j-(1+j_prime)].score; | |
1111 //Should be the j_prime we had at cell_path_y | |
1112 //MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size)) | |
1113 | |
1114 mc[j-1].score = table[i-2][j_right_prime].score; | |
1115 mc[j-1].xpos = i-2; | |
1116 mc[j-1].ypos = j-1; | |
1117 | |
1118 if(table[i-2][j_right_prime].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
1119 | |
1120 } | |
1121 | |
1122 | |
1123 if(j-1 >= MAX(0, (cell_path_y[i-1]-window_size)) && (cell_path_y[i-1]+window_size) >= j-1 && j_diag_prime >= limit_left && j_diag_prime < limit_right && j_diag_prime < cell_path_y[i-1]+window_size){ | |
1124 //scoreDiagonal = table[i-1][j-1].score + score; | |
1125 //printf("prevdiag: %"PRId64"\n", table[i-1][j_diag_prime].score); | |
1126 scoreDiagonal = table[i-1][j_diag_prime].score + score; | |
1127 if(table[i-1][j_diag_prime].score == INT64_MIN){ printf("A: i-1\t%"PRIu64"\tj_diag\t%"PRIu64"\ts%"PRId64"\n", i-1, j_diag_prime, table[i-1][j_diag_prime].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
1128 }else{ | |
1129 scoreDiagonal = INT64_MIN; | |
1130 } | |
1131 | |
1132 if(i>=1 && j>1){ | |
1133 scoreLeft = mf.score + iGap + (j - (mf.ypos+2))*eGap + score; | |
1134 | |
1135 if(mf.score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
1136 }else{ | |
1137 scoreLeft = INT64_MIN; | |
1138 } | |
1139 | |
1140 if(j>=1 && i>1){ | |
1141 scoreRight = mc[j-1].score + iGap + (i - (mc[j-1].xpos+2))*eGap + score; | |
1142 //if(scoreRight == -12) printf("MC: %"PRId64", From: %"PRIu64", %"PRIu64"->", mc[j-1].score, mc[j-1].xpos, mc[j-1].ypos); | |
1143 | |
1144 if(mc[j-1].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
1145 }else{ | |
1146 scoreRight = INT64_MIN; | |
1147 } | |
1148 | |
1149 /* | |
1150 if(Xend == 799 && Yend == 2497 && i >= 152 && i == 153){ | |
1151 printf("@%"PRIu64", %"PRIu64" -> scores: %"PRId64", %"PRId64", %"PRId64"\n", i, j, scoreDiagonal, scoreRight, scoreLeft); | |
1152 printf("in position @ jprime= %"PRIu64" cellpaths [i-2, i-1, i] are %"PRId64", %"PRId64", %"PRId64", window_size: %"PRId64", j_diag_prime: %"PRId64"\n", j_prime, cell_path_y[i-2], cell_path_y[i-1], cell_path_y[i], window_size, j_diag_prime); | |
1153 printf("Mfs from scoreLeft: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); | |
1154 getchar(); | |
1155 } | |
1156 */ | |
1157 | |
1158 //Choose maximum | |
1159 /* | |
1160 #ifdef VERBOSE | |
1161 printf("The game starts at %"PRId64"\n", MAX(0, cell_path_y[i] - window_size)); | |
1162 printf("from %c %c and I get to %"PRIu64" while j=%"PRIu64"\n", X[i], Y[j], j_prime, j); | |
1163 printf("j_prime: %"PRId64"\n", j_prime); | |
1164 printf("j_diag_prime: %"PRId64" limits[%"PRId64", %"PRId64"]\n", j_diag_prime, limit_left, limit_right); | |
1165 printf("Score DIAG: %"PRId64"; LEFT: %"PRId64"; RIGHT: %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
1166 printf("currmf: %"PRId64" mc: %"PRId64"\n", mf.score, mc[j-1].score); | |
1167 #endif | |
1168 */ | |
1169 | |
1170 | |
1171 //if(i >= MAX_READ_SIZE){ printf("i=%"PRIu64"\n", i); terror("i overflowed\n");} | |
1172 //if(j_prime >= MAX_WINDOW_SIZE){ printf("upper : %"PRId64"\n", MIN(Yend,(cell_path_y[i] + window_size-1))); printf("jp=%"PRIu64"\n", j_prime); terror("j overflowed\n"); } | |
1173 | |
1174 | |
1175 | |
1176 if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight){ | |
1177 //Diagonal | |
1178 | |
1179 //fprintf(stdout, "The JPRIME: %"PRId64" actual pos: %"PRIu64"\n", j_prime, j); getchar(); | |
1180 table[i][j_prime].score = scoreDiagonal; | |
1181 table[i][j_prime].xfrom = i-1; | |
1182 table[i][j_prime].yfrom = j-1; | |
1183 | |
1184 | |
1185 }else if(scoreRight > scoreLeft){ | |
1186 table[i][j_prime].score = scoreRight; | |
1187 table[i][j_prime].xfrom = mc[j-1].xpos; | |
1188 table[i][j_prime].yfrom = mc[j-1].ypos; | |
1189 | |
1190 }else{ | |
1191 //printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
1192 table[i][j_prime].score = scoreLeft; | |
1193 table[i][j_prime].xfrom = mf.xpos; | |
1194 table[i][j_prime].yfrom = mf.ypos; | |
1195 } | |
1196 //printf("F: i\t%"PRIu64"\tj_prime\t%"PRIu64"\n", i, j_prime); | |
1197 //getchar(); | |
1198 //if(i == 94){ printf("showing j %"PRIu64" jprime %"PRIu64" lleft %"PRIu64", llright %"PRIu64"\n", j, j_prime, limit_left, limit_right); getchar(); } | |
1199 //if(i == 94 && j == 374){ printf("stopped at 94, 374 s %"PRId64"\n", table[i][j_prime].score); getchar(); } | |
1200 | |
1201 | |
1202 | |
1203 | |
1204 /* | |
1205 if(i == 264 && j == 176){ | |
1206 printf("@%"PRIu64", %"PRIu64"\n", i, j); | |
1207 printf("my score is %"PRId64"\n", mc[j-1].score); | |
1208 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]); | |
1209 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
1210 printf("check j_right_prime == %"PRIu64"\n", j_right_prime); | |
1211 getchar(); | |
1212 //exit(-1); | |
1213 } | |
1214 */ | |
1215 | |
1216 //check if column max has changed | |
1217 //New condition: check if you filled i-2, j-1 | |
1218 | |
1219 | |
1220 if(i == Xend-1 || j == Yend-1){ | |
1221 | |
1222 if(i == Xend-1 && j != Yend-1){ | |
1223 table[i][j_prime].score = table[i][j_prime].score + iGap + (Yend - j)*eGap; | |
1224 }else if(j == Yend-1 && i != Xend-1){ | |
1225 table[i][j_prime].score = table[i][j_prime].score + iGap + (Xend - i)*eGap; | |
1226 } | |
1227 //Check for best cell | |
1228 if(table[i][j_prime].score >= bc.c.score){ | |
1229 | |
1230 /* | |
1231 if(i == 798 && j == 1052){ // yields 799, 2497 | |
1232 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]); | |
1233 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
1234 printf("score comes from %"PRIu64", %"PRIu64",\n", mc[j-1].xpos, mc[j-1].ypos); | |
1235 printf("IDlengths: %"PRIu64", %"PRIu64"\n", Xend, Yend); | |
1236 | |
1237 //exit(-1); | |
1238 } | |
1239 */ | |
1240 | |
1241 bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime; | |
1242 } | |
1243 //bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime; | |
1244 } | |
1245 | |
1246 | |
1247 #ifdef VERBOSE | |
1248 //printf("Put score: %"PRId64"\n\n", table[i][j_prime].score); | |
1249 //printf("(%"PRId64")%02"PRId64" ", j_diag_prime, table[i][j_prime].score); //printf("->(%"PRIu64", %"PRIu64")", i, j); printf("[%c %c]", X[i], Y[j]); | |
1250 //if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight) printf("*\t"); | |
1251 //else if(scoreRight > scoreLeft) printf("{\t"); else printf("}\t"); | |
1252 //getchar(); | |
1253 | |
1254 #endif | |
1255 j_prime++; | |
1256 } | |
1257 #ifdef VERBOSE | |
1258 printf("\n"); | |
1259 getchar(); | |
1260 #endif | |
1261 } | |
1262 | |
1263 return bc; | |
1264 } | |
1265 | |
1266 | |
1267 | |
1268 void backtrackingNW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, struct cell ** table, char * rec_X, char * rec_Y, struct best_cell * bc, uint64_t * ret_head_x, uint64_t * ret_head_y, BasicAlignment * ba, int64_t * cell_path_y, uint64_t window_size){ | |
1269 uint64_t curr_x, curr_y, prev_x, prev_y, head_x, head_y, limit_x, limit_y; | |
1270 int64_t k, j_prime, delta_diff = 0; | |
1271 | |
1272 //limit_x = 2*MAX_READ_SIZE-1; | |
1273 //limit_y = limit_x; | |
1274 limit_x = 2*MAX(Xend, Yend); | |
1275 limit_y = limit_x; | |
1276 //head_x = 2*MAX(Xend, Yend); | |
1277 //head_y = 2*MAX(Xend, Yend); | |
1278 head_x = limit_x; | |
1279 head_y = limit_y; | |
1280 curr_x = bc->c.xpos; | |
1281 curr_y = bc->c.ypos; | |
1282 #ifdef VERBOSE | |
1283 printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
1284 #endif | |
1285 //printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
1286 | |
1287 prev_x = curr_x; | |
1288 prev_y = curr_y; | |
1289 int show = 0; | |
1290 | |
1291 for(k=Xend-1; k>curr_x; k--) rec_X[head_x--] = '-'; | |
1292 for(k=Yend-1; k>curr_y; k--) rec_Y[head_y--] = '-'; | |
1293 | |
1294 j_prime = bc->j_prime; | |
1295 //printf("init prime: %"PRIu64"\n", j_prime); | |
1296 unsigned char first_track = 1; | |
1297 | |
1298 while(curr_x > 0 && curr_y > 0){ | |
1299 | |
1300 | |
1301 if(first_track == 0){ | |
1302 delta_diff = MAX(1, cell_path_y[prev_x] - (int64_t) window_size) - MAX(1, cell_path_y[curr_x] - (int64_t)window_size); //j-1 | |
1303 j_prime = MAX(0, j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff); | |
1304 | |
1305 | |
1306 if(/*(bc->c.xpos == 630 && bc->c.ypos == 541 )||*/ j_prime > MAX_WINDOW_SIZE){ | |
1307 | |
1308 printf("from %"PRIu64", %"PRIu64"\nto %"PRIu64", %"PRIu64"\n", prev_x, prev_y, curr_x, curr_y); | |
1309 printf("jp: %"PRIu64", py,cy %"PRIu64", %"PRIu64", delta: %"PRId64"\n", j_prime, prev_y, curr_y, (int64_t)delta_diff); | |
1310 printf("currx curry : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
1311 printf("window size: %"PRIu64"\n", window_size); | |
1312 printf("cp[prev, curr] : %"PRId64", %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]); | |
1313 printf("my cell path: %"PRId64"\n", cell_path_y[curr_x]); | |
1314 printf("Optimum : %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
1315 getchar(); | |
1316 } | |
1317 | |
1318 //j_prime = j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff; | |
1319 | |
1320 prev_x = curr_x; | |
1321 prev_y = curr_y; | |
1322 | |
1323 /* | |
1324 if(bc->c.xpos == 798 && bc->c.ypos == 1052){ | |
1325 printf("[%c %c]", X[prev_x], Y[prev_y]); | |
1326 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y); | |
1327 | |
1328 } | |
1329 */ | |
1330 | |
1331 #ifdef VERBOSE | |
1332 //printf("Jprime: %"PRId64" :DELTADIF:%"PRId64"\n", j_prime, delta_diff); | |
1333 printf("[%c %c]", X[prev_x], Y[prev_y]); | |
1334 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y); | |
1335 //printf("(%"PRIu64", %"PRIu64") ::: \n", prev_x, prev_y); | |
1336 //printf("cellp Prev: %"PRId64" Post: %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]); | |
1337 //printf("the difs? %"PRId64" the other: %"PRId64"\n", MAX(0, cell_path_y[prev_x] - (int64_t) window_size), MAX(0, cell_path_y[curr_x] - (int64_t)window_size)); | |
1338 getchar(); | |
1339 #endif | |
1340 | |
1341 } | |
1342 | |
1343 //if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE) fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom); | |
1344 | |
1345 /* | |
1346 if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE){ | |
1347 fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom); | |
1348 uint64_t k; | |
1349 for(k=0;k<Xend;k++){ | |
1350 fprintf(stdout, "%c", X[k]); | |
1351 } | |
1352 fprintf(stdout, "\n"); | |
1353 for(k=0;k<Yend;k++){ | |
1354 fprintf(stdout, "%c", Y[k]); | |
1355 } | |
1356 fprintf(stdout, "\n"); | |
1357 show = 1; | |
1358 } | |
1359 */ | |
1360 if(j_prime >= MAX_WINDOW_SIZE) printf("j_prime:overflow %"PRIu64"\n", j_prime); | |
1361 | |
1362 | |
1363 curr_x = table[prev_x][j_prime].xfrom; | |
1364 curr_y = table[prev_x][j_prime].yfrom; | |
1365 first_track = 0; | |
1366 | |
1367 //printf("w: %"PRIu64"- %"PRIu64"\n", curr_x, curr_y); | |
1368 | |
1369 | |
1370 if((curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){ | |
1371 //Diagonal case | |
1372 //printf("DIAG\n"); | |
1373 if(head_x == 0 || head_y == 0) goto exit_point; | |
1374 rec_X[head_x--] = (char) X[prev_x]; | |
1375 rec_Y[head_y--] = (char) Y[prev_y]; | |
1376 ba->length++; | |
1377 | |
1378 }else if((prev_x - curr_x) > (prev_y - curr_y)){ | |
1379 //Gap in X | |
1380 //printf("Gap X\n"); | |
1381 if(head_x == 0 || head_y == 0) goto exit_point; | |
1382 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){ | |
1383 rec_Y[head_y--] = Y[prev_y]; | |
1384 rec_X[head_x--] = X[prev_x]; | |
1385 }else{ | |
1386 rec_Y[head_y--] = '-'; | |
1387 rec_X[head_x--] = X[prev_x]; | |
1388 } | |
1389 ba->length++; | |
1390 | |
1391 for(k=prev_x-1;k>curr_x;k--){ | |
1392 if(head_x == 0 || head_y == 0) goto exit_point; | |
1393 #ifdef VERBOSE | |
1394 if(head_x == 0 || head_y == 0){ | |
1395 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y); | |
1396 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
1397 uint64_t z; | |
1398 for(z=head_x;z<limit_x;z++){ | |
1399 fprintf(stdout, "%c", (char) rec_X[z]); | |
1400 } | |
1401 printf("\n"); | |
1402 for(z=head_y;z<limit_y;z++){ | |
1403 fprintf(stdout, "%c", (char) rec_Y[z]); | |
1404 } | |
1405 getchar(); | |
1406 } | |
1407 #endif | |
1408 rec_Y[head_y--] = '-'; | |
1409 rec_X[head_x--] = (char) X[k]; | |
1410 ba->length++; | |
1411 ba->egaps++; | |
1412 } | |
1413 ba->igaps += 1; | |
1414 ba->egaps--; | |
1415 }else{ | |
1416 //Gap in Y | |
1417 //printf("GAP Y\n"); | |
1418 //10, 0, 401, 281 | |
1419 if(head_x == 0 || head_y == 0) goto exit_point; | |
1420 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){ | |
1421 rec_Y[head_y--] = Y[prev_y]; | |
1422 rec_X[head_x--] = X[prev_x]; | |
1423 }else{ | |
1424 rec_Y[head_y--] = Y[prev_y]; | |
1425 rec_X[head_x--] = '-'; | |
1426 } | |
1427 ba->length++; | |
1428 | |
1429 for(k=prev_y-1;k>curr_y;k--){ | |
1430 if(head_x == 0 || head_y == 0) goto exit_point; | |
1431 #ifdef VERBOSE | |
1432 if(head_x == 0 || head_y == 0){ | |
1433 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y); | |
1434 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
1435 uint64_t z; | |
1436 for(z=head_x;z<limit_x;z++){ | |
1437 fprintf(stdout, "%c", (char) rec_X[z]); | |
1438 } | |
1439 printf("\n"); | |
1440 for(z=head_y;z<limit_y;z++){ | |
1441 fprintf(stdout, "%c", (char) rec_Y[z]); | |
1442 } | |
1443 getchar(); | |
1444 } | |
1445 #endif | |
1446 rec_X[head_x--] = '-'; | |
1447 rec_Y[head_y--] = (char) Y[k]; | |
1448 ba->length++; | |
1449 ba->egaps++; | |
1450 } | |
1451 | |
1452 ba->igaps += 1; | |
1453 ba->egaps--; | |
1454 } | |
1455 | |
1456 } | |
1457 | |
1458 if(curr_x == 0 && curr_y == 0 && (curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){ | |
1459 rec_X[head_x--] = (char) X[curr_x]; | |
1460 rec_Y[head_y--] = (char) Y[curr_y]; | |
1461 ba->length++; | |
1462 } | |
1463 | |
1464 exit_point: | |
1465 | |
1466 //printf("curr: %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
1467 //printf("Heads: %"PRIu64", %"PRIu64"\n", head_x, head_y); | |
1468 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
1469 uint64_t huecos_x = 0, huecos_y = 0; | |
1470 k=(int64_t)curr_x-1; | |
1471 while(k>=0){ if(head_x == 0) break; rec_X[head_x--] = '-'; huecos_x++; k--; } | |
1472 k=(int64_t)curr_y-1; | |
1473 while(k>=0){ if(head_y == 0) break; rec_Y[head_y--] = '-'; huecos_y++; k--; } | |
1474 | |
1475 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
1476 | |
1477 if(huecos_x >= huecos_y){ | |
1478 while(huecos_x > 0) { if(head_y == 0) break; rec_Y[head_y--] = ' '; huecos_x--;} | |
1479 }else{ | |
1480 while(huecos_y > 0) { if(head_x == 0) break; rec_X[head_x--] = ' '; huecos_y--;} | |
1481 } | |
1482 | |
1483 if(show == 1){ | |
1484 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
1485 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", 2*Xend, 2*Yend); | |
1486 uint64_t k; | |
1487 for(k=head_x;k<limit_x;k++){ | |
1488 fprintf(stdout, "%c", (char) rec_X[k]); | |
1489 } | |
1490 printf("\n"); | |
1491 for(k=head_y;k<limit_y;k++){ | |
1492 fprintf(stdout, "%c", (char) rec_Y[k]); | |
1493 } | |
1494 printf("\n"); | |
1495 getchar(); | |
1496 } | |
1497 | |
1498 *ret_head_x = head_x; | |
1499 *ret_head_y = head_y; | |
1500 #ifdef VERBOSE | |
1501 printf("hx hy: %"PRIu64", %"PRIu64"\n", head_x, head_y); | |
1502 #endif | |
1503 } | |
1504 | |
1505 | |
1506 AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key){ | |
1507 | |
1508 if(mp[*n_pools_used].current == POOL_SIZE){ | |
1509 *n_pools_used += 1; | |
1510 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools"); | |
1511 init_mem_pool_AVL(&mp[*n_pools_used]); | |
1512 | |
1513 } | |
1514 | |
1515 AVLTree * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current; | |
1516 mp[*n_pools_used].current++; | |
1517 | |
1518 new_pos->key = key; | |
1519 new_pos->count = 1; | |
1520 new_pos->height = 1; | |
1521 | |
1522 return new_pos; | |
1523 } | |
1524 | |
1525 void init_mem_pool_AVL(Mempool_AVL * mp){ | |
1526 mp->base = (AVLTree *) calloc(POOL_SIZE, sizeof(AVLTree)); | |
1527 if(mp->base == NULL) terror("Could not request memory pool"); | |
1528 mp->current = 0; | |
1529 } | |
1530 | |
1531 | |
1532 | |
1533 /* | |
1534 // An AVL tree node | |
1535 typedef struct AVL_Node{ | |
1536 uint64_t key; | |
1537 struct AVL_Node * left; | |
1538 struct AVL_Node * right; | |
1539 uint64_t height; | |
1540 llpos * next; | |
1541 } AVLTree; | |
1542 */ | |
1543 | |
1544 // A utility function to get height of the tree | |
1545 | |
1546 uint64_t height(AVLTree * N){ | |
1547 if (N == NULL) | |
1548 return 0; | |
1549 return N->height; | |
1550 } | |
1551 | |
1552 /* Substituted by (x == NULL) ? (0) : (x->height) */ | |
1553 | |
1554 /* Helper function that allocates a new node with the given key and | |
1555 NULL left and right pointers. */ | |
1556 | |
1557 /* This one is substituted by AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key) */ | |
1558 | |
1559 // A utility function to right rotate subtree rooted with y | |
1560 // See the diagram given above. | |
1561 AVLTree * right_rotate(AVLTree * y){ | |
1562 AVLTree * x = y->left; | |
1563 AVLTree * T2 = x->right; | |
1564 | |
1565 // Perform rotation | |
1566 x->right = y; | |
1567 y->left = T2; | |
1568 | |
1569 // Update heights | |
1570 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1; | |
1571 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1; | |
1572 // Update heights | |
1573 y->height = MAX(height(y->left), height(y->right))+1; | |
1574 x->height = MAX(height(x->left), height(x->right))+1; | |
1575 | |
1576 // Return new root | |
1577 return x; | |
1578 } | |
1579 | |
1580 // A utility function to left rotate subtree rooted with x | |
1581 // See the diagram given above. | |
1582 AVLTree * left_rotate(AVLTree * x){ | |
1583 AVLTree * y = x->right; | |
1584 AVLTree * T2 = y->left; | |
1585 | |
1586 // Perform rotation | |
1587 y->left = x; | |
1588 x->right = T2; | |
1589 | |
1590 // Update heights | |
1591 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1; | |
1592 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1; | |
1593 x->height = MAX(height(x->left), height(x->right))+1; | |
1594 y->height = MAX(height(y->left), height(y->right))+1; | |
1595 | |
1596 // Return new root | |
1597 return y; | |
1598 } | |
1599 | |
1600 // Get Balance factor of node N | |
1601 | |
1602 int64_t get_balance(AVLTree * N){ | |
1603 if (N == NULL) | |
1604 return 0; | |
1605 return height(N->left) - height(N->right); | |
1606 } | |
1607 | |
1608 /* Substituted by (node == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height) */ | |
1609 | |
1610 AVLTree * find_AVLTree(AVLTree * node, uint64_t key){ | |
1611 AVLTree * found = NULL; | |
1612 if(node == NULL) return NULL; | |
1613 | |
1614 if (key < node->key) { | |
1615 found = find_AVLTree(node->left, key); | |
1616 } else if (key > node->key) { | |
1617 found = find_AVLTree(node->right, key); | |
1618 } else { | |
1619 return node; | |
1620 } | |
1621 return found; | |
1622 } | |
1623 | |
1624 llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key){ | |
1625 llpos * aux = NULL; | |
1626 if(node == NULL) return NULL; | |
1627 | |
1628 if (key < node->key) { | |
1629 aux = find_AVLTree_llpos(node->left, key); | |
1630 } else if (key > node->key) { | |
1631 aux = find_AVLTree_llpos(node->right, key); | |
1632 } else { | |
1633 return node->next; | |
1634 } | |
1635 return aux; | |
1636 } | |
1637 | |
1638 // Recursive function to insert key in subtree rooted | |
1639 // with node and returns new root of subtree. | |
1640 AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l, uint64_t s_id){ | |
1641 /* 1. Perform the normal BST insertion */ | |
1642 if (node == NULL){ | |
1643 | |
1644 AVLTree * n_node = getNewLocationAVLTree(mp, n_pools_used, key); | |
1645 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l); | |
1646 aux->pos = pos; | |
1647 aux->s_id = s_id; | |
1648 n_node->next = aux; | |
1649 return n_node; | |
1650 } | |
1651 | |
1652 if (key < node->key) { | |
1653 node->left = insert_AVLTree(node->left, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id); | |
1654 } else if (key > node->key) { | |
1655 node->right = insert_AVLTree(node->right, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id); | |
1656 } else { | |
1657 // Equal keys are inserted as a linked list | |
1658 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l); | |
1659 aux->pos = pos; | |
1660 aux->s_id = s_id; | |
1661 aux->next = node->next; | |
1662 node->next = aux; | |
1663 ++(node->count); | |
1664 return node; | |
1665 } | |
1666 | |
1667 /* 2. Update height of this ancestor node */ | |
1668 //node->height = 1 + MAX((node->left == NULL) ? (0) : (node->left->height), (node->right == NULL) ? (0) : (node->right->height)); | |
1669 node->height = 1 + MAX(height(node->left), height(node->right)); | |
1670 | |
1671 /* 3. Get the balance factor of this ancestor | |
1672 node to check whether this node became | |
1673 unbalanced */ | |
1674 //int64_t balance = (node->left == NULL || node->right == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height); | |
1675 int64_t balance = get_balance(node); | |
1676 | |
1677 // If this node becomes unbalanced, then | |
1678 // there are 4 cases | |
1679 | |
1680 // Left Left Case | |
1681 if (balance > 1 && key < node->left->key) | |
1682 return right_rotate(node); | |
1683 | |
1684 // Right Right Case | |
1685 if (balance < -1 && key > node->right->key) | |
1686 return left_rotate(node); | |
1687 | |
1688 // Left Right Case | |
1689 if (balance > 1 && key > node->left->key) | |
1690 { | |
1691 node->left = left_rotate(node->left); | |
1692 return right_rotate(node); | |
1693 } | |
1694 | |
1695 // Right Left Case | |
1696 if (balance < -1 && key < node->right->key) | |
1697 { | |
1698 node->right = right_rotate(node->right); | |
1699 return left_rotate(node); | |
1700 } | |
1701 | |
1702 /* return the (unchanged) node pointer */ | |
1703 return node; | |
1704 } | |
1705 | |
1706 // A utility function to print preorder traversal | |
1707 // of the tree. | |
1708 // The function also prints height of every node | |
1709 | |
1710 void pre_order(AVLTree * root){ | |
1711 if(root != NULL){ | |
1712 printf("%"PRIu64" ", root->key); | |
1713 llpos * aux = root->next; | |
1714 while(aux != NULL){ printf("#%"PRIu64", ", aux->pos); aux = aux->next; } | |
1715 pre_order(root->left); | |
1716 pre_order(root->right); | |
1717 } | |
1718 } |