annotate chromeister/src/CHROMEISTER.c @ 0:7fdf47a0bae8 draft

Uploaded
author alvarofaure
date Wed, 12 Dec 2018 07:18:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
1 /*********
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
2
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
3 File CHROMEISTER.c
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
4 Author EPW <estebanpw@uma.es>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
5 Description Computes hits and generates a dotplot
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
6
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
7 USAGE Usage is described by calling ./CHROMEISTER --help
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
8
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
9
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
10
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
11 **********/
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
12
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
13
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
14 #include <stdio.h>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
15 #include <stdlib.h>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
16 #include <math.h>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
17 #include <string.h>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
18 #include <ctype.h>
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
19 #include "structs.h"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
20 #include "alignmentFunctions.h"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
21 #include "commonFunctions.h"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
22 #define STARTING_SEQS 1000
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
23 #define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
24 #define RANGE 2
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
25
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
26 uint64_t custom_kmer = 32; // Defined as external in structs.h
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
27 uint64_t diffuse_z = 4; // Defined as external in structs.h
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
28
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
29 uint64_t get_seq_len(FILE * f);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
30
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
31
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
32 void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
33
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
34 int main(int argc, char ** av){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
35
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
36
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
37 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
38 //Store positions of kmers
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
39 uint64_t n_pools_used = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
40 //Mempool_l * mp = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
41 //if(mp == NULL) terror("Could not allocate vectors for memory pools");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
42 Mempool_l mp[MAX_MEM_POOLS];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
43 init_mem_pool_llpos(&mp[n_pools_used]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
44 //llpos * aux;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
45
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
46 uint64_t n_pools_used_AVL = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
47 Mempool_AVL mp_AVL[MAX_MEM_POOLS];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
48 init_mem_pool_AVL(&mp_AVL[n_pools_used_AVL]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
49 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
50 Tuple_hits * thit;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
51
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
52 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
53 AVLTree * root = NULL;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
54 root = insert_AVLTree(root, 10, mp_AVL, &n_pools_used_AVL, 0, mp, &n_pools_used);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
55
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
56 llpos * some = find_AVLTree(root, 25);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
57 while(some != NULL){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
58 printf("#%"PRIu64", ", some->pos); some = some->next;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
59 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
60 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
61
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
62 uint64_t i, j;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
63
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
64 //query to read kmers from, database to find seeds
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
65 FILE * query = NULL, * database = NULL, * out_database = NULL;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
66 uint64_t dimension = 1000; // Default 1000 * 1000
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
67
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
68
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
69 init_args(argc, av, &query, &database, &out_database, &custom_kmer, &dimension, &diffuse_z);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
70
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
71
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
72
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
73 unsigned char char_converter[91];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
74 char_converter[(unsigned char)'A'] = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
75 char_converter[(unsigned char)'C'] = 1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
76 char_converter[(unsigned char)'G'] = 2;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
77 char_converter[(unsigned char)'T'] = 3;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
78
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
79
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
80 // Variables to account for positions
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
81 // Print info
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
82 fprintf(stdout, "[INFO] Loading database\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
83 // Variables to read kmers
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
84 char c = 'N'; //Char to read character
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
85 // Current length of array and variables for the buffer
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
86 uint64_t idx = 0, r = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
87
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
88 //Vector to read in batches
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
89 char * temp_seq_buffer = NULL;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
90 if ((temp_seq_buffer = calloc(READBUF, sizeof(char))) == NULL) {
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
91 terror("Could not allocate memory for read buffer");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
92 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
93
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
94 //Dimensional matrix
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
95 uint64_t ** representation = (uint64_t **) calloc(dimension+1, sizeof(uint64_t *));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
96 if(representation == NULL) terror("Could not allocate representation");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
97 for(i=0; i<dimension+1; i++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
98 representation[i] = (uint64_t *) calloc(dimension+1, sizeof(uint64_t));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
99 if(representation[i] == NULL) terror("Could not allocate second loop representation");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
100 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
101
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
102 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
103 fseek(database, 0, SEEK_END);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
104 uint64_t aprox_len_query = ftell(database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
105 uint64_t aprox_len_db = aprox_len_query;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
106 rewind(database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
107 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
108 uint64_t aprox_len_query = get_seq_len(database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
109 uint64_t aprox_len_db = aprox_len_query;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
110
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
111 uint64_t a_hundreth = (aprox_len_query/100);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
112
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
113 unsigned char curr_kmer[custom_kmer], reverse_kmer[custom_kmer];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
114 curr_kmer[0] = reverse_kmer[0] = '\0';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
115 uint64_t word_size = 0, word_size_rev = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
116
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
117 //To hold all information related to database
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
118 uint64_t current_len = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
119
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
120 //To force reading from the buffer
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
121 idx = READBUF + 1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
122
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
123 //unsigned char aux_kmer[custom_kmer+1];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
124
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
125 //Vector to store query seq
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
126 unsigned char * seq_vector_query = (unsigned char *) malloc(READBUF*sizeof(unsigned char));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
127 if(seq_vector_query == NULL) terror("Could not allocate memory for query vector");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
128
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
129 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
130 Container * ct = (Container *) calloc(1, sizeof(Container));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
131 if(ct == NULL) terror("Could not allocate container");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
132 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
133
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
134
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
135
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
136 Index * ctidx = (Index *) calloc(1, sizeof(Index));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
137 if(ctidx == NULL) terror("Could not allocate container");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
138
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
139
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
140 //begin = clock();
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
141
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
142
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
143 c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
144 while((!feof(database) || (feof(database) && idx < r))){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
145
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
146 if(c == '>'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
147
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
148
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
149
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
150 while(c != '\n') c = buffered_fgetc(temp_seq_buffer, &idx, &r, database); //Skip ID
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
151
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
152
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
153 while(c != '>' && (!feof(database) || (feof(database) && idx < r))){ //Until next id
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
154 c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
155 c = toupper(c);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
156 if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
157 curr_kmer[word_size] = (unsigned char) c;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
158 if(word_size < custom_kmer) ++word_size;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
159 ++current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
160 if(current_len % a_hundreth == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
161 fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
162 //printf("%"PRIu64"%%..wasted: (%e) (%e)", 1+100*pos_in_query/aprox_len_query, (double)(wasted_cycles_forward)/CLOCKS_PER_SEC, (double)(wasted_cycles_reverse)/CLOCKS_PER_SEC);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
163 fflush(stdout);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
164 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
165
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
166
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
167
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
168 }else{ //It can be anything (including N, Y, X ...)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
169
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
170 if(c != '\n' && c != '>'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
171 word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
172 // data_database.sequences[pos_in_database++] = (unsigned char) 'N'; //Convert to N
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
173 ++current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
174
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
175 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
176 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
177 //if(current_len % 1000000 == 0) printf(" curr len %" PRIu64"\n", current_len);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
178 if(word_size == custom_kmer){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
179 //write to hash table
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
180
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
181
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
182 thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
183 [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
184 [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
185 [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
186
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
187 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
188 typedef struct tuple_hits{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
189 int repetition;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
190 int hit_count;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
191 uint64_t key;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
192 uint64_t pos;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
193 } Tuple_hits;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
194 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
195
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
196 if(thit->repetition == FALSE){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
197 // Then we can insert
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
198 thit->hit_count = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
199 thit->key = collisioned_hash(&curr_kmer[0], custom_kmer);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
200 thit->pos = current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
201 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
202 // Otherwise we break it
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
203 thit->repetition = TRUE;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
204 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
205
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
206 //thit->root = insert_AVLTree(thit->root, hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
207 //thit->root = insert_AVLTree(thit->root, collisioned_hash(&curr_kmer[0], custom_kmer), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
208
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
209
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
210
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
211 // Non overlapping
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
212 word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
213
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
214
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
215 // Overlapping
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
216 //memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
217 //--word_size;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
218 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
219 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
220 word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
221
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
222 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
223 c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
224 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
225
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
226 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
227
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
228
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
229 //end = clock();
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
230
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
231 // data_database.total_len = pos_in_database;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
232
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
233 //fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64". Hash table building took %e seconds\n", data_database.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
234 fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64".\n", current_len);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
235 //close database
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
236 fclose(database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
237
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
238
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
239
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
240 //begin = clock();
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
241
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
242
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
243
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
244
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
245
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
246 double pixel_size_db = (double) dimension / (double) current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
247 double ratio_db = (double) current_len / dimension;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
248
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
249
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
250 // Get file length
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
251
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
252 //fseek(query, 0, SEEK_END);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
253 //aprox_len_query = ftell(query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
254 //rewind(query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
255 aprox_len_query = get_seq_len(query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
256
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
257 //uint64_t reallocs_hash_holder_table = 1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
258 //uint64_t n_items_hash_holder_table = aprox_len_query / 5;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
259
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
260 //Hash_holder * hash_holder_table = (Hash_holder *) calloc(n_items_hash_holder_table, sizeof(Hash_holder));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
261 //if(hash_holder_table == NULL) terror("Could not allocate hash holding table");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
262
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
263 a_hundreth = (aprox_len_query/100);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
264 double pixel_size_query = (double) dimension / (double) aprox_len_query;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
265 double ratio_query = (double) aprox_len_query / dimension;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
266
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
267
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
268 double i_r_fix = MAX(1.0, custom_kmer * pixel_size_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
269 double j_r_fix = MAX(1.0, custom_kmer * pixel_size_db);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
270
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
271
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
272
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
273 fprintf(stdout, "[INFO] Ratios: Q [%e] D [%e]. Lenghts: Q [%"PRIu64"] D [%"PRIu64"]\n", ratio_query, ratio_db, aprox_len_query, current_len);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
274 fprintf(stdout, "[INFO] Pixel size: Q [%e] D [%e].\n", pixel_size_query, pixel_size_db);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
275
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
276
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
277 fprintf(stdout, "[INFO] Computing absolute hit numbers.\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
278
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
279
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
280 current_len = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
281
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
282 //llpos * the_original_hit;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
283
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
284 //To force reading from the buffer
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
285 idx = READBUF + 1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
286 c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
287 //uint64_t c_hash_holder = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
288
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
289 while((!feof(query) || (feof(query) && idx < r))){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
290
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
291 if(c == '>'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
292 word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
293 word_size_rev = custom_kmer-1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
294
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
295
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
296
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
297
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
298 while(c != '\n'){ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } //Skip ID
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
299
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
300
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
301 while(c != '>' && (!feof(query) || (feof(query) && idx < r))){ //Until next id
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
302 c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
303 c = toupper(c);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
304 if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
305
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
306 ++current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
307 if(current_len % a_hundreth == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
308 fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
309 fflush(stdout);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
310 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
311 curr_kmer[word_size] = (unsigned char) c;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
312 ++word_size;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
313
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
314 switch(c){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
315 case ('A'): reverse_kmer[word_size_rev] = (unsigned)'T';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
316 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
317 case ('C'): reverse_kmer[word_size_rev] = (unsigned)'G';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
318 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
319 case ('G'): reverse_kmer[word_size_rev] = (unsigned)'C';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
320 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
321 case ('T'): reverse_kmer[word_size_rev] = (unsigned)'A';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
322 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
323 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
324 if(word_size_rev != 0) --word_size_rev;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
325
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
326
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
327
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
328
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
329 if(word_size == custom_kmer){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
330
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
331
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
332 //hash_forward = hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
333 //hash_reverse = hashOfWord(&reverse_kmer[0], custom_kmer, FIXED_K);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
334 uint64_t hash_forward, hash_reverse;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
335 hash_forward = collisioned_hash(&curr_kmer[0], custom_kmer);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
336 hash_reverse = collisioned_hash(&reverse_kmer[0], custom_kmer);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
337
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
338
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
339 thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
340 [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
341 [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
342 [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
343
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
344 //AVLTree * search = find_AVLTree(thit->root, hash_forward);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
345
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
346 if(thit->repetition == FALSE && hash_forward == thit->key){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
347 // Attention ::::: you were not removing the ones with count==1 earlier
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
348 thit->pos_in_y = current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
349 thit->hit_count++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
350 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
351
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
352 thit = &ctidx->table[char_converter[reverse_kmer[0]]][char_converter[reverse_kmer[1]]][char_converter[reverse_kmer[2]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
353 [char_converter[reverse_kmer[3]]][char_converter[reverse_kmer[4]]][char_converter[reverse_kmer[5]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
354 [char_converter[reverse_kmer[6]]][char_converter[reverse_kmer[7]]][char_converter[reverse_kmer[8]]]
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
355 [char_converter[reverse_kmer[9]]][char_converter[reverse_kmer[10]]][char_converter[reverse_kmer[11]]];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
356
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
357 if(thit->repetition == FALSE && hash_reverse == thit->key){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
358 // Attention ::::: you were not removing the ones with count==1 earlier
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
359 thit->pos_in_y = current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
360 thit->hit_count++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
361 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
362
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
363 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
364 if(search != NULL && search->count == 1){ //If count is two, then it is a rep
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
365 thit->hit_count += search->count;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
366
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
367 hash_holder_table[c_hash_holder].pos = current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
368 hash_holder_table[c_hash_holder].node = search;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
369 hash_holder_table[c_hash_holder].th = thit;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
370 ++c_hash_holder;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
371 if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
372 ++reallocs_hash_holder_table;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
373 hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
374 if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
375 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
376 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
377 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
378
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
379
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
380
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
381
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
382
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
383 //search = find_AVLTree(thit->root, hash_reverse);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
384 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
385 if(search != NULL && search->count == 1){ //If count is two, then it is a rep
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
386
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
387 thit->hit_count += search->count;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
388 hash_holder_table[c_hash_holder].pos = current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
389 hash_holder_table[c_hash_holder].node = search;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
390 hash_holder_table[c_hash_holder].th = thit;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
391 ++c_hash_holder;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
392 if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
393 ++reallocs_hash_holder_table;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
394 hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
395 if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
396 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
397 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
398 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
399
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
400 // Overlapping
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
401
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
402 memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
403 memmove(&reverse_kmer[1], &reverse_kmer[0], custom_kmer-1);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
404 --word_size;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
405
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
406 // Non overlapping
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
407 //word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
408 //word_size_rev = custom_kmer-1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
409 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
410 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
411 if(c != '\n' && c != '>'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
412 word_size = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
413 word_size_rev = custom_kmer-1;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
414 ++current_len;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
415 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
416 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
417 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
418 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
419 c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
420 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
421
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
422 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
423
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
424 /// Out
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
425
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
426 fprintf(stdout, "Scanning hits table.\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
427
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
428 a_hundreth = MAX(1, TOTAL_ENTRIES/100);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
429 uint64_t t_computed = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
430 uint64_t w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
431 for(w0=0;w0<4;w0++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
432 for(w1=0;w1<4;w1++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
433 for(w2=0;w2<4;w2++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
434 for(w3=0;w3<4;w3++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
435 for(w4=0;w4<4;w4++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
436 for(w5=0;w5<4;w5++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
437 for(w6=0;w6<4;w6++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
438 for(w7=0;w7<4;w7++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
439 for(w8=0;w8<4;w8++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
440 for(w9=0;w9<4;w9++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
441 for(w10=0;w10<4;w10++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
442 for(w11=0;w11<4;w11++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
443
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
444 if(t_computed % a_hundreth == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
445 fprintf(stdout, "\r%"PRIu64"%%...", 1+100*t_computed/TOTAL_ENTRIES);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
446 fflush(stdout);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
447 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
448 ++t_computed;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
449 Tuple_hits * taux = &ctidx->table[w0][w1][w2][w3][w4][w5][w6][w7][w8][w9][w10][w11];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
450 if(taux->hit_count == 1){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
451 // We plot it
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
452 // Convert scale to representation
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
453 uint64_t redir_db = (uint64_t) (taux->pos / (ratio_db));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
454 uint64_t redir_query = (uint64_t) (taux->pos_in_y / (ratio_query));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
455 double i_r = i_r_fix; double j_r = j_r_fix;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
456 while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
457 if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
458 representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
459 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
460 representation[redir_query][redir_db]++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
461 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
462 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
463 i_r -= MIN(1.0, pixel_size_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
464 j_r -= MIN(1.0, pixel_size_db);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
465 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
466 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
467 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
468 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
469 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
470 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
471 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
472 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
473 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
474 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
475 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
476 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
477 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
478 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
479
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
480
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
481 //double average_hit = ((double) total_hits / (double) table_size);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
482 //average_hit = 2.2;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
483
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
484 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
485 //fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" Avg = %e.\n", total_hits, table_size, average_hit);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
486 fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" E' = %e.\n", total_hits, table_size, Eprime);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
487
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
488
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
489
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
490
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
491
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
492 a_hundreth = MAX(1,c_hash_holder/100);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
493
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
494 for(current_len = 0; current_len < c_hash_holder; current_len++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
495
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
496 if(current_len % a_hundreth == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
497 fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/c_hash_holder);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
498 fflush(stdout);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
499 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
500
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
501 aux = hash_holder_table[current_len].node->next;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
502
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
503 //if(hash_holder_table[current_len].th->hit_count < (uint64_t) average_hit){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
504 if(hash_holder_table[current_len].th->hit_count < (uint64_t) Eprime){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
505 while(aux != NULL){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
506 // Convert scale to representation
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
507 uint64_t redir_db = (uint64_t) (aux->pos / (ratio_db));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
508 uint64_t redir_query = (uint64_t) (hash_holder_table[current_len].pos / (ratio_query));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
509 double i_r = i_r_fix; double j_r = j_r_fix;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
510 while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
511 if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
512 representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
513 }else{
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
514 representation[redir_query][redir_db]++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
515 break;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
516 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
517 i_r -= MIN(1.0, pixel_size_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
518 j_r -= MIN(1.0, pixel_size_db);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
519 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
520 aux = aux->next;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
521 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
522 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
523 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
524 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
525
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
526
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
527 //end = clock();
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
528
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
529
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
530
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
531
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
532 //fprintf(stdout, "\n[INFO] Query length %"PRIu64". Hits completed. Took %e seconds\n", data_query.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
533 fprintf(stdout, "\n[INFO] Query length %"PRIu64".\n", current_len);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
534
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
535 //begin = clock();
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
536
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
537 //reads_per_thread = (uint64_t) (floorl((long double) data_query.n_seqs / (long double) n_threads));
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
538
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
539 fprintf(stdout, "[INFO] Writing matrix.\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
540
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
541
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
542 uint64_t unique_diffuse = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
543 fprintf(out_database, "%"PRIu64"\n", aprox_len_query);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
544 fprintf(out_database, "%"PRIu64"\n", aprox_len_db);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
545 // And replace 2's with 1's
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
546
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
547 for(i=0; i<dimension+1; i++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
548 for(j=0; j<dimension; j++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
549 fprintf(out_database, "%"PRIu64" ", representation[i][j]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
550 unique_diffuse += representation[i][j];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
551 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
552 fprintf(out_database, "%"PRIu64"\n", representation[i][dimension]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
553 unique_diffuse += representation[i][dimension];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
554 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
555
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
556 fprintf(stdout, "[INFO] Found %"PRIu64" unique hits for z = %"PRIu64".\n", unique_diffuse, diffuse_z);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
557
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
558
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
559
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
560 //free(ct->table);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
561 //free(hash_holder_table);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
562 /*
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
563 for(i=0;i<=n_pools_used_AVL;i++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
564 free(mp_AVL[i].base);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
565 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
566
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
567 for(i=0;i<=n_pools_used;i++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
568 free(mp[i].base);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
569 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
570 */
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
571 for(i=0;i<dimension;i++){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
572 free(representation[i]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
573 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
574 free(representation);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
575 if(out_database != NULL) fclose(out_database);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
576
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
577 return 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
578 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
579
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
580 void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
581
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
582 int pNum = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
583 while(pNum < argc){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
584 if(strcmp(av[pNum], "--help") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
585 fprintf(stdout, "USAGE:\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
586 fprintf(stdout, " CHROMEISTER -query [query] -db [database] -out [outfile]\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
587 fprintf(stdout, "OPTIONAL:\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
588 fprintf(stdout, " -kmer [Integer: k>1 (default 32)]\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
589 fprintf(stdout, " -diffuse [Integer: z>0 (default 4)]\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
590 fprintf(stdout, " -dimension Size of the output [Integer: d>0 (default 1000)]\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
591 fprintf(stdout, " -out [File path]\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
592 fprintf(stdout, " --help Shows help for program usage\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
593 fprintf(stdout, "\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
594 fprintf(stdout, "PLEASE NOTICE: The reverse complementary is calculated for the QUERY.\n");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
595 exit(1);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
596 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
597 if(strcmp(av[pNum], "-query") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
598 *query = fopen64(av[pNum+1], "rt");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
599 if(query==NULL) terror("Could not open query file");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
600 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
601 if(strcmp(av[pNum], "-db") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
602 *database = fopen64(av[pNum+1], "rt");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
603 if(database==NULL) terror("Could not open database file");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
604 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
605 if(strcmp(av[pNum], "-out") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
606 *out_database = fopen(av[pNum+1], "wt");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
607 if(out_database==NULL) terror("Could not open output database file");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
608 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
609 if(strcmp(av[pNum], "-kmer") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
610 *custom_kmer = (uint64_t) atoi(av[pNum+1]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
611 if(*custom_kmer < BYTES_IN_MER) terror("K-mer size must be larger than 4");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
612 if(*custom_kmer % BYTES_IN_MER != 0) terror("K-mer size must be a multiple of 4");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
613
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
614 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
615 if(strcmp(av[pNum], "-diffuse") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
616 *diffuse_z = (uint64_t) atoi(av[pNum+1]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
617 if(*diffuse_z == 0 || *diffuse_z > 32) terror("Z must satisfy 0<z<=32");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
618
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
619 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
620
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
621 if(strcmp(av[pNum], "-dimension") == 0){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
622 *dimension = (uint64_t) atoi(av[pNum+1]);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
623 if(*dimension < 1) terror("Dimension must be a positive integer");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
624 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
625
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
626 pNum++;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
627 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
628
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
629 if(*query==NULL || *database==NULL || *out_database==NULL) terror("A query, database and output file is required");
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
630 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
631
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
632 uint64_t get_seq_len(FILE * f) {
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
633 char c = '\0';
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
634 uint64_t l = 0;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
635
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
636 while(!feof(f)){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
637 c = getc(f);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
638
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
639 if(c == '>'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
640 while(c != '\n') c = getc(f);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
641 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
642 c = toupper(c);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
643 if(c >= 'A' && c <= 'Z'){
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
644 ++l;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
645 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
646 }
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
647
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
648
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
649 rewind(f);
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
650 return l;
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
651 }