diff chromeister/src/CHROMEISTER.c @ 1:3d1fbde7e0cc draft default tip

Deleted selected files
author alvarofaure
date Thu, 13 Dec 2018 03:41:58 -0500
parents 7fdf47a0bae8
children
line wrap: on
line diff
--- a/chromeister/src/CHROMEISTER.c	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,651 +0,0 @@
-/*********
-
-File        CHROMEISTER.c
-Author      EPW <estebanpw@uma.es>
-Description Computes hits and generates a dotplot
-
-USAGE       Usage is described by calling ./CHROMEISTER --help
-
-
-
-**********/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-#include "structs.h"
-#include "alignmentFunctions.h"
-#include "commonFunctions.h"
-#define STARTING_SEQS 1000
-#define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes
-#define RANGE 2
-
-uint64_t custom_kmer = 32; // Defined as external in structs.h
-uint64_t diffuse_z = 4; // Defined as external in structs.h
-
-uint64_t get_seq_len(FILE * f);
-
-
-void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z);
-
-int main(int argc, char ** av){
-    
-
-    /*
-    //Store positions of kmers
-    uint64_t n_pools_used = 0;
-    //Mempool_l * mp = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l));
-    //if(mp == NULL) terror("Could not allocate vectors for memory pools");
-    Mempool_l mp[MAX_MEM_POOLS];
-    init_mem_pool_llpos(&mp[n_pools_used]);
-    //llpos * aux;
-
-    uint64_t n_pools_used_AVL = 0;
-    Mempool_AVL mp_AVL[MAX_MEM_POOLS];
-    init_mem_pool_AVL(&mp_AVL[n_pools_used_AVL]);
-    */
-    Tuple_hits * thit;
-    
-    /*
-    AVLTree * root = NULL;
-    root = insert_AVLTree(root, 10, mp_AVL, &n_pools_used_AVL, 0, mp, &n_pools_used);
-    
-    llpos * some = find_AVLTree(root, 25);
-    while(some != NULL){
-        printf("#%"PRIu64", ", some->pos); some = some->next;
-    }
-    */
-
-    uint64_t i, j;
-
-    //query to read kmers from, database to find seeds
-    FILE * query = NULL, * database = NULL, * out_database = NULL;
-    uint64_t dimension = 1000; // Default 1000 * 1000
-    
-    
-    init_args(argc, av, &query, &database, &out_database, &custom_kmer, &dimension, &diffuse_z);
-
-
-
-    unsigned char char_converter[91];
-    char_converter[(unsigned char)'A'] = 0;
-    char_converter[(unsigned char)'C'] = 1;
-    char_converter[(unsigned char)'G'] = 2;
-    char_converter[(unsigned char)'T'] = 3;
-
-    
-    // Variables to account for positions
-    // Print info
-    fprintf(stdout, "[INFO] Loading database\n");
-    // Variables to read kmers
-    char c = 'N'; //Char to read character
-    // Current length of array and variables for the buffer
-    uint64_t idx = 0, r = 0;
-    
-    //Vector to read in batches
-    char * temp_seq_buffer = NULL;
-    if ((temp_seq_buffer = calloc(READBUF, sizeof(char))) == NULL) {
-        terror("Could not allocate memory for read buffer");
-    }
-
-    //Dimensional matrix
-    uint64_t ** representation = (uint64_t **) calloc(dimension+1, sizeof(uint64_t *));
-    if(representation == NULL) terror("Could not allocate representation");
-    for(i=0; i<dimension+1; i++){
-        representation[i] = (uint64_t *) calloc(dimension+1, sizeof(uint64_t));
-        if(representation[i] == NULL) terror("Could not allocate second loop representation");
-    }
-
-    /*
-    fseek(database, 0, SEEK_END);
-    uint64_t aprox_len_query = ftell(database);
-    uint64_t aprox_len_db = aprox_len_query;
-    rewind(database);
-    */
-    uint64_t aprox_len_query = get_seq_len(database);
-    uint64_t aprox_len_db = aprox_len_query;
-
-    uint64_t a_hundreth = (aprox_len_query/100);
-
-    unsigned char curr_kmer[custom_kmer], reverse_kmer[custom_kmer];
-    curr_kmer[0] = reverse_kmer[0] = '\0';
-    uint64_t word_size = 0, word_size_rev = 0;
-
-    //To hold all information related to database
-    uint64_t current_len = 0;
-    
-    //To force reading from the buffer
-    idx = READBUF + 1;
-
-    //unsigned char aux_kmer[custom_kmer+1];
-    
-    //Vector to store query seq
-    unsigned char * seq_vector_query = (unsigned char *) malloc(READBUF*sizeof(unsigned char));
-    if(seq_vector_query == NULL) terror("Could not allocate memory for query vector");
-
-    /*
-    Container * ct = (Container *) calloc(1, sizeof(Container));
-    if(ct == NULL) terror("Could not allocate container");    
-    */
-
-
-
-    Index * ctidx = (Index *) calloc(1, sizeof(Index));
-    if(ctidx == NULL) terror("Could not allocate container");
-    
-
-    //begin = clock();
-    
-
-    c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
-    while((!feof(database) || (feof(database) && idx < r))){
-
-        if(c == '>'){
-            
-
-
-            while(c != '\n') c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);  //Skip ID
-                
-
-            while(c != '>' && (!feof(database) || (feof(database) && idx < r))){ //Until next id
-                c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);
-                c = toupper(c);
-                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
-                    curr_kmer[word_size] = (unsigned char) c;
-                    if(word_size < custom_kmer) ++word_size;
-                    ++current_len;
-                    if(current_len % a_hundreth == 0){ 
-                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
-                        //printf("%"PRIu64"%%..wasted: (%e) (%e)", 1+100*pos_in_query/aprox_len_query, (double)(wasted_cycles_forward)/CLOCKS_PER_SEC, (double)(wasted_cycles_reverse)/CLOCKS_PER_SEC); 
-                        fflush(stdout);
-                    }
-                    
-
-
-                }else{ //It can be anything (including N, Y, X ...)
-
-                    if(c != '\n' && c != '>'){
-                        word_size = 0;
-                        // data_database.sequences[pos_in_database++] = (unsigned char) 'N'; //Convert to N
-                        ++current_len;
-
-                    } 
-                }
-                //if(current_len % 1000000 == 0) printf(" curr len %" PRIu64"\n", current_len);
-                if(word_size == custom_kmer){
-                    //write to hash table
-                    
-
-                    thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
-                    [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
-                    [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
-                    [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
-                    
-                    /*
-                    typedef struct tuple_hits{
-                        int repetition;
-                        int hit_count;
-                        uint64_t key;
-                        uint64_t pos;
-                    } Tuple_hits;
-                    */
-
-                    if(thit->repetition == FALSE){
-                        // Then we can insert
-                        thit->hit_count = 0;
-                        thit->key = collisioned_hash(&curr_kmer[0], custom_kmer);
-                        thit->pos = current_len;
-                    }else{
-                        // Otherwise we break it
-                        thit->repetition = TRUE;
-                    }
-
-                    //thit->root = insert_AVLTree(thit->root, hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
-                    //thit->root = insert_AVLTree(thit->root, collisioned_hash(&curr_kmer[0], custom_kmer), mp_AVL, &n_pools_used_AVL, current_len, mp, &n_pools_used);
-                    
-                    
-
-                    // Non overlapping
-                    word_size = 0;
-                    
-		
-		    // Overlapping
-                    //memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
-                    //--word_size;
-                }
-            }
-            word_size = 0;
-            
-        }else{
-            c = buffered_fgetc(temp_seq_buffer, &idx, &r, database);    
-        }
-        
-    }
-
-
-    //end = clock();
-
-    // data_database.total_len = pos_in_database;
-
-    //fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64". Hash table building took %e seconds\n", data_database.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
-    fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64".\n", current_len);
-    //close database
-    fclose(database);
-
-    
-    
-    //begin = clock();
-    
-
-    
-    
-    
-    double pixel_size_db = (double) dimension / (double) current_len;
-    double ratio_db = (double) current_len / dimension;
-
-
-    // Get file length
-    
-    //fseek(query, 0, SEEK_END);
-    //aprox_len_query = ftell(query);
-    //rewind(query);
-    aprox_len_query = get_seq_len(query);
-
-    //uint64_t reallocs_hash_holder_table = 1;
-    //uint64_t n_items_hash_holder_table = aprox_len_query / 5;
-
-    //Hash_holder * hash_holder_table = (Hash_holder *) calloc(n_items_hash_holder_table, sizeof(Hash_holder));
-    //if(hash_holder_table == NULL) terror("Could not allocate hash holding table");
-
-    a_hundreth = (aprox_len_query/100);
-    double pixel_size_query = (double) dimension / (double) aprox_len_query;
-    double ratio_query = (double) aprox_len_query / dimension;
-    
-
-    double i_r_fix = MAX(1.0, custom_kmer * pixel_size_query);
-    double j_r_fix = MAX(1.0, custom_kmer * pixel_size_db);
-
-    
-    
-    fprintf(stdout, "[INFO] Ratios: Q [%e] D [%e]. Lenghts: Q [%"PRIu64"] D [%"PRIu64"]\n", ratio_query, ratio_db, aprox_len_query, current_len);
-    fprintf(stdout, "[INFO] Pixel size: Q [%e] D [%e].\n", pixel_size_query, pixel_size_db);
-
-
-    fprintf(stdout, "[INFO] Computing absolute hit numbers.\n");
-
-
-    current_len = 0;
-
-    //llpos * the_original_hit;
-
-    //To force reading from the buffer
-    idx = READBUF + 1;
-    c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
-    //uint64_t c_hash_holder = 0;
-    
-    while((!feof(query) || (feof(query) && idx < r))){
-
-        if(c == '>'){
-            word_size = 0;
-            word_size_rev = custom_kmer-1;
-            
-
-
-
-            while(c != '\n'){ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } //Skip ID
-                
-
-            while(c != '>' && (!feof(query) || (feof(query) && idx < r))){ //Until next id
-                c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);
-                c = toupper(c);
-                if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
-                    
-                    ++current_len;
-                    if(current_len % a_hundreth == 0){ 
-                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/aprox_len_query); 
-                        fflush(stdout);
-                    }
-                    curr_kmer[word_size] = (unsigned char) c;
-                    ++word_size;
-
-                    switch(c){
-                        case ('A'): reverse_kmer[word_size_rev] = (unsigned)'T';
-                        break;
-                        case ('C'): reverse_kmer[word_size_rev] = (unsigned)'G';
-                        break;
-                        case ('G'): reverse_kmer[word_size_rev] = (unsigned)'C';
-                        break;
-                        case ('T'): reverse_kmer[word_size_rev] = (unsigned)'A';
-                        break;
-                    }
-                    if(word_size_rev != 0) --word_size_rev;
-
-
-
-                    
-                    if(word_size == custom_kmer){
-
-
-                        //hash_forward = hashOfWord(&curr_kmer[0], custom_kmer, FIXED_K);
-                        //hash_reverse = hashOfWord(&reverse_kmer[0], custom_kmer, FIXED_K);
-                        uint64_t hash_forward, hash_reverse;
-                        hash_forward = collisioned_hash(&curr_kmer[0], custom_kmer);
-                        hash_reverse = collisioned_hash(&reverse_kmer[0], custom_kmer);
-                        
-
-                        thit = &ctidx->table[char_converter[curr_kmer[0]]][char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]]
-                        [char_converter[curr_kmer[3]]][char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]]
-                        [char_converter[curr_kmer[6]]][char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]]
-                        [char_converter[curr_kmer[9]]][char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
-
-                        //AVLTree * search = find_AVLTree(thit->root, hash_forward);
-
-                        if(thit->repetition == FALSE && hash_forward == thit->key){
-                            // Attention ::::: you were not removing the ones with count==1 earlier 
-                            thit->pos_in_y = current_len;
-                            thit->hit_count++;
-                        }
-
-                        thit = &ctidx->table[char_converter[reverse_kmer[0]]][char_converter[reverse_kmer[1]]][char_converter[reverse_kmer[2]]]
-                        [char_converter[reverse_kmer[3]]][char_converter[reverse_kmer[4]]][char_converter[reverse_kmer[5]]]
-                        [char_converter[reverse_kmer[6]]][char_converter[reverse_kmer[7]]][char_converter[reverse_kmer[8]]]
-                        [char_converter[reverse_kmer[9]]][char_converter[reverse_kmer[10]]][char_converter[reverse_kmer[11]]];
-
-                        if(thit->repetition == FALSE && hash_reverse == thit->key){
-                            // Attention ::::: you were not removing the ones with count==1 earlier 
-                            thit->pos_in_y = current_len;
-                            thit->hit_count++;
-                        }
-
-                        /*
-                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
-                            thit->hit_count += search->count;
-                            
-                            hash_holder_table[c_hash_holder].pos = current_len;
-                            hash_holder_table[c_hash_holder].node = search;
-                            hash_holder_table[c_hash_holder].th = thit;
-                            ++c_hash_holder;
-                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
-                                ++reallocs_hash_holder_table;
-                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
-                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
-                            }
-                        }
-                        */
-
-                        
-
-                        
-
-                        //search = find_AVLTree(thit->root, hash_reverse);
-                        /*
-                        if(search != NULL && search->count == 1){ //If count is two, then it is a rep
-                            
-                            thit->hit_count += search->count;
-                            hash_holder_table[c_hash_holder].pos = current_len;
-                            hash_holder_table[c_hash_holder].node = search;
-                            hash_holder_table[c_hash_holder].th = thit;
-                            ++c_hash_holder;
-                            if(c_hash_holder == n_items_hash_holder_table*reallocs_hash_holder_table){
-                                ++reallocs_hash_holder_table;
-                                hash_holder_table = (Hash_holder *) realloc(hash_holder_table, n_items_hash_holder_table*reallocs_hash_holder_table*sizeof(Hash_holder));
-                                if(hash_holder_table == NULL) terror("Could not realloc hash holder table");
-                            }
-                        }
-                        */
-
-                        // Overlapping
-                        
-                        memmove(&curr_kmer[0], &curr_kmer[1], custom_kmer-1);
-                        memmove(&reverse_kmer[1], &reverse_kmer[0], custom_kmer-1);
-                        --word_size;
-
-                        // Non overlapping
-                        //word_size = 0;
-                        //word_size_rev = custom_kmer-1;
-                    }
-                }else{
-                    if(c != '\n' && c != '>'){
-                        word_size = 0;
-                        word_size_rev = custom_kmer-1;
-                        ++current_len;
-                    }
-                }
-            }
-        }else{
-            c = buffered_fgetc(temp_seq_buffer, &idx, &r, query);    
-        }
-        
-    } 
-
-    /// Out
-
-    fprintf(stdout, "Scanning hits table.\n");
-
-    a_hundreth = MAX(1, TOTAL_ENTRIES/100);
-    uint64_t t_computed = 0;
-    uint64_t w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11;
-    for(w0=0;w0<4;w0++){
-        for(w1=0;w1<4;w1++){
-            for(w2=0;w2<4;w2++){
-                for(w3=0;w3<4;w3++){
-                    for(w4=0;w4<4;w4++){
-                        for(w5=0;w5<4;w5++){
-                            for(w6=0;w6<4;w6++){
-                                for(w7=0;w7<4;w7++){
-                                    for(w8=0;w8<4;w8++){
-                                        for(w9=0;w9<4;w9++){
-                                            for(w10=0;w10<4;w10++){
-                                                for(w11=0;w11<4;w11++){
-
-                                                    if(t_computed % a_hundreth == 0){ 
-                                                        fprintf(stdout, "\r%"PRIu64"%%...", 1+100*t_computed/TOTAL_ENTRIES); 
-                                                        fflush(stdout);
-                                                    }
-                                                    ++t_computed;
-                                                    Tuple_hits * taux = &ctidx->table[w0][w1][w2][w3][w4][w5][w6][w7][w8][w9][w10][w11];
-                                                    if(taux->hit_count == 1){
-                                                        // We plot it   
-                                                        // Convert scale to representation
-                                                        uint64_t redir_db = (uint64_t) (taux->pos / (ratio_db));
-                                                        uint64_t redir_query = (uint64_t) (taux->pos_in_y / (ratio_query));
-                                                        double i_r = i_r_fix; double j_r = j_r_fix;
-                                                        while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
-                                                            if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
-                                                                representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
-                                                            }else{
-                                                                representation[redir_query][redir_db]++;
-                                                                break;
-                                                            }
-                                                            i_r -= MIN(1.0, pixel_size_query);
-                                                            j_r -= MIN(1.0, pixel_size_db);
-                                                        }                                                     
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    
-
-    //double average_hit = ((double) total_hits / (double) table_size);
-    //average_hit = 2.2;
-
-    /*
-    //fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" Avg = %e.\n", total_hits, table_size, average_hit);
-    fprintf(stdout, "[INFO] Total hit count is %"PRIu64" on a size of %"PRIu64" E' = %e.\n", total_hits, table_size, Eprime);
-
-    
-
-    
-
-    a_hundreth = MAX(1,c_hash_holder/100);
-    
-    for(current_len = 0; current_len < c_hash_holder; current_len++){
-
-        if(current_len % a_hundreth == 0){ 
-            fprintf(stdout, "\r%"PRIu64"%%...", 1+100*current_len/c_hash_holder); 
-            fflush(stdout);
-        }
-
-        aux = hash_holder_table[current_len].node->next;
-
-        //if(hash_holder_table[current_len].th->hit_count < (uint64_t) average_hit){
-        if(hash_holder_table[current_len].th->hit_count < (uint64_t) Eprime){
-            while(aux != NULL){
-                // Convert scale to representation
-                uint64_t redir_db = (uint64_t) (aux->pos / (ratio_db));
-                uint64_t redir_query = (uint64_t) (hash_holder_table[current_len].pos / (ratio_query));
-                double i_r = i_r_fix; double j_r = j_r_fix;
-                while((uint64_t) i_r >= 1 && (uint64_t) j_r >= 1){
-                    if((int64_t) redir_query - (int64_t) i_r > 0 && (int64_t) redir_db - (int64_t) j_r > 0){
-                        representation[(int64_t) redir_query - (int64_t) i_r][(int64_t) redir_db - (int64_t) j_r]++;
-                    }else{
-                        representation[redir_query][redir_db]++;
-                        break;
-                    }
-                    i_r -= MIN(1.0, pixel_size_query);
-                    j_r -= MIN(1.0, pixel_size_db);
-                }
-                aux = aux->next;
-            }
-        }
-    }
-    */
-
-
-    //end = clock();
-
-    
-    
-
-    //fprintf(stdout, "\n[INFO] Query length %"PRIu64". Hits completed. Took %e seconds\n", data_query.total_len, (double)(end-begin)/CLOCKS_PER_SEC);
-    fprintf(stdout, "\n[INFO] Query length %"PRIu64".\n", current_len);
-
-    //begin = clock();
-
-    //reads_per_thread = (uint64_t) (floorl((long double) data_query.n_seqs / (long double) n_threads));
-    
-    fprintf(stdout, "[INFO] Writing matrix.\n");
-
-
-    uint64_t unique_diffuse = 0;
-	fprintf(out_database, "%"PRIu64"\n", aprox_len_query);
-    fprintf(out_database, "%"PRIu64"\n", aprox_len_db);
-     // And replace 2's with 1's 
-	
-    for(i=0; i<dimension+1; i++){
-        for(j=0; j<dimension; j++){
-            fprintf(out_database, "%"PRIu64" ", representation[i][j]);
-	    unique_diffuse += representation[i][j];
-        }
-        fprintf(out_database, "%"PRIu64"\n",  representation[i][dimension]);
-	unique_diffuse += representation[i][dimension];
-    }
-
-    fprintf(stdout, "[INFO] Found %"PRIu64" unique hits for z = %"PRIu64".\n", unique_diffuse, diffuse_z);
-    
-
-    
-    //free(ct->table);
-    //free(hash_holder_table);
-    /*
-    for(i=0;i<=n_pools_used_AVL;i++){
-        free(mp_AVL[i].base);
-    }
-
-    for(i=0;i<=n_pools_used;i++){
-        free(mp[i].base);
-    }
-    */
-    for(i=0;i<dimension;i++){
-        free(representation[i]);
-    }
-    free(representation);
-    if(out_database != NULL) fclose(out_database);
-    
-    return 0;
-}
-
-void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * custom_kmer, uint64_t * dimension, uint64_t * diffuse_z){
-
-    int pNum = 0;
-    while(pNum < argc){
-        if(strcmp(av[pNum], "--help") == 0){
-            fprintf(stdout, "USAGE:\n");
-            fprintf(stdout, "           CHROMEISTER -query [query] -db [database] -out [outfile]\n");
-            fprintf(stdout, "OPTIONAL:\n");
-            fprintf(stdout, "           -kmer       [Integer:   k>1 (default 32)]\n");
-	    fprintf(stdout, "		-diffuse    [Integer:   z>0 (default 4)]\n");
-            fprintf(stdout, "           -dimension  Size of the output [Integer:   d>0 (default 1000)]\n");
-            fprintf(stdout, "           -out        [File path]\n");
-            fprintf(stdout, "           --help      Shows help for program usage\n");
-            fprintf(stdout, "\n");
-            fprintf(stdout, "PLEASE NOTICE: The reverse complementary is calculated for the QUERY.\n");
-            exit(1);
-        }
-        if(strcmp(av[pNum], "-query") == 0){
-            *query = fopen64(av[pNum+1], "rt");
-            if(query==NULL) terror("Could not open query file");
-        }
-        if(strcmp(av[pNum], "-db") == 0){
-            *database = fopen64(av[pNum+1], "rt");
-            if(database==NULL) terror("Could not open database file");
-        }
-        if(strcmp(av[pNum], "-out") == 0){
-            *out_database = fopen(av[pNum+1], "wt");
-            if(out_database==NULL) terror("Could not open output database file");
-        }
-        if(strcmp(av[pNum], "-kmer") == 0){
-            *custom_kmer = (uint64_t) atoi(av[pNum+1]);
-            if(*custom_kmer < BYTES_IN_MER) terror("K-mer size must be larger than 4");
-            if(*custom_kmer % BYTES_IN_MER != 0) terror("K-mer size must be a multiple of 4");
-
-        }
-        if(strcmp(av[pNum], "-diffuse") == 0){
-            *diffuse_z = (uint64_t) atoi(av[pNum+1]);
-            if(*diffuse_z == 0 || *diffuse_z > 32) terror("Z must satisfy 0<z<=32");
-
-        }
-
-        if(strcmp(av[pNum], "-dimension") == 0){
-            *dimension = (uint64_t) atoi(av[pNum+1]);
-            if(*dimension < 1) terror("Dimension must be a positive integer");
-        }
-        
-        pNum++;
-    }
-    
-    if(*query==NULL || *database==NULL || *out_database==NULL) terror("A query, database and output file is required");
-}
-
-uint64_t get_seq_len(FILE * f) {
-    char c = '\0';
-    uint64_t l = 0;
-
-    while(!feof(f)){
-        c = getc(f);
-
-        if(c == '>'){
-            while(c != '\n') c = getc(f);
-        }
-        c = toupper(c);
-        if(c >= 'A' && c <= 'Z'){
-            ++l;
-        }
-    }
-
-
-    rewind(f);
-    return l;
-}