Mercurial > repos > bitlab > imsame
view IMSAME/src/IMSAME.c @ 0:762009a91895 draft
Uploaded
author | bitlab |
---|---|
date | Sat, 15 Dec 2018 18:04:10 -0500 |
parents | |
children |
line wrap: on
line source
/********* File IMSAME.c Author EPW <estebanpw@uma.es> Description Computes an incremental alignment on reads versus reads using n threads USAGE Usage is described by calling ./IMSAME --help **********/ #include <stdio.h> #include <stdlib.h> #include <math.h> #include <string.h> #include <ctype.h> #include <pthread.h> #include "structs.h" #include "alignmentFunctions.h" #include "commonFunctions.h" #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MIN(x, y) (((x) <= (y)) ? (x) : (y)) #define STARTING_SEQS 1000 #define PIECE_OF_DB_REALLOC 3200000 //half a gigabyte if divided by 8 bytes uint64_t custom_kmer = 12; // Defined as external in structs.h void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * n_threads, long double * minevalue, long double * mincoverage, int * igap, int * egap, long double * minidentity, long double * window, unsigned char * full_comp, uint64_t * custom_kmer, unsigned char * hits_only, uint64_t * n_parts); int VERBOSE_ACTIVE = 0; int main(int argc, char ** av){ clock_t begin, end; int error; //To tell if threads could not be launched uint64_t i,j; //query to read kmers from, database to find seeds FILE * query = NULL, * database = NULL, * out_database = NULL; long double minevalue = 1/powl(10, 10); //Default 1 * 10^-10 long double mincoverage = 0.5, minidentity = 0.5, window = 0.15; //Default int igap = -5, egap = -2; unsigned char full_comp = FALSE; unsigned char hits_only = FALSE; uint64_t n_threads = 4; uint64_t n_parts = 3; // Default is 3 init_args(argc, av, &query, &database, &out_database, &n_threads, &minevalue, &mincoverage, &igap, &egap, &minidentity, &window, &full_comp, &custom_kmer, &hits_only, &n_parts); //uint64_t reads_per_thread; uint64_t sum_accepted_reads = 0; begin = clock(); fprintf(stdout, "[INFO] Init. quick table\n"); pthread_t * threads = (pthread_t *) malloc(n_threads * sizeof(pthread_t)); if(threads == NULL) terror("Could not create threads"); pthread_t * loading_threads = (pthread_t *) malloc(FIXED_LOADING_THREADS * sizeof(pthread_t)); if(loading_threads == NULL) terror("Could not create loading threads"); HashTableArgs * hta = (HashTableArgs *) malloc(n_threads*sizeof(HashTableArgs)); if(hta == NULL) terror("Could not allocate arguments for hash table"); pthread_mutex_t lock; //The mutex to lock the queue if (pthread_mutex_init(&lock, NULL) != 0) terror("Could not init mutex"); // To be used if only computing hits uint64_t ** hits_table = NULL; unsigned char ** my_x = (unsigned char **) malloc(n_threads * sizeof(unsigned char*)); unsigned char ** my_y = (unsigned char **) malloc(n_threads * sizeof(unsigned char*)); unsigned char ** marker_taggs = NULL; // To be used with full comparison struct positioned_cell ** mc = (struct positioned_cell **) malloc(n_threads * sizeof(struct positioned_cell *)); struct cell *** table = (struct cell ***) malloc(n_threads * sizeof(struct cell **)); if(table == NULL) terror("Could not allocate NW table"); char ** reconstruct_X = (char **) malloc(n_threads * sizeof(char *)); char ** reconstruct_Y = (char **) malloc(n_threads * sizeof(char *)); if(reconstruct_Y == NULL || reconstruct_X == NULL) terror("Could not allocate output alignment sequences"); char ** writing_buffer_alignment = (char **) malloc(n_threads * sizeof(char*)); for(i=0;i<n_threads;i++){ table[i] = (struct cell **) malloc(MAX_READ_SIZE * sizeof(struct cell *)); for(j=0;j<MAX_READ_SIZE;j++){ table[i][j] = (struct cell *) malloc((1+MAX_WINDOW_SIZE)*sizeof(struct cell)); if(table[i][j] == NULL) terror("Could not allocate memory for second loop of table"); // Delete this /* uint64_t r; for(r=0;r<MAX_WINDOW_SIZE+1;r++){ table[i][j][r].score = INT64_MIN; table[i][j][r].xfrom = 10000000000; table[i][j][r].yfrom = 10000000000; } */ } mc[i] = (struct positioned_cell *) malloc(MAX_READ_SIZE * sizeof(struct positioned_cell)); my_x[i] = (unsigned char *) malloc(2*MAX_READ_SIZE * sizeof(unsigned char)); my_y[i] = (unsigned char *) malloc(2*MAX_READ_SIZE * sizeof(unsigned char)); reconstruct_X[i] = (char *) malloc(2*MAX_READ_SIZE * sizeof(char)); reconstruct_Y[i] = (char *) malloc(2*MAX_READ_SIZE * sizeof(char)); writing_buffer_alignment[i] = (char *) malloc(6*MAX_READ_SIZE*sizeof(char)); //6 times because of 2 times the length of the max of the read, and that happens 3 times (seqX,align,Y) if(table[i] == NULL || mc[i] == NULL || my_x[i] == NULL || my_y[i] == NULL || reconstruct_X[i] == NULL || reconstruct_Y[i] == NULL || writing_buffer_alignment[i] == NULL) terror("Could not allocate buffer for alignment output"); } end = clock(); fprintf(stdout, "[INFO] Initialization took %e seconds.\n", (double)(end-begin)/CLOCKS_PER_SEC); //Variables to account for positions //Print info fprintf(stdout, "[INFO] Loading database.\n"); //Variables to read kmers char c = 'N'; //Char to read character //Current length of array and variables for the buffer uint64_t idx = 0, r = 0; //Vector to read in batches char * temp_seq_buffer = NULL; if ((temp_seq_buffer = calloc(READBUF, sizeof(char))) == NULL) { terror("Could not allocate memory for read buffer"); } //Vector to store database seq unsigned char ** seq_vector_database = (unsigned char **) malloc(FIXED_LOADING_THREADS*sizeof(unsigned char *)); if(seq_vector_database == NULL) terror("Could not allocate memory for database vector"); uint64_t ** database_positions = (uint64_t **) malloc(FIXED_LOADING_THREADS*sizeof(uint64_t)); if(database_positions == NULL) terror("Could not allocate database sequences positions"); //Mempool_l * mp = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l)); //if(mp == NULL) terror("Could not allocate vectors for memory pools"); //Mempool_l mp[FIXED_LOADING_THREADS][MAX_MEM_POOLS]; Mempool_l ** mp = (Mempool_l **) malloc(FIXED_LOADING_THREADS*sizeof(Mempool_l *)); if(mp == NULL) terror("Could not allocate memory pools"); for(i=0; i<FIXED_LOADING_THREADS; i++){ mp[i] = (Mempool_l *) malloc(MAX_MEM_POOLS*sizeof(Mempool_l)); if(mp[i] == NULL) terror("Could not allocate individual memory pools"); } Mempool_AVL ** mp_AVL = (Mempool_AVL **) malloc(FIXED_LOADING_THREADS*sizeof(Mempool_AVL *)); if(mp_AVL == NULL) terror("Could not allocate memory AVL pools"); for(i=0; i<FIXED_LOADING_THREADS; i++){ mp_AVL[i] = (Mempool_AVL *) malloc(MAX_MEM_POOLS*sizeof(Mempool_AVL)); if(mp_AVL[i] == NULL) terror("Could not allocate individual memory AVL pools"); } AVLContainer * ct_A = (AVLContainer *) calloc(1, sizeof(AVLContainer)); if(ct_A == NULL) terror("Could not allocate container A"); AVLContainer * ct_B = (AVLContainer *) calloc(1, sizeof(AVLContainer)); if(ct_B == NULL) terror("Could not allocate container B"); AVLContainer * ct_C = (AVLContainer *) calloc(1, sizeof(AVLContainer)); if(ct_C == NULL) terror("Could not allocate container C"); AVLContainer * ct_D = (AVLContainer *) calloc(1, sizeof(AVLContainer)); if(ct_D == NULL) terror("Could not allocate container D"); SeqInfo data_database[FIXED_LOADING_THREADS]; uint64_t full_db_n_seqs = 0; unsigned char curr_kmer[custom_kmer]; unsigned char aux_kmer[custom_kmer+1]; curr_kmer[0] = '\0'; uint64_t word_size = 0; SeqInfo data_query; //To force reading from the buffer idx = READBUF + 1; //Vector to store query seq unsigned char * seq_vector_query = (unsigned char *) malloc(READBUF*sizeof(unsigned char)); if(seq_vector_query == NULL) terror("Could not allocate memory for query vector"); uint64_t n_realloc_query = 1, pos_in_query = 0, n_seqs_query_realloc = 1; uint64_t * query_positions = (uint64_t *) malloc(INITSEQS*sizeof(uint64_t)); if(query_positions == NULL) terror("Could not allocate query sequences positions"); // Read number of sequences and load into RAM begin = clock(); fseek(database, 0L, SEEK_END); uint64_t db_temp_size = ftell(database); char * load_buffer = (char *) malloc(db_temp_size * sizeof(char)); if(load_buffer == NULL) terror("Could not allocate intermediate buffer for threads sequence array"); fseek(database, 0L, SEEK_SET); if(db_temp_size != fread(load_buffer, sizeof(char), db_temp_size, database)) terror("Could not read full sequence"); LoadingDBArgs args_DB_load[FIXED_LOADING_THREADS]; args_DB_load[0].data_database = &data_database[0]; args_DB_load[1].data_database = &data_database[1]; args_DB_load[2].data_database = &data_database[2]; args_DB_load[3].data_database = &data_database[3]; args_DB_load[0].ct = ct_A; args_DB_load[1].ct = ct_B; args_DB_load[2].ct = ct_C; args_DB_load[3].ct = ct_D; for(i=0; i<FIXED_LOADING_THREADS; i++){ args_DB_load[i].read_to = 0; args_DB_load[i].read_from = 0; } //uint64_t a_fourth = db_temp_size / 4; //get_num_seqs_and_length(load_buffer, &full_db_n_seqs, &db_temp_size, args_DB_load); end = clock(); fprintf(stdout, "[INFO] Loading into RAM took %e seconds.\n", (double)(end-begin)/CLOCKS_PER_SEC); begin = clock(); /* char * temp_seq_buffer; SeqInfo * data_database; uint64_t t_len; uint64_t word_size; uint64_t read_from; uint64_t read_to; char thread_id; Mempool_l * mp; uint64_t n_pools_used; */ // Launch threads to process database args_DB_load[0].thread_id = 'A'; args_DB_load[1].thread_id = 'B'; args_DB_load[2].thread_id = 'C'; args_DB_load[3].thread_id = 'D'; for(i=0; i<FIXED_LOADING_THREADS; i++){ //seq_vector_database[i] = (unsigned char *) malloc((args_DB_load[i].read_to - args_DB_load[i].read_from)*sizeof(unsigned char)); seq_vector_database[i] = (unsigned char *) malloc((READBUF)*sizeof(unsigned char)); //database_positions[i] = (uint64_t *) malloc((1+data_database[i].n_seqs)*sizeof(uint64_t)); database_positions[i] = (uint64_t *) malloc((INITSEQS)*sizeof(uint64_t)); if(seq_vector_database[i] == NULL || database_positions[i] == NULL) terror("Could not allocate memory for individual database vectors"); data_database[i].sequences = seq_vector_database[i]; //To hold all information related to database args_DB_load[i].n_pools_used = 0; args_DB_load[i].n_pools_used_AVL = 0; init_mem_pool_llpos(&mp[i][args_DB_load[i].n_pools_used]); init_mem_pool_AVL(&mp_AVL[i][args_DB_load[i].n_pools_used_AVL]); data_database[i].start_pos = database_positions[i]; args_DB_load[i].n_allocs = 1; args_DB_load[i].read_from = i * (db_temp_size / 4); args_DB_load[i].read_to = (i+1) * (db_temp_size / 4); args_DB_load[i].temp_seq_buffer = load_buffer; args_DB_load[i].t_len = db_temp_size; args_DB_load[i].word_size = custom_kmer; args_DB_load[i].mp = mp[i]; args_DB_load[i].mp_AVL = mp_AVL[i]; if( 0 != (error = pthread_create(&loading_threads[i], NULL, load_input, (void *) (&args_DB_load[i])) )){ fprintf(stdout, "[@loading] Thread %"PRIu64" returned %d:", i, error); terror("Could not launch"); } } //Wait for threads to finish for(i=0;i<FIXED_LOADING_THREADS;i++){ pthread_join(loading_threads[i], NULL); } // Deallocate memory not needed anymore free(load_buffer); free(loading_threads); //fprintf(stdout, "[INFO] WARNING!!!!!!!!! USING NON OVERLAPPING MERS, WHICH IS NOT INCLUDED AS OPTION!!!! DISABLE\n"); //data_database.total_len = pos_in_database; //printf("tables have %"PRIu64" %"PRIu64", %"PRIu64" %"PRIu64"\n", data_database[0].total_len, data_database[1].total_len, data_database[2].total_len, data_database[3].total_len); uint64_t full_db_len = data_database[0].total_len + data_database[1].total_len + data_database[2].total_len + data_database[3].total_len; full_db_n_seqs = args_DB_load[0].contained_reads + args_DB_load[1].contained_reads + args_DB_load[2].contained_reads + args_DB_load[3].contained_reads; end = clock(); fprintf(stdout, "[INFO] Database loaded and of length %"PRIu64" (%"PRIu64" sequences). Hash table building took %e seconds\n", full_db_len, full_db_n_seqs, (double)(end-begin)/CLOCKS_PER_SEC); //close database fclose(database); // Allocate marker tags to avoid repeting computation marker_taggs = (unsigned char **) malloc(n_threads * sizeof(unsigned char *)); if(marker_taggs == NULL) terror("Could not allocate marker taggs"); for(i=0;i<n_threads;i++){ marker_taggs[i] = (unsigned char *) malloc(full_db_n_seqs); if(marker_taggs[i] == NULL) terror("Could not allocate second loop of marker taggs"); } if(hits_only == TRUE){ hits_table = (uint64_t **) calloc(n_threads, sizeof(uint64_t *)); if(hits_table == NULL) terror("Could not allocate hits table"); uint64_t z; for(z=0;z<n_threads;z++){ hits_table[z] = (uint64_t *) calloc(full_db_n_seqs, sizeof(uint64_t)); if(hits_table[z] == NULL) terror("Could not allocate sub table of hits"); } } begin = clock(); //To force reading from the buffer idx = READBUF + 1; data_query.sequences = seq_vector_query; data_query.start_pos = query_positions; data_query.total_len = 0; data_query.n_seqs = 0; //Print info fprintf(stdout, "[INFO] Loading query.\n"); c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); while((!feof(query) || (feof(query) && idx < r))){ if(c == '>'){ data_query.start_pos[data_query.n_seqs++] = pos_in_query; word_size = 0; if(pos_in_query == READBUF*n_realloc_query){ n_realloc_query++; data_query.sequences = (unsigned char *) realloc(data_query.sequences, READBUF*n_realloc_query*sizeof(unsigned char)); if(data_query.sequences == NULL) terror("Could not reallocate temporary query"); } if(data_query.n_seqs == INITSEQS*n_seqs_query_realloc){ n_seqs_query_realloc++; data_query.start_pos = (uint64_t *) realloc(data_query.start_pos, INITSEQS*n_seqs_query_realloc*sizeof(uint64_t)); } while(c != '\n'){ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } //Skip ID while(c != '>' && (!feof(query) || (feof(query) && idx < r))){ //Until next id c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); c = toupper(c); if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){ data_query.sequences[pos_in_query++] = (unsigned char) c; curr_kmer[word_size++] = (unsigned char) c; if(word_size == custom_kmer){ memcpy(aux_kmer, curr_kmer, custom_kmer); aux_kmer[custom_kmer] = '\0'; memcpy(aux_kmer, &curr_kmer[1], custom_kmer-1); memcpy(curr_kmer, aux_kmer, custom_kmer-1); word_size--; } if(pos_in_query == READBUF*n_realloc_query){ n_realloc_query++; data_query.sequences = (unsigned char *) realloc(data_query.sequences, READBUF*n_realloc_query*sizeof(unsigned char)); if(data_query.sequences == NULL) terror("Could not reallocate temporary query"); } }else{ if(c != '\n' && c != '\r' && c != '>'){ word_size = 0; data_query.sequences[pos_in_query++] = (unsigned char) 'N'; //Convert to N if(pos_in_query == READBUF*n_realloc_query){ n_realloc_query++; data_query.sequences = (unsigned char *) realloc(data_query.sequences, READBUF*n_realloc_query*sizeof(unsigned char)); if(data_query.sequences == NULL) terror("Could not reallocate temporary query"); } } } } }else{ c = buffered_fgetc(temp_seq_buffer, &idx, &r, query); } } end = clock(); data_query.total_len = pos_in_query; fprintf(stdout, "[INFO] Query loaded and of length %"PRIu64". Took %e seconds\n", data_query.total_len, (double)(end-begin)/CLOCKS_PER_SEC); begin = clock(); Head queue_head; Queue * first_task = generate_queue(&queue_head, data_query.n_seqs, n_threads, n_parts); /* Queue * traverse = queue_head.head; while(traverse != NULL){ printf("current_piece: %"PRIu64"-%"PRIu64"\n", traverse->r1, traverse->r2); traverse = traverse->next; } getchar(); */ //reads_per_thread = (uint64_t) (floorl((long double) data_query.n_seqs / (long double) n_threads)); fprintf(stdout, "[INFO] Computing alignments.\n"); /* uint64_t z; for(z=0; z<POOL_SIZE; z++){ aux = mp[0].base + z; fprintf(stdout, "%p\n", aux); fflush(stdout); } */ // Make the full db uint64_t contained_reads[FIXED_LOADING_THREADS] = {0,0,0,0}; uint64_t base_coordinates[FIXED_LOADING_THREADS] = {0,0,0,0}; //contained_reads[0] = 0; //base_coordinates[0] = 0; for(i=1;i<FIXED_LOADING_THREADS;i++){ contained_reads[i] = args_DB_load[i-1].contained_reads + contained_reads[i-1]; base_coordinates[i] = args_DB_load[i-1].base_coordinates + base_coordinates[i-1]; } /* for(i = 0; i < 4 ; i++){ printf("total len: %"PRIu64"\n", data_database[i].total_len); } getchar(); */ /* for(i = 0; i < 4 ; i++){ printf("c:%"PRIu64" - b:%"PRIu64"\n", contained_reads[i], base_coordinates[i]); } getchar(); */ SeqInfo final_db; final_db.sequences = (unsigned char *) malloc(full_db_len * sizeof(unsigned char)); if(final_db.sequences == NULL) terror ("Could not allocate final database sequences"); memcpy(&final_db.sequences[0], &data_database[0].sequences[0], data_database[0].total_len); memcpy(&final_db.sequences[data_database[0].total_len], &data_database[1].sequences[0], data_database[1].total_len); memcpy(&final_db.sequences[data_database[0].total_len+data_database[1].total_len], &data_database[2].sequences[0], data_database[2].total_len); memcpy(&final_db.sequences[data_database[0].total_len+data_database[1].total_len+data_database[2].total_len], &data_database[3].sequences[0], data_database[3].total_len); final_db.start_pos = (uint64_t *) malloc(full_db_n_seqs * sizeof(uint64_t)); if(final_db.start_pos == NULL) terror("Could not allocate final db starting positions"); // Copy them with offset i=0; while(i<data_database[0].n_seqs){ final_db.start_pos[i] = data_database[0].start_pos[i]; ++i; } j=0; //printf("switch\n"); while(j<data_database[1].n_seqs){ final_db.start_pos[i] = data_database[1].start_pos[j] + data_database[0].total_len; ++i; ++j; } j=0; //printf("switch\n"); while(j<data_database[2].n_seqs){ final_db.start_pos[i] = data_database[2].start_pos[j] + data_database[1].total_len + data_database[0].total_len; ++i; ++j; } j=0; //printf("switch\n"); while(j<data_database[3].n_seqs){ final_db.start_pos[i] = data_database[3].start_pos[j] + data_database[2].total_len + data_database[1].total_len + data_database[0].total_len; ++i; ++j; } final_db.total_len = full_db_len; final_db.n_seqs = full_db_n_seqs; for(i=0;i<FIXED_LOADING_THREADS;i++){ free(data_database[i].sequences); free(data_database[i].start_pos); } // Debug /* uint64_t max = 0; for(i=0; i<full_db_n_seqs-2; i++){ //printf("%"PRIu64" - %"PRIu64" res in \n", final_db.start_pos[i], final_db.start_pos[i+1]); //printf("%"PRIu64"\n", final_db.start_pos[i+2] - final_db.start_pos[i+1]); if(final_db.start_pos[i+2] - final_db.start_pos[i+1] > max) max = final_db.start_pos[i+2] - final_db.start_pos[i+1]; //getchar(); } printf("%"PRIu64"\n", max); */ for(i=0;i<n_threads;i++){ hta[i].id = i; hta[i].database = &final_db; hta[i].query = &data_query; //hta[i].from = i * reads_per_thread; //hta[i].to = (i + 1) * reads_per_thread; hta[i].container_a = ct_A; hta[i].container_b = ct_B; hta[i].container_c = ct_C; hta[i].container_d = ct_D; hta[i].contained_reads = contained_reads; hta[i].base_coordinates = base_coordinates; hta[i].accepted_query_reads = 0; hta[i].min_e_value = minevalue; hta[i].min_coverage = mincoverage; hta[i].min_identity = minidentity; hta[i].out = out_database; hta[i].igap = igap; hta[i].egap = egap; hta[i].window = window; hta[i].mc = mc[i]; hta[i].table = table[i]; hta[i].reconstruct_X = reconstruct_X[i]; hta[i].reconstruct_Y = reconstruct_Y[i]; hta[i].writing_buffer_alignment = writing_buffer_alignment[i]; hta[i].my_x = my_x[i]; hta[i].my_y = my_y[i]; hta[i].queue_head = &queue_head; hta[i].lock = &lock; hta[i].full_comp = full_comp; hta[i].markers = marker_taggs[i]; if(hits_only) hta[i].hits = hits_table[i]; else hta[i].hits = NULL; //if(i==n_threads-1) hta[i].to = data_query.n_seqs; if( 0 != (error = pthread_create(&threads[i], NULL, computeAlignmentsByThread, (void *) (&hta[i])) )){ fprintf(stdout, "Thread %"PRIu64" returned %d:", i, error); terror("Could not launch"); } } //Wait for threads to finish for(i=0;i<n_threads;i++){ pthread_join(threads[i], NULL); } for(i=0;i<n_threads;i++){ sum_accepted_reads += hta[i].accepted_query_reads; } // Accumulate hits just in case if(hits_only == TRUE){ for(i=0;i<full_db_n_seqs;i++){ for(j=1;j<n_threads;j++){ hta[0].hits[i] += hta[j].hits[i]; } if(out_database != NULL){ if(i == final_db.n_seqs - 1){ fprintf(out_database, "%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n", i, hta[0].hits[i], full_db_len - final_db.start_pos[i]); }else{ fprintf(out_database, "%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n", i, hta[0].hits[i], final_db.start_pos[i+1] - final_db.start_pos[i]); } } } } end = clock(); fprintf(stdout, "[INFO] Alignments computed in %e seconds.\n", (double)(end-begin)/CLOCKS_PER_SEC); fprintf(stdout, "[INFO] %"PRIu64" reads (%"PRIu64") from the query were found in the database (%"PRIu64") at a minimum e-value of %Le and minimum coverage of %d%%.\n", sum_accepted_reads, data_query.n_seqs, final_db.n_seqs, (long double)minevalue, (int) (100*mincoverage)); fprintf(stdout, "[INFO] The Jaccard-index is: %Le\n", (long double)sum_accepted_reads/((final_db.n_seqs+data_query.n_seqs)-sum_accepted_reads)); fprintf(stdout, "[INFO] Deallocating heap memory.\n"); fclose(query); if(out_database != NULL) fclose(out_database); free(temp_seq_buffer); if(hits_only == TRUE){ // Write hits here for(i=0;i<n_threads;i++){ free(hits_table[i]); } free(hits_table); } free(final_db.sequences); free(final_db.start_pos); free(data_query.sequences); free(data_query.start_pos); free(ct_A->root); free(ct_B->root); free(ct_C->root); free(ct_D->root); //free(ct); free(threads); free(hta); for(i=0;i<n_threads;i++){ for(j=0;j<MAX_READ_SIZE;j++){ free(table[i][j]); } free(table[i]); free(mc[i]); free(reconstruct_X[i]); free(reconstruct_Y[i]); free(my_x[i]); free(my_y[i]); free(writing_buffer_alignment[i]); free(marker_taggs[i]); } free(marker_taggs); free(table); free(mc); free(reconstruct_X); free(reconstruct_Y); free(my_y); free(my_x); free(writing_buffer_alignment); pthread_mutex_destroy(&lock); for(i=0; i<FIXED_LOADING_THREADS; i++){ for(j=0;j<=args_DB_load[i].n_pools_used;j++){ free(mp[i][j].base); } free(mp[i]); } for(i=0; i<FIXED_LOADING_THREADS; i++){ for(j=0;j<=args_DB_load[i].n_pools_used_AVL;j++){ free(mp_AVL[i][j].base); } free(mp_AVL[i]); } free(mp); free(mp_AVL); free(seq_vector_database); free(database_positions); //Deallocate queue (its allocated as an array) free(first_task); //free(mp); return 0; } void init_args(int argc, char ** av, FILE ** query, FILE ** database, FILE ** out_database, uint64_t * n_threads, long double * minevalue, long double * mincoverage, int * igap, int * egap, long double * minidentity, long double * window, unsigned char * full_comp, uint64_t * custom_kmer, unsigned char * hits_only, uint64_t * n_parts){ int pNum = 0; while(pNum < argc){ if(strcmp(av[pNum], "--verbose") == 0) VERBOSE_ACTIVE = 1; if(strcmp(av[pNum], "--help") == 0){ fprintf(stdout, "USAGE:\n"); fprintf(stdout, " IMSAME -query [query] -db [database]\n"); fprintf(stdout, "OPTIONAL:\n"); fprintf(stdout, " -n_threads [Integer: 0<n_threads] (default 4)\n"); fprintf(stdout, " -evalue [Double: 0<=pval<1] (default: 1 * 10^-10)\n"); fprintf(stdout, " -coverage [Double: 0<coverage<=1 (default: 0.5)\n"); fprintf(stdout, " -identity [Double: 0<identity<=1 (default: 0.5)\n"); fprintf(stdout, " -igap [Integer: (default: 5)\n"); fprintf(stdout, " -egap [Integer: (default: 2)\n"); fprintf(stdout, " -window [Double: 0<window<=0.5 (default: 0.15)\n"); fprintf(stdout, " -kmer [Integer: k>1 (default 12)]\n"); fprintf(stdout, " -n_parts [Integer: n>0 (default 3)]\n"); fprintf(stdout, " -out [File path]\n"); fprintf(stdout, " --full Does not stop at first match and reports all equalities\n"); fprintf(stdout, " --verbose Turns verbose on\n"); fprintf(stdout, " --help Shows help for program usage\n"); fprintf(stdout, " --hits Compute only the non-overlapping hits\n"); exit(1); } if(strcmp(av[pNum], "--full") == 0) *full_comp = TRUE; if(strcmp(av[pNum], "--hits") == 0) *hits_only = TRUE; if(strcmp(av[pNum], "-n_parts") == 0) *n_parts = (uint64_t) atoi(av[pNum+1]); if(strcmp(av[pNum], "-query") == 0){ *query = fopen64(av[pNum+1], "rt"); if(query==NULL) terror("Could not open query file"); } if(strcmp(av[pNum], "-db") == 0){ *database = fopen64(av[pNum+1], "rt"); if(database==NULL) terror("Could not open database file"); } if(strcmp(av[pNum], "-out") == 0){ *out_database = fopen64(av[pNum+1], "wt"); if(out_database==NULL) terror("Could not open output database file"); } if(strcmp(av[pNum], "-evalue") == 0){ *minevalue = (long double) atof(av[pNum+1]); if(*minevalue < 0) terror("Min-e-value must be larger than zero"); } if(strcmp(av[pNum], "-window") == 0){ *window = (long double) atof(av[pNum+1]); if(*window <= 0 || *window > 0.5) terror("Window percentage size must lie between 0<window<=0.5"); } if(strcmp(av[pNum], "-coverage") == 0){ *mincoverage = (long double) atof(av[pNum+1]); if(*mincoverage <= 0) terror("Min-coverage must be larger than zero"); } if(strcmp(av[pNum], "-identity") == 0){ *minidentity = (long double) atof(av[pNum+1]); if(*minidentity <= 0) terror("Min-identity must be larger than zero"); } if(strcmp(av[pNum], "-igap") == 0){ *igap = - (atoi(av[pNum+1])); } if(strcmp(av[pNum], "-egap") == 0){ *egap = - (atoi(av[pNum+1])); } if(strcmp(av[pNum], "-n_threads") == 0){ *n_threads = (uint64_t) atoi(av[pNum+1]); if(*n_threads < 0) terror("Number of threads must be larger than zero"); } if(strcmp(av[pNum], "-kmer") == 0){ *custom_kmer = (uint64_t) atoi(av[pNum+1]); if(*custom_kmer < 2) terror("K-mer size must be larger than 1"); } pNum++; } if(*query==NULL || *database==NULL) terror("A query and database is required"); }