Mercurial > repos > calkan > mrcanavar
view mrcanavar-0.34/sam.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:44:13 -0500 |
parents | |
children |
line wrap: on
line source
#include "sam.h" void read_depth(char *indirSAM, char *depthFile, char gzSAM){ DIR *dp; int fileCnt; int totalFile; struct dirent *ep; int i; if (GENOME_CONF == NULL) print_error("Select genom configuration file (input) through the -conf parameter.\n"); if (indirSAM == NULL) print_error("Select input directory that contains the SAM files through the -samdir parameter.\n"); if (depthFile == NULL) print_error("Select read depth output file through the -depth parameter.\n"); loadRefConfig(GENOME_CONF); fprintf(stdout, "Scanning the SAM directory: %s\n", indirSAM); dp = opendir(indirSAM); if (dp == NULL) print_error("SAM directory not found.\n"); fileCnt = 0; totalFile = 0; while((ep=readdir(dp))){ if (ep->d_name[0] == '.') continue; if (ep->d_type == DT_DIR) continue; if (CHECKSAM && !endswith(ep->d_name, ".sam") && !endswith(ep->d_name, ".sam.gz")) continue; /* if (!strstr(ep->d_name, "sam")) continue; */ i = strlen(ep->d_name)-1; if ((ep->d_name[i] == 'z' && ep->d_name[i-1] == 'g' && ep->d_name[i-2] == '.') && gzSAM == 0){ print_error("File name ends with .gz yet --gz option is not selected. Are you sure?\nIf the files are indeed uncompressed, rename the files.\n"); } totalFile++; } rewinddir(dp); while((ep=readdir(dp))){ if (ep->d_name[0] == '.') continue; if (ep->d_type == DT_DIR) continue; if (CHECKSAM && !endswith(ep->d_name, ".sam") && !endswith(ep->d_name, ".sam.gz")) continue; /* if (!strstr(ep->d_name, "sam")) continue; */ fprintf(stdout, "\r \rLoading file %d of %d: %s...", (fileCnt+1), totalFile, ep->d_name); fflush(stdout); readSAM(indirSAM, ep->d_name, gzSAM); fileCnt++; } closedir(dp); if (fileCnt == 0) print_error("SAM directory does not contain any files with extensions \".sam\" or \".sam.gz\".\nIf you do have the correct files, please rename them to have either \".sam\" or \".sam.gz\" extensions.\n"); else fprintf(stdout, "\n\n%d file%s loaded.\n", fileCnt, fileCnt>1 ? "s" : ""); saveDepth(depthFile); } void readSAM(char *indirSAM, char *fname, char gzSAM){ FILE *sam; char *samfile; char samLine[4 * MAX_STR]; char prevChrom[MAX_STR]; char chrom[MAX_STR]; char *token; int pos; int chrom_id; int prev_chrom_id = -1; int lineCnt; samfile = (char *) malloc (sizeof(char) * (strlen(indirSAM) + strlen(fname) + 2)); sprintf(samfile, "%s/%s", indirSAM, fname); sam = my_fopen(samfile, "r", gzSAM); prevChrom[0] = 0; while (1){ /* read entire line from the SAM file */ if (gzSAM){ gzgets(sam, samLine, (4 * MAX_STR)); if (gzeof(sam)) break; } else{ fgets(samLine, (4 * MAX_STR), sam); if (feof(sam)) break; } /* parse chrom */ token = NULL; token = strtok(samLine, "\t"); // read name token = strtok(NULL, "\t"); // flag token = strtok(NULL, "\t"); //chrom strcpy(chrom, token); trimspace(chrom); token = strtok(NULL, "\t"); // pos pos = atoi(token) - 1; // SAM file is 1-based; our format is 0-based if (pos < 0) continue; /* debug if needed fprintf(stdout, "%s\t%d\n", chrom, pos); */ chrom_id = insert_read_lw(chrom, pos, prevChrom, prev_chrom_id); /* if (VERBOSE) fprintf(stdout, "[READSAM]\t%s\t%d\tchrom_id: %d\n", chrom, pos, chrom_id); */ if (chrom_id == -1) // this chromosome is not in the config file continue; chrom_id = insert_read_sw(chrom, pos, prevChrom, chrom_id); if (chrom_id == -1) // this chromosome is not in the config file; it shouldn't come to this continue; chrom_id = insert_read_cw(chrom, pos, prevChrom, chrom_id); if (chrom_id == -1) // this chromosome is not in the config file; it shouldn't come to this continue; strcpy(prevChrom, chrom); prev_chrom_id = chrom_id; } if (gzSAM) gzclose(sam); else fclose(sam); free(samfile); } int insert_read_lw(char *chrom, int pos, char *prevChrom, int prev_chrom_id){ int chrom_id; int window_id; int flank; chrom_id = findChrom(chrom, prevChrom, prev_chrom_id); if (chrom_id != -1){ if (pos >= chromosomes[chrom_id]->length){ return chrom_id; } window_id = windowSearch(chromosomes[chrom_id]->lw, chromosomes[chrom_id]->lw_cnt, pos); if (window_id != -1){ chromosomes[chrom_id]->lw[window_id].depth += 1; /* iterate left */ flank = window_id - 1; while (flank >= 0 && (pos >= chromosomes[chrom_id]->lw[flank].start && pos <= chromosomes[chrom_id]->lw[flank].end)) chromosomes[chrom_id]->lw[flank--].depth += 1; /* iterate right */ flank = window_id + 1; while (flank < chromosomes[chrom_id]->lw_cnt && (pos >= chromosomes[chrom_id]->lw[flank].start && pos <= chromosomes[chrom_id]->lw[flank].end)) chromosomes[chrom_id]->lw[flank++].depth += 1; } } return chrom_id; } int insert_read_sw(char *chrom, int pos, char *prevChrom, int prev_chrom_id){ int chrom_id; int window_id; int flank; chrom_id = findChrom(chrom, prevChrom, prev_chrom_id); if (chrom_id != -1){ if (pos >= chromosomes[chrom_id]->length) return chrom_id; window_id = windowSearch(chromosomes[chrom_id]->sw, chromosomes[chrom_id]->sw_cnt, pos); if (window_id != -1){ chromosomes[chrom_id]->sw[window_id].depth += 1; /* iterate left */ flank = window_id - 1; while (flank >= 0 && (pos >= chromosomes[chrom_id]->sw[flank].start && pos <= chromosomes[chrom_id]->sw[flank].end)) chromosomes[chrom_id]->sw[flank--].depth += 1; /* iterate right */ flank = window_id + 1; while (flank < chromosomes[chrom_id]->sw_cnt && (pos >= chromosomes[chrom_id]->sw[flank].start && pos <= chromosomes[chrom_id]->sw[flank].end)) chromosomes[chrom_id]->sw[flank++].depth += 1; } } return chrom_id; } int insert_read_cw(char *chrom, int pos, char *prevChrom, int prev_chrom_id){ int chrom_id; int window_id; int flank; chrom_id = findChrom(chrom, prevChrom, prev_chrom_id); if (chrom_id != -1){ if (pos >= chromosomes[chrom_id]->length) return chrom_id; window_id = windowSearch(chromosomes[chrom_id]->cw, chromosomes[chrom_id]->cw_cnt, pos); if (window_id != -1){ chromosomes[chrom_id]->cw[window_id].depth += 1; } } return chrom_id; } int findChrom(char *chrom, char *prevChrom, int prev_chrom_id){ int chrom_id; if (!strcmp(chrom, prevChrom)){ chrom_id = prev_chrom_id; } else if (prev_chrom_id == -1 && !strcmp(chrom, chromosomes[0]->name)){ // the first entry in the file chrom_id = 0; } else if (prev_chrom_id != -1 && prev_chrom_id < num_chrom -1 && !strcmp(chrom, chromosomes[prev_chrom_id+1]->name)){ chrom_id = prev_chrom_id + 1; } else{ chrom_id = binSearch(chrom); } return chrom_id; } int binSearch(char *chrom){ int start; int end; int med; int strtest; start = 0; end = num_chrom - 1; med = (start + end) / 2; while (1){ if (start > end) return -1; if (start == med || end == med){ if (!strcmp(chrom, chromosomes[start]->name)) return start; else if (!strcmp(chrom, chromosomes[end]->name)) return end; else{ return -1; } } strtest = strcmp(chrom, chromosomes[med]->name); if(strtest == 0) return med; else if (strtest < 0){ end = med; med = (start + end) / 2; } else { start = med; med = (start + end) / 2; } } } int windowSearch(struct window *searchWin, int winCnt, int pos){ int start; int end; int med; start = 0; end = winCnt - 1; med = (start + end) / 2; while (1){ if (start > end) return -1; if (start == med || end == med){ if (pos >= searchWin[start].start && pos < searchWin[start].end) return start; else if (pos >= searchWin[end].start && pos < searchWin[end].end) return end; else return -1; } if (pos >= searchWin[med].start && pos < searchWin[med].end) return med; else if (pos < searchWin[med].start){ end = med; med = (start + end) / 2; } else { start = med; med = (start + end) / 2; } } } void saveDepth(char *depthFile){ int i; int j; int retVal; char *fname; FILE *txtDepth; FILE *binDepth; fname = (char *) malloc (sizeof(char) * (strlen(depthFile) + 10)); sprintf(fname, "%s.lw.txt", depthFile); txtDepth = my_fopen(fname, "w", 0); binDepth = my_fopen(depthFile, "w", 0); /* start with the magicNum, I will use this as a format check when loading */ retVal = fwrite(&magicNum, sizeof(magicNum), 1, binDepth); fprintf(stdout, "Saving depth file %s\n", fname); for (i = 0; i < num_chrom; i++){ for (j = 0; j < chromosomes[i]->lw_cnt; j++){ fprintf(txtDepth, "%s\t%d\t%d\t%f\t%d\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, (int) (chromosomes[i]->lw[j].depth)); retVal = fwrite(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth); } } fclose(txtDepth); sprintf(fname, "%s.sw.txt", depthFile); txtDepth = my_fopen(fname, "w", 0); fprintf(stdout, "Saving depth file %s\n", fname); for (i = 0; i < num_chrom; i++){ for (j = 0; j < chromosomes[i]->sw_cnt; j++){ fprintf(txtDepth, "%s\t%d\t%d\t%f\t%d\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, (int) (chromosomes[i]->sw[j].depth)); retVal = fwrite(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth); } } fclose(txtDepth); sprintf(fname, "%s.cw.txt", depthFile); txtDepth = my_fopen(fname, "w", 0); fprintf(stdout, "Saving depth file %s\n", fname); for (i = 0; i < num_chrom; i++){ for (j = 0; j < chromosomes[i]->cw_cnt; j++){ fprintf(txtDepth, "%s\t%d\t%d\t%f\t%d\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, (int) (chromosomes[i]->cw[j].depth)); retVal = fwrite(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth); } } fclose(txtDepth); fclose(binDepth); free(fname); }