Mercurial > repos > bitlab > bitlab
view gecko/src/indexmaker.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
line wrap: on
line source
/********************* File indexmaker.c Author Bitlab Description Computes a series of statistics and stores them in natural order and sorted order USAGE <sequences.fasta> The fasta sequences <output.index> The output name for the two indexes * ------------------------------------------*/ #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <string.h> #include <inttypes.h> #include "structs.h" #include "comparisonFunctions.h" #include "commonFunctions.h" void copyRead(struct rIndex2 *To, struct rIndex2 *From); void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y); int main(int ac, char **av) { FILE *f, *fOut; struct rIndex2 R;//, *RR; //uint64_t *positions; uint64_t c; uint64_t tLen=0, tLmask=0, tN=0, nReads=0; uint64_t maxRlen=0, maxRlenMasked=0, maxNonACGT=0; uint64_t N=0, i; //char tmp[MAXLID]; if (ac!=3) terror("USE: indexmaker <sequences.fasta> <output.index>"); if ((f=fopen(av[1],"rt"))==NULL) terror("Could not open sequences fasta file"); if ((fOut=fopen(av[2],"wb"))==NULL) terror("Could not open output file"); R.Lac=0; R.rLen=0; c=fgetc(f); while (!feof(f)) { while(c!='>') c=(char)fgetc(f); // only first time R.pos=ftell(f) - 1; i=0; c=getc(f); if(i==0 && c== ' ') c=getc(f); //Handle sequences with a ' ' after the '>' while(i< MAXLID && c!='\n' && c!=' ') { R.id[i++] = c; c=getc(f); } R.id[i]=0x00; while(c!='\n') c=getc(f); i=0; R.rLen=0; R.rLmasked=0; R.nonACGT=0; while(c!='>' && !feof(f)) { if (c=='N' || c=='n') R.nonACGT++; else if (c>96 && c<123) R.rLmasked++; c=toupper(c); if (c>64 && c<91) R.rLen++; if (c>47 && c<58) R.rLen++; //Handling color space reads c=(char)fgetc(f); } R.rNumber = N; if (R.rLen > maxRlen) maxRlen=R.rLen; if (R.rLmasked > maxRlenMasked) maxRlenMasked = R.rLmasked; if (R.nonACGT > maxNonACGT) maxNonACGT = R.nonACGT; fwrite(&R,sizeof(struct rIndex2),1,fOut); N++; R.Lac +=R.rLen; tLen +=R.rLen; tN +=R.nonACGT; tLmask+=R.rLmasked; nReads++; } fclose(f); fclose(fOut); fprintf(stdout,"....done\n"); return 0; } void copyRead(struct rIndex2 *To, struct rIndex2 *From){ strcpy(To->id, From->id); To->rNumber = From->rNumber; To->rLen = From->rLen; To->rLmasked= From->rLmasked; To->nonACGT = From->nonACGT; To->pos = From->pos; //Start position of read To->Lac = From->Lac; // accumulated length } void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y) { struct rIndex2 pivote, aux; uint64_t x1, y1; copyRead(&pivote, &array[(x+y)/2]); x1 = x; y1 = y; do{ while (strcmp(pivote.id, array[x1].id)>0) x1++; while (strcmp(pivote.id, array[y1].id)<0) y1--; if (x1 < y1) { copyRead(&aux,&array[x1]); copyRead(&array[x1], &array[y1]); copyRead(&array[y1], &aux); x1++; y1--; }else if (x1 == y1) x1++; } while (x1 <=y1); if (x<y1) QsortRead(array,x,y1); if (x1<y) QsortRead(array,x1,y); }