comparison gecko/src/indexmaker.c @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
comparison
equal deleted inserted replaced
0:ee6b15b409e5 1:35af401890c0
1
2 /*********************
3 File indexmaker.c
4 Author Bitlab
5 Description Computes a series of statistics and stores them in natural order and sorted order
6
7 USAGE <sequences.fasta> The fasta sequences
8 <output.index> The output name for the two indexes
9 * ------------------------------------------*/
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <inttypes.h>
16 #include "structs.h"
17 #include "comparisonFunctions.h"
18 #include "commonFunctions.h"
19
20
21 void copyRead(struct rIndex2 *To, struct rIndex2 *From);
22 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y);
23
24 int main(int ac, char **av) {
25
26 FILE *f, *fOut;
27 struct rIndex2 R;//, *RR;
28 //uint64_t *positions;
29 uint64_t c;
30 uint64_t tLen=0, tLmask=0, tN=0, nReads=0;
31 uint64_t maxRlen=0, maxRlenMasked=0, maxNonACGT=0;
32 uint64_t N=0, i;
33 //char tmp[MAXLID];
34
35 if (ac!=3) terror("USE: indexmaker <sequences.fasta> <output.index>");
36
37
38 if ((f=fopen(av[1],"rt"))==NULL) terror("Could not open sequences fasta file");
39
40 if ((fOut=fopen(av[2],"wb"))==NULL) terror("Could not open output file");
41
42 R.Lac=0;
43 R.rLen=0;
44 c=fgetc(f);
45 while (!feof(f)) {
46 while(c!='>') c=(char)fgetc(f); // only first time
47 R.pos=ftell(f) - 1;
48 i=0;
49 c=getc(f);
50 if(i==0 && c== ' ') c=getc(f); //Handle sequences with a ' ' after the '>'
51 while(i< MAXLID && c!='\n' && c!=' ') {
52 R.id[i++] = c;
53 c=getc(f);
54 }
55 R.id[i]=0x00;
56
57 while(c!='\n') c=getc(f);
58
59 i=0;
60 R.rLen=0;
61 R.rLmasked=0;
62 R.nonACGT=0;
63
64 while(c!='>' && !feof(f)) {
65 if (c=='N' || c=='n') R.nonACGT++;
66 else if (c>96 && c<123) R.rLmasked++;
67 c=toupper(c);
68 if (c>64 && c<91) R.rLen++;
69 if (c>47 && c<58) R.rLen++; //Handling color space reads
70
71 c=(char)fgetc(f);
72 }
73 R.rNumber = N;
74 if (R.rLen > maxRlen) maxRlen=R.rLen;
75 if (R.rLmasked > maxRlenMasked) maxRlenMasked = R.rLmasked;
76 if (R.nonACGT > maxNonACGT) maxNonACGT = R.nonACGT;
77
78 fwrite(&R,sizeof(struct rIndex2),1,fOut);
79
80 N++;
81
82 R.Lac +=R.rLen;
83 tLen +=R.rLen;
84 tN +=R.nonACGT;
85 tLmask+=R.rLmasked;
86
87 nReads++;
88
89 }
90
91 fclose(f);
92 fclose(fOut);
93
94 fprintf(stdout,"....done\n");
95 return 0;
96 }
97
98 void copyRead(struct rIndex2 *To, struct rIndex2 *From){
99
100 strcpy(To->id, From->id);
101 To->rNumber = From->rNumber;
102 To->rLen = From->rLen;
103 To->rLmasked= From->rLmasked;
104 To->nonACGT = From->nonACGT;
105 To->pos = From->pos; //Start position of read
106 To->Lac = From->Lac; // accumulated length
107 }
108
109
110 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y) {
111
112 struct rIndex2 pivote, aux;
113 uint64_t x1, y1;
114
115 copyRead(&pivote, &array[(x+y)/2]);
116 x1 = x;
117 y1 = y;
118
119 do{
120 while (strcmp(pivote.id, array[x1].id)>0) x1++;
121 while (strcmp(pivote.id, array[y1].id)<0) y1--;
122 if (x1 < y1) {
123 copyRead(&aux,&array[x1]);
124 copyRead(&array[x1], &array[y1]);
125 copyRead(&array[y1], &aux);
126 x1++;
127 y1--;
128 }else if (x1 == y1) x1++;
129 } while (x1 <=y1);
130
131 if (x<y1) QsortRead(array,x,y1);
132 if (x1<y) QsortRead(array,x1,y);
133 }