annotate gecko/src/indexmaker.c @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
35af401890c0 Uploaded
bitlab
parents:
diff changeset
1
35af401890c0 Uploaded
bitlab
parents:
diff changeset
2 /*********************
35af401890c0 Uploaded
bitlab
parents:
diff changeset
3 File indexmaker.c
35af401890c0 Uploaded
bitlab
parents:
diff changeset
4 Author Bitlab
35af401890c0 Uploaded
bitlab
parents:
diff changeset
5 Description Computes a series of statistics and stores them in natural order and sorted order
35af401890c0 Uploaded
bitlab
parents:
diff changeset
6
35af401890c0 Uploaded
bitlab
parents:
diff changeset
7 USAGE <sequences.fasta> The fasta sequences
35af401890c0 Uploaded
bitlab
parents:
diff changeset
8 <output.index> The output name for the two indexes
35af401890c0 Uploaded
bitlab
parents:
diff changeset
9 * ------------------------------------------*/
35af401890c0 Uploaded
bitlab
parents:
diff changeset
10
35af401890c0 Uploaded
bitlab
parents:
diff changeset
11 #include <stdio.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
12 #include <stdlib.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
13 #include <ctype.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
14 #include <string.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
15 #include <inttypes.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
16 #include "structs.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
17 #include "comparisonFunctions.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
18 #include "commonFunctions.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
19
35af401890c0 Uploaded
bitlab
parents:
diff changeset
20
35af401890c0 Uploaded
bitlab
parents:
diff changeset
21 void copyRead(struct rIndex2 *To, struct rIndex2 *From);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
22 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
23
35af401890c0 Uploaded
bitlab
parents:
diff changeset
24 int main(int ac, char **av) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
25
35af401890c0 Uploaded
bitlab
parents:
diff changeset
26 FILE *f, *fOut;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
27 struct rIndex2 R;//, *RR;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
28 //uint64_t *positions;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
29 uint64_t c;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
30 uint64_t tLen=0, tLmask=0, tN=0, nReads=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
31 uint64_t maxRlen=0, maxRlenMasked=0, maxNonACGT=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
32 uint64_t N=0, i;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
33 //char tmp[MAXLID];
35af401890c0 Uploaded
bitlab
parents:
diff changeset
34
35af401890c0 Uploaded
bitlab
parents:
diff changeset
35 if (ac!=3) terror("USE: indexmaker <sequences.fasta> <output.index>");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
36
35af401890c0 Uploaded
bitlab
parents:
diff changeset
37
35af401890c0 Uploaded
bitlab
parents:
diff changeset
38 if ((f=fopen(av[1],"rt"))==NULL) terror("Could not open sequences fasta file");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
39
35af401890c0 Uploaded
bitlab
parents:
diff changeset
40 if ((fOut=fopen(av[2],"wb"))==NULL) terror("Could not open output file");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
41
35af401890c0 Uploaded
bitlab
parents:
diff changeset
42 R.Lac=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
43 R.rLen=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
44 c=fgetc(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
45 while (!feof(f)) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
46 while(c!='>') c=(char)fgetc(f); // only first time
35af401890c0 Uploaded
bitlab
parents:
diff changeset
47 R.pos=ftell(f) - 1;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
48 i=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
49 c=getc(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
50 if(i==0 && c== ' ') c=getc(f); //Handle sequences with a ' ' after the '>'
35af401890c0 Uploaded
bitlab
parents:
diff changeset
51 while(i< MAXLID && c!='\n' && c!=' ') {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
52 R.id[i++] = c;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
53 c=getc(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
54 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
55 R.id[i]=0x00;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
56
35af401890c0 Uploaded
bitlab
parents:
diff changeset
57 while(c!='\n') c=getc(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
58
35af401890c0 Uploaded
bitlab
parents:
diff changeset
59 i=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
60 R.rLen=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
61 R.rLmasked=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
62 R.nonACGT=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
63
35af401890c0 Uploaded
bitlab
parents:
diff changeset
64 while(c!='>' && !feof(f)) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
65 if (c=='N' || c=='n') R.nonACGT++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
66 else if (c>96 && c<123) R.rLmasked++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
67 c=toupper(c);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
68 if (c>64 && c<91) R.rLen++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
69 if (c>47 && c<58) R.rLen++; //Handling color space reads
35af401890c0 Uploaded
bitlab
parents:
diff changeset
70
35af401890c0 Uploaded
bitlab
parents:
diff changeset
71 c=(char)fgetc(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
72 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
73 R.rNumber = N;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
74 if (R.rLen > maxRlen) maxRlen=R.rLen;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
75 if (R.rLmasked > maxRlenMasked) maxRlenMasked = R.rLmasked;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
76 if (R.nonACGT > maxNonACGT) maxNonACGT = R.nonACGT;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
77
35af401890c0 Uploaded
bitlab
parents:
diff changeset
78 fwrite(&R,sizeof(struct rIndex2),1,fOut);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
79
35af401890c0 Uploaded
bitlab
parents:
diff changeset
80 N++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
81
35af401890c0 Uploaded
bitlab
parents:
diff changeset
82 R.Lac +=R.rLen;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
83 tLen +=R.rLen;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
84 tN +=R.nonACGT;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
85 tLmask+=R.rLmasked;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
86
35af401890c0 Uploaded
bitlab
parents:
diff changeset
87 nReads++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
88
35af401890c0 Uploaded
bitlab
parents:
diff changeset
89 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
90
35af401890c0 Uploaded
bitlab
parents:
diff changeset
91 fclose(f);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
92 fclose(fOut);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
93
35af401890c0 Uploaded
bitlab
parents:
diff changeset
94 fprintf(stdout,"....done\n");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
95 return 0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
96 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
97
35af401890c0 Uploaded
bitlab
parents:
diff changeset
98 void copyRead(struct rIndex2 *To, struct rIndex2 *From){
35af401890c0 Uploaded
bitlab
parents:
diff changeset
99
35af401890c0 Uploaded
bitlab
parents:
diff changeset
100 strcpy(To->id, From->id);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
101 To->rNumber = From->rNumber;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
102 To->rLen = From->rLen;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
103 To->rLmasked= From->rLmasked;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
104 To->nonACGT = From->nonACGT;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
105 To->pos = From->pos; //Start position of read
35af401890c0 Uploaded
bitlab
parents:
diff changeset
106 To->Lac = From->Lac; // accumulated length
35af401890c0 Uploaded
bitlab
parents:
diff changeset
107 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
108
35af401890c0 Uploaded
bitlab
parents:
diff changeset
109
35af401890c0 Uploaded
bitlab
parents:
diff changeset
110 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
111
35af401890c0 Uploaded
bitlab
parents:
diff changeset
112 struct rIndex2 pivote, aux;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
113 uint64_t x1, y1;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
114
35af401890c0 Uploaded
bitlab
parents:
diff changeset
115 copyRead(&pivote, &array[(x+y)/2]);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
116 x1 = x;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
117 y1 = y;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
118
35af401890c0 Uploaded
bitlab
parents:
diff changeset
119 do{
35af401890c0 Uploaded
bitlab
parents:
diff changeset
120 while (strcmp(pivote.id, array[x1].id)>0) x1++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
121 while (strcmp(pivote.id, array[y1].id)<0) y1--;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
122 if (x1 < y1) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
123 copyRead(&aux,&array[x1]);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
124 copyRead(&array[x1], &array[y1]);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
125 copyRead(&array[y1], &aux);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
126 x1++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
127 y1--;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
128 }else if (x1 == y1) x1++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
129 } while (x1 <=y1);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
130
35af401890c0 Uploaded
bitlab
parents:
diff changeset
131 if (x<y1) QsortRead(array,x,y1);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
132 if (x1<y) QsortRead(array,x1,y);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
133 }