Mercurial > repos > bitlab > bitlab
comparison gecko/src/indexmaker.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ee6b15b409e5 | 1:35af401890c0 |
---|---|
1 | |
2 /********************* | |
3 File indexmaker.c | |
4 Author Bitlab | |
5 Description Computes a series of statistics and stores them in natural order and sorted order | |
6 | |
7 USAGE <sequences.fasta> The fasta sequences | |
8 <output.index> The output name for the two indexes | |
9 * ------------------------------------------*/ | |
10 | |
11 #include <stdio.h> | |
12 #include <stdlib.h> | |
13 #include <ctype.h> | |
14 #include <string.h> | |
15 #include <inttypes.h> | |
16 #include "structs.h" | |
17 #include "comparisonFunctions.h" | |
18 #include "commonFunctions.h" | |
19 | |
20 | |
21 void copyRead(struct rIndex2 *To, struct rIndex2 *From); | |
22 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y); | |
23 | |
24 int main(int ac, char **av) { | |
25 | |
26 FILE *f, *fOut; | |
27 struct rIndex2 R;//, *RR; | |
28 //uint64_t *positions; | |
29 uint64_t c; | |
30 uint64_t tLen=0, tLmask=0, tN=0, nReads=0; | |
31 uint64_t maxRlen=0, maxRlenMasked=0, maxNonACGT=0; | |
32 uint64_t N=0, i; | |
33 //char tmp[MAXLID]; | |
34 | |
35 if (ac!=3) terror("USE: indexmaker <sequences.fasta> <output.index>"); | |
36 | |
37 | |
38 if ((f=fopen(av[1],"rt"))==NULL) terror("Could not open sequences fasta file"); | |
39 | |
40 if ((fOut=fopen(av[2],"wb"))==NULL) terror("Could not open output file"); | |
41 | |
42 R.Lac=0; | |
43 R.rLen=0; | |
44 c=fgetc(f); | |
45 while (!feof(f)) { | |
46 while(c!='>') c=(char)fgetc(f); // only first time | |
47 R.pos=ftell(f) - 1; | |
48 i=0; | |
49 c=getc(f); | |
50 if(i==0 && c== ' ') c=getc(f); //Handle sequences with a ' ' after the '>' | |
51 while(i< MAXLID && c!='\n' && c!=' ') { | |
52 R.id[i++] = c; | |
53 c=getc(f); | |
54 } | |
55 R.id[i]=0x00; | |
56 | |
57 while(c!='\n') c=getc(f); | |
58 | |
59 i=0; | |
60 R.rLen=0; | |
61 R.rLmasked=0; | |
62 R.nonACGT=0; | |
63 | |
64 while(c!='>' && !feof(f)) { | |
65 if (c=='N' || c=='n') R.nonACGT++; | |
66 else if (c>96 && c<123) R.rLmasked++; | |
67 c=toupper(c); | |
68 if (c>64 && c<91) R.rLen++; | |
69 if (c>47 && c<58) R.rLen++; //Handling color space reads | |
70 | |
71 c=(char)fgetc(f); | |
72 } | |
73 R.rNumber = N; | |
74 if (R.rLen > maxRlen) maxRlen=R.rLen; | |
75 if (R.rLmasked > maxRlenMasked) maxRlenMasked = R.rLmasked; | |
76 if (R.nonACGT > maxNonACGT) maxNonACGT = R.nonACGT; | |
77 | |
78 fwrite(&R,sizeof(struct rIndex2),1,fOut); | |
79 | |
80 N++; | |
81 | |
82 R.Lac +=R.rLen; | |
83 tLen +=R.rLen; | |
84 tN +=R.nonACGT; | |
85 tLmask+=R.rLmasked; | |
86 | |
87 nReads++; | |
88 | |
89 } | |
90 | |
91 fclose(f); | |
92 fclose(fOut); | |
93 | |
94 fprintf(stdout,"....done\n"); | |
95 return 0; | |
96 } | |
97 | |
98 void copyRead(struct rIndex2 *To, struct rIndex2 *From){ | |
99 | |
100 strcpy(To->id, From->id); | |
101 To->rNumber = From->rNumber; | |
102 To->rLen = From->rLen; | |
103 To->rLmasked= From->rLmasked; | |
104 To->nonACGT = From->nonACGT; | |
105 To->pos = From->pos; //Start position of read | |
106 To->Lac = From->Lac; // accumulated length | |
107 } | |
108 | |
109 | |
110 void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y) { | |
111 | |
112 struct rIndex2 pivote, aux; | |
113 uint64_t x1, y1; | |
114 | |
115 copyRead(&pivote, &array[(x+y)/2]); | |
116 x1 = x; | |
117 y1 = y; | |
118 | |
119 do{ | |
120 while (strcmp(pivote.id, array[x1].id)>0) x1++; | |
121 while (strcmp(pivote.id, array[y1].id)<0) y1--; | |
122 if (x1 < y1) { | |
123 copyRead(&aux,&array[x1]); | |
124 copyRead(&array[x1], &array[y1]); | |
125 copyRead(&array[y1], &aux); | |
126 x1++; | |
127 y1--; | |
128 }else if (x1 == y1) x1++; | |
129 } while (x1 <=y1); | |
130 | |
131 if (x<y1) QsortRead(array,x,y1); | |
132 if (x1<y) QsortRead(array,x1,y); | |
133 } |