Mercurial > repos > bitlab > bitlab
comparison gecko/src/hdStat.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ee6b15b409e5 | 1:35af401890c0 |
---|---|
1 /* leehd read and displays the hash table from disk | |
2 Syntax: leehd prefixNameOUT | |
3 | |
4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences | |
5 prefixNameOUT.h2dP : positions | |
6 both must be available | |
7 | |
8 Any char as third argument means "Verbose mode" | |
9 Feb.2012: computes word frequencies | |
10 | |
11 ortrelles@uma.es / Dic.2011 | |
12 ---------------------------------------------------------*/ | |
13 | |
14 #include <stdio.h> | |
15 #include <stdlib.h> | |
16 #include <string.h> | |
17 #include <stdlib.h> | |
18 #include <errno.h> | |
19 #include <inttypes.h> | |
20 | |
21 #include "structs.h" | |
22 #include "commonFunctions.h" | |
23 #include "dictionaryFunctions.h" | |
24 | |
25 #define PEQ 1001 | |
26 | |
27 int main(int ac, char** av){ | |
28 | |
29 char fname[1024], *W; | |
30 W=(char *)malloc(33*sizeof(char)); | |
31 FILE *f1, *f2, *f3; | |
32 hashentry he; | |
33 uint64_t i=0; | |
34 location spos; | |
35 uint64_t nW=0,maxF=0, aveF=0; | |
36 int flagV=0; | |
37 int64_t freq[PEQ]; | |
38 | |
39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n"); | |
40 if (ac==3) flagV=1; | |
41 for (i=0;i<PEQ;i++) freq[i]=0; | |
42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table) | |
43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file"); | |
44 sprintf(fname,"%s.d2hP",av[1]); // Positions file | |
45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file"); | |
46 | |
47 sprintf(fname,"%s.freq",av[1]); // output | |
48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file"); | |
49 | |
50 // kick-off | |
51 if(fread(&he,sizeof(hashentry),1,f1)!=1) | |
52 terror("Empty dictionary"); | |
53 | |
54 while(!feof(f1)){ | |
55 | |
56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);} | |
57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num); | |
58 if (he.num>=PEQ) { | |
59 fprintf(f3, "%" PRIu64 "\t", he.num); | |
60 showWord(&he.w, W); | |
61 fprintf(f3, "%.32s", W); | |
62 fprintf(f3, "%" PRIu64 "\n", he.num); | |
63 } | |
64 else freq[he.num]++; | |
65 nW++; | |
66 if (he.num>maxF) maxF=he.num; | |
67 aveF+=he.num; | |
68 | |
69 fseek(f2,0, he.pos); | |
70 if (flagV) { | |
71 | |
72 for (i=0;i<he.num;i++){ | |
73 if(fread(&spos,sizeof(location),1,f2)!=1) | |
74 terror("Error reading the word occurrences"); | |
75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq); | |
76 } | |
77 fprintf(stdout,"\n"); | |
78 } | |
79 if(fread(&he,sizeof(hashentry),1,f1)!=1) | |
80 if(ferror(f1)) | |
81 terror("Error reading a dictionary entry"); | |
82 } | |
83 free(W); | |
84 | |
85 fclose(f1); | |
86 fclose(f2); | |
87 // store PEQ freqs-------- | |
88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n"); | |
89 for (i=0;i<PEQ;i++) | |
90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]); | |
91 | |
92 fclose(f3); | |
93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW); | |
94 | |
95 exit(0); | |
96 } | |
97 | |
98 | |
99 |