comparison gecko/src/hdStat.c @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
comparison
equal deleted inserted replaced
0:ee6b15b409e5 1:35af401890c0
1 /* leehd read and displays the hash table from disk
2 Syntax: leehd prefixNameOUT
3
4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences
5 prefixNameOUT.h2dP : positions
6 both must be available
7
8 Any char as third argument means "Verbose mode"
9 Feb.2012: computes word frequencies
10
11 ortrelles@uma.es / Dic.2011
12 ---------------------------------------------------------*/
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <stdlib.h>
18 #include <errno.h>
19 #include <inttypes.h>
20
21 #include "structs.h"
22 #include "commonFunctions.h"
23 #include "dictionaryFunctions.h"
24
25 #define PEQ 1001
26
27 int main(int ac, char** av){
28
29 char fname[1024], *W;
30 W=(char *)malloc(33*sizeof(char));
31 FILE *f1, *f2, *f3;
32 hashentry he;
33 uint64_t i=0;
34 location spos;
35 uint64_t nW=0,maxF=0, aveF=0;
36 int flagV=0;
37 int64_t freq[PEQ];
38
39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n");
40 if (ac==3) flagV=1;
41 for (i=0;i<PEQ;i++) freq[i]=0;
42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table)
43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file");
44 sprintf(fname,"%s.d2hP",av[1]); // Positions file
45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file");
46
47 sprintf(fname,"%s.freq",av[1]); // output
48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file");
49
50 // kick-off
51 if(fread(&he,sizeof(hashentry),1,f1)!=1)
52 terror("Empty dictionary");
53
54 while(!feof(f1)){
55
56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);}
57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num);
58 if (he.num>=PEQ) {
59 fprintf(f3, "%" PRIu64 "\t", he.num);
60 showWord(&he.w, W);
61 fprintf(f3, "%.32s", W);
62 fprintf(f3, "%" PRIu64 "\n", he.num);
63 }
64 else freq[he.num]++;
65 nW++;
66 if (he.num>maxF) maxF=he.num;
67 aveF+=he.num;
68
69 fseek(f2,0, he.pos);
70 if (flagV) {
71
72 for (i=0;i<he.num;i++){
73 if(fread(&spos,sizeof(location),1,f2)!=1)
74 terror("Error reading the word occurrences");
75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq);
76 }
77 fprintf(stdout,"\n");
78 }
79 if(fread(&he,sizeof(hashentry),1,f1)!=1)
80 if(ferror(f1))
81 terror("Error reading a dictionary entry");
82 }
83 free(W);
84
85 fclose(f1);
86 fclose(f2);
87 // store PEQ freqs--------
88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n");
89 for (i=0;i<PEQ;i++)
90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]);
91
92 fclose(f3);
93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW);
94
95 exit(0);
96 }
97
98
99