annotate gecko/src/hdStat.c @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
35af401890c0 Uploaded
bitlab
parents:
diff changeset
1 /* leehd read and displays the hash table from disk
35af401890c0 Uploaded
bitlab
parents:
diff changeset
2 Syntax: leehd prefixNameOUT
35af401890c0 Uploaded
bitlab
parents:
diff changeset
3
35af401890c0 Uploaded
bitlab
parents:
diff changeset
4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences
35af401890c0 Uploaded
bitlab
parents:
diff changeset
5 prefixNameOUT.h2dP : positions
35af401890c0 Uploaded
bitlab
parents:
diff changeset
6 both must be available
35af401890c0 Uploaded
bitlab
parents:
diff changeset
7
35af401890c0 Uploaded
bitlab
parents:
diff changeset
8 Any char as third argument means "Verbose mode"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
9 Feb.2012: computes word frequencies
35af401890c0 Uploaded
bitlab
parents:
diff changeset
10
35af401890c0 Uploaded
bitlab
parents:
diff changeset
11 ortrelles@uma.es / Dic.2011
35af401890c0 Uploaded
bitlab
parents:
diff changeset
12 ---------------------------------------------------------*/
35af401890c0 Uploaded
bitlab
parents:
diff changeset
13
35af401890c0 Uploaded
bitlab
parents:
diff changeset
14 #include <stdio.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
15 #include <stdlib.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
16 #include <string.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
17 #include <stdlib.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
18 #include <errno.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
19 #include <inttypes.h>
35af401890c0 Uploaded
bitlab
parents:
diff changeset
20
35af401890c0 Uploaded
bitlab
parents:
diff changeset
21 #include "structs.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
22 #include "commonFunctions.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
23 #include "dictionaryFunctions.h"
35af401890c0 Uploaded
bitlab
parents:
diff changeset
24
35af401890c0 Uploaded
bitlab
parents:
diff changeset
25 #define PEQ 1001
35af401890c0 Uploaded
bitlab
parents:
diff changeset
26
35af401890c0 Uploaded
bitlab
parents:
diff changeset
27 int main(int ac, char** av){
35af401890c0 Uploaded
bitlab
parents:
diff changeset
28
35af401890c0 Uploaded
bitlab
parents:
diff changeset
29 char fname[1024], *W;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
30 W=(char *)malloc(33*sizeof(char));
35af401890c0 Uploaded
bitlab
parents:
diff changeset
31 FILE *f1, *f2, *f3;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
32 hashentry he;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
33 uint64_t i=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
34 location spos;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
35 uint64_t nW=0,maxF=0, aveF=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
36 int flagV=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
37 int64_t freq[PEQ];
35af401890c0 Uploaded
bitlab
parents:
diff changeset
38
35af401890c0 Uploaded
bitlab
parents:
diff changeset
39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
40 if (ac==3) flagV=1;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
41 for (i=0;i<PEQ;i++) freq[i]=0;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table)
35af401890c0 Uploaded
bitlab
parents:
diff changeset
43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
44 sprintf(fname,"%s.d2hP",av[1]); // Positions file
35af401890c0 Uploaded
bitlab
parents:
diff changeset
45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
46
35af401890c0 Uploaded
bitlab
parents:
diff changeset
47 sprintf(fname,"%s.freq",av[1]); // output
35af401890c0 Uploaded
bitlab
parents:
diff changeset
48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
49
35af401890c0 Uploaded
bitlab
parents:
diff changeset
50 // kick-off
35af401890c0 Uploaded
bitlab
parents:
diff changeset
51 if(fread(&he,sizeof(hashentry),1,f1)!=1)
35af401890c0 Uploaded
bitlab
parents:
diff changeset
52 terror("Empty dictionary");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
53
35af401890c0 Uploaded
bitlab
parents:
diff changeset
54 while(!feof(f1)){
35af401890c0 Uploaded
bitlab
parents:
diff changeset
55
35af401890c0 Uploaded
bitlab
parents:
diff changeset
56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);}
35af401890c0 Uploaded
bitlab
parents:
diff changeset
57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
58 if (he.num>=PEQ) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
59 fprintf(f3, "%" PRIu64 "\t", he.num);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
60 showWord(&he.w, W);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
61 fprintf(f3, "%.32s", W);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
62 fprintf(f3, "%" PRIu64 "\n", he.num);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
63 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
64 else freq[he.num]++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
65 nW++;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
66 if (he.num>maxF) maxF=he.num;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
67 aveF+=he.num;
35af401890c0 Uploaded
bitlab
parents:
diff changeset
68
35af401890c0 Uploaded
bitlab
parents:
diff changeset
69 fseek(f2,0, he.pos);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
70 if (flagV) {
35af401890c0 Uploaded
bitlab
parents:
diff changeset
71
35af401890c0 Uploaded
bitlab
parents:
diff changeset
72 for (i=0;i<he.num;i++){
35af401890c0 Uploaded
bitlab
parents:
diff changeset
73 if(fread(&spos,sizeof(location),1,f2)!=1)
35af401890c0 Uploaded
bitlab
parents:
diff changeset
74 terror("Error reading the word occurrences");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
76 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
77 fprintf(stdout,"\n");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
78 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
79 if(fread(&he,sizeof(hashentry),1,f1)!=1)
35af401890c0 Uploaded
bitlab
parents:
diff changeset
80 if(ferror(f1))
35af401890c0 Uploaded
bitlab
parents:
diff changeset
81 terror("Error reading a dictionary entry");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
82 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
83 free(W);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
84
35af401890c0 Uploaded
bitlab
parents:
diff changeset
85 fclose(f1);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
86 fclose(f2);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
87 // store PEQ freqs--------
35af401890c0 Uploaded
bitlab
parents:
diff changeset
88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n");
35af401890c0 Uploaded
bitlab
parents:
diff changeset
89 for (i=0;i<PEQ;i++)
35af401890c0 Uploaded
bitlab
parents:
diff changeset
90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
91
35af401890c0 Uploaded
bitlab
parents:
diff changeset
92 fclose(f3);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
94
35af401890c0 Uploaded
bitlab
parents:
diff changeset
95 exit(0);
35af401890c0 Uploaded
bitlab
parents:
diff changeset
96 }
35af401890c0 Uploaded
bitlab
parents:
diff changeset
97
35af401890c0 Uploaded
bitlab
parents:
diff changeset
98
35af401890c0 Uploaded
bitlab
parents:
diff changeset
99