Mercurial > repos > bitlab > bitlab
comparison gecko/src/hdStat.c @ 1:35af401890c0 draft
Uploaded
| author | bitlab |
|---|---|
| date | Thu, 13 Dec 2018 07:59:25 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:ee6b15b409e5 | 1:35af401890c0 |
|---|---|
| 1 /* leehd read and displays the hash table from disk | |
| 2 Syntax: leehd prefixNameOUT | |
| 3 | |
| 4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences | |
| 5 prefixNameOUT.h2dP : positions | |
| 6 both must be available | |
| 7 | |
| 8 Any char as third argument means "Verbose mode" | |
| 9 Feb.2012: computes word frequencies | |
| 10 | |
| 11 ortrelles@uma.es / Dic.2011 | |
| 12 ---------------------------------------------------------*/ | |
| 13 | |
| 14 #include <stdio.h> | |
| 15 #include <stdlib.h> | |
| 16 #include <string.h> | |
| 17 #include <stdlib.h> | |
| 18 #include <errno.h> | |
| 19 #include <inttypes.h> | |
| 20 | |
| 21 #include "structs.h" | |
| 22 #include "commonFunctions.h" | |
| 23 #include "dictionaryFunctions.h" | |
| 24 | |
| 25 #define PEQ 1001 | |
| 26 | |
| 27 int main(int ac, char** av){ | |
| 28 | |
| 29 char fname[1024], *W; | |
| 30 W=(char *)malloc(33*sizeof(char)); | |
| 31 FILE *f1, *f2, *f3; | |
| 32 hashentry he; | |
| 33 uint64_t i=0; | |
| 34 location spos; | |
| 35 uint64_t nW=0,maxF=0, aveF=0; | |
| 36 int flagV=0; | |
| 37 int64_t freq[PEQ]; | |
| 38 | |
| 39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n"); | |
| 40 if (ac==3) flagV=1; | |
| 41 for (i=0;i<PEQ;i++) freq[i]=0; | |
| 42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table) | |
| 43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file"); | |
| 44 sprintf(fname,"%s.d2hP",av[1]); // Positions file | |
| 45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file"); | |
| 46 | |
| 47 sprintf(fname,"%s.freq",av[1]); // output | |
| 48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file"); | |
| 49 | |
| 50 // kick-off | |
| 51 if(fread(&he,sizeof(hashentry),1,f1)!=1) | |
| 52 terror("Empty dictionary"); | |
| 53 | |
| 54 while(!feof(f1)){ | |
| 55 | |
| 56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);} | |
| 57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num); | |
| 58 if (he.num>=PEQ) { | |
| 59 fprintf(f3, "%" PRIu64 "\t", he.num); | |
| 60 showWord(&he.w, W); | |
| 61 fprintf(f3, "%.32s", W); | |
| 62 fprintf(f3, "%" PRIu64 "\n", he.num); | |
| 63 } | |
| 64 else freq[he.num]++; | |
| 65 nW++; | |
| 66 if (he.num>maxF) maxF=he.num; | |
| 67 aveF+=he.num; | |
| 68 | |
| 69 fseek(f2,0, he.pos); | |
| 70 if (flagV) { | |
| 71 | |
| 72 for (i=0;i<he.num;i++){ | |
| 73 if(fread(&spos,sizeof(location),1,f2)!=1) | |
| 74 terror("Error reading the word occurrences"); | |
| 75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq); | |
| 76 } | |
| 77 fprintf(stdout,"\n"); | |
| 78 } | |
| 79 if(fread(&he,sizeof(hashentry),1,f1)!=1) | |
| 80 if(ferror(f1)) | |
| 81 terror("Error reading a dictionary entry"); | |
| 82 } | |
| 83 free(W); | |
| 84 | |
| 85 fclose(f1); | |
| 86 fclose(f2); | |
| 87 // store PEQ freqs-------- | |
| 88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n"); | |
| 89 for (i=0;i<PEQ;i++) | |
| 90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]); | |
| 91 | |
| 92 fclose(f3); | |
| 93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW); | |
| 94 | |
| 95 exit(0); | |
| 96 } | |
| 97 | |
| 98 | |
| 99 |
