Mercurial > repos > bitlab > bitlab
diff gecko/src/hdStat.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gecko/src/hdStat.c Thu Dec 13 07:59:25 2018 -0500 @@ -0,0 +1,99 @@ +/* leehd read and displays the hash table from disk + Syntax: leehd prefixNameOUT + + prefixNameOUT.h2dW : index of words-Pos-Ocurrences + prefixNameOUT.h2dP : positions + both must be available + + Any char as third argument means "Verbose mode" + Feb.2012: computes word frequencies + + ortrelles@uma.es / Dic.2011 + ---------------------------------------------------------*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <inttypes.h> + +#include "structs.h" +#include "commonFunctions.h" +#include "dictionaryFunctions.h" + +#define PEQ 1001 + +int main(int ac, char** av){ + + char fname[1024], *W; + W=(char *)malloc(33*sizeof(char)); + FILE *f1, *f2, *f3; + hashentry he; + uint64_t i=0; + location spos; + uint64_t nW=0,maxF=0, aveF=0; + int flagV=0; + int64_t freq[PEQ]; + + if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n"); + if (ac==3) flagV=1; + for (i=0;i<PEQ;i++) freq[i]=0; + sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table) + if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file"); + sprintf(fname,"%s.d2hP",av[1]); // Positions file + if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file"); + + sprintf(fname,"%s.freq",av[1]); // output + if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file"); + + // kick-off + if(fread(&he,sizeof(hashentry),1,f1)!=1) + terror("Empty dictionary"); + + while(!feof(f1)){ + + if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);} + if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num); + if (he.num>=PEQ) { + fprintf(f3, "%" PRIu64 "\t", he.num); + showWord(&he.w, W); + fprintf(f3, "%.32s", W); + fprintf(f3, "%" PRIu64 "\n", he.num); + } + else freq[he.num]++; + nW++; + if (he.num>maxF) maxF=he.num; + aveF+=he.num; + + fseek(f2,0, he.pos); + if (flagV) { + + for (i=0;i<he.num;i++){ + if(fread(&spos,sizeof(location),1,f2)!=1) + terror("Error reading the word occurrences"); + fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq); + } + fprintf(stdout,"\n"); + } + if(fread(&he,sizeof(hashentry),1,f1)!=1) + if(ferror(f1)) + terror("Error reading a dictionary entry"); + } + free(W); + + fclose(f1); + fclose(f2); + // store PEQ freqs-------- + fprintf(f3,"freqs of words that appear\nTimes\tnWords\n"); + for (i=0;i<PEQ;i++) + if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]); + + fclose(f3); + fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW); + + exit(0); +} + + +