Mercurial > repos > bitlab > bitlab
diff gecko/src/w2hd.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gecko/src/w2hd.c Thu Dec 13 07:59:25 2018 -0500 @@ -0,0 +1,81 @@ +/* creates a hash table in disk from a set of ordered words + Syntax: w2hd wordsSort.In prefixNameOUT + + wordsSort is a bin file with Word-Pos-Seq + prefixNameOUT.h2dW : index of words-Pos-Ocurrences + prefixNameOUT.h2dP : positions(Pos+seq) + + Feb.2011: add a new parameter: PrefixSize + + PrefixSize: defines the word-prefix size to be used to identify when two + words are the "same" + + ortrelles@uma.es / Dic.2011 + ---------------------------------------------------------*/ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <inttypes.h> +#include "structs.h" +#include "commonFunctions.h" +#include "dictionaryFunctions.h" + +int main(int ac, char** av){ + + char fname[1024]; + uint64_t nWords=0; + FILE* fw, *fOut1, *fOut2; + + wentry we; + hashentry he; + location loc; + + if(ac!=3)terror("USE: w2hd wordsSort.In prefixNameOUT\n"); + + if ((fw = fopen(av[1],"rb"))==NULL) terror("opening IN file"); + + sprintf(fname,"%s.d2hW",av[2]); + if ((fOut1 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hW file"); + sprintf(fname,"%s.d2hP",av[2]); + if ((fOut2 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hP file"); + + if(fread(&we,sizeof(wentry),1,fw)!=1){ + terror("empty words file"); + } + memcpy(&he.w.b[0],&we.w.b[0],8); + he.pos=0; + he.num=0; + + while(!feof(fw)){ + loc.pos=we.pos; + loc.seq=we.seq; + if (wordcmp(&he.w.b[0],&we.w.b[0],32)!=0) { + fwrite(&he,sizeof(hashentry),1,fOut1); + memcpy(&he.w.b[0],&we.w.b[0],8); + he.pos=ftell(fOut2); + he.num=0; + } + + fwrite(&loc,sizeof(location),1,fOut2); + he.num++; + nWords++; + + if(fread(&we,sizeof(wentry),1,fw)!=sizeof(wentry)){ + if(ferror(fw))terror("error reading words file"); + } + } + + fwrite(&he,sizeof(hashentry),1,fOut1); + + fprintf(stdout,"\nw2hd: %s tot words=%" PRIu64 "\n",av[1],nWords); + + fclose(fOut1); + fclose(fOut2); + fclose(fw); + + return 0; +} + + +