comparison gecko/src/w2hd.c @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
comparison
equal deleted inserted replaced
0:ee6b15b409e5 1:35af401890c0
1 /* creates a hash table in disk from a set of ordered words
2 Syntax: w2hd wordsSort.In prefixNameOUT
3
4 wordsSort is a bin file with Word-Pos-Seq
5 prefixNameOUT.h2dW : index of words-Pos-Ocurrences
6 prefixNameOUT.h2dP : positions(Pos+seq)
7
8 Feb.2011: add a new parameter: PrefixSize
9
10 PrefixSize: defines the word-prefix size to be used to identify when two
11 words are the "same"
12
13 ortrelles@uma.es / Dic.2011
14 ---------------------------------------------------------*/
15
16 #include <string.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <inttypes.h>
20 #include "structs.h"
21 #include "commonFunctions.h"
22 #include "dictionaryFunctions.h"
23
24 int main(int ac, char** av){
25
26 char fname[1024];
27 uint64_t nWords=0;
28 FILE* fw, *fOut1, *fOut2;
29
30 wentry we;
31 hashentry he;
32 location loc;
33
34 if(ac!=3)terror("USE: w2hd wordsSort.In prefixNameOUT\n");
35
36 if ((fw = fopen(av[1],"rb"))==NULL) terror("opening IN file");
37
38 sprintf(fname,"%s.d2hW",av[2]);
39 if ((fOut1 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hW file");
40 sprintf(fname,"%s.d2hP",av[2]);
41 if ((fOut2 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hP file");
42
43 if(fread(&we,sizeof(wentry),1,fw)!=1){
44 terror("empty words file");
45 }
46 memcpy(&he.w.b[0],&we.w.b[0],8);
47 he.pos=0;
48 he.num=0;
49
50 while(!feof(fw)){
51 loc.pos=we.pos;
52 loc.seq=we.seq;
53 if (wordcmp(&he.w.b[0],&we.w.b[0],32)!=0) {
54 fwrite(&he,sizeof(hashentry),1,fOut1);
55 memcpy(&he.w.b[0],&we.w.b[0],8);
56 he.pos=ftell(fOut2);
57 he.num=0;
58 }
59
60 fwrite(&loc,sizeof(location),1,fOut2);
61 he.num++;
62 nWords++;
63
64 if(fread(&we,sizeof(wentry),1,fw)!=sizeof(wentry)){
65 if(ferror(fw))terror("error reading words file");
66 }
67 }
68
69 fwrite(&he,sizeof(hashentry),1,fOut1);
70
71 fprintf(stdout,"\nw2hd: %s tot words=%" PRIu64 "\n",av[1],nWords);
72
73 fclose(fOut1);
74 fclose(fOut2);
75 fclose(fw);
76
77 return 0;
78 }
79
80
81