Mercurial > repos > bitlab > bitlab
comparison gecko/src/w2hd.c @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ee6b15b409e5 | 1:35af401890c0 |
---|---|
1 /* creates a hash table in disk from a set of ordered words | |
2 Syntax: w2hd wordsSort.In prefixNameOUT | |
3 | |
4 wordsSort is a bin file with Word-Pos-Seq | |
5 prefixNameOUT.h2dW : index of words-Pos-Ocurrences | |
6 prefixNameOUT.h2dP : positions(Pos+seq) | |
7 | |
8 Feb.2011: add a new parameter: PrefixSize | |
9 | |
10 PrefixSize: defines the word-prefix size to be used to identify when two | |
11 words are the "same" | |
12 | |
13 ortrelles@uma.es / Dic.2011 | |
14 ---------------------------------------------------------*/ | |
15 | |
16 #include <string.h> | |
17 #include <stdlib.h> | |
18 #include <stdio.h> | |
19 #include <inttypes.h> | |
20 #include "structs.h" | |
21 #include "commonFunctions.h" | |
22 #include "dictionaryFunctions.h" | |
23 | |
24 int main(int ac, char** av){ | |
25 | |
26 char fname[1024]; | |
27 uint64_t nWords=0; | |
28 FILE* fw, *fOut1, *fOut2; | |
29 | |
30 wentry we; | |
31 hashentry he; | |
32 location loc; | |
33 | |
34 if(ac!=3)terror("USE: w2hd wordsSort.In prefixNameOUT\n"); | |
35 | |
36 if ((fw = fopen(av[1],"rb"))==NULL) terror("opening IN file"); | |
37 | |
38 sprintf(fname,"%s.d2hW",av[2]); | |
39 if ((fOut1 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hW file"); | |
40 sprintf(fname,"%s.d2hP",av[2]); | |
41 if ((fOut2 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hP file"); | |
42 | |
43 if(fread(&we,sizeof(wentry),1,fw)!=1){ | |
44 terror("empty words file"); | |
45 } | |
46 memcpy(&he.w.b[0],&we.w.b[0],8); | |
47 he.pos=0; | |
48 he.num=0; | |
49 | |
50 while(!feof(fw)){ | |
51 loc.pos=we.pos; | |
52 loc.seq=we.seq; | |
53 if (wordcmp(&he.w.b[0],&we.w.b[0],32)!=0) { | |
54 fwrite(&he,sizeof(hashentry),1,fOut1); | |
55 memcpy(&he.w.b[0],&we.w.b[0],8); | |
56 he.pos=ftell(fOut2); | |
57 he.num=0; | |
58 } | |
59 | |
60 fwrite(&loc,sizeof(location),1,fOut2); | |
61 he.num++; | |
62 nWords++; | |
63 | |
64 if(fread(&we,sizeof(wentry),1,fw)!=sizeof(wentry)){ | |
65 if(ferror(fw))terror("error reading words file"); | |
66 } | |
67 } | |
68 | |
69 fwrite(&he,sizeof(hashentry),1,fOut1); | |
70 | |
71 fprintf(stdout,"\nw2hd: %s tot words=%" PRIu64 "\n",av[1],nWords); | |
72 | |
73 fclose(fOut1); | |
74 fclose(fOut2); | |
75 fclose(fw); | |
76 | |
77 return 0; | |
78 } | |
79 | |
80 | |
81 |