From 4ed5fbcd29d2a98b048d1d94510b262d352b4f7c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Sat, 15 May 1999 14:36:37 +0000 Subject: [PATCH] Updated dictionary. Implemented "compression" of dictionary. --- CHANGELOG | 3 ++ dict/dcompact.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++------- dict/delete.c | 17 ++++---- dict/dopen.c | 11 +++++- dict/drdwr.c | 64 ++++++++++++++++++++++++++++-- dict/insert.c | 63 ++++++++++++++++-------------- dict/lookgrep.c | 15 ++++--- dict/lookup.c | 19 +++++---- dict/lookupec.c | 14 ++++--- dict/open.c | 44 ++++++++++----------- dict/scan.c | 18 +++++---- include/dict.h | 30 ++++++++------ index/compact.c | 7 +++- index/extract.c | 7 +++- index/invstat.c | 7 +++- index/kinput.c | 7 +++- index/main.c | 6 ++- index/trav.c | 10 +++-- index/zebraapi.c | 7 +++- 19 files changed, 328 insertions(+), 135 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 971678b..9038b54 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +Implemented "compression" of Dictionary and ISAM system. Dictionary +format HAS changed. + Added "tagsysno" directive to zebra.cfg to control under which tag the sysstem ID is placed. Use tagsysno: 0 to disable Zebra's system number entirely. diff --git a/dict/dcompact.c b/dict/dcompact.c index f2d5fcd..800b654 100644 --- a/dict/dcompact.c +++ b/dict/dcompact.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dcompact.c,v $ - * Revision 1.3 1999-05-12 13:08:06 adam + * Revision 1.4 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.3 1999/05/12 13:08:06 adam * First version of ISAMS. * * Revision 1.2 1999/03/09 16:27:49 adam @@ -15,6 +18,7 @@ * */ +#include #include #include #include @@ -22,34 +26,114 @@ #include #include -int dict_compact (BFiles bfs, const char *from_name, const char *to_name) +static void dict_copy_page(Dict dict, char *to_p, char *from_p, int *map) +{ + int i, slen, no = 0; + short *from_indxp, *to_indxp; + char *from_info, *to_info; + + from_indxp = (short*) ((char*) from_p+DICT_bsize(from_p)); + to_indxp = (short*) ((char*) to_p+DICT_bsize(to_p)); + to_info = (char*) to_p + DICT_infoffset; + for (i = DICT_nodir (from_p); --i >= 0; ) + { + if (*--from_indxp > 0) /* tail string here! */ + { + /* string (Dict_char *) DICT_EOS terminated */ + /* unsigned char length of information */ + /* char * information */ + + from_info = (char*) from_p + *from_indxp; + *--to_indxp = to_info - to_p; + slen = (dict_strlen((Dict_char*) from_info)+1)*sizeof(Dict_char); + memcpy (to_info, from_info, slen); + from_info += slen; + to_info += slen; + } + else + { + Dict_ptr subptr; + Dict_char subchar; + /* Dict_ptr subptr */ + /* Dict_char sub char */ + /* unsigned char length of information */ + /* char * information */ + + *--to_indxp = -(to_info - to_p); + from_info = (char*) from_p - *from_indxp; + + memcpy (&subptr, from_info, sizeof(subptr)); + subptr = map[subptr]; + from_info += sizeof(Dict_ptr); + memcpy (&subchar, from_info, sizeof(subchar)); + from_info += sizeof(Dict_char); + + memcpy (to_info, &subptr, sizeof(Dict_ptr)); + to_info += sizeof(Dict_ptr); + memcpy (to_info, &subchar, sizeof(Dict_char)); + to_info += sizeof(Dict_char); + } + assert (to_info < (char*) to_indxp); + slen = *from_info+1; + memcpy (to_info, from_info, slen); + to_info += slen; + ++no; + } + DICT_size(to_p) = to_info - to_p; + DICT_type(to_p) = 0; + DICT_nodir(to_p) = no; +} + +int dict_copy_compact (BFiles bfs, const char *from_name, const char *to_name) { int no_dir = 0; - Dict from, to; + Dict dict_from, dict_to; int *map, i; - from = dict_open (bfs, from_name, 0, 0); - if (!from) + dict_from = dict_open (bfs, from_name, 0, 0, 0); + if (!dict_from) return -1; - map = xmalloc ((from->head.last+1) * sizeof(*map)); - for (i = 0; i <= (int) (from->head.last); i++) + map = xmalloc ((dict_from->head.last+1) * sizeof(*map)); + for (i = 0; i <= (int) (dict_from->head.last); i++) map[i] = -1; - to = dict_open (bfs, to_name, 0, 1); - if (!to) + dict_to = dict_open (bfs, to_name, 0, 1, 1); + if (!dict_to) return -1; map[0] = 0; - map[1] = DICT_pagesize(from); + map[1] = dict_from->head.page_size; - for (i = 1; i < (int) (from->head.last); i++) + for (i = 1; i < (int) (dict_from->head.last); i++) { void *buf; + int size; +#if 0 logf (LOG_LOG, "map[%d] = %d", i, map[i]); - dict_bf_readp (from->dbf, i, &buf); - map[i+1] = map[i] + DICT_size(buf); +#endif + dict_bf_readp (dict_from->dbf, i, &buf); + size = ((DICT_size(buf)+sizeof(short)-1)/sizeof(short) + + DICT_nodir(buf))*sizeof(short); + map[i+1] = map[i] + size; no_dir += DICT_nodir(buf); } logf (LOG_LOG, "map[%d] = %d", i, map[i]); logf (LOG_LOG, "nodir = %d", no_dir); - dict_close (from); - dict_close (to); + dict_to->head.root = map[1]; + dict_to->head.last = map[i]; + for (i = 1; i< (int) (dict_from->head.last); i++) + { + void *old_p, *new_p; + dict_bf_readp (dict_from->dbf, i, &old_p); + + logf (LOG_LOG, "dict_bf_newp no=%d size=%d", map[i], + map[i+1] - map[i]); + dict_bf_newp (dict_to->dbf, map[i], &new_p, map[i+1] - map[i]); + + DICT_type(new_p) = 0; + DICT_backptr(new_p) = map[i-1]; + DICT_bsize(new_p) = map[i+1] - map[i]; + + dict_copy_page(dict_from, new_p, old_p, map); + } + dict_close (dict_from); + dict_close (dict_to); return 0; } diff --git a/dict/delete.c b/dict/delete.c index 599ccd4..3a9912f 100644 --- a/dict/delete.c +++ b/dict/delete.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: delete.c,v $ - * Revision 1.5 1999-02-02 14:50:17 adam + * Revision 1.6 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.5 1999/02/02 14:50:17 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.4 1996/02/02 13:43:50 adam @@ -30,9 +33,8 @@ #include -static int dict_del (Dict dict, const Dict_char *str) +static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) { - Dict_ptr ptr = 1; int mid, lo, hi; int cmp; void *p; @@ -42,7 +44,7 @@ static int dict_del (Dict dict, const Dict_char *str) dict_bf_readp (dict->dbf, ptr, &p); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -101,8 +103,7 @@ static int dict_del (Dict dict, const Dict_char *str) dict_bf_readp (dict->dbf, ptr, &p); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict) - -sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); continue; } } @@ -117,7 +118,7 @@ static int dict_del (Dict dict, const Dict_char *str) int dict_delete (Dict dict, const char *p) { - if (dict->head.last == 1) + if (!dict->head.root) return 0; - return dict_del (dict, (const Dict_char*) p); + return dict_del (dict, (const Dict_char*) p, dict->head.root); } diff --git a/dict/dopen.c b/dict/dopen.c index 9883be3..0a3e63c 100644 --- a/dict/dopen.c +++ b/dict/dopen.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dopen.c,v $ - * Revision 1.6 1999-02-02 14:50:20 adam + * Revision 1.7 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.6 1999/02/02 14:50:20 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.5 1997/09/17 12:19:07 adam @@ -34,6 +37,7 @@ static void common_init (Dict_BFile bf, int block_size, int cache) int i; bf->block_size = block_size; + bf->compact_flag = 0; bf->cache = cache; bf->hash_size = 31; @@ -78,3 +82,8 @@ Dict_BFile dict_bf_open (BFiles bfs, const char *name, int block_size, common_init (dbf, block_size, cache); return dbf; } + +void dict_bf_compact (Dict_BFile dbf) +{ + dbf->compact_flag = 1; +} diff --git a/dict/drdwr.c b/dict/drdwr.c index a786304..9d235ca 100644 --- a/dict/drdwr.c +++ b/dict/drdwr.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: drdwr.c,v $ - * Revision 1.10 1999-02-02 14:50:21 adam + * Revision 1.11 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.10 1999/02/02 14:50:21 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.9 1997/09/09 13:38:01 adam @@ -92,7 +95,43 @@ void dict_bf_flush_blocks (Dict_BFile bf, int no_to_flush) p = bf->lru_back; if (p->dirty) { - bf_write (bf->bf, p->no, 0, 0, p->data); + if (!bf->compact_flag) + bf_write (bf->bf, p->no, 0, 0, p->data); + else + { + int effective_block = p->no / bf->block_size; + int effective_offset = p->no - + effective_block * bf->block_size; + int remain = bf->block_size - effective_offset; + + if (remain >= p->nbytes) + { + bf_write (bf->bf, effective_block, effective_offset, + p->nbytes, p->data); +#if 0 + logf (LOG_LOG, "bf_write no=%d offset=%d size=%d", + effective_block, effective_offset, + p->nbytes); +#endif + + } + else + { +#if 0 + logf (LOG_LOG, "bf_write1 no=%d offset=%d size=%d", + effective_block, effective_offset, + remain); +#endif + bf_write (bf->bf, effective_block, effective_offset, + remain, p->data); +#if 0 + logf (LOG_LOG, "bf_write2 no=%d offset=%d size=%d", + effective_block+1, 0, p->nbytes - remain); +#endif + bf_write (bf->bf, effective_block+1, 0, + p->nbytes - remain, (char*)p->data + remain); + } + } } release_block (bf, p); } @@ -166,7 +205,23 @@ int dict_bf_readp (Dict_BFile bf, int no, void **bufp) } bf->misses++; p = alloc_block (bf, no); - i = bf_read (bf->bf, no, 0, 0, p->data); + + //////////////// insert here + + if (!bf->compact_flag) + i = bf_read (bf->bf, no, 0, 0, p->data); + else + { + int effective_block = no / bf->block_size; + int effective_offset = no - effective_block * bf->block_size; + + i = bf_read (bf->bf, effective_block, effective_offset, + bf->block_size - effective_offset, p->data); + if (i > 0 && effective_offset > 0) + i = bf_read (bf->bf, effective_block+1, 0, effective_offset, + p->data + bf->block_size - effective_offset); + i = 1; + } if (i > 0) { *bufp = p->data; @@ -177,7 +232,7 @@ int dict_bf_readp (Dict_BFile bf, int no, void **bufp) return i; } -int dict_bf_newp (Dict_BFile dbf, int no, void **bufp) +int dict_bf_newp (Dict_BFile dbf, int no, void **bufp, int nbytes) { struct Dict_file_block *p; if (!(p = find_block (dbf, no))) @@ -187,6 +242,7 @@ int dict_bf_newp (Dict_BFile dbf, int no, void **bufp) *bufp = p->data; memset (p->data, 0, dbf->block_size); p->dirty = 1; + p->nbytes = nbytes; #if 0 printf ("bf_newp of %d:", no); dict_pr_lru (dbf); diff --git a/dict/insert.c b/dict/insert.c index af7775b..6fd334e 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: insert.c,v $ - * Revision 1.19 1999-02-02 14:50:22 adam + * Revision 1.20 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.19 1999/02/02 14:50:22 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.18 1998/03/05 08:17:24 adam @@ -88,26 +91,24 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) { void *p; - Dict_ptr ptr = dict->head.free_list; - if (dict->head.free_list == dict->head.last) + Dict_ptr ptr = dict->head.last; + if (!dict->head.freelist) { - dict->head.free_list++; - dict->head.last = dict->head.free_list; - dict_bf_newp (dict->dbf, ptr, &p); + dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size); + (dict->head.last)++; } else { - dict_bf_readp (dict->dbf, dict->head.free_list, &p); - dict->head.free_list = DICT_nextptr(p); - if (dict->head.free_list == 0) - dict->head.free_list = dict->head.last; + ptr = dict->head.freelist; + dict_bf_readp (dict->dbf, ptr, &p); + dict->head.freelist = DICT_backptr(p); } assert (p); DICT_type(p) = 0; DICT_backptr(p) = back_ptr; - DICT_nextptr(p) = 0; DICT_nodir(p) = 0; DICT_size(p) = DICT_infoffset; + DICT_bsize(p) = dict->head.page_size; if (pp) *pp = p; return ptr; @@ -125,7 +126,7 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) int best_no = -1, no_current = 1; /* determine splitting char... */ - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp) { if (*indxp > 0) /* tail string here! */ @@ -202,8 +203,9 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, short *indxp1, *indxp2; char *info1, *info2; - indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); - indxp2 = (short*) ((char*) np+DICT_pagesize(dict)); + DICT_bsize(np) = dict->head.page_size; + indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + indxp2 = (short*) ((char*) np+DICT_bsize(np)); info2 = (char*) np + DICT_infoffset; for (i = DICT_nodir (p); --i >= 0; --indxp1) { @@ -265,7 +267,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, info2 - ((char*)np+DICT_infoffset)); memcpy ((char*)p + ((char*)indxp2 - (char*)np), indxp2, - ((char*) np+DICT_pagesize(dict)) - (char*)indxp2); + ((char*) np+DICT_bsize(p)) - (char*)indxp2); #else memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, DICT_pagesize(dict)-DICT_infoffset); @@ -284,25 +286,21 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* return 2 if same as before */ static int dict_ins (Dict dict, const Dict_char *str, - Dict_ptr back_ptr, int userlen, void *userinfo) + Dict_ptr ptr, int userlen, void *userinfo) { int hi, lo, mid, slen, cmp = 1; - Dict_ptr ptr = back_ptr; short *indxp; char *info; void *p; - if (ptr == 0) - ptr = new_page (dict, back_ptr, &p); - else - dict_bf_readp (dict->dbf, ptr, &p); + dict_bf_readp (dict->dbf, ptr, &p); assert (p); assert (ptr); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -385,7 +383,7 @@ static int dict_ins (Dict dict, const Dict_char *str, /* xlen < userlen, expanding needed ... */ if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+ userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) + DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)) { /* not enough room - split needed ... */ if (DICT_type(p) == 1) @@ -441,7 +439,7 @@ static int dict_ins (Dict dict, const Dict_char *str, --indxp; slen = (dict_strlen(str)+1)*sizeof(Dict_char); if (DICT_size(p)+slen+userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ + DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ { if (DICT_type(p)) { @@ -455,7 +453,7 @@ static int dict_ins (Dict dict, const Dict_char *str, { short *indxp1; (DICT_nodir(p))++; - indxp1 = (short*)((char*) p + DICT_pagesize(dict) + indxp1 = (short*)((char*) p + DICT_bsize(p) - DICT_nodir(p)*sizeof(short)); for (; indxp1 != indxp; indxp1++) indxp1[0] = indxp1[1]; @@ -490,10 +488,15 @@ static int dict_ins (Dict dict, const Dict_char *str, int dict_insert (Dict dict, const char *str, int userlen, void *userinfo) { - assert (dict->head.last > 0); - if (dict->head.last == 1) - return dict_ins (dict, (const Dict_char *) str, 0, userlen, userinfo); - else - return dict_ins (dict, (const Dict_char *) str, 1, userlen, userinfo); + if (!dict->head.root) + { + void *p; + if (dict->rw) + dict->head.root = new_page (dict, 0, &p); + if (!dict->head.root) + return 0; + } + return dict_ins (dict, (const Dict_char *) str, dict->head.root, + userlen, userinfo); } diff --git a/dict/lookgrep.c b/dict/lookgrep.c index d4e9873..4b651dc 100644 --- a/dict/lookgrep.c +++ b/dict/lookgrep.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lookgrep.c,v $ - * Revision 1.22 1999-02-02 14:50:23 adam + * Revision 1.23 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.22 1999/02/02 14:50:23 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.21 1998/06/24 12:16:12 adam @@ -304,7 +307,7 @@ static int dict_grep (Dict dict, Dict_ptr ptr, MatchContext *mc, dict_bf_readp (dict->dbf, ptr, &p); lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { @@ -404,8 +407,7 @@ static int dict_grep (Dict dict, Dict_ptr ptr, MatchContext *mc, init_pos)) return 1; dict_bf_readp (dict->dbf, ptr, &p); - indxp = (short*) ((char*) p+DICT_pagesize(dict) - -sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); } } } @@ -471,8 +473,9 @@ int dict_lookup_grep (Dict dict, const char *pattern, int range, void *client, } } *max_pos = 0; - if (dict->head.last > 1) - i = dict_grep (dict, 1, mc, Rj, 0, client, userfunc, prefix, + if (dict->head.root) + i = dict_grep (dict, dict->head.root, mc, Rj, 0, client, + userfunc, prefix, dfa, max_pos, init_pos); else i = 0; diff --git a/dict/lookup.c b/dict/lookup.c index b7dd51c..f08de3c 100644 --- a/dict/lookup.c +++ b/dict/lookup.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lookup.c,v $ - * Revision 1.9 1999-02-02 14:50:25 adam + * Revision 1.10 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.9 1999/02/02 14:50:25 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.8 1998/03/05 08:17:24 adam @@ -41,9 +44,8 @@ #include -static char *dict_look (Dict dict, const Dict_char *str) +static char *dict_look (Dict dict, const Dict_char *str, Dict_ptr ptr) { - Dict_ptr ptr = 1; int mid, lo, hi; int cmp; void *p; @@ -53,7 +55,7 @@ static char *dict_look (Dict dict, const Dict_char *str) dict_bf_readp (dict->dbf, ptr, &p); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -97,8 +99,7 @@ static char *dict_look (Dict dict, const Dict_char *str) dict_bf_readp (dict->dbf, ptr, &p); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict) - -sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); continue; } } @@ -113,9 +114,7 @@ static char *dict_look (Dict dict, const Dict_char *str) char *dict_lookup (Dict dict, const char *p) { - if (dict->head.last <= 1) + if (!dict->head.root) return NULL; - return dict_look (dict, (const Dict_char *) p); + return dict_look (dict, (const Dict_char *) p, dict->head.root); } - - diff --git a/dict/lookupec.c b/dict/lookupec.c index 212dbc8..70e28a6 100644 --- a/dict/lookupec.c +++ b/dict/lookupec.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lookupec.c,v $ - * Revision 1.7 1999-02-02 14:50:26 adam + * Revision 1.8 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.7 1999/02/02 14:50:26 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.6 1996/02/02 13:43:51 adam @@ -60,7 +63,7 @@ int dict_look_ec (Dict dict, Dict_ptr ptr, MatchInfo *mi, MatchWord *ri_base, dict_bf_readp (dict->dbf, ptr, &p); lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { if (indxp[-lo] > 0) @@ -131,7 +134,7 @@ int dict_look_ec (Dict dict, Dict_ptr ptr, MatchInfo *mi, MatchWord *ri_base, userfunc, range, prefix); dict_bf_readp (dict->dbf, ptr, &p); indxp = (short*) ((char*) p + - DICT_pagesize(dict)-sizeof(short)); + DICT_bsize(p)-sizeof(short)); } } } @@ -164,7 +167,7 @@ int dict_lookup_ec (Dict dict, char *pattern, int range, int i; Dict_char prefix[2048]; - if (dict->head.last == 1) + if (!dict->head.root) return 0; mi = prepare_match ((Dict_char*) pattern); @@ -174,7 +177,8 @@ int dict_lookup_ec (Dict dict, char *pattern, int range, for (i=0; i<=range; i++) ri[i] = (2<head.root, mi, ri, 0, userfunc, + range, prefix); xfree (ri); return i; } diff --git a/dict/open.c b/dict/open.c index a38076b..2a5d322 100644 --- a/dict/open.c +++ b/dict/open.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: open.c,v $ - * Revision 1.14 1999-03-09 13:07:06 adam + * Revision 1.15 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.14 1999/03/09 13:07:06 adam * Work on dict_compact routine. * * Revision 1.13 1999/02/02 14:50:27 adam @@ -57,11 +60,11 @@ #include -Dict dict_open (BFiles bfs, const char *name, int cache, int rw) +Dict dict_open (BFiles bfs, const char *name, int cache, int rw, + int compact_flag) { Dict dict; void *head_buf; - struct Dict_head *dh; char resource_str[80]; int page_size; @@ -90,37 +93,34 @@ Dict dict_open (BFiles bfs, const char *name, int cache, int rw) } if (dict_bf_readp (dict->dbf, 0, &head_buf) <= 0) { + strcpy (dict->head.magic_str, DICT_MAGIC); + dict->head.last = 1; + dict->head.root = 0; + dict->head.freelist = 0; + dict->head.page_size = page_size; + dict->head.compact_flag = compact_flag; + + /* create header with information (page 0) */ if (rw) - { /* create header with information (page 0) */ - dict_bf_newp (dict->dbf, 0, &head_buf); - dh = (struct Dict_head *) head_buf; - strcpy(dh->magic_str, DICT_MAGIC); - dh->free_list = dh->last = 1; - dh->page_size = page_size; - memcpy (&dict->head, dh, sizeof(*dh)); - } - else - { /* no header present, i.e. no dictionary at all */ - dict->head.free_list = dict->head.last = 0; - dict->head.page_size = page_size; - } + dict_bf_newp (dict->dbf, 0, &head_buf, page_size); } else /* header was there, check magic and page size */ { - dh = (struct Dict_head *) head_buf; - if (strcmp (dh->magic_str, DICT_MAGIC)) + memcpy (&dict->head, head_buf, sizeof(dict->head)); + if (strcmp (dict->head.magic_str, DICT_MAGIC)) { logf (LOG_WARN, "Bad magic of `%s'", name); exit (1); } - if (dh->page_size != page_size) + if (dict->head.page_size != page_size) { logf (LOG_WARN, "Resource %s is %d and pagesize of `%s' is %d", - resource_str, page_size, name, dh->page_size); - exit (1); + resource_str, page_size, name, dict->head.page_size); + return 0; } - memcpy (&dict->head, dh, sizeof(*dh)); } + if (dict->head.compact_flag) + dict_bf_compact(dict->dbf); return dict; } diff --git a/dict/scan.c b/dict/scan.c index 5eaa727..f66a1c2 100644 --- a/dict/scan.c +++ b/dict/scan.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: scan.c,v $ - * Revision 1.12 1999-02-02 14:50:28 adam + * Revision 1.13 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.12 1999/02/02 14:50:28 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.11 1998/06/22 11:34:45 adam @@ -68,7 +71,7 @@ int dict_scan_trav (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, lo = hi; else lo = start; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi && lo >= 0 && *count > 0) { @@ -114,7 +117,7 @@ int dict_scan_trav (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, dict_scan_trav (dict, subptr, pos+1, str, 0, count, client, userfunc, dir); dict_bf_readp (dict->dbf, ptr, &p); - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); } } lo += dir; @@ -136,7 +139,7 @@ int dict_scan_r (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, return 0; mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -231,9 +234,8 @@ int dict_scan (Dict dict, char *str, int *before, int *after, void *client, logf (LOG_DEBUG, " %3d %c", str[i], (str[i] > ' ' && str[i] < 127) ? str[i] : '?'); } - if (dict->head.last <= 1) + if (!dict->head.root) return 0; - return dict_scan_r (dict, 1, 0, (Dict_char *) str, before, after, client, - f); + return dict_scan_r (dict, dict->head.root, 0, (Dict_char *) str, + before, after, client, f); } - diff --git a/include/dict.h b/include/dict.h index 4b75eea..b7c10c0 100644 --- a/include/dict.h +++ b/include/dict.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dict.h,v $ - * Revision 1.28 1999-03-09 13:07:06 adam + * Revision 1.29 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.28 1999/03/09 13:07:06 adam * Work on dict_compact routine. * * Revision 1.27 1999/02/02 14:50:32 adam @@ -116,7 +119,8 @@ typedef unsigned char Dict_char; struct Dict_head { char magic_str[8]; int page_size; - Dict_ptr free_list, last; + int compact_flag; + Dict_ptr root, last, freelist; }; struct Dict_file_block @@ -126,6 +130,7 @@ struct Dict_file_block void *data; int dirty; int no; + int nbytes; }; typedef struct Dict_file_struct @@ -144,6 +149,7 @@ typedef struct Dict_file_struct int block_size; int hits; int misses; + int compact_flag; } *Dict_BFile; typedef struct Dict_struct { @@ -154,19 +160,21 @@ typedef struct Dict_struct { struct Dict_head head; } *Dict; -#define DICT_MAGIC "dict00" +#define DICT_MAGIC "dict01" #define DICT_DEFAULT_PAGESIZE 4096 int dict_bf_readp (Dict_BFile bf, int no, void **bufp); -int dict_bf_newp (Dict_BFile bf, int no, void **bufp); +int dict_bf_newp (Dict_BFile bf, int no, void **bufp, int nbytes); int dict_bf_touch (Dict_BFile bf, int no); void dict_bf_flush_blocks (Dict_BFile bf, int no_to_flush); Dict_BFile dict_bf_open (BFiles bfs, const char *name, int block_size, int cache, int rw); int dict_bf_close (Dict_BFile dbf); +void dict_bf_compact (Dict_BFile dbf); -Dict dict_open (BFiles bfs, const char *name, int cache, int rw); +Dict dict_open (BFiles bfs, const char *name, int cache, int rw, + int compact_flag); int dict_close (Dict dict); int dict_insert (Dict dict, const char *p, int userlen, void *userinfo); int dict_delete (Dict dict, const char *p); @@ -187,17 +195,17 @@ int dict_scan (Dict dict, char *str, void dict_grep_cmap (Dict dict, void *vp, const char **(*cmap)(void *vp, const char **from, int len)); -int dict_compact (BFiles bfs, const char *from, const char *to); +int dict_copy_compact (BFiles bfs, const char *from, const char *to); #define DICT_EOS 0 #define DICT_type(x) 0[(Dict_ptr*) x] #define DICT_backptr(x) 1[(Dict_ptr*) x] -#define DICT_nextptr(x) 2[(Dict_ptr*) x] -#define DICT_nodir(x) 0[(short*)((char*)(x)+3*sizeof(Dict_ptr))] -#define DICT_size(x) 1[(short*)((char*)(x)+3*sizeof(Dict_ptr))] -#define DICT_infoffset (3*sizeof(Dict_ptr)+2*sizeof(short)) -#define DICT_pagesize(x) ((x)->head.page_size) +#define DICT_bsize(x) 2[(short*)((char*)(x)+2*sizeof(Dict_ptr))] +#define DICT_nodir(x) 0[(short*)((char*)(x)+2*sizeof(Dict_ptr))] +#define DICT_size(x) 1[(short*)((char*)(x)+2*sizeof(Dict_ptr))] +#define DICT_infoffset (2*sizeof(Dict_ptr)+3*sizeof(short)) +#define DICT_xxxxpagesize(x) ((x)->head.page_size) #define DICT_to_str(x) sizeof(Dict_info)+sizeof(Dict_ptr) diff --git a/index/compact.c b/index/compact.c index eab0edc..be32707 100644 --- a/index/compact.c +++ b/index/compact.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: compact.c,v $ - * Revision 1.1 1999-03-09 10:16:35 adam + * Revision 1.2 1999-05-15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.1 1999/03/09 10:16:35 adam * Work on compaction of dictionary/isamc. * */ @@ -17,5 +20,5 @@ void inv_compact (BFiles bfs) { - dict_compact (bfs, FNAME_DICT, "out"); + dict_copy_compact (bfs, FNAME_DICT, "out"); } diff --git a/index/extract.c b/index/extract.c index 3e29b83..0486bb6 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.92 1999-03-09 16:27:49 adam + * Revision 1.93 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.92 1999/03/09 16:27:49 adam * More work on SDRKit integration. * * Revision 1.91 1999/02/12 13:29:22 adam @@ -405,7 +408,7 @@ int key_open (struct recordGroup *rGroup, int mem) key_buf_used = 0; key_file_no = 0; - if (!(matchDict = dict_open (bfs, GMATCH_DICT, 50, rw))) + if (!(matchDict = dict_open (bfs, GMATCH_DICT, 50, rw, 0))) { logf (LOG_FATAL, "dict_open fail of %s", GMATCH_DICT); return -1; diff --git a/index/invstat.c b/index/invstat.c index e08685a..4781c50 100644 --- a/index/invstat.c +++ b/index/invstat.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: invstat.c,v $ - * Revision 1.10 1999-05-12 13:08:06 adam + * Revision 1.11 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.10 1999/05/12 13:08:06 adam * First version of ISAMS. * * Revision 1.9 1999/02/12 13:29:23 adam @@ -140,7 +143,7 @@ void inv_prstat (BFiles bfs) term_dict[0] = 1; term_dict[1] = 0; - dict = dict_open (bfs, FNAME_DICT, 100, 0); + dict = dict_open (bfs, FNAME_DICT, 100, 0, 0); if (!dict) { logf (LOG_FATAL, "dict_open fail"); diff --git a/index/kinput.c b/index/kinput.c index 07addbe..c560591 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: kinput.c,v $ - * Revision 1.32 1999-05-12 13:08:06 adam + * Revision 1.33 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.32 1999/05/12 13:08:06 adam * First version of ISAMS. * * Revision 1.31 1999/02/02 14:50:56 adam @@ -667,7 +670,7 @@ void key_input (BFiles bfs, int nkeys, int cache) if (!nkeys) return ; } - dict = dict_open (bfs, FNAME_DICT, cache, 1); + dict = dict_open (bfs, FNAME_DICT, cache, 1, 0); if (!dict) { logf (LOG_FATAL, "dict_open fail"); diff --git a/index/main.c b/index/main.c index f4c52ad..ef92bb6 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.63 1999-03-09 16:27:49 adam + * Revision 1.64 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.63 1999/03/09 16:27:49 adam * More work on SDRKit integration. * * Revision 1.62 1999/02/18 15:01:24 adam @@ -422,7 +425,6 @@ int main (int argc, char **argv) } else if (!strcmp (arg, "compact")) { - printf ("--------- compact ------\n"); rval = res_get (common_resource, "shadow"); zebraIndexLock (rGroupDef.bfs, 0, rval); if (rval && *rval) diff --git a/index/trav.c b/index/trav.c index fec9d6a..150fc27 100644 --- a/index/trav.c +++ b/index/trav.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: trav.c,v $ - * Revision 1.35 1999-02-02 14:51:09 adam + * Revision 1.36 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.35 1999/02/02 14:51:09 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.34 1998/06/08 14:43:14 adam @@ -393,7 +396,7 @@ void repositoryShow (struct recordGroup *rGroup) Dict dict; struct dirs_info *di; - if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50, 0))) + if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50, 0, 0))) { logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); return; @@ -508,7 +511,8 @@ void repositoryUpdate (struct recordGroup *rGroup) if (rGroup->recordId && !strcmp (rGroup->recordId, "file")) { Dict dict; - if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50, rGroup->flagRw))) + if (!(dict = dict_open (rGroup->bfs, FMATCH_DICT, 50, + rGroup->flagRw, 0))) { logf (LOG_FATAL, "dict_open fail of %s", FMATCH_DICT); return ; diff --git a/index/zebraapi.c b/index/zebraapi.c index ee30db0..8120ebe 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebraapi.c,v $ - * Revision 1.17 1999-05-12 13:08:06 adam + * Revision 1.18 1999-05-15 14:36:38 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.17 1999/05/12 13:08:06 adam * First version of ISAMS. * * Revision 1.16 1999/02/19 10:38:30 adam @@ -136,7 +139,7 @@ static int zebra_register_lock (ZebraHandle zh) bf_cache (zh->bfs, state ? res_get (zh->res, "shadow") : NULL); zh->registerState = state; zh->records = rec_open (zh->bfs, 0); - if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 40, 0))) + if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 40, 0, 0))) { logf (LOG_WARN, "dict_open"); return -1; -- 1.7.10.4