X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Finsert.c;h=506fe1726b656fd5100f0c7e4ad5510600863226;hp=8d1cffbb2e045de880fcb54b743756f5fd9efae8;hb=8a0144a3925311102133d62ac6258ace8d7187eb;hpb=cf0045decc8edc53834fffc1eadf8a634cf81381 diff --git a/dict/insert.c b/dict/insert.c index 8d1cffb..506fe17 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -1,145 +1,462 @@ -/* - * Copyright (C) 1994, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: insert.c,v $ - * Revision 1.2 1994-08-17 13:32:19 adam - * Use cache in dict - not in bfile. - * - * Revision 1.1 1994/08/16 16:26:48 adam - * Added dict. - * - */ +/* This file is part of the Zebra server. + Copyright (C) Index Data -#include +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + + + +#if HAVE_CONFIG_H +#include +#endif #include +#include #include #include -#include +#include "dict-p.h" + +#define CHECK 0 -static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) +static int dict_ins(Dict dict, const Dict_char *str, + Dict_ptr back_ptr, int userlen, void *userinfo); +static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo); + + +static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp) { void *p; - Dict_ptr ptr = dict->head.free_list; - if (dict->head.free_list == dict->head.last) + Dict_ptr ptr = dict->head.last; + if (!dict->head.freelist) { - dict->head.free_list++; - dict->head.last = dict->head.free_list; - dict_bf_newp (dict->dbf, ptr, &p); + dict_bf_newp(dict->dbf, dict->head.last, &p, dict->head.page_size); + (dict->head.last)++; } else { - dict_bf_readp (dict->dbf, dict->head.free_list, &p); - dict->head.free_list = DICT_nextptr(p); - if (dict->head.free_list == 0) - dict->head.free_list = dict->head.last; + ptr = dict->head.freelist; + dict_bf_readp(dict->dbf, ptr, &p); + dict->head.freelist = DICT_backptr(p); } - assert (p); - DICT_type(p) = 1; + assert(p); + DICT_type(p) = 0; DICT_backptr(p) = back_ptr; - DICT_nextptr(p) = 0; DICT_nodir(p) = 0; - DICT_size(p) = 0; - *pp = p; + DICT_size(p) = DICT_infoffset; + DICT_bsize(p) = dict->head.page_size; + if (pp) + *pp = p; return ptr; } -static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr, - void *p, void *userinfo) +static int split_page(Dict dict, Dict_ptr ptr, void *p) { - Dict_ptr ptr = back_ptr, subptr; - short *indxp, *indxp1, *indxp2; - short newsize; - if (ptr == 0) - ptr = new_page (dict, back_ptr, &p); - assert (p); - assert (ptr); - - indxp = (short*) ((char*) p+DICT_PAGESIZE); - while (*str != DICT_EOS) + void *subp; + char *info_here; + Dict_ptr subptr; + int i, j; + short *indxp, *best_indxp = NULL; + Dict_char best_char = 0; + Dict_char prev_char = 0; + int best_no = -1, no_current = 1; + + dict->no_split++; + /* determine splitting char... */ + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + for (i = DICT_nodir(p); --i >= 0; --indxp) { - char *info; - if (*--indxp > 0) /* tail string here! */ + if (*indxp > 0) /* tail string here! */ { - int cmp; - info = DICT_info(p) + *indxp; - cmp = dict_strcmp ((Dict_char*) - (info+sizeof(Dict_info)+sizeof(Dict_ptr)), - str); - if (!cmp) - { - if (memcmp (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo))) - { - memcpy (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo)); - dict_bf_touch (dict->dbf, ptr); + Dict_char dc; + + memcpy(&dc, (char*) p + *indxp, sizeof(dc)); + if (best_no < 0) + { /* first entry met */ + best_char = prev_char = dc; + best_no = 1; + best_indxp = indxp; + } + else if (prev_char == dc) + { /* same char prefix. update */ + if (++no_current > best_no) + { /* best entry so far */ + best_no = no_current; + best_char = dc; + best_indxp = indxp; } - return 0; } - else if(cmp < 0) - break; - + else + { /* new char prefix. restore */ + prev_char = dc; + no_current = 1; + } } - else if(*indxp < 0) /* tail of string in sub page */ + } + assert(best_no >= 0); /* we didn't find any tail string entry at all! */ + + j = best_indxp - (short*) p; + subptr = new_page(dict, ptr, &subp); + /* scan entries to see if there is a string with */ + /* length 1. info_here indicates if such entry exist */ + info_here = NULL; + for (i=0; i 1); + if (slen == 2) { - int cmp; - info = DICT_info(p) - *indxp; - cmp = memcmp (info+sizeof(Dict_info)+sizeof(Dict_ptr), str, - sizeof(Dict_char)); - if (!cmp) - { - Dict_ptr subptr; - void *pp; - memcpy (&subptr, info, sizeof(subptr)); - if (subptr == 0) - { - subptr = new_page (dict, ptr, &pp); - memcpy (info, &subptr, sizeof(subptr)); - dict_bf_touch (dict->dbf, ptr); - } - return dict_ins (dict, str+1, ptr, pp, userinfo); - } - else if(cmp < 0) - break; + assert(!info_here); + info_here = info+slen*sizeof(Dict_char); } else - break; + { + info1 = info+slen*sizeof(Dict_char); /* info start */ + dict_ins(dict, (Dict_char*) (info+sizeof(Dict_char)), + subptr, *info1, info1+1); + dict_bf_readp(dict->dbf, ptr, &p); + } } - newsize = DICT_size(p); - subptr = 0; - memcpy (DICT_info(p) + newsize, &subptr, sizeof(subptr)); - memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr), userinfo, - sizeof(Dict_info)); - memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr)+sizeof(Dict_info), - str, dict_strlen (str)); - newsize = DICT_size(p) + - sizeof(Dict_info) + sizeof(Dict_ptr) + dict_strlen (str); - DICT_size (p) = newsize; - - DICT_nodir(p) = DICT_nodir(p)+1; - indxp2 = (short*)((char*) p + DICT_PAGESIZE - DICT_nodir(p)*sizeof(short)); - for (indxp1 = indxp2; indxp1 != indxp; indxp1++) - indxp[0] = indxp[1]; - *indxp = -newsize; + /* now clean the page ... */ + clean_page(dict, ptr, p, &best_char, subptr, info_here); return 0; } -int dict_insert (Dict dict, const Dict_char *str, void *userinfo) +static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo) { - dict_ins (dict, str, 0, NULL, userinfo); - return 0; + char *np = (char *) xmalloc(dict->head.page_size); + int i, slen, no = 0; + short *indxp1, *indxp2; + char *info1, *info2; + + DICT_bsize(np) = dict->head.page_size; + indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + indxp2 = (short*) ((char*) np+DICT_bsize(np)); + info2 = (char*) np + DICT_infoffset; + for (i = DICT_nodir(p); --i >= 0; --indxp1) + { + if (*indxp1 > 0) /* tail string here! */ + { + /* string (Dict_char *) DICT_EOS terminated */ + /* unsigned char length of information */ + /* char * information */ + + info1 = (char*) p + *indxp1; + if (out && memcmp(out, info1, sizeof(Dict_char)) == 0) + { + if (subptr == 0) + continue; + *--indxp2 = -(info2 - np); + memcpy(info2, &subptr, sizeof(Dict_ptr)); + info2 += sizeof(Dict_ptr); + memcpy(info2, out, sizeof(Dict_char)); + info2 += sizeof(Dict_char); + if (userinfo) + { + memcpy(info2, userinfo, *userinfo+1); + info2 += *userinfo + 1; + } + else + *info2++ = 0; + subptr = 0; + ++no; + continue; + } + *--indxp2 = info2 - np; + slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char); + memcpy(info2, info1, slen); + info1 += slen; + info2 += slen; + } + else + { + /* Dict_ptr subptr */ + /* Dict_char sub char */ + /* unsigned char length of information */ + /* char * information */ + + assert(*indxp1 < 0); + *--indxp2 = -(info2 - np); + info1 = (char*) p - *indxp1; + memcpy(info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char)); + info1 += sizeof(Dict_ptr)+sizeof(Dict_char); + info2 += sizeof(Dict_ptr)+sizeof(Dict_char); + } + slen = *info1+1; + memcpy(info2, info1, slen); + info2 += slen; + ++no; + } +#if 1 + memcpy((char*)p+DICT_infoffset, + (char*)np+DICT_infoffset, + info2 - ((char*)np+DICT_infoffset)); + memcpy((char*)p + ((char*)indxp2 - (char*)np), + indxp2, + ((char*) np+DICT_bsize(p)) - (char*)indxp2); +#else + memcpy((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, + DICT_pagesize(dict)-DICT_infoffset); +#endif + DICT_size(p) = info2 - np; + DICT_type(p) = 0; + DICT_nodir(p) = no; + xfree(np); + dict_bf_touch(dict->dbf, ptr); } +/* return 0 if new */ +/* return 1 if before but change of info */ +/* return 2 if same as before */ +static int dict_ins(Dict dict, const Dict_char *str, + Dict_ptr ptr, int userlen, void *userinfo) +{ + int hi, lo, mid, slen, cmp = 1; + short *indxp; + char *info; + void *p; + dict_bf_readp(dict->dbf, ptr, &p); + assert(p); + assert(ptr); + mid = lo = 0; + hi = DICT_nodir(p)-1; + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + while (lo <= hi) + { + mid = (lo+hi)/2; + if (indxp[-mid] > 0) + { + /* string (Dict_char *) DICT_EOS terminated */ + /* unsigned char length of information */ + /* char * information */ + info = (char*)p + indxp[-mid]; + cmp = dict_strcmp((Dict_char*) info, str); + if (!cmp) + { + info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char); + /* consider change of userinfo length... */ + if (*info == userlen) + { + /* change of userinfo ? */ + if (memcmp(info+1, userinfo, userlen)) + { + dict_bf_touch(dict->dbf, ptr); + memcpy(info+1, userinfo, userlen); + return 1; + } + /* same userinfo */ + return 2; + } + else if (*info > userlen) + { + /* room for new userinfo */ + DICT_type(p) = 1; + *info = userlen; + dict_bf_touch(dict->dbf, ptr); + memcpy(info+1, userinfo, userlen); + return 1; + } + break; + } + } + else + { + Dict_char dc; + Dict_ptr subptr; + /* Dict_ptr subptr */ + /* Dict_char sub char */ + /* unsigned char length of information */ + /* char * information */ + info = (char*)p - indxp[-mid]; + memcpy(&dc, info+sizeof(Dict_ptr), sizeof(Dict_char)); + cmp = dc- *str; + if (!cmp) + { + memcpy(&subptr, info, sizeof(Dict_ptr)); + if (*++str == DICT_EOS) + { + /* finish of string. Store userinfo here... */ + int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)]; + if (xlen == userlen) + { + if (memcmp(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen)) + { + dict_bf_touch(dict->dbf, ptr); + memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen); + return 1; + } + return 2; + } + else if (xlen > userlen) + { + DICT_type(p) = 1; + info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen; + memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen); + dict_bf_touch(dict->dbf, ptr); + return 1; + } + /* xlen < userlen, expanding needed ... */ + if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+ + userlen >= + DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)) + { + /* not enough room - split needed ... */ + if (DICT_type(p) == 1) + { + clean_page(dict, ptr, p, NULL, 0, NULL); + return dict_ins(dict, str-1, ptr, + userlen, userinfo); + } + if (split_page(dict, ptr, p)) + { + yaz_log(YLOG_FATAL, "Unable to split page %d\n", ptr); + assert(0); + } + return dict_ins(dict, str-1, ptr, userlen, userinfo); + } + else + { /* enough room - no split needed ... */ + info = (char*)p + DICT_size(p); + memcpy(info, &subptr, sizeof(subptr)); + memcpy(info+sizeof(Dict_ptr), &dc, sizeof(Dict_char)); + info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen; + memcpy(info+sizeof(Dict_char)+sizeof(Dict_ptr)+1, + userinfo, userlen); + indxp[-mid] = -DICT_size(p); + DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr) + +1+userlen; + DICT_type(p) = 1; + dict_bf_touch(dict->dbf, ptr); + } + if (xlen) + return 1; + return 0; + } + else + { + if (subptr == 0) + { + subptr = new_page(dict, ptr, NULL); + memcpy(info, &subptr, sizeof(subptr)); + dict_bf_touch(dict->dbf, ptr); + } + return dict_ins(dict, str, subptr, userlen, userinfo); + } + } + } + if (cmp < 0) + lo = mid+1; + else + hi = mid-1; + } + indxp = indxp-mid; + if (lo>hi && cmp < 0) + --indxp; + slen = (dict_strlen(str)+1)*sizeof(Dict_char); + if (DICT_size(p)+slen+userlen >= + (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */ + { + if (DICT_type(p)) + { + clean_page(dict, ptr, p, NULL, 0, NULL); + return dict_ins(dict, str, ptr, userlen, userinfo); + } + split_page(dict, ptr, p); + return dict_ins(dict, str, ptr, userlen, userinfo); + } + if (cmp) + { + short *indxp1; + (DICT_nodir(p))++; + indxp1 = (short*)((char*) p + DICT_bsize(p) + - DICT_nodir(p)*sizeof(short)); + for (; indxp1 != indxp; indxp1++) + indxp1[0] = indxp1[1]; +#if CHECK + indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + for (i = DICT_nodir (p); --i >= 0; --indxp1) + { + if (*indxp1 < 0) + { + info = (char*)p - *indxp1; + assert (info[sizeof(Dict_ptr)] > ' '); + } + } +#endif + } + else + DICT_type(p) = 1; + info = (char*)p + DICT_size(p); + memcpy(info, str, slen); + info += slen; + *info++ = userlen; + memcpy(info, userinfo, userlen); + info += userlen; + *indxp = DICT_size(p); + DICT_size(p) = info- (char*) p; + dict_bf_touch(dict->dbf, ptr); + if (cmp) + return 0; + return 1; +} +int dict_insert(Dict dict, const char *str, int userlen, void *userinfo) +{ + if (!dict->rw) + return -1; + dict->no_insert++; + if (!dict->head.root) + { + void *p; + dict->head.root = new_page(dict, 0, &p); + if (!dict->head.root) + return -1; + } + return dict_ins(dict, (const Dict_char *) str, dict->head.root, + userlen, userinfo); +} +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */