X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Finsert.c;h=506fe1726b656fd5100f0c7e4ad5510600863226;hp=1057d670703d8a292050e47187ec415189d3bb6e;hb=0dedb6a7a6a0f0ddaf56cb0673978fdd85e64be1;hpb=ef696645cc3b7e0f4027008d1dc589c0f0f90c1f diff --git a/dict/insert.c b/dict/insert.c index 1057d67..506fe17 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -1,112 +1,58 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: insert.c,v $ - * Revision 1.21 1999-05-26 07:49:12 adam - * C++ compilation. - * - * Revision 1.20 1999/05/15 14:36:37 adam - * Updated dictionary. Implemented "compression" of dictionary. - * - * Revision 1.19 1999/02/02 14:50:22 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.18 1998/03/05 08:17:24 adam - * Added a few comments - no code changed. - * - * Revision 1.17 1996/05/14 15:49:09 adam - * Bug fix: In function split_page. In rare cases variable best_indxp was - * referenced. - * - * Revision 1.16 1996/02/02 13:43:50 adam - * The public functions simply use char instead of Dict_char to represent - * search strings. Dict_char is used internally only. - * - * Revision 1.15 1996/02/01 20:39:59 adam - * Bug fix: insert didn't work on 8-bit characters due to unsigned char - * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is - * unsigned now. - * - * Revision 1.14 1995/12/07 11:48:56 adam - * Insert operation obeys DICT_type = 1 (slack in page). - * Function dict_open exists if page size or magic aren't right. - * - * Revision 1.13 1995/11/28 09:06:37 adam - * Fixed potential dangling pointer. - * - * Revision 1.12 1995/09/06 10:34:44 adam - * Memcpy in clean_page edited to satisfy checkergcc. - * - * Revision 1.11 1995/09/04 12:33:31 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.10 1994/10/05 12:16:48 adam - * Pagesize is a resource now. - * - * Revision 1.9 1994/09/16 15:39:13 adam - * Initial code of lookup - not tested yet. - * - * Revision 1.8 1994/09/16 12:35:01 adam - * New version of split_page which use clean_page for splitting. - * - * Revision 1.7 1994/09/12 08:06:42 adam - * Futher development of insert.c - * - * Revision 1.6 1994/09/06 13:05:15 adam - * Further development of insertion. Some special cases are - * not properly handled yet! assert(0) are put here. The - * binary search in each page definitely reduce usr CPU. - * - * Revision 1.5 1994/09/01 17:49:39 adam - * Removed stupid line. Work on insertion in dictionary. Not finished yet. - * - * Revision 1.4 1994/09/01 17:44:09 adam - * depend include change. - * - * Revision 1.3 1994/08/18 12:40:56 adam - * Some development of dictionary. Not finished at all! - * - * Revision 1.2 1994/08/17 13:32:19 adam - * Use cache in dict - not in bfile. - * - * Revision 1.1 1994/08/16 16:26:48 adam - * Added dict. - * - */ +/* This file is part of the Zebra server. + Copyright (C) Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + + +#if HAVE_CONFIG_H +#include +#endif #include #include #include #include -#include +#include "dict-p.h" #define CHECK 0 -static int dict_ins (Dict dict, const Dict_char *str, - Dict_ptr back_ptr, int userlen, void *userinfo); -static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, - Dict_ptr subptr, char *userinfo); +static int dict_ins(Dict dict, const Dict_char *str, + Dict_ptr back_ptr, int userlen, void *userinfo); +static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo); -static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) +static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp) { void *p; Dict_ptr ptr = dict->head.last; if (!dict->head.freelist) { - dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size); + dict_bf_newp(dict->dbf, dict->head.last, &p, dict->head.page_size); (dict->head.last)++; } else { ptr = dict->head.freelist; - dict_bf_readp (dict->dbf, ptr, &p); + dict_bf_readp(dict->dbf, ptr, &p); dict->head.freelist = DICT_backptr(p); } - assert (p); + assert(p); DICT_type(p) = 0; DICT_backptr(p) = back_ptr; DICT_nodir(p) = 0; @@ -117,7 +63,7 @@ static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) return ptr; } -static int split_page (Dict dict, Dict_ptr ptr, void *p) +static int split_page(Dict dict, Dict_ptr ptr, void *p) { void *subp; char *info_here; @@ -128,15 +74,16 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) Dict_char prev_char = 0; int best_no = -1, no_current = 1; + dict->no_split++; /* determine splitting char... */ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); - for (i = DICT_nodir (p); --i >= 0; --indxp) + for (i = DICT_nodir(p); --i >= 0; --indxp) { if (*indxp > 0) /* tail string here! */ { Dict_char dc; - memcpy (&dc, (char*) p + *indxp, sizeof(dc)); + memcpy(&dc, (char*) p + *indxp, sizeof(dc)); if (best_no < 0) { /* first entry met */ best_char = prev_char = dc; @@ -152,18 +99,17 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) best_indxp = indxp; } } - else + else { /* new char prefix. restore */ prev_char = dc; no_current = 1; } } } - if (best_no < 0) /* we didn't find any tail string entry at all! */ - return -1; + assert(best_no >= 0); /* we didn't find any tail string entry at all! */ j = best_indxp - (short*) p; - subptr = new_page (dict, ptr, &subp); + subptr = new_page(dict, ptr, &subp); /* scan entries to see if there is a string with */ /* length 1. info_here indicates if such entry exist */ info_here = NULL; @@ -175,33 +121,33 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) info = (char*) p + ((short*) p)[j]; /* entry start */ - memcpy (&dc, info, sizeof(dc)); - assert (dc == best_char); + memcpy(&dc, info, sizeof(dc)); + assert(dc == best_char); slen = 1+dict_strlen((Dict_char*) info); - assert (slen > 1); + assert(slen > 1); if (slen == 2) { - assert (!info_here); + assert(!info_here); info_here = info+slen*sizeof(Dict_char); } else { info1 = info+slen*sizeof(Dict_char); /* info start */ - dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)), - subptr, *info1, info1+1); - dict_bf_readp (dict->dbf, ptr, &p); + dict_ins(dict, (Dict_char*) (info+sizeof(Dict_char)), + subptr, *info1, info1+1); + dict_bf_readp(dict->dbf, ptr, &p); } } /* now clean the page ... */ - clean_page (dict, ptr, p, &best_char, subptr, info_here); + clean_page(dict, ptr, p, &best_char, subptr, info_here); return 0; } -static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, - Dict_ptr subptr, char *userinfo) +static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo) { - char *np = (char *) xmalloc (dict->head.page_size); + char *np = (char *) xmalloc(dict->head.page_size); int i, slen, no = 0; short *indxp1, *indxp2; char *info1, *info2; @@ -210,7 +156,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); indxp2 = (short*) ((char*) np+DICT_bsize(np)); info2 = (char*) np + DICT_infoffset; - for (i = DICT_nodir (p); --i >= 0; --indxp1) + for (i = DICT_nodir(p); --i >= 0; --indxp1) { if (*indxp1 > 0) /* tail string here! */ { @@ -219,29 +165,29 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* char * information */ info1 = (char*) p + *indxp1; - if (out && memcmp (out, info1, sizeof(Dict_char)) == 0) + if (out && memcmp(out, info1, sizeof(Dict_char)) == 0) { if (subptr == 0) continue; *--indxp2 = -(info2 - np); - memcpy (info2, &subptr, sizeof(Dict_ptr)); + memcpy(info2, &subptr, sizeof(Dict_ptr)); info2 += sizeof(Dict_ptr); - memcpy (info2, out, sizeof(Dict_char)); + memcpy(info2, out, sizeof(Dict_char)); info2 += sizeof(Dict_char); if (userinfo) { - memcpy (info2, userinfo, *userinfo+1); + memcpy(info2, userinfo, *userinfo+1); info2 += *userinfo + 1; } else - *info2++ = 0; - subptr = 0; + *info2++ = 0; + subptr = 0; ++no; continue; } *--indxp2 = info2 - np; slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char); - memcpy (info2, info1, slen); + memcpy(info2, info1, slen); info1 += slen; info2 += slen; } @@ -252,34 +198,34 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* unsigned char length of information */ /* char * information */ - assert (*indxp1 < 0); + assert(*indxp1 < 0); *--indxp2 = -(info2 - np); info1 = (char*) p - *indxp1; - memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char)); + memcpy(info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char)); info1 += sizeof(Dict_ptr)+sizeof(Dict_char); info2 += sizeof(Dict_ptr)+sizeof(Dict_char); } slen = *info1+1; - memcpy (info2, info1, slen); + memcpy(info2, info1, slen); info2 += slen; ++no; } #if 1 - memcpy ((char*)p+DICT_infoffset, - (char*)np+DICT_infoffset, - info2 - ((char*)np+DICT_infoffset)); - memcpy ((char*)p + ((char*)indxp2 - (char*)np), - indxp2, - ((char*) np+DICT_bsize(p)) - (char*)indxp2); + memcpy((char*)p+DICT_infoffset, + (char*)np+DICT_infoffset, + info2 - ((char*)np+DICT_infoffset)); + memcpy((char*)p + ((char*)indxp2 - (char*)np), + indxp2, + ((char*) np+DICT_bsize(p)) - (char*)indxp2); #else - memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, - DICT_pagesize(dict)-DICT_infoffset); + memcpy((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, + DICT_pagesize(dict)-DICT_infoffset); #endif DICT_size(p) = info2 - np; DICT_type(p) = 0; DICT_nodir(p) = no; - xfree (np); - dict_bf_touch (dict->dbf, ptr); + xfree(np); + dict_bf_touch(dict->dbf, ptr); } @@ -288,18 +234,18 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* return 1 if before but change of info */ /* return 2 if same as before */ -static int dict_ins (Dict dict, const Dict_char *str, - Dict_ptr ptr, int userlen, void *userinfo) +static int dict_ins(Dict dict, const Dict_char *str, + Dict_ptr ptr, int userlen, void *userinfo) { int hi, lo, mid, slen, cmp = 1; short *indxp; char *info; void *p; - dict_bf_readp (dict->dbf, ptr, &p); - - assert (p); - assert (ptr); + dict_bf_readp(dict->dbf, ptr, &p); + + assert(p); + assert(ptr); mid = lo = 0; hi = DICT_nodir(p)-1; @@ -321,10 +267,10 @@ static int dict_ins (Dict dict, const Dict_char *str, if (*info == userlen) { /* change of userinfo ? */ - if (memcmp (info+1, userinfo, userlen)) + if (memcmp(info+1, userinfo, userlen)) { - dict_bf_touch (dict->dbf, ptr); - memcpy (info+1, userinfo, userlen); + dict_bf_touch(dict->dbf, ptr); + memcpy(info+1, userinfo, userlen); return 1; } /* same userinfo */ @@ -335,8 +281,8 @@ static int dict_ins (Dict dict, const Dict_char *str, /* room for new userinfo */ DICT_type(p) = 1; *info = userlen; - dict_bf_touch (dict->dbf, ptr); - memcpy (info+1, userinfo, userlen); + dict_bf_touch(dict->dbf, ptr); + memcpy(info+1, userinfo, userlen); return 1; } break; @@ -352,11 +298,11 @@ static int dict_ins (Dict dict, const Dict_char *str, /* unsigned char length of information */ /* char * information */ info = (char*)p - indxp[-mid]; - memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char)); + memcpy(&dc, info+sizeof(Dict_ptr), sizeof(Dict_char)); cmp = dc- *str; if (!cmp) { - memcpy (&subptr, info, sizeof(Dict_ptr)); + memcpy(&subptr, info, sizeof(Dict_ptr)); if (*++str == DICT_EOS) { /* finish of string. Store userinfo here... */ @@ -364,12 +310,12 @@ static int dict_ins (Dict dict, const Dict_char *str, int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)]; if (xlen == userlen) { - if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, - userinfo, userlen)) + if (memcmp(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen)) { - dict_bf_touch (dict->dbf, ptr); - memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, - userinfo, userlen); + dict_bf_touch(dict->dbf, ptr); + memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen); return 1; } return 2; @@ -378,9 +324,9 @@ static int dict_ins (Dict dict, const Dict_char *str, { DICT_type(p) = 1; info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen; - memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, - userinfo, userlen); - dict_bf_touch (dict->dbf, ptr); + memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, + userinfo, userlen); + dict_bf_touch(dict->dbf, ptr); return 1; } /* xlen < userlen, expanding needed ... */ @@ -391,30 +337,30 @@ static int dict_ins (Dict dict, const Dict_char *str, /* not enough room - split needed ... */ if (DICT_type(p) == 1) { - clean_page (dict, ptr, p, NULL, 0, NULL); - return dict_ins (dict, str-1, ptr, - userlen, userinfo); + clean_page(dict, ptr, p, NULL, 0, NULL); + return dict_ins(dict, str-1, ptr, + userlen, userinfo); } - if (split_page (dict, ptr, p)) + if (split_page(dict, ptr, p)) { - logf (LOG_FATAL, "Unable to split page %d\n", ptr); - abort (); + yaz_log(YLOG_FATAL, "Unable to split page %d\n", ptr); + assert(0); } - return dict_ins (dict, str-1, ptr, userlen, userinfo); + return dict_ins(dict, str-1, ptr, userlen, userinfo); } else { /* enough room - no split needed ... */ info = (char*)p + DICT_size(p); - memcpy (info, &subptr, sizeof(subptr)); - memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char)); + memcpy(info, &subptr, sizeof(subptr)); + memcpy(info+sizeof(Dict_ptr), &dc, sizeof(Dict_char)); info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen; - memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1, - userinfo, userlen); + memcpy(info+sizeof(Dict_char)+sizeof(Dict_ptr)+1, + userinfo, userlen); indxp[-mid] = -DICT_size(p); DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr) +1+userlen; DICT_type(p) = 1; - dict_bf_touch (dict->dbf, ptr); + dict_bf_touch(dict->dbf, ptr); } if (xlen) return 1; @@ -424,11 +370,11 @@ static int dict_ins (Dict dict, const Dict_char *str, { if (subptr == 0) { - subptr = new_page (dict, ptr, NULL); - memcpy (info, &subptr, sizeof(subptr)); - dict_bf_touch (dict->dbf, ptr); + subptr = new_page(dict, ptr, NULL); + memcpy(info, &subptr, sizeof(subptr)); + dict_bf_touch(dict->dbf, ptr); } - return dict_ins (dict, str, subptr, userlen, userinfo); + return dict_ins(dict, str, subptr, userlen, userinfo); } } } @@ -446,11 +392,11 @@ static int dict_ins (Dict dict, const Dict_char *str, { if (DICT_type(p)) { - clean_page (dict, ptr, p, NULL, 0, NULL); - return dict_ins (dict, str, ptr, userlen, userinfo); + clean_page(dict, ptr, p, NULL, 0, NULL); + return dict_ins(dict, str, ptr, userlen, userinfo); } - split_page (dict, ptr, p); - return dict_ins (dict, str, ptr, userlen, userinfo); + split_page(dict, ptr, p); + return dict_ins(dict, str, ptr, userlen, userinfo); } if (cmp) { @@ -475,31 +421,42 @@ static int dict_ins (Dict dict, const Dict_char *str, else DICT_type(p) = 1; info = (char*)p + DICT_size(p); - memcpy (info, str, slen); + memcpy(info, str, slen); info += slen; *info++ = userlen; - memcpy (info, userinfo, userlen); + memcpy(info, userinfo, userlen); info += userlen; *indxp = DICT_size(p); DICT_size(p) = info- (char*) p; - dict_bf_touch (dict->dbf, ptr); + dict_bf_touch(dict->dbf, ptr); if (cmp) return 0; return 1; } -int dict_insert (Dict dict, const char *str, int userlen, void *userinfo) +int dict_insert(Dict dict, const char *str, int userlen, void *userinfo) { + if (!dict->rw) + return -1; + dict->no_insert++; if (!dict->head.root) { void *p; - if (dict->rw) - dict->head.root = new_page (dict, 0, &p); + dict->head.root = new_page(dict, 0, &p); if (!dict->head.root) - return 0; + return -1; } - return dict_ins (dict, (const Dict_char *) str, dict->head.root, - userlen, userinfo); + return dict_ins(dict, (const Dict_char *) str, dict->head.root, + userlen, userinfo); } +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +