X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Finsert.c;h=1057d670703d8a292050e47187ec415189d3bb6e;hp=70a18b4e6088583ef30e4eeb68affac67fa450b8;hb=ef696645cc3b7e0f4027008d1dc589c0f0f90c1f;hpb=30bfffa1e13e0ebd1d7eac84518705e2fbcd955b diff --git a/dict/insert.c b/dict/insert.c index 70a18b4..1057d67 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -1,10 +1,48 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1999, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: insert.c,v $ - * Revision 1.10 1994-10-05 12:16:48 adam + * Revision 1.21 1999-05-26 07:49:12 adam + * C++ compilation. + * + * Revision 1.20 1999/05/15 14:36:37 adam + * Updated dictionary. Implemented "compression" of dictionary. + * + * Revision 1.19 1999/02/02 14:50:22 adam + * Updated WIN32 code specific sections. Changed header. + * + * Revision 1.18 1998/03/05 08:17:24 adam + * Added a few comments - no code changed. + * + * Revision 1.17 1996/05/14 15:49:09 adam + * Bug fix: In function split_page. In rare cases variable best_indxp was + * referenced. + * + * Revision 1.16 1996/02/02 13:43:50 adam + * The public functions simply use char instead of Dict_char to represent + * search strings. Dict_char is used internally only. + * + * Revision 1.15 1996/02/01 20:39:59 adam + * Bug fix: insert didn't work on 8-bit characters due to unsigned char + * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is + * unsigned now. + * + * Revision 1.14 1995/12/07 11:48:56 adam + * Insert operation obeys DICT_type = 1 (slack in page). + * Function dict_open exists if page size or magic aren't right. + * + * Revision 1.13 1995/11/28 09:06:37 adam + * Fixed potential dangling pointer. + * + * Revision 1.12 1995/09/06 10:34:44 adam + * Memcpy in clean_page edited to satisfy checkergcc. + * + * Revision 1.11 1995/09/04 12:33:31 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.10 1994/10/05 12:16:48 adam * Pagesize is a resource now. * * Revision 1.9 1994/09/16 15:39:13 adam @@ -56,26 +94,24 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) { void *p; - Dict_ptr ptr = dict->head.free_list; - if (dict->head.free_list == dict->head.last) + Dict_ptr ptr = dict->head.last; + if (!dict->head.freelist) { - dict->head.free_list++; - dict->head.last = dict->head.free_list; - dict_bf_newp (dict->dbf, ptr, &p); + dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size); + (dict->head.last)++; } else { - dict_bf_readp (dict->dbf, dict->head.free_list, &p); - dict->head.free_list = DICT_nextptr(p); - if (dict->head.free_list == 0) - dict->head.free_list = dict->head.last; + ptr = dict->head.freelist; + dict_bf_readp (dict->dbf, ptr, &p); + dict->head.freelist = DICT_backptr(p); } assert (p); DICT_type(p) = 0; DICT_backptr(p) = back_ptr; - DICT_nextptr(p) = 0; DICT_nodir(p) = 0; DICT_size(p) = DICT_infoffset; + DICT_bsize(p) = dict->head.page_size; if (pp) *pp = p; return ptr; @@ -86,14 +122,14 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) void *subp; char *info_here; Dict_ptr subptr; - int i; + int i, j; short *indxp, *best_indxp = NULL; Dict_char best_char = 0; Dict_char prev_char = 0; int best_no = -1, no_current = 1; /* determine splitting char... */ - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp) { if (*indxp > 0) /* tail string here! */ @@ -105,6 +141,7 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) { /* first entry met */ best_char = prev_char = dc; best_no = 1; + best_indxp = indxp; } else if (prev_char == dc) { /* same char prefix. update */ @@ -125,31 +162,34 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) if (best_no < 0) /* we didn't find any tail string entry at all! */ return -1; + j = best_indxp - (short*) p; subptr = new_page (dict, ptr, &subp); /* scan entries to see if there is a string with */ /* length 1. info_here indicates if such entry exist */ info_here = NULL; - for (indxp=best_indxp, i=0; i 0); - - info = (char*) p + *indxp; /* entry start */ - assert (*info == best_char); - slen = dict_strlen(info); + info = (char*) p + ((short*) p)[j]; + /* entry start */ + memcpy (&dc, info, sizeof(dc)); + assert (dc == best_char); + slen = 1+dict_strlen((Dict_char*) info); - assert (slen > 0); - if (slen == 1) + assert (slen > 1); + if (slen == 2) { assert (!info_here); - info_here = info+(slen+1)*sizeof(Dict_char); + info_here = info+slen*sizeof(Dict_char); } else { - info1 = info+(1+slen)*sizeof(Dict_char); /* info start */ - dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1); + info1 = info+slen*sizeof(Dict_char); /* info start */ + dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)), + subptr, *info1, info1+1); dict_bf_readp (dict->dbf, ptr, &p); } } @@ -159,15 +199,16 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) } static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, - Dict_ptr subptr, char *userinfo) + Dict_ptr subptr, char *userinfo) { - char *np = xmalloc (dict->head.page_size); + char *np = (char *) xmalloc (dict->head.page_size); int i, slen, no = 0; short *indxp1, *indxp2; char *info1, *info2; - indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); - indxp2 = (short*) ((char*) np+DICT_pagesize(dict)); + DICT_bsize(np) = dict->head.page_size; + indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + indxp2 = (short*) ((char*) np+DICT_bsize(np)); info2 = (char*) np + DICT_infoffset; for (i = DICT_nodir (p); --i >= 0; --indxp1) { @@ -199,7 +240,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, continue; } *--indxp2 = info2 - np; - slen = (dict_strlen(info1)+1)*sizeof(Dict_char); + slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char); memcpy (info2, info1, slen); info1 += slen; info2 += slen; @@ -223,8 +264,17 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, info2 += slen; ++no; } +#if 1 + memcpy ((char*)p+DICT_infoffset, + (char*)np+DICT_infoffset, + info2 - ((char*)np+DICT_infoffset)); + memcpy ((char*)p + ((char*)indxp2 - (char*)np), + indxp2, + ((char*) np+DICT_bsize(p)) - (char*)indxp2); +#else memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, DICT_pagesize(dict)-DICT_infoffset); +#endif DICT_size(p) = info2 - np; DICT_type(p) = 0; DICT_nodir(p) = no; @@ -239,25 +289,21 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* return 2 if same as before */ static int dict_ins (Dict dict, const Dict_char *str, - Dict_ptr back_ptr, int userlen, void *userinfo) + Dict_ptr ptr, int userlen, void *userinfo) { int hi, lo, mid, slen, cmp = 1; - Dict_ptr ptr = back_ptr; short *indxp; char *info; void *p; - if (ptr == 0) - ptr = new_page (dict, back_ptr, &p); - else - dict_bf_readp (dict->dbf, ptr, &p); + dict_bf_readp (dict->dbf, ptr, &p); assert (p); assert (ptr); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -270,20 +316,23 @@ static int dict_ins (Dict dict, const Dict_char *str, cmp = dict_strcmp((Dict_char*) info, str); if (!cmp) { - info += (dict_strlen(info)+1)*sizeof(Dict_char); + info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char); /* consider change of userinfo length... */ if (*info == userlen) { + /* change of userinfo ? */ if (memcmp (info+1, userinfo, userlen)) { dict_bf_touch (dict->dbf, ptr); memcpy (info+1, userinfo, userlen); return 1; } + /* same userinfo */ return 2; } else if (*info > userlen) { + /* room for new userinfo */ DICT_type(p) = 1; *info = userlen; dict_bf_touch (dict->dbf, ptr); @@ -310,9 +359,9 @@ static int dict_ins (Dict dict, const Dict_char *str, memcpy (&subptr, info, sizeof(Dict_ptr)); if (*++str == DICT_EOS) { - int xlen; - - xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)]; + /* finish of string. Store userinfo here... */ + + int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)]; if (xlen == userlen) { if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1, @@ -334,10 +383,12 @@ static int dict_ins (Dict dict, const Dict_char *str, dict_bf_touch (dict->dbf, ptr); return 1; } + /* xlen < userlen, expanding needed ... */ if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+ userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) + DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)) { + /* not enough room - split needed ... */ if (DICT_type(p) == 1) { clean_page (dict, ptr, p, NULL, 0, NULL); @@ -346,13 +397,13 @@ static int dict_ins (Dict dict, const Dict_char *str, } if (split_page (dict, ptr, p)) { - log (LOG_FATAL, "Unable to split page %d\n", ptr); + logf (LOG_FATAL, "Unable to split page %d\n", ptr); abort (); } return dict_ins (dict, str-1, ptr, userlen, userinfo); } else - { + { /* enough room - no split needed ... */ info = (char*)p + DICT_size(p); memcpy (info, &subptr, sizeof(subptr)); memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char)); @@ -391,8 +442,13 @@ static int dict_ins (Dict dict, const Dict_char *str, --indxp; slen = (dict_strlen(str)+1)*sizeof(Dict_char); if (DICT_size(p)+slen+userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ + (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */ { + if (DICT_type(p)) + { + clean_page (dict, ptr, p, NULL, 0, NULL); + return dict_ins (dict, str, ptr, userlen, userinfo); + } split_page (dict, ptr, p); return dict_ins (dict, str, ptr, userlen, userinfo); } @@ -400,7 +456,7 @@ static int dict_ins (Dict dict, const Dict_char *str, { short *indxp1; (DICT_nodir(p))++; - indxp1 = (short*)((char*) p + DICT_pagesize(dict) + indxp1 = (short*)((char*) p + DICT_bsize(p) - DICT_nodir(p)*sizeof(short)); for (; indxp1 != indxp; indxp1++) indxp1[0] = indxp1[1]; @@ -433,12 +489,17 @@ static int dict_ins (Dict dict, const Dict_char *str, return 1; } -int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo) +int dict_insert (Dict dict, const char *str, int userlen, void *userinfo) { - assert (dict->head.last > 0); - if (dict->head.last == 1) - return dict_ins (dict, str, 0, userlen, userinfo); - else - return dict_ins (dict, str, 1, userlen, userinfo); + if (!dict->head.root) + { + void *p; + if (dict->rw) + dict->head.root = new_page (dict, 0, &p); + if (!dict->head.root) + return 0; + } + return dict_ins (dict, (const Dict_char *) str, dict->head.root, + userlen, userinfo); }