X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dict%2Finsert.c;h=a1fb0f36f5b838a535c7b6754485649525a4f210;hp=db7fa69e7bb4653823873773ec2786b84db25d60;hb=89d16cf15eda0e4802d18b8ad09bd3653508ebfc;hpb=3db15cf883ad80b6a4b0e960f3c3b59c86b59c6b diff --git a/dict/insert.c b/dict/insert.c index db7fa69..a1fb0f3 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -1,78 +1,30 @@ -/* - * Copyright (C) 1994-1998, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: insert.c,v $ - * Revision 1.18 1998-03-05 08:17:24 adam - * Added a few comments - no code changed. - * - * Revision 1.17 1996/05/14 15:49:09 adam - * Bug fix: In function split_page. In rare cases variable best_indxp was - * referenced. - * - * Revision 1.16 1996/02/02 13:43:50 adam - * The public functions simply use char instead of Dict_char to represent - * search strings. Dict_char is used internally only. - * - * Revision 1.15 1996/02/01 20:39:59 adam - * Bug fix: insert didn't work on 8-bit characters due to unsigned char - * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is - * unsigned now. - * - * Revision 1.14 1995/12/07 11:48:56 adam - * Insert operation obeys DICT_type = 1 (slack in page). - * Function dict_open exists if page size or magic aren't right. - * - * Revision 1.13 1995/11/28 09:06:37 adam - * Fixed potential dangling pointer. - * - * Revision 1.12 1995/09/06 10:34:44 adam - * Memcpy in clean_page edited to satisfy checkergcc. - * - * Revision 1.11 1995/09/04 12:33:31 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.10 1994/10/05 12:16:48 adam - * Pagesize is a resource now. - * - * Revision 1.9 1994/09/16 15:39:13 adam - * Initial code of lookup - not tested yet. - * - * Revision 1.8 1994/09/16 12:35:01 adam - * New version of split_page which use clean_page for splitting. - * - * Revision 1.7 1994/09/12 08:06:42 adam - * Futher development of insert.c - * - * Revision 1.6 1994/09/06 13:05:15 adam - * Further development of insertion. Some special cases are - * not properly handled yet! assert(0) are put here. The - * binary search in each page definitely reduce usr CPU. - * - * Revision 1.5 1994/09/01 17:49:39 adam - * Removed stupid line. Work on insertion in dictionary. Not finished yet. - * - * Revision 1.4 1994/09/01 17:44:09 adam - * depend include change. - * - * Revision 1.3 1994/08/18 12:40:56 adam - * Some development of dictionary. Not finished at all! - * - * Revision 1.2 1994/08/17 13:32:19 adam - * Use cache in dict - not in bfile. - * - * Revision 1.1 1994/08/16 16:26:48 adam - * Added dict. - * - */ +/* This file is part of the Zebra server. + Copyright (C) 1994-2009 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + + #include #include #include #include -#include +#include "dict-p.h" #define CHECK 0 @@ -85,26 +37,24 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) { void *p; - Dict_ptr ptr = dict->head.free_list; - if (dict->head.free_list == dict->head.last) + Dict_ptr ptr = dict->head.last; + if (!dict->head.freelist) { - dict->head.free_list++; - dict->head.last = dict->head.free_list; - dict_bf_newp (dict->dbf, ptr, &p); + dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size); + (dict->head.last)++; } else { - dict_bf_readp (dict->dbf, dict->head.free_list, &p); - dict->head.free_list = DICT_nextptr(p); - if (dict->head.free_list == 0) - dict->head.free_list = dict->head.last; + ptr = dict->head.freelist; + dict_bf_readp (dict->dbf, ptr, &p); + dict->head.freelist = DICT_backptr(p); } assert (p); DICT_type(p) = 0; DICT_backptr(p) = back_ptr; - DICT_nextptr(p) = 0; DICT_nodir(p) = 0; DICT_size(p) = DICT_infoffset; + DICT_bsize(p) = dict->head.page_size; if (pp) *pp = p; return ptr; @@ -121,8 +71,9 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) Dict_char prev_char = 0; int best_no = -1, no_current = 1; + dict->no_split++; /* determine splitting char... */ - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp) { if (*indxp > 0) /* tail string here! */ @@ -152,8 +103,7 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) } } } - if (best_no < 0) /* we didn't find any tail string entry at all! */ - return -1; + assert(best_no >= 0); /* we didn't find any tail string entry at all! */ j = best_indxp - (short*) p; subptr = new_page (dict, ptr, &subp); @@ -194,13 +144,14 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, Dict_ptr subptr, char *userinfo) { - char *np = xmalloc (dict->head.page_size); + char *np = (char *) xmalloc (dict->head.page_size); int i, slen, no = 0; short *indxp1, *indxp2; char *info1, *info2; - indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); - indxp2 = (short*) ((char*) np+DICT_pagesize(dict)); + DICT_bsize(np) = dict->head.page_size; + indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + indxp2 = (short*) ((char*) np+DICT_bsize(np)); info2 = (char*) np + DICT_infoffset; for (i = DICT_nodir (p); --i >= 0; --indxp1) { @@ -262,7 +213,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, info2 - ((char*)np+DICT_infoffset)); memcpy ((char*)p + ((char*)indxp2 - (char*)np), indxp2, - ((char*) np+DICT_pagesize(dict)) - (char*)indxp2); + ((char*) np+DICT_bsize(p)) - (char*)indxp2); #else memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, DICT_pagesize(dict)-DICT_infoffset); @@ -281,25 +232,21 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, /* return 2 if same as before */ static int dict_ins (Dict dict, const Dict_char *str, - Dict_ptr back_ptr, int userlen, void *userinfo) + Dict_ptr ptr, int userlen, void *userinfo) { int hi, lo, mid, slen, cmp = 1; - Dict_ptr ptr = back_ptr; short *indxp; char *info; void *p; - if (ptr == 0) - ptr = new_page (dict, back_ptr, &p); - else - dict_bf_readp (dict->dbf, ptr, &p); + dict_bf_readp (dict->dbf, ptr, &p); assert (p); assert (ptr); mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -382,7 +329,7 @@ static int dict_ins (Dict dict, const Dict_char *str, /* xlen < userlen, expanding needed ... */ if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+ userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) + DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)) { /* not enough room - split needed ... */ if (DICT_type(p) == 1) @@ -393,8 +340,8 @@ static int dict_ins (Dict dict, const Dict_char *str, } if (split_page (dict, ptr, p)) { - logf (LOG_FATAL, "Unable to split page %d\n", ptr); - abort (); + yaz_log (YLOG_FATAL, "Unable to split page %d\n", ptr); + assert(0); } return dict_ins (dict, str-1, ptr, userlen, userinfo); } @@ -438,7 +385,7 @@ static int dict_ins (Dict dict, const Dict_char *str, --indxp; slen = (dict_strlen(str)+1)*sizeof(Dict_char); if (DICT_size(p)+slen+userlen >= - DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ + (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */ { if (DICT_type(p)) { @@ -452,7 +399,7 @@ static int dict_ins (Dict dict, const Dict_char *str, { short *indxp1; (DICT_nodir(p))++; - indxp1 = (short*)((char*) p + DICT_pagesize(dict) + indxp1 = (short*)((char*) p + DICT_bsize(p) - DICT_nodir(p)*sizeof(short)); for (; indxp1 != indxp; indxp1++) indxp1[0] = indxp1[1]; @@ -487,10 +434,25 @@ static int dict_ins (Dict dict, const Dict_char *str, int dict_insert (Dict dict, const char *str, int userlen, void *userinfo) { - assert (dict->head.last > 0); - if (dict->head.last == 1) - return dict_ins (dict, (const Dict_char *) str, 0, userlen, userinfo); - else - return dict_ins (dict, (const Dict_char *) str, 1, userlen, userinfo); + if (!dict->rw) + return -1; + dict->no_insert++; + if (!dict->head.root) + { + void *p; + dict->head.root = new_page (dict, 0, &p); + if (!dict->head.root) + return -1; + } + return dict_ins (dict, (const Dict_char *) str, dict->head.root, + userlen, userinfo); } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +