-/*
- * Copyright (C) 1994, Index Data I/S
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: insert.c,v $
- * Revision 1.16 1996-02-02 13:43:50 adam
- * The public functions simply use char instead of Dict_char to represent
- * search strings. Dict_char is used internally only.
- *
- * Revision 1.15 1996/02/01 20:39:59 adam
- * Bug fix: insert didn't work on 8-bit characters due to unsigned char
- * compares in dict_strcmp (strcmp) and signed Dict_char. Dict_char is
- * unsigned now.
- *
- * Revision 1.14 1995/12/07 11:48:56 adam
- * Insert operation obeys DICT_type = 1 (slack in page).
- * Function dict_open exists if page size or magic aren't right.
- *
- * Revision 1.13 1995/11/28 09:06:37 adam
- * Fixed potential dangling pointer.
- *
- * Revision 1.12 1995/09/06 10:34:44 adam
- * Memcpy in clean_page edited to satisfy checkergcc.
- *
- * Revision 1.11 1995/09/04 12:33:31 adam
- * Various cleanup. YAZ util used instead.
- *
- * Revision 1.10 1994/10/05 12:16:48 adam
- * Pagesize is a resource now.
- *
- * Revision 1.9 1994/09/16 15:39:13 adam
- * Initial code of lookup - not tested yet.
- *
- * Revision 1.8 1994/09/16 12:35:01 adam
- * New version of split_page which use clean_page for splitting.
- *
- * Revision 1.7 1994/09/12 08:06:42 adam
- * Futher development of insert.c
- *
- * Revision 1.6 1994/09/06 13:05:15 adam
- * Further development of insertion. Some special cases are
- * not properly handled yet! assert(0) are put here. The
- * binary search in each page definitely reduce usr CPU.
- *
- * Revision 1.5 1994/09/01 17:49:39 adam
- * Removed stupid line. Work on insertion in dictionary. Not finished yet.
- *
- * Revision 1.4 1994/09/01 17:44:09 adam
- * depend include change.
- *
- * Revision 1.3 1994/08/18 12:40:56 adam
- * Some development of dictionary. Not finished at all!
- *
- * Revision 1.2 1994/08/17 13:32:19 adam
- * Use cache in dict - not in bfile.
- *
- * Revision 1.1 1994/08/16 16:26:48 adam
- * Added dict.
- *
- */
+/* $Id: insert.c,v 1.31 2007-01-15 15:10:15 adam Exp $
+ Copyright (C) 1995-2007
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
-#include <dict.h>
+#include "dict-p.h"
#define CHECK 0
static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
{
void *p;
- Dict_ptr ptr = dict->head.free_list;
- if (dict->head.free_list == dict->head.last)
+ Dict_ptr ptr = dict->head.last;
+ if (!dict->head.freelist)
{
- dict->head.free_list++;
- dict->head.last = dict->head.free_list;
- dict_bf_newp (dict->dbf, ptr, &p);
+ dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size);
+ (dict->head.last)++;
}
else
{
- dict_bf_readp (dict->dbf, dict->head.free_list, &p);
- dict->head.free_list = DICT_nextptr(p);
- if (dict->head.free_list == 0)
- dict->head.free_list = dict->head.last;
+ ptr = dict->head.freelist;
+ dict_bf_readp (dict->dbf, ptr, &p);
+ dict->head.freelist = DICT_backptr(p);
}
assert (p);
DICT_type(p) = 0;
DICT_backptr(p) = back_ptr;
- DICT_nextptr(p) = 0;
DICT_nodir(p) = 0;
DICT_size(p) = DICT_infoffset;
+ DICT_bsize(p) = dict->head.page_size;
if (pp)
*pp = p;
return ptr;
Dict_char prev_char = 0;
int best_no = -1, no_current = 1;
+ dict->no_split++;
/* determine splitting char... */
- indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
+ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
for (i = DICT_nodir (p); --i >= 0; --indxp)
{
if (*indxp > 0) /* tail string here! */
{ /* first entry met */
best_char = prev_char = dc;
best_no = 1;
+ best_indxp = indxp;
}
else if (prev_char == dc)
{ /* same char prefix. update */
}
}
}
- if (best_no < 0) /* we didn't find any tail string entry at all! */
- return -1;
+ assert(best_no >= 0); /* we didn't find any tail string entry at all! */
j = best_indxp - (short*) p;
subptr = new_page (dict, ptr, &subp);
char *info, *info1;
int slen;
Dict_char dc;
-
info = (char*) p + ((short*) p)[j];
/* entry start */
memcpy (&dc, info, sizeof(dc));
assert (dc == best_char);
- slen = dict_strlen((Dict_char*) info);
+ slen = 1+dict_strlen((Dict_char*) info);
- assert (slen > 0);
- if (slen == 1)
+ assert (slen > 1);
+ if (slen == 2)
{
assert (!info_here);
- info_here = info+(slen+1)*sizeof(Dict_char);
+ info_here = info+slen*sizeof(Dict_char);
}
else
{
- info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
+ info1 = info+slen*sizeof(Dict_char); /* info start */
dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
subptr, *info1, info1+1);
dict_bf_readp (dict->dbf, ptr, &p);
static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
Dict_ptr subptr, char *userinfo)
{
- char *np = xmalloc (dict->head.page_size);
+ char *np = (char *) xmalloc (dict->head.page_size);
int i, slen, no = 0;
short *indxp1, *indxp2;
char *info1, *info2;
- indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
- indxp2 = (short*) ((char*) np+DICT_pagesize(dict));
+ DICT_bsize(np) = dict->head.page_size;
+ indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
+ indxp2 = (short*) ((char*) np+DICT_bsize(np));
info2 = (char*) np + DICT_infoffset;
for (i = DICT_nodir (p); --i >= 0; --indxp1)
{
info2 - ((char*)np+DICT_infoffset));
memcpy ((char*)p + ((char*)indxp2 - (char*)np),
indxp2,
- ((char*) np+DICT_pagesize(dict)) - (char*)indxp2);
+ ((char*) np+DICT_bsize(p)) - (char*)indxp2);
#else
memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
DICT_pagesize(dict)-DICT_infoffset);
/* return 2 if same as before */
static int dict_ins (Dict dict, const Dict_char *str,
- Dict_ptr back_ptr, int userlen, void *userinfo)
+ Dict_ptr ptr, int userlen, void *userinfo)
{
int hi, lo, mid, slen, cmp = 1;
- Dict_ptr ptr = back_ptr;
short *indxp;
char *info;
void *p;
- if (ptr == 0)
- ptr = new_page (dict, back_ptr, &p);
- else
- dict_bf_readp (dict->dbf, ptr, &p);
+ dict_bf_readp (dict->dbf, ptr, &p);
assert (p);
assert (ptr);
mid = lo = 0;
hi = DICT_nodir(p)-1;
- indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
+ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
while (lo <= hi)
{
mid = (lo+hi)/2;
/* consider change of userinfo length... */
if (*info == userlen)
{
+ /* change of userinfo ? */
if (memcmp (info+1, userinfo, userlen))
{
dict_bf_touch (dict->dbf, ptr);
memcpy (info+1, userinfo, userlen);
return 1;
}
+ /* same userinfo */
return 2;
}
else if (*info > userlen)
{
+ /* room for new userinfo */
DICT_type(p) = 1;
*info = userlen;
dict_bf_touch (dict->dbf, ptr);
memcpy (&subptr, info, sizeof(Dict_ptr));
if (*++str == DICT_EOS)
{
- int xlen;
-
- xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
+ /* finish of string. Store userinfo here... */
+
+ int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
if (xlen == userlen)
{
if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
dict_bf_touch (dict->dbf, ptr);
return 1;
}
+ /* xlen < userlen, expanding needed ... */
if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
userlen >=
- DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short))
+ DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
{
+ /* not enough room - split needed ... */
if (DICT_type(p) == 1)
{
clean_page (dict, ptr, p, NULL, 0, NULL);
}
if (split_page (dict, ptr, p))
{
- logf (LOG_FATAL, "Unable to split page %d\n", ptr);
- abort ();
+ yaz_log (YLOG_FATAL, "Unable to split page %d\n", ptr);
+ assert(0);
}
return dict_ins (dict, str-1, ptr, userlen, userinfo);
}
else
- {
+ { /* enough room - no split needed ... */
info = (char*)p + DICT_size(p);
memcpy (info, &subptr, sizeof(subptr));
memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
--indxp;
slen = (dict_strlen(str)+1)*sizeof(Dict_char);
if (DICT_size(p)+slen+userlen >=
- DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
+ (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
{
if (DICT_type(p))
{
{
short *indxp1;
(DICT_nodir(p))++;
- indxp1 = (short*)((char*) p + DICT_pagesize(dict)
+ indxp1 = (short*)((char*) p + DICT_bsize(p)
- DICT_nodir(p)*sizeof(short));
for (; indxp1 != indxp; indxp1++)
indxp1[0] = indxp1[1];
int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
{
- assert (dict->head.last > 0);
- if (dict->head.last == 1)
- return dict_ins (dict, (const Dict_char *) str, 0, userlen, userinfo);
- else
- return dict_ins (dict, (const Dict_char *) str, 1, userlen, userinfo);
+ if (!dict->rw)
+ return -1;
+ dict->no_insert++;
+ if (!dict->head.root)
+ {
+ void *p;
+ dict->head.root = new_page (dict, 0, &p);
+ if (!dict->head.root)
+ return -1;
+ }
+ return dict_ins (dict, (const Dict_char *) str, dict->head.root,
+ userlen, userinfo);
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+