-/*
- * Copyright (C) 1994, Index Data I/S
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: insert.c,v $
- * Revision 1.4 1994-09-01 17:44:09 adam
- * depend include change.
- * CVS ----------------------------------------------------------------------
- *
- * Revision 1.3 1994/08/18 12:40:56 adam
- * Some development of dictionary. Not finished at all!
- *
- * Revision 1.2 1994/08/17 13:32:19 adam
- * Use cache in dict - not in bfile.
- *
- * Revision 1.1 1994/08/16 16:26:48 adam
- * Added dict.
- *
- */
+/* $Id: insert.c,v 1.25 2005-01-15 19:38:21 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
-#include <dict.h>
+#include "dict-p.h"
+
+#define CHECK 0
static int dict_ins (Dict dict, const Dict_char *str,
Dict_ptr back_ptr, int userlen, void *userinfo);
+static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
+ Dict_ptr subptr, char *userinfo);
static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
{
void *p;
- Dict_ptr ptr = dict->head.free_list;
- if (dict->head.free_list == dict->head.last)
+ Dict_ptr ptr = dict->head.last;
+ if (!dict->head.freelist)
{
- dict->head.free_list++;
- dict->head.last = dict->head.free_list;
- dict_bf_newp (dict->dbf, ptr, &p);
+ dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size);
+ (dict->head.last)++;
}
else
{
- dict_bf_readp (dict->dbf, dict->head.free_list, &p);
- dict->head.free_list = DICT_nextptr(p);
- if (dict->head.free_list == 0)
- dict->head.free_list = dict->head.last;
+ ptr = dict->head.freelist;
+ dict_bf_readp (dict->dbf, ptr, &p);
+ dict->head.freelist = DICT_backptr(p);
}
assert (p);
DICT_type(p) = 0;
DICT_backptr(p) = back_ptr;
- DICT_nextptr(p) = 0;
DICT_nodir(p) = 0;
DICT_size(p) = DICT_infoffset;
- *pp = p;
+ DICT_bsize(p) = dict->head.page_size;
+ if (pp)
+ *pp = p;
return ptr;
}
void *subp;
char *info_here;
Dict_ptr subptr;
- int i, need;
- short *indxp, *best_indxp;
- Dict_char best_char;
- Dict_char prev_char;
- int best_no = -1, no_current;
+ int i, j;
+ short *indxp, *best_indxp = NULL;
+ Dict_char best_char = 0;
+ Dict_char prev_char = 0;
+ int best_no = -1, no_current = 1;
- indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
+ /* determine splitting char... */
+ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
for (i = DICT_nodir (p); --i >= 0; --indxp)
{
if (*indxp > 0) /* tail string here! */
if (best_no < 0)
{ /* first entry met */
best_char = prev_char = dc;
- no_current = best_no = 1;
+ best_no = 1;
+ best_indxp = indxp;
}
else if (prev_char == dc)
{ /* same char prefix. update */
if (best_no < 0) /* we didn't find any tail string entry at all! */
return -1;
+ j = best_indxp - (short*) p;
subptr = new_page (dict, ptr, &subp);
/* scan entries to see if there is a string with */
/* length 1. info_here indicates if such entry exist */
info_here = NULL;
- for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
- {
- char *info;
- int slen;
-
- assert (*indxp > 0);
-
- info = (char*) p + *indxp; /* entry start */
- slen = dict_strlen(info);
-
- assert (slen > 0);
- if (slen == 1)
- {
- assert (!info_here);
- info_here = info+(slen+1)*sizeof(Dict_char);
- }
- }
- /* calculate the amount of bytes needed for this entry when */
- /* transformed to a sub entry */
- need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
- if (info_here)
- need += *info_here;
-
- indxp = best_indxp;
- /* now loop on all entries with string length > 1 i.e. all */
- /* those entries which contribute to a sub page */
- best_indxp = NULL;
- for (i=0; i<best_no; i++, indxp++)
+ for (i=0; i<best_no; i++, j++)
{
char *info, *info1;
int slen;
+ Dict_char dc;
- assert (*indxp > 0);
-
- info = (char*) p + *indxp; /* entry start */
- slen = dict_strlen(info);
-
- if (slen > 1)
- {
- info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
-
- if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
- best_indxp = indxp; /* space for entry */
- dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
- }
- }
- if (best_indxp)
- { /* there was a hole big enough for a sub entry */
- char *info = (char*) p + *best_indxp;
- short *indxp1;
+ info = (char*) p + ((short*) p)[j];
+ /* entry start */
+ memcpy (&dc, info, sizeof(dc));
+ assert (dc == best_char);
+ slen = 1+dict_strlen((Dict_char*) info);
- *--indxp = - *best_indxp;
- DICT_type(p) = 1;
- DICT_nodir (p) -= (best_no-1);
- indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
- while (indxp != indxp1)
+ assert (slen > 1);
+ if (slen == 2)
{
- --indxp;
- *indxp = indxp[1-best_no];
+ assert (!info_here);
+ info_here = info+slen*sizeof(Dict_char);
}
- memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
- info += sizeof(Dict_ptr);
- memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
- info += sizeof(Dict_char);
- if (info_here)
- memcpy (info, info_here, *info_here+1); /* with information */
else
- *info = 0; /* without info */
- }
- else
- {
- short *indxp1, *indxp2;
- assert (0);
- DICT_type(p) = 1;
- DICT_nodir(p) -= best_no;
- indxp2 = indxp;
- indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
- do
{
- --indxp2;
- indxp2[0] = indxp2[-best_no];
- } while (indxp2 != indxp1);
+ info1 = info+slen*sizeof(Dict_char); /* info start */
+ dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
+ subptr, *info1, info1+1);
+ dict_bf_readp (dict->dbf, ptr, &p);
+ }
}
+ /* now clean the page ... */
+ clean_page (dict, ptr, p, &best_char, subptr, info_here);
return 0;
}
-static void clean_page (Dict dict, void *p)
+static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
+ Dict_ptr subptr, char *userinfo)
{
- char *np = xmalloc (dict->head.page_size);
- int i, slen;
+ char *np = (char *) xmalloc (dict->head.page_size);
+ int i, slen, no = 0;
short *indxp1, *indxp2;
char *info1, *info2;
- indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
- indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
+ DICT_bsize(np) = dict->head.page_size;
+ indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
+ indxp2 = (short*) ((char*) np+DICT_bsize(np));
info2 = (char*) np + DICT_infoffset;
for (i = DICT_nodir (p); --i >= 0; --indxp1)
{
/* unsigned char length of information */
/* char * information */
- *--indxp2 = info2 - np;
info1 = (char*) p + *indxp1;
- slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
- memcpy (info2, info1, slen);
- info2 += slen;
- info1 += slen;
- slen = *info1+1;
+ if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
+ {
+ if (subptr == 0)
+ continue;
+ *--indxp2 = -(info2 - np);
+ memcpy (info2, &subptr, sizeof(Dict_ptr));
+ info2 += sizeof(Dict_ptr);
+ memcpy (info2, out, sizeof(Dict_char));
+ info2 += sizeof(Dict_char);
+ if (userinfo)
+ {
+ memcpy (info2, userinfo, *userinfo+1);
+ info2 += *userinfo + 1;
+ }
+ else
+ *info2++ = 0;
+ subptr = 0;
+ ++no;
+ continue;
+ }
+ *--indxp2 = info2 - np;
+ slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
memcpy (info2, info1, slen);
+ info1 += slen;
info2 += slen;
- info1 += slen;
}
else
{
/* unsigned char length of information */
/* char * information */
+ assert (*indxp1 < 0);
*--indxp2 = -(info2 - np);
info1 = (char*) p - *indxp1;
memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
+ info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
- info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
- slen = *info1+1;
- memcpy (info2, info1, slen);
- info2 += slen;
- info1 += slen;
}
+ slen = *info1+1;
+ memcpy (info2, info1, slen);
+ info2 += slen;
+ ++no;
}
- memcpy ((char*) p + DICT_infoffset, (char*) np + DICT_infoffset,
- DICT_PAGESIZE-DICT_infoffset);
+#if 1
+ memcpy ((char*)p+DICT_infoffset,
+ (char*)np+DICT_infoffset,
+ info2 - ((char*)np+DICT_infoffset));
+ memcpy ((char*)p + ((char*)indxp2 - (char*)np),
+ indxp2,
+ ((char*) np+DICT_bsize(p)) - (char*)indxp2);
+#else
+ memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
+ DICT_pagesize(dict)-DICT_infoffset);
+#endif
DICT_size(p) = info2 - np;
DICT_type(p) = 0;
+ DICT_nodir(p) = no;
xfree (np);
+ dict_bf_touch (dict->dbf, ptr);
}
+
+
+/* return 0 if new */
+/* return 1 if before but change of info */
+/* return 2 if same as before */
+
static int dict_ins (Dict dict, const Dict_char *str,
- Dict_ptr back_ptr, int userlen, void *userinfo)
+ Dict_ptr ptr, int userlen, void *userinfo)
{
- int i, slen, cmp = 1;
- Dict_ptr ptr = back_ptr;
+ int hi, lo, mid, slen, cmp = 1;
short *indxp;
char *info;
void *p;
- if (ptr == 0)
- ptr = new_page (dict, back_ptr, &p);
- else
- dict_bf_readp (dict->dbf, ptr, &p);
+ dict_bf_readp (dict->dbf, ptr, &p);
assert (p);
assert (ptr);
- indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
- for (i = DICT_nodir (p); --i >= 0; --indxp)
+ mid = lo = 0;
+ hi = DICT_nodir(p)-1;
+ indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
+ while (lo <= hi)
{
- if (*indxp > 0) /* tail string here! */
+ mid = (lo+hi)/2;
+ if (indxp[-mid] > 0)
{
- info = (char*) p + *indxp;
/* string (Dict_char *) DICT_EOS terminated */
/* unsigned char length of information */
/* char * information */
- cmp = dict_strcmp ((Dict_char*) info, str);
+ info = (char*)p + indxp[-mid];
+ cmp = dict_strcmp((Dict_char*) info, str);
if (!cmp)
{
- info += (dict_strlen(info)+1)*sizeof(Dict_char);
+ info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
/* consider change of userinfo length... */
if (*info == userlen)
{
+ /* change of userinfo ? */
if (memcmp (info+1, userinfo, userlen))
{
dict_bf_touch (dict->dbf, ptr);
memcpy (info+1, userinfo, userlen);
+ return 1;
}
+ /* same userinfo */
+ return 2;
}
else if (*info > userlen)
{
+ /* room for new userinfo */
DICT_type(p) = 1;
*info = userlen;
dict_bf_touch (dict->dbf, ptr);
memcpy (info+1, userinfo, userlen);
+ return 1;
}
- else
- {
- DICT_type(p) = 1;
- break;
- }
- return 0;
- }
- else if(cmp > 0)
break;
+ }
}
- else /* tail of string in sub page */
+ else
{
Dict_char dc;
- assert (*indxp < 0);
- info = (char*) p - *indxp;
+ Dict_ptr subptr;
+
/* Dict_ptr subptr */
/* Dict_char sub char */
/* unsigned char length of information */
/* char * information */
+ info = (char*)p - indxp[-mid];
memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
cmp = dc- *str;
if (!cmp)
{
- Dict_ptr subptr;
- void *pp;
+ memcpy (&subptr, info, sizeof(Dict_ptr));
if (*++str == DICT_EOS)
- { /* missing: consider change of userinfo length ... */
- if (memcmp (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
- userinfo, userlen))
+ {
+ /* finish of string. Store userinfo here... */
+
+ int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
+ if (xlen == userlen)
{
- memcpy (dict+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
+ if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
+ userinfo, userlen))
+ {
+ dict_bf_touch (dict->dbf, ptr);
+ memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
+ userinfo, userlen);
+ return 1;
+ }
+ return 2;
+ }
+ else if (xlen > userlen)
+ {
+ DICT_type(p) = 1;
+ info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
+ memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
userinfo, userlen);
dict_bf_touch (dict->dbf, ptr);
+ return 1;
+ }
+ /* xlen < userlen, expanding needed ... */
+ if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
+ userlen >=
+ DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
+ {
+ /* not enough room - split needed ... */
+ if (DICT_type(p) == 1)
+ {
+ clean_page (dict, ptr, p, NULL, 0, NULL);
+ return dict_ins (dict, str-1, ptr,
+ userlen, userinfo);
+ }
+ if (split_page (dict, ptr, p))
+ {
+ yaz_log (YLOG_FATAL, "Unable to split page %d\n", ptr);
+ abort ();
+ }
+ return dict_ins (dict, str-1, ptr, userlen, userinfo);
}
+ else
+ { /* enough room - no split needed ... */
+ info = (char*)p + DICT_size(p);
+ memcpy (info, &subptr, sizeof(subptr));
+ memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
+ info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
+ memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
+ userinfo, userlen);
+ indxp[-mid] = -DICT_size(p);
+ DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
+ +1+userlen;
+ DICT_type(p) = 1;
+ dict_bf_touch (dict->dbf, ptr);
+ }
+ if (xlen)
+ return 1;
return 0;
}
else
{
- memcpy (&subptr, info, sizeof(subptr));
if (subptr == 0)
{
- subptr = new_page (dict, ptr, &pp);
+ subptr = new_page (dict, ptr, NULL);
memcpy (info, &subptr, sizeof(subptr));
dict_bf_touch (dict->dbf, ptr);
}
- return dict_ins (dict, str, ptr, userlen, userinfo);
+ return dict_ins (dict, str, subptr, userlen, userinfo);
}
}
- else if(cmp > 0)
- break;
}
+ if (cmp < 0)
+ lo = mid+1;
+ else
+ hi = mid-1;
}
+ indxp = indxp-mid;
+ if (lo>hi && cmp < 0)
+ --indxp;
slen = (dict_strlen(str)+1)*sizeof(Dict_char);
if (DICT_size(p)+slen+userlen >=
- DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
+ (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
{
- if (DICT_type(p) == 1)
+ if (DICT_type(p))
{
- clean_page (dict, p);
- dict_ins (dict, str, ptr, userlen, userinfo);
- return 0;
+ clean_page (dict, ptr, p, NULL, 0, NULL);
+ return dict_ins (dict, str, ptr, userlen, userinfo);
}
- i = 0;
- do
- {
- if (i > 0)
- assert (0);
- if (split_page (dict, ptr, p))
- {
- log (LOG_FATAL, "Unable to split page %d\n", ptr);
- abort ();
- }
- if (DICT_size(p)+slen+userlen <
- DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
- break;
- i++;
- clean_page (dict, p);
- } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
- (1+DICT_nodir(p))*sizeof(short));
- dict_ins (dict, str, ptr, userlen, userinfo);
- return 0;
+ split_page (dict, ptr, p);
+ return dict_ins (dict, str, ptr, userlen, userinfo);
}
if (cmp)
{
short *indxp1;
(DICT_nodir(p))++;
- indxp1 = (short*)((char*) p + DICT_PAGESIZE
+ indxp1 = (short*)((char*) p + DICT_bsize(p)
- DICT_nodir(p)*sizeof(short));
for (; indxp1 != indxp; indxp1++)
indxp1[0] = indxp1[1];
+#if CHECK
+ indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
+ for (i = DICT_nodir (p); --i >= 0; --indxp1)
+ {
+ if (*indxp1 < 0)
+ {
+ info = (char*)p - *indxp1;
+ assert (info[sizeof(Dict_ptr)] > ' ');
+ }
+ }
+#endif
}
+ else
+ DICT_type(p) = 1;
info = (char*)p + DICT_size(p);
memcpy (info, str, slen);
info += slen;
info += userlen;
*indxp = DICT_size(p);
-#if 0
- printf ("indxp[%d]\n", (char*) indxp - (char*) p);
-#endif
-
DICT_size(p) = info- (char*) p;
dict_bf_touch (dict->dbf, ptr);
- return 0;
+ if (cmp)
+ return 0;
+ return 1;
}
-int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
+int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
{
- assert (dict->head.last > 0);
- if (dict->head.last == 1)
- dict_ins (dict, str, 0, userlen, userinfo);
- else
- dict_ins (dict, str, 1, userlen, userinfo);
- return 0;
+ if (!dict->head.root)
+ {
+ void *p;
+ if (dict->rw)
+ dict->head.root = new_page (dict, 0, &p);
+ if (!dict->head.root)
+ return 0;
+ }
+ return dict_ins (dict, (const Dict_char *) str, dict->head.root,
+ userlen, userinfo);
}
+