-/* $Id: reckeys.c,v 1.6 2006-06-07 10:14:41 adam Exp $
- Copyright (C) 1995-2006
+/* $Id: reckeys.c,v 1.12 2007-01-15 15:10:17 adam Exp $
+ Copyright (C) 1995-2007
Index Data ApS
This file is part of the Zebra server.
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <assert.h>
#include <ctype.h>
-#include <yaz/nmem.h>
-#include "index.h"
#include "reckeys.h"
+#include <yaz/nmem.h>
+#include <yaz/xmalloc.h>
+
+#define NEW 0
+#if NEW
+struct zebra_rec_word_entry {
+ char *buf;
+ size_t len;
+ int ord;
+ int max_seq;
+ struct zebra_rec_word_entry *next;
+ struct zebra_rec_key_entry *keys;
+ struct zebra_rec_key_entry **last_key;
+};
+
+struct zebra_rec_key_entry {
+ struct it_key key;
+ struct zebra_rec_key_entry *next;
+};
+#else
struct zebra_rec_key_entry {
char *buf;
size_t len;
struct it_key key;
struct zebra_rec_key_entry *next;
};
+#endif
struct zebra_rec_keys_t_ {
size_t buf_used;
NMEM nmem;
size_t hash_size;
- struct zebra_rec_key_entry **entries;
+#if NEW
+ struct zebra_rec_word_entry **entries;
+#else
+ struct zebra_rec_key_entry **entries;
+#endif
};
+#if NEW
+struct zebra_rec_word_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
+ const char *buf,
+ size_t len,
+ int ord)
+{
+ int i;
+ unsigned h = ord;
+
+ for (i = 0; i<len; i++)
+ h = h * 65509 + buf[i];
+ return &p->entries[h % (unsigned) p->hash_size];
+}
+#else
struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
const char *buf,
size_t len,
unsigned h = 0;
size_t i;
int j;
+#if 0
+ h = key->mem[key->len-1];
+#else
for (i = 0; i<len; i++)
h = h * 65509 + buf[i];
for (j = 0; j<key->len; j++)
h = h * 65509 + CAST_ZINT_TO_INT(key->mem[j]);
+#endif
return &p->entries[h % (unsigned) p->hash_size];
}
+#endif
static void init_hash(zebra_rec_keys_t p)
{
}
}
-zebra_rec_keys_t zebra_rec_keys_open()
+zebra_rec_keys_t zebra_rec_keys_open(void)
{
zebra_rec_keys_t p = xmalloc(sizeof(*p));
p->buf_used = 0;
p->decode_handle = iscz1_start();
p->nmem = nmem_create();
- p->hash_size = 1023;
+ p->hash_size = 32767;
p->entries = 0;
init_hash(p);
xfree(p);
}
+#if NEW
+void zebra_rec_keys_write(zebra_rec_keys_t keys,
+ const char *str, size_t slen,
+ const struct it_key *key)
+{
+ char *dst;
+ const char *src = (char*) key;
+
+ struct zebra_rec_word_entry **wep;
+ struct zebra_rec_key_entry **kep;
+ int ord = key->mem[0];
+ int seq = key->mem[key->len-1];
+
+ assert(keys->owner_of_buffer);
+
+ wep = zebra_rec_keys_mk_hash(keys, str, slen, ord);
+
+ while (*wep)
+ {
+ struct zebra_rec_word_entry *e = *wep;
+ if (ord == e->ord && slen == e->len && !memcmp(str, e->buf, slen))
+ break;
+ wep = &(*wep)->next;
+ }
+
+ if (!*wep)
+ {
+ *wep = nmem_malloc(keys->nmem, sizeof(**wep));
+ (*wep)->buf = nmem_malloc(keys->nmem, slen);
+ memcpy((*wep)->buf, str, slen);
+ (*wep)->len = slen;
+ (*wep)->ord = ord;
+ (*wep)->next = 0;
+ (*wep)->keys = 0;
+ (*wep)->max_seq = 0;
+ (*wep)->last_key = &(*wep)->keys;
+ }
+ if (seq > (*wep)->max_seq)
+ kep = (*wep)->last_key;
+ else
+ {
+ kep = &(*wep)->keys;
+ while (*kep)
+ {
+ if (!key_compare(key, &(*kep)->key))
+ return;
+ kep = &(*kep)->next;
+ }
+ }
+ *kep = nmem_malloc(keys->nmem, sizeof(**kep));
+ (*kep)->next = 0;
+ (*wep)->last_key = &(*kep)->next;
+ memcpy(&(*kep)->key, key, sizeof(*key));
+ if (seq > (*wep)->max_seq)
+ {
+ (*wep)->max_seq = seq;
+ }
+}
+#else
int zebra_rec_keys_add_hash(zebra_rec_keys_t keys,
const char *str, size_t slen,
const struct it_key *key)
{
- struct zebra_rec_key_entry **kep = zebra_rec_keys_mk_hash(keys,
- str, slen, key);
+ struct zebra_rec_key_entry **kep_first
+ = zebra_rec_keys_mk_hash(keys, str, slen, key);
+ struct zebra_rec_key_entry **kep = kep_first;
while (*kep)
{
struct zebra_rec_key_entry *e = *kep;
if (slen == e->len && !memcmp(str, e->buf, slen) &&
!key_compare(key, &e->key))
{
+ *kep = (*kep)->next; /* out of queue */
+ e->next = *kep_first; /* move to front */
+ *kep_first = e;
+
return 0;
}
kep = &(*kep)->next;
assert(keys->owner_of_buffer);
+#if 1
if (!zebra_rec_keys_add_hash(keys, str, slen, key))
+ {
+#if 0
+ yaz_log(YLOG_LOG, "dup key slen=%d %.*s "
+ "ord=" ZINT_FORMAT " seq=" ZINT_FORMAT,
+ slen, slen, str, key->mem[0], key->mem[key->len-1]);
+#endif
return; /* key already there . Omit it */
+ }
+#endif
if (keys->buf_used+1024 > keys->buf_max)
{
char *b = (char *) xmalloc (keys->buf_max += 128000);
*dst++ = '\0';
keys->buf_used = dst - keys->buf;
}
+#endif
void zebra_rec_keys_reset(zebra_rec_keys_t keys)
{
iscz1_reset(keys->encode_handle);
init_hash(keys);
-
}
int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
{
assert(keys);
iscz1_reset(keys->decode_handle);
+
+#if NEW
+ if (keys->buf_used == 0)
+ {
+ size_t i;
+ for (i = 0; i<keys->hash_size; i++)
+ {
+ struct zebra_rec_word_entry *we = keys->entries[i];
+ for (; we; we = we->next)
+ {
+ struct zebra_rec_key_entry *ke = we->keys;
+ for (; ke; ke = ke->next)
+ {
+ const char *src = (char*) &ke->key;
+ char *dst;
+ if (keys->buf_used+1024 > keys->buf_max)
+ {
+ char *b = (char *) xmalloc (keys->buf_max += 128000);
+ if (keys->buf_used > 0)
+ memcpy (b, keys->buf, keys->buf_used);
+ xfree (keys->buf);
+ keys->buf = b;
+ }
+
+ dst = keys->buf + keys->buf_used;
+
+ iscz1_encode(keys->encode_handle, &dst, &src);
+
+ memcpy (dst, we->buf, we->len);
+ dst += we->len;
+ *dst++ = '\0';
+ keys->buf_used = dst - keys->buf;
+ }
+ }
+ }
+ }
+#endif
+
keys->fetch_offset = 0;
if (keys->buf_used == 0)
return 0;