-/* $Id: reckeys.c,v 1.1 2005-10-28 09:22:50 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: reckeys.c,v 1.13 2007-11-30 12:19:08 adam Exp $
+ Copyright (C) 1995-2007
Index Data ApS
This file is part of the Zebra server.
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <assert.h>
#include <ctype.h>
-#include "index.h"
#include "reckeys.h"
+#include <yaz/nmem.h>
+#include <yaz/xmalloc.h>
+
+#define NEW 0
+
+#if NEW
+struct zebra_rec_word_entry {
+ char *buf;
+ size_t len;
+ int ord;
+ int max_seq;
+ struct zebra_rec_word_entry *next;
+ struct zebra_rec_key_entry *keys;
+ struct zebra_rec_key_entry **last_key;
+};
+
+struct zebra_rec_key_entry {
+ struct it_key key;
+ struct zebra_rec_key_entry *next;
+};
+#else
+struct zebra_rec_key_entry {
+ char *buf;
+ size_t len;
+ struct it_key key;
+ struct zebra_rec_key_entry *next;
+};
+#endif
struct zebra_rec_keys_t_ {
size_t buf_used;
void *encode_handle;
void *decode_handle;
char owner_of_buffer;
+ zint custom_record_id;
+
+ NMEM nmem;
+ size_t hash_size;
+#if NEW
+ struct zebra_rec_word_entry **entries;
+#else
+ struct zebra_rec_key_entry **entries;
+#endif
};
-zebra_rec_keys_t zebra_rec_keys_open()
+
+#if NEW
+struct zebra_rec_word_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
+ const char *buf,
+ size_t len,
+ int ord)
+{
+ int i;
+ unsigned h = ord;
+
+ for (i = 0; i<len; i++)
+ h = h * 65509 + buf[i];
+ return &p->entries[h % (unsigned) p->hash_size];
+}
+#else
+struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
+ const char *buf,
+ size_t len,
+ const struct it_key *key)
+{
+ unsigned h = 0;
+ size_t i;
+ int j;
+#if 0
+ h = key->mem[key->len-1];
+#else
+ for (i = 0; i<len; i++)
+ h = h * 65509 + buf[i];
+ for (j = 0; j<key->len; j++)
+ h = h * 65509 + CAST_ZINT_TO_INT(key->mem[j]);
+#endif
+ return &p->entries[h % (unsigned) p->hash_size];
+}
+#endif
+
+static void init_hash(zebra_rec_keys_t p)
+{
+ p->entries = 0;
+ nmem_reset(p->nmem);
+ if (p->hash_size)
+ {
+ size_t i;
+ p->entries = nmem_malloc(p->nmem, p->hash_size * sizeof(*p->entries));
+ for (i = 0; i<p->hash_size; i++)
+ p->entries[i] = 0;
+ }
+}
+
+zebra_rec_keys_t zebra_rec_keys_open(void)
{
zebra_rec_keys_t p = xmalloc(sizeof(*p));
p->buf_used = 0;
p->owner_of_buffer = 1;
p->encode_handle = iscz1_start();
p->decode_handle = iscz1_start();
+
+ p->custom_record_id = 0;
+ p->nmem = nmem_create();
+ p->hash_size = 32767;
+ p->entries = 0;
+
+ init_hash(p);
+
return p;
}
-
+
void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz,
- int owner)
+ int copy_buf)
{
if (p->owner_of_buffer)
xfree(p->buf);
p->buf_used = sz;
p->buf_max = sz;
- if (!owner)
+ if (!copy_buf)
{
p->buf = buf;
}
memcpy(p->buf, buf, sz);
}
}
- p->owner_of_buffer = owner;
+ p->owner_of_buffer = copy_buf;
}
void zebra_rec_keys_get_buf(zebra_rec_keys_t p, char **buf, size_t *sz)
iscz1_stop(p->encode_handle);
if (p->decode_handle)
iscz1_stop(p->decode_handle);
+ nmem_destroy(p->nmem);
xfree(p);
}
+#if NEW
+void zebra_rec_keys_write(zebra_rec_keys_t keys,
+ const char *str, size_t slen,
+ const struct it_key *key)
+{
+ char *dst;
+ const char *src = (char*) key;
+
+ struct zebra_rec_word_entry **wep;
+ struct zebra_rec_key_entry **kep;
+ int ord = key->mem[0];
+ int seq = key->mem[key->len-1];
+
+ assert(keys->owner_of_buffer);
+
+ wep = zebra_rec_keys_mk_hash(keys, str, slen, ord);
+
+ while (*wep)
+ {
+ struct zebra_rec_word_entry *e = *wep;
+ if (ord == e->ord && slen == e->len && !memcmp(str, e->buf, slen))
+ break;
+ wep = &(*wep)->next;
+ }
+
+ if (!*wep)
+ {
+ *wep = nmem_malloc(keys->nmem, sizeof(**wep));
+ (*wep)->buf = nmem_malloc(keys->nmem, slen);
+ memcpy((*wep)->buf, str, slen);
+ (*wep)->len = slen;
+ (*wep)->ord = ord;
+ (*wep)->next = 0;
+ (*wep)->keys = 0;
+ (*wep)->max_seq = 0;
+ (*wep)->last_key = &(*wep)->keys;
+ }
+ if (seq > (*wep)->max_seq)
+ kep = (*wep)->last_key;
+ else
+ {
+ kep = &(*wep)->keys;
+ while (*kep)
+ {
+ if (!key_compare(key, &(*kep)->key))
+ return;
+ kep = &(*kep)->next;
+ }
+ }
+ *kep = nmem_malloc(keys->nmem, sizeof(**kep));
+ (*kep)->next = 0;
+ (*wep)->last_key = &(*kep)->next;
+ memcpy(&(*kep)->key, key, sizeof(*key));
+ if (seq > (*wep)->max_seq)
+ {
+ (*wep)->max_seq = seq;
+ }
+}
+#else
+int zebra_rec_keys_add_hash(zebra_rec_keys_t keys,
+ const char *str, size_t slen,
+ const struct it_key *key)
+{
+ struct zebra_rec_key_entry **kep_first
+ = zebra_rec_keys_mk_hash(keys, str, slen, key);
+ struct zebra_rec_key_entry **kep = kep_first;
+ while (*kep)
+ {
+ struct zebra_rec_key_entry *e = *kep;
+ if (slen == e->len && !memcmp(str, e->buf, slen) &&
+ !key_compare(key, &e->key))
+ {
+ *kep = (*kep)->next; /* out of queue */
+ e->next = *kep_first; /* move to front */
+ *kep_first = e;
+
+ return 0;
+ }
+ kep = &(*kep)->next;
+ }
+ *kep = nmem_malloc(keys->nmem, sizeof(**kep));
+ (*kep)->next = 0;
+ (*kep)->len = slen;
+ memcpy(&(*kep)->key, key, sizeof(*key));
+ (*kep)->buf = nmem_malloc(keys->nmem, slen);
+ memcpy((*kep)->buf, str, slen);
+ return 1;
+}
+
void zebra_rec_keys_write(zebra_rec_keys_t keys,
- int reg_type,
const char *str, size_t slen,
const struct it_key *key)
{
assert(keys->owner_of_buffer);
+ if (key->mem[1]) /* record_id custom */
+ {
+ keys->custom_record_id = key->mem[1];
+ }
+#if 1
+ if (!zebra_rec_keys_add_hash(keys, str, slen, key))
+ {
+#if 0
+ yaz_log(YLOG_LOG, "dup key slen=%d %.*s "
+ "ord=" ZINT_FORMAT " seq=" ZINT_FORMAT,
+ slen, slen, str, key->mem[0], key->mem[key->len-1]);
+#endif
+ return; /* key already there . Omit it */
+ }
+#endif
if (keys->buf_used+1024 > keys->buf_max)
{
char *b = (char *) xmalloc (keys->buf_max += 128000);
iscz1_encode(keys->encode_handle, &dst, &src);
-#if REG_TYPE_PREFIX
- *dst++ = reg_type;
-#endif
memcpy (dst, str, slen);
dst += slen;
*dst++ = '\0';
keys->buf_used = dst - keys->buf;
}
+#endif
void zebra_rec_keys_reset(zebra_rec_keys_t keys)
{
keys->buf_used = 0;
iscz1_reset(keys->encode_handle);
+
+ init_hash(keys);
}
int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
{
assert(keys);
iscz1_reset(keys->decode_handle);
+
+#if NEW
+ if (keys->buf_used == 0)
+ {
+ size_t i;
+ for (i = 0; i<keys->hash_size; i++)
+ {
+ struct zebra_rec_word_entry *we = keys->entries[i];
+ for (; we; we = we->next)
+ {
+ struct zebra_rec_key_entry *ke = we->keys;
+ for (; ke; ke = ke->next)
+ {
+ const char *src = (char*) &ke->key;
+ char *dst;
+ if (keys->buf_used+1024 > keys->buf_max)
+ {
+ char *b = (char *) xmalloc (keys->buf_max += 128000);
+ if (keys->buf_used > 0)
+ memcpy (b, keys->buf, keys->buf_used);
+ xfree (keys->buf);
+ keys->buf = b;
+ }
+
+ dst = keys->buf + keys->buf_used;
+
+ iscz1_encode(keys->encode_handle, &dst, &src);
+
+ memcpy (dst, we->buf, we->len);
+ dst += we->len;
+ *dst++ = '\0';
+ keys->buf_used = dst - keys->buf;
+ }
+ }
+ }
+ }
+#endif
+
keys->fetch_offset = 0;
if (keys->buf_used == 0)
return 0;
}
return 1;
}
+
+zint zebra_rec_keys_get_custom_record_id(zebra_rec_keys_t keys)
+{
+ return keys->custom_record_id;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+