X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Freckeys.c;h=58582672a9dce844033960f7b942bb97c23ec7fd;hb=527dab66d9847bb7f8a931c558306a070064bf25;hp=067de6b843b07e8ee2bada63a5bbd1c15797f0a8;hpb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;p=idzebra-moved-to-github.git diff --git a/index/reckeys.c b/index/reckeys.c index 067de6b..5858267 100644 --- a/index/reckeys.c +++ b/index/reckeys.c @@ -1,5 +1,5 @@ -/* $Id: reckeys.c,v 1.4 2006-05-10 08:13:22 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: reckeys.c,v 1.13 2007-11-30 12:19:08 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -15,26 +15,46 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include #include +#include #include #include -#include -#include "index.h" #include "reckeys.h" +#include +#include + +#define NEW 0 + +#if NEW +struct zebra_rec_word_entry { + char *buf; + size_t len; + int ord; + int max_seq; + struct zebra_rec_word_entry *next; + struct zebra_rec_key_entry *keys; + struct zebra_rec_key_entry **last_key; +}; struct zebra_rec_key_entry { + struct it_key key; + struct zebra_rec_key_entry *next; +}; +#else +struct zebra_rec_key_entry { char *buf; size_t len; struct it_key key; struct zebra_rec_key_entry *next; }; +#endif struct zebra_rec_keys_t_ { size_t buf_used; @@ -44,23 +64,51 @@ struct zebra_rec_keys_t_ { void *encode_handle; void *decode_handle; char owner_of_buffer; + zint custom_record_id; NMEM nmem; size_t hash_size; - struct zebra_rec_key_entry **entries; +#if NEW + struct zebra_rec_word_entry **entries; +#else + struct zebra_rec_key_entry **entries; +#endif }; +#if NEW +struct zebra_rec_word_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, + const char *buf, + size_t len, + int ord) +{ + int i; + unsigned h = ord; + + for (i = 0; ientries[h % (unsigned) p->hash_size]; +} +#else struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, const char *buf, - size_t len) + size_t len, + const struct it_key *key) { unsigned h = 0; size_t i; + int j; +#if 0 + h = key->mem[key->len-1]; +#else for (i = 0; ilen; j++) + h = h * 65509 + CAST_ZINT_TO_INT(key->mem[j]); +#endif return &p->entries[h % (unsigned) p->hash_size]; } +#endif static void init_hash(zebra_rec_keys_t p) { @@ -75,7 +123,7 @@ static void init_hash(zebra_rec_keys_t p) } } -zebra_rec_keys_t zebra_rec_keys_open() +zebra_rec_keys_t zebra_rec_keys_open(void) { zebra_rec_keys_t p = xmalloc(sizeof(*p)); p->buf_used = 0; @@ -86,8 +134,9 @@ zebra_rec_keys_t zebra_rec_keys_open() p->encode_handle = iscz1_start(); p->decode_handle = iscz1_start(); + p->custom_record_id = 0; p->nmem = nmem_create(); - p->hash_size = 127; + p->hash_size = 32767; p->entries = 0; init_hash(p); @@ -144,17 +193,82 @@ void zebra_rec_keys_close(zebra_rec_keys_t p) xfree(p); } +#if NEW +void zebra_rec_keys_write(zebra_rec_keys_t keys, + const char *str, size_t slen, + const struct it_key *key) +{ + char *dst; + const char *src = (char*) key; + + struct zebra_rec_word_entry **wep; + struct zebra_rec_key_entry **kep; + int ord = key->mem[0]; + int seq = key->mem[key->len-1]; + + assert(keys->owner_of_buffer); + + wep = zebra_rec_keys_mk_hash(keys, str, slen, ord); + + while (*wep) + { + struct zebra_rec_word_entry *e = *wep; + if (ord == e->ord && slen == e->len && !memcmp(str, e->buf, slen)) + break; + wep = &(*wep)->next; + } + + if (!*wep) + { + *wep = nmem_malloc(keys->nmem, sizeof(**wep)); + (*wep)->buf = nmem_malloc(keys->nmem, slen); + memcpy((*wep)->buf, str, slen); + (*wep)->len = slen; + (*wep)->ord = ord; + (*wep)->next = 0; + (*wep)->keys = 0; + (*wep)->max_seq = 0; + (*wep)->last_key = &(*wep)->keys; + } + if (seq > (*wep)->max_seq) + kep = (*wep)->last_key; + else + { + kep = &(*wep)->keys; + while (*kep) + { + if (!key_compare(key, &(*kep)->key)) + return; + kep = &(*kep)->next; + } + } + *kep = nmem_malloc(keys->nmem, sizeof(**kep)); + (*kep)->next = 0; + (*wep)->last_key = &(*kep)->next; + memcpy(&(*kep)->key, key, sizeof(*key)); + if (seq > (*wep)->max_seq) + { + (*wep)->max_seq = seq; + } +} +#else int zebra_rec_keys_add_hash(zebra_rec_keys_t keys, const char *str, size_t slen, const struct it_key *key) { - struct zebra_rec_key_entry **kep = zebra_rec_keys_mk_hash(keys, str, slen); + struct zebra_rec_key_entry **kep_first + = zebra_rec_keys_mk_hash(keys, str, slen, key); + struct zebra_rec_key_entry **kep = kep_first; while (*kep) { struct zebra_rec_key_entry *e = *kep; if (slen == e->len && !memcmp(str, e->buf, slen) && !key_compare(key, &e->key)) { + *kep = (*kep)->next; /* out of queue */ + e->next = *kep_first; /* move to front */ + *kep_first = e; + return 0; } kep = &(*kep)->next; @@ -177,8 +291,21 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys, assert(keys->owner_of_buffer); + if (key->mem[1]) /* record_id custom */ + { + keys->custom_record_id = key->mem[1]; + } +#if 1 if (!zebra_rec_keys_add_hash(keys, str, slen, key)) + { +#if 0 + yaz_log(YLOG_LOG, "dup key slen=%d %.*s " + "ord=" ZINT_FORMAT " seq=" ZINT_FORMAT, + slen, slen, str, key->mem[0], key->mem[key->len-1]); +#endif return; /* key already there . Omit it */ + } +#endif if (keys->buf_used+1024 > keys->buf_max) { char *b = (char *) xmalloc (keys->buf_max += 128000); @@ -196,6 +323,7 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys, *dst++ = '\0'; keys->buf_used = dst - keys->buf; } +#endif void zebra_rec_keys_reset(zebra_rec_keys_t keys) { @@ -205,13 +333,50 @@ void zebra_rec_keys_reset(zebra_rec_keys_t keys) iscz1_reset(keys->encode_handle); init_hash(keys); - } int zebra_rec_keys_rewind(zebra_rec_keys_t keys) { assert(keys); iscz1_reset(keys->decode_handle); + +#if NEW + if (keys->buf_used == 0) + { + size_t i; + for (i = 0; ihash_size; i++) + { + struct zebra_rec_word_entry *we = keys->entries[i]; + for (; we; we = we->next) + { + struct zebra_rec_key_entry *ke = we->keys; + for (; ke; ke = ke->next) + { + const char *src = (char*) &ke->key; + char *dst; + if (keys->buf_used+1024 > keys->buf_max) + { + char *b = (char *) xmalloc (keys->buf_max += 128000); + if (keys->buf_used > 0) + memcpy (b, keys->buf, keys->buf_used); + xfree (keys->buf); + keys->buf = b; + } + + dst = keys->buf + keys->buf_used; + + iscz1_encode(keys->encode_handle, &dst, &src); + + memcpy (dst, we->buf, we->len); + dst += we->len; + *dst++ = '\0'; + keys->buf_used = dst - keys->buf; + } + } + } + } +#endif + keys->fetch_offset = 0; if (keys->buf_used == 0) return 0; @@ -251,6 +416,12 @@ int zebra_rec_keys_read(zebra_rec_keys_t keys, } return 1; } + +zint zebra_rec_keys_get_custom_record_id(zebra_rec_keys_t keys) +{ + return keys->custom_record_id; +} + /* * Local variables: * c-basic-offset: 4