From 34552d7bc714468512b1873f1d6b75608d6b4655 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 9 Nov 2005 11:51:29 +0000 Subject: [PATCH] Register no longer uses register type prefix (REG_TYPE_PREFIX). Try to avoid dup keys for filters (bug #432). --- include/idzebra/util.h | 3 +- index/extract.c | 81 ++++++++++++++++++++++++++++++++++------------- index/reckeys.c | 82 ++++++++++++++++++++++++++++++++++++++++++++---- index/reckeys.h | 3 +- index/zebraapi.c | 4 +-- index/zrpn.c | 19 +---------- 6 files changed, 140 insertions(+), 52 deletions(-) diff --git a/include/idzebra/util.h b/include/idzebra/util.h index 6d2a74d..ea7baec 100644 --- a/include/idzebra/util.h +++ b/include/idzebra/util.h @@ -1,4 +1,4 @@ -/* $Id: util.h,v 1.4 2005-06-23 06:45:46 adam Exp $ +/* $Id: util.h,v 1.5 2005-11-09 11:51:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -86,6 +86,5 @@ YAZ_END_CDECL /* NATTR=1 for string-attribute architecture, =0 for set+use . */ #define NATTR 0 -#define REG_TYPE_PREFIX 0 #endif diff --git a/index/extract.c b/index/extract.c index 06eb1b4..fc2205f 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $ +/* $Id: extract.c,v 1.198 2005-11-09 11:51:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -535,7 +535,11 @@ static int file_extract_record(ZebraHandle zh, { dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -614,7 +618,11 @@ static int file_extract_record(ZebraHandle zh, yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; @@ -652,10 +660,16 @@ static int file_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1002,7 +1016,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif #if 0 print_rec_keys(zh, zh->reg->keys); @@ -1039,8 +1057,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); +#if NATTR + zebra_rec_keys_set_buf(sortKeys, + rec->info[recInfo_sortKeys], + rec->size[recInfo_sortKeys], + 0); +#else sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; +#endif #if NATTR extract_flushSortKeys (zh, *sysno, 0, sortKeys); @@ -1094,7 +1119,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; @@ -1131,10 +1160,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1233,7 +1268,7 @@ int explain_extract (void *handle, Record rec, data1_node *n) zebra_rec_keys_t delkeys = zebra_rec_keys_open(); #if NATTR - zebra_rec_keys_t sortkeys = zzebra_rec_keys_open(); + zebra_rec_keys_t sortkeys = zebra_rec_keys_open(); #else struct sortKeys sortkeys; #endif @@ -1269,10 +1304,16 @@ int explain_extract (void *handle, Record rec, data1_node *n) &rec->size[recInfo_delKeys]); xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif return 0; } @@ -1495,14 +1536,6 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -void extract_add_it_key (ZebraHandle zh, - zebra_rec_keys_t *keys, - int reg_type, - const char *str, int slen, struct it_key *key) -{ - zebra_rec_keys_write(*keys, reg_type, str, slen, key); -} - ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets) @@ -1598,16 +1631,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length) key.mem[3] = p->seqno; #if 0 - /* just for debugging .. */ - yaz_log(YLOG_LOG, "add: set=%d use=%d " - "record_id=%lld section_id=%lld seqno=%lld", - p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); + if (1) + { + char strz[80]; + int i; + + strz[0] = 0; + for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, + strz); + } #endif - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->keys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->keys, str, length, &key); } #if NATTR @@ -1635,10 +1675,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) key.mem[2] = p->section_id; key.mem[3] = p->seqno; - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->sortKeys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } #else static void extract_add_sort_string (RecWord *p, const char *str, int length) diff --git a/index/reckeys.c b/index/reckeys.c index 174e895..346fd28 100644 --- a/index/reckeys.c +++ b/index/reckeys.c @@ -1,4 +1,4 @@ -/* $Id: reckeys.c,v 1.2 2005-11-09 08:27:28 adam Exp $ +/* $Id: reckeys.c,v 1.3 2005-11-09 11:51:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -25,9 +25,17 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include +#include #include "index.h" #include "reckeys.h" +struct zebra_rec_key_entry { + char *buf; + size_t len; + struct it_key key; + struct zebra_rec_key_entry *next; +}; + struct zebra_rec_keys_t_ { size_t buf_used; size_t buf_max; @@ -36,8 +44,37 @@ struct zebra_rec_keys_t_ { void *encode_handle; void *decode_handle; char owner_of_buffer; + + NMEM nmem; + size_t hash_size; + struct zebra_rec_key_entry **entries; }; + +struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, + const char *buf, + size_t len) +{ + unsigned h = 0; + size_t i; + for (i = 0; ientries[h % (unsigned) p->hash_size]; +} + +static void init_hash(zebra_rec_keys_t p) +{ + p->entries = 0; + nmem_reset(p->nmem); + if (p->hash_size) + { + size_t i; + p->entries = nmem_malloc(p->nmem, p->hash_size * sizeof(*p->entries)); + for (i = 0; ihash_size; i++) + p->entries[i] = 0; + } +} + zebra_rec_keys_t zebra_rec_keys_open() { zebra_rec_keys_t p = xmalloc(sizeof(*p)); @@ -48,9 +85,16 @@ zebra_rec_keys_t zebra_rec_keys_open() p->owner_of_buffer = 1; p->encode_handle = iscz1_start(); p->decode_handle = iscz1_start(); + + p->nmem = nmem_create(); + p->hash_size = 127; + p->entries = 0; + + init_hash(p); + return p; } - + void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz, int copy_buf) { @@ -96,11 +140,35 @@ void zebra_rec_keys_close(zebra_rec_keys_t p) iscz1_stop(p->encode_handle); if (p->decode_handle) iscz1_stop(p->decode_handle); + nmem_destroy(p->nmem); xfree(p); } +int zebra_rec_keys_add_hash(zebra_rec_keys_t keys, + const char *str, size_t slen, + const struct it_key *key) +{ + struct zebra_rec_key_entry **kep = zebra_rec_keys_mk_hash(keys, str, slen); + while (*kep) + { + struct zebra_rec_key_entry *e = *kep; + if (slen == e->len && !memcmp(str, e->buf, slen) && + !key_compare(key, &e->key)) + { + return 0; + } + kep = &(*kep)->next; + } + *kep = nmem_malloc(keys->nmem, sizeof(**kep)); + (*kep)->next = 0; + (*kep)->len = slen; + memcpy(&(*kep)->key, key, sizeof(*key)); + (*kep)->buf = nmem_malloc(keys->nmem, slen); + memcpy((*kep)->buf, str, slen); + return 1; +} + void zebra_rec_keys_write(zebra_rec_keys_t keys, - int reg_type, const char *str, size_t slen, const struct it_key *key) { @@ -109,6 +177,8 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys, assert(keys->owner_of_buffer); + if (!zebra_rec_keys_add_hash(keys, str, slen, key)) + return; /* key already there . Omit it */ if (keys->buf_used+1024 > keys->buf_max) { char *b = (char *) xmalloc (keys->buf_max += 128000); @@ -121,9 +191,6 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys, iscz1_encode(keys->encode_handle, &dst, &src); -#if REG_TYPE_PREFIX - *dst++ = reg_type; -#endif memcpy (dst, str, slen); dst += slen; *dst++ = '\0'; @@ -136,6 +203,9 @@ void zebra_rec_keys_reset(zebra_rec_keys_t keys) keys->buf_used = 0; iscz1_reset(keys->encode_handle); + + init_hash(keys); + } int zebra_rec_keys_rewind(zebra_rec_keys_t keys) diff --git a/index/reckeys.h b/index/reckeys.h index 5a27e1b..2fc05a0 100644 --- a/index/reckeys.h +++ b/index/reckeys.h @@ -1,4 +1,4 @@ -/* $Id: reckeys.h,v 1.2 2005-11-09 08:27:28 adam Exp $ +/* $Id: reckeys.h,v 1.3 2005-11-09 11:51:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -30,7 +30,6 @@ zebra_rec_keys_t zebra_rec_keys_open(); void zebra_rec_keys_close(zebra_rec_keys_t p); void zebra_rec_keys_write(zebra_rec_keys_t keys, - int reg_type, const char *str, size_t slen, const struct it_key *key); void zebra_rec_keys_reset(zebra_rec_keys_t keys); diff --git a/index/zebraapi.c b/index/zebraapi.c index cc7fd51..d23ea47 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.193 2005-10-28 09:22:50 adam Exp $ +/* $Id: zebraapi.c,v 1.194 2005-11-09 11:51:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -497,7 +497,7 @@ static void zebra_register_close (ZebraService zs, struct zebra_register *reg) zebra_rec_keys_close(reg->keys); #if NATTR - zebra_rec_keys_close(rec->sortKeys); + zebra_rec_keys_close(reg->sortKeys); #else xfree(reg->sortKeys.buf); #endif diff --git a/index/zrpn.c b/index/zrpn.c index db339e5..9cf39d4 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.206 2005-11-02 11:43:26 adam Exp $ +/* $Id: zrpn.c,v 1.207 2005-11-09 11:51:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1241,11 +1241,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, attr_ok = 1; term_dict[prefix_len++] = ')'; -#if REG_TYPE_PREFIX - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = reg_type; - yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]); -#endif term_dict[prefix_len] = '\0'; j = prefix_len; switch (truncation_value) @@ -1893,11 +1888,6 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } bases_ok++; term_dict[prefix_len++] = ')'; -#if REG_TYPE_PREFIX - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = reg_type; - yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]); -#endif term_dict[prefix_len] = '\0'; if (!numeric_relation(zh, zapt, &termp, term_dict, attributeSet, grep_info, &max_pos, reg_type, @@ -2158,10 +2148,6 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, term_dict[prefix_len++] = ord_buf[i]; } term_dict[prefix_len++] = ')'; -#if REG_TYPE_PREFIX - term_dict[prefix_len++] = 1; - term_dict[prefix_len++] = reg_type; -#endif strcpy(term_dict+prefix_len, term); grep_info.isam_p_indx = 0; @@ -2926,9 +2912,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, scan_info->list[j].term = NULL; prefix_len += key_SU_encode (ords[i], termz + prefix_len); -#if REG_TYPE_PREFIX - termz[prefix_len++] = reg_id; -#endif termz[prefix_len] = 0; strcpy(scan_info->prefix, termz); -- 1.7.10.4