Register no longer uses register type prefix (REG_TYPE_PREFIX).
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 9 Nov 2005 11:51:29 +0000 (11:51 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 9 Nov 2005 11:51:29 +0000 (11:51 +0000)
Try to avoid dup keys for filters (bug #432).

include/idzebra/util.h
index/extract.c
index/reckeys.c
index/reckeys.h
index/zebraapi.c
index/zrpn.c

index 6d2a74d..ea7baec 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: util.h,v 1.4 2005-06-23 06:45:46 adam Exp $
+/* $Id: util.h,v 1.5 2005-11-09 11:51:29 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -86,6 +86,5 @@ YAZ_END_CDECL
 
 /* NATTR=1 for string-attribute architecture, =0 for set+use . */
 #define NATTR 0
-#define REG_TYPE_PREFIX 0
 
 #endif
index 06eb1b4..fc2205f 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $
+/* $Id: extract.c,v 1.198 2005-11-09 11:51:29 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -535,7 +535,11 @@ static int file_extract_record(ZebraHandle zh,
         {
             dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
         extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                 recordAttr->staticrank);
         zh->records_inserted++;
@@ -614,7 +618,11 @@ static int file_extract_record(ZebraHandle zh,
                     yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
                         zh->m_record_type, fname, recordOffset);
                recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+                extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
                 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
                 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                         recordAttr->staticrank);
                 zh->records_updated++;
@@ -652,10 +660,16 @@ static int file_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -1002,7 +1016,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             dict_insert (zh->reg->matchDict, matchStr,
                          sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
        
 #if 0
        print_rec_keys(zh, zh->reg->keys);
@@ -1039,8 +1057,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                               rec->info[recInfo_delKeys],
                               rec->size[recInfo_delKeys],
                               0);
+#if NATTR
+       zebra_rec_keys_set_buf(sortKeys,
+                              rec->info[recInfo_sortKeys],
+                              rec->size[recInfo_sortKeys],
+                              0);
+#else
         sortKeys.buf_used = rec->size[recInfo_sortKeys];
         sortKeys.buf = rec->info[recInfo_sortKeys];
+#endif
 
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 0, sortKeys);
@@ -1094,7 +1119,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                    yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
                             pr_fname, (long) recordOffset);
                recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+                extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
                 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
                 extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, 
                                         recordAttr->staticrank);
                 zh->records_updated++;
@@ -1131,10 +1160,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -1233,7 +1268,7 @@ int explain_extract (void *handle, Record rec, data1_node *n)
        zebra_rec_keys_t delkeys = zebra_rec_keys_open();
        
 #if NATTR
-       zebra_rec_keys_t sortkeys = zzebra_rec_keys_open();
+       zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
 #else
        struct sortKeys sortkeys;
 #endif
@@ -1269,10 +1304,16 @@ int explain_extract (void *handle, Record rec, data1_node *n)
                           &rec->size[recInfo_delKeys]);
 
     xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     return 0;
 }
@@ -1495,14 +1536,6 @@ void extract_flushWriteKeys (ZebraHandle zh, int final)
     zh->reg->key_buf_used = 0;
 }
 
-void extract_add_it_key (ZebraHandle zh,
-                        zebra_rec_keys_t *keys,
-                        int reg_type,
-                        const char *str, int slen, struct it_key *key)
-{
-    zebra_rec_keys_write(*keys, reg_type, str, slen, key);
-}
-
 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
                                  zebra_rec_keys_t reckeys,
                                  zebra_snippets *snippets)
@@ -1598,16 +1631,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length)
     key.mem[3] = p->seqno;
 
 #if 0
-    /* just for debugging .. */
-    yaz_log(YLOG_LOG, "add: set=%d use=%d "
-           "record_id=%lld section_id=%lld seqno=%lld",
-           p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno);
+    if (1)
+    {
+       char strz[80];
+       int i;
+
+       strz[0] = 0;
+       for (i = 0; i<length && i < 20; i++)
+           sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
+       /* just for debugging .. */
+       yaz_log(YLOG_LOG, "add: set=%d use=%d "
+               "record_id=%lld section_id=%lld seqno=%lld %s",
+               p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
+               strz);
+    }
 #endif
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->keys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->keys, str, length, &key);
 }
 
 #if NATTR
@@ -1635,10 +1675,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length)
     key.mem[2] = p->section_id;
     key.mem[3] = p->seqno;
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->sortKeys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
 }
 #else
 static void extract_add_sort_string (RecWord *p, const char *str, int length)
index 174e895..346fd28 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: reckeys.c,v 1.2 2005-11-09 08:27:28 adam Exp $
+/* $Id: reckeys.c,v 1.3 2005-11-09 11:51:29 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -25,9 +25,17 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <assert.h>
 #include <ctype.h>
 
+#include <yaz/nmem.h>
 #include "index.h"
 #include "reckeys.h"
 
+struct zebra_rec_key_entry {
+    char *buf;
+    size_t len;
+    struct it_key key;
+    struct zebra_rec_key_entry *next;
+};
+
 struct zebra_rec_keys_t_ {
     size_t buf_used;
     size_t buf_max;
@@ -36,8 +44,37 @@ struct zebra_rec_keys_t_ {
     void *encode_handle;
     void *decode_handle;
     char owner_of_buffer;
+
+    NMEM nmem;
+    size_t hash_size;
+    struct zebra_rec_key_entry **entries; 
 };
 
+
+struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p,
+                                                   const char *buf,
+                                                   size_t len)
+{
+    unsigned h = 0;
+    size_t i;
+    for (i = 0; i<len; i++)
+       h = h * 65509 + buf[i];
+    return &p->entries[h % (unsigned) p->hash_size];
+}
+
+static void init_hash(zebra_rec_keys_t p)
+{
+    p->entries = 0;
+    nmem_reset(p->nmem);
+    if (p->hash_size)
+    {
+       size_t i;
+       p->entries = nmem_malloc(p->nmem, p->hash_size * sizeof(*p->entries));
+       for (i = 0; i<p->hash_size; i++)
+           p->entries[i] = 0;
+    }
+}
+
 zebra_rec_keys_t zebra_rec_keys_open()
 {
     zebra_rec_keys_t p = xmalloc(sizeof(*p));
@@ -48,9 +85,16 @@ zebra_rec_keys_t zebra_rec_keys_open()
     p->owner_of_buffer = 1;
     p->encode_handle = iscz1_start();
     p->decode_handle = iscz1_start(); 
+
+    p->nmem = nmem_create();
+    p->hash_size = 127;
+    p->entries = 0;
+
+    init_hash(p);
+
     return p;
 }
-    
+
 void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz,
                            int copy_buf)
 {
@@ -96,11 +140,35 @@ void zebra_rec_keys_close(zebra_rec_keys_t p)
        iscz1_stop(p->encode_handle);
     if (p->decode_handle)
        iscz1_stop(p->decode_handle);
+    nmem_destroy(p->nmem);
     xfree(p);
 }
 
+int zebra_rec_keys_add_hash(zebra_rec_keys_t keys, 
+                           const char *str, size_t slen,
+                           const struct it_key *key)
+{
+    struct zebra_rec_key_entry **kep = zebra_rec_keys_mk_hash(keys, str, slen);
+    while (*kep)
+    {
+       struct zebra_rec_key_entry *e = *kep;
+       if (slen == e->len && !memcmp(str, e->buf, slen) &&
+           !key_compare(key, &e->key))
+       {
+           return 0;
+       }
+       kep = &(*kep)->next;
+    }
+    *kep = nmem_malloc(keys->nmem, sizeof(**kep));
+    (*kep)->next = 0;
+    (*kep)->len = slen;
+    memcpy(&(*kep)->key, key, sizeof(*key));
+    (*kep)->buf = nmem_malloc(keys->nmem, slen);
+    memcpy((*kep)->buf, str, slen);
+    return 1;
+}
+
 void zebra_rec_keys_write(zebra_rec_keys_t keys, 
-                         int reg_type,
                          const char *str, size_t slen,
                          const struct it_key *key)
 {
@@ -109,6 +177,8 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys,
     
     assert(keys->owner_of_buffer);
 
+    if (!zebra_rec_keys_add_hash(keys, str, slen, key))
+       return;  /* key already there . Omit it */
     if (keys->buf_used+1024 > keys->buf_max)
     {
         char *b = (char *) xmalloc (keys->buf_max += 128000);
@@ -121,9 +191,6 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys,
 
     iscz1_encode(keys->encode_handle, &dst, &src);
 
-#if REG_TYPE_PREFIX
-    *dst++ = reg_type;
-#endif
     memcpy (dst, str, slen);
     dst += slen;
     *dst++ = '\0';
@@ -136,6 +203,9 @@ void zebra_rec_keys_reset(zebra_rec_keys_t keys)
     keys->buf_used = 0;
     
     iscz1_reset(keys->encode_handle);
+
+    init_hash(keys);
+
 }
 
 int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
index 5a27e1b..2fc05a0 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: reckeys.h,v 1.2 2005-11-09 08:27:28 adam Exp $
+/* $Id: reckeys.h,v 1.3 2005-11-09 11:51:29 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -30,7 +30,6 @@ zebra_rec_keys_t zebra_rec_keys_open();
 void zebra_rec_keys_close(zebra_rec_keys_t p);
 
 void zebra_rec_keys_write(zebra_rec_keys_t keys, 
-                         int reg_type,
                          const char *str, size_t slen,
                          const struct it_key *key);
 void zebra_rec_keys_reset(zebra_rec_keys_t keys);
index cc7fd51..d23ea47 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zebraapi.c,v 1.193 2005-10-28 09:22:50 adam Exp $
+/* $Id: zebraapi.c,v 1.194 2005-11-09 11:51:29 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -497,7 +497,7 @@ static void zebra_register_close (ZebraService zs, struct zebra_register *reg)
 
     zebra_rec_keys_close(reg->keys);
 #if NATTR
-    zebra_rec_keys_close(rec->sortKeys);
+    zebra_rec_keys_close(reg->sortKeys);
 #else
     xfree(reg->sortKeys.buf);
 #endif
index db339e5..9cf39d4 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.206 2005-11-02 11:43:26 adam Exp $
+/* $Id: zrpn.c,v 1.207 2005-11-09 11:51:30 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -1241,11 +1241,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
            attr_ok = 1;
 
         term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX
-        term_dict[prefix_len++] = 1;
-        term_dict[prefix_len++] = reg_type;
-        yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
-#endif
         term_dict[prefix_len] = '\0';
         j = prefix_len;
         switch (truncation_value)
@@ -1893,11 +1888,6 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
         bases_ok++;
         term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX    
-        term_dict[prefix_len++] = 1;
-        term_dict[prefix_len++] = reg_type;
-        yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
-#endif
         term_dict[prefix_len] = '\0';
         if (!numeric_relation(zh, zapt, &termp, term_dict,
                              attributeSet, grep_info, &max_pos, reg_type,
@@ -2158,10 +2148,6 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
         term_dict[prefix_len++] = ord_buf[i];
     }
     term_dict[prefix_len++] = ')';
-#if REG_TYPE_PREFIX
-    term_dict[prefix_len++] = 1;
-    term_dict[prefix_len++] = reg_type;
-#endif
     strcpy(term_dict+prefix_len, term);
     
     grep_info.isam_p_indx = 0;
@@ -2926,9 +2912,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
             scan_info->list[j].term = NULL;
 
         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
-#if REG_TYPE_PREFIX
-        termz[prefix_len++] = reg_id;
-#endif
         termz[prefix_len] = 0;
         strcpy(scan_info->prefix, termz);