Added code which maintains number of term occurrences and document
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 10 May 2006 12:31:08 +0000 (12:31 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 10 May 2006 12:31:08 +0000 (12:31 +0000)
occurrences for an index.

index/extract.c
index/zinfo.c
index/zinfo.h

index 3f21862..66212ce 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $
-   Copyright (C) 1995-2005
+/* $Id: extract.c,v 1.210 2006-05-10 12:31:08 adam Exp $
+   Copyright (C) 1995-2006
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -532,17 +532,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
         *sysno = rec->sysno;
         
         if (zh->records_processed < zh->m_file_verbose_limit)
-          if (matchStr)
-            yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
-                    " " ZINT_FORMAT " %s" ,
-                    zh->m_record_type,
-                    fname, recordOffset, *sysno, matchStr);
-          else
-            yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
-                    " " ZINT_FORMAT , 
-                    zh->m_record_type,
-                    fname, recordOffset, *sysno);
-        
+        {
+            if (matchStr)
+                yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                        " " ZINT_FORMAT " %s" ,
+                        zh->m_record_type,
+                        fname, recordOffset, *sysno, matchStr);
+            else
+                yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                        " " ZINT_FORMAT , 
+                        zh->m_record_type,
+                        fname, recordOffset, *sysno);
+        }
        recordAttr = rec_init_attr (zh->reg->zei, rec);
        recordAttr->staticrank = extractCtrl.staticrank;
 
@@ -611,19 +612,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
             else
             {
                 if (zh->records_processed < zh->m_file_verbose_limit)
-                  if (matchStr)
-                    yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
-                            " " ZINT_FORMAT " %s" ,
-                            zh->m_record_type,
-                            fname, recordOffset, *sysno, matchStr);
-                  else
-                    yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
-                            " " ZINT_FORMAT , 
-                            zh->m_record_type,
-                            fname, recordOffset, *sysno);
-
-
-
+                {
+                    if (matchStr)
+                        yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                                " " ZINT_FORMAT " %s" ,
+                                zh->m_record_type,
+                                fname, recordOffset, *sysno, matchStr);
+                    else
+                        yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                                " " ZINT_FORMAT , 
+                                zh->m_record_type,
+                                fname, recordOffset, *sysno);
+                }
                 zh->records_deleted++;
                 if (matchStr)
                {
@@ -640,17 +640,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
         {
            /* flush new keys for sort&search etc */
             if (zh->records_processed < zh->m_file_verbose_limit)
-                  if (matchStr)
+            {
+                if (matchStr)
                     yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
                             " " ZINT_FORMAT " %s" ,
                             zh->m_record_type,
                             fname, recordOffset, *sysno, matchStr);
-                  else
+                else
                     yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
                             " " ZINT_FORMAT , 
                             zh->m_record_type,
                             fname, recordOffset, *sysno);
-
+            }
            recordAttr->staticrank = extractCtrl.staticrank;
 #if NATTR
             extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
@@ -1350,6 +1351,58 @@ int explain_extract (void *handle, Record rec, data1_node *n)
     return 0;
 }
 
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+                             zebra_rec_keys_t reckeys)
+{
+    ZebraExplainInfo zei = zh->reg->zei;
+    struct ord_stat {
+        int no;
+        int ord;
+        struct ord_stat *next;
+    };
+
+    if (zebra_rec_keys_rewind(reckeys))
+    {
+        struct ord_stat *ord_list = 0;
+        struct ord_stat *p;
+       size_t slen;
+       const char *str;
+       struct it_key key_in;
+       while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+        {
+            int ord = key_in.mem[0]; 
+
+            for (p = ord_list; p ; p = p->next)
+                if (p->ord == ord)
+                {
+                    p->no++;
+                    break;
+                }
+            if (!p)
+            {
+                p = xmalloc(sizeof(*p));
+                p->no = 1;
+                p->ord = ord;
+                p->next = ord_list;
+                ord_list = p;
+            }
+        }
+
+        p = ord_list;
+        while (p)
+        {
+            struct ord_stat *p1 = p;
+
+            if (is_insert)
+                zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+            else
+                zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+            p = p->next;
+            xfree(p1);
+        }
+    }
+}
+
 void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
                               int cmd,
                              zebra_rec_keys_t reckeys,
@@ -1357,6 +1410,8 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
 {
     ZebraExplainInfo zei = zh->reg->zei;
 
+    extract_rec_keys_adjust(zh, cmd, reckeys);
+
     if (!zh->reg->key_buf)
     {
        int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
@@ -1403,7 +1458,7 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
            zh->reg->key_buf_used +=
                key_SU_encode(ch, (char*)zh->reg->key_buf +
                              zh->reg->key_buf_used);
-           
+
            /* copy the 0-terminated stuff from str to output */
            memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
            zh->reg->key_buf_used += slen;
index 2235089..562ae67 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: zinfo.c,v 1.59 2006-05-10 09:08:55 adam Exp $
-   Copyright (C) 1995-2005
+/* $Id: zinfo.c,v 1.60 2006-05-10 12:31:08 adam Exp $
+   Copyright (C) 1995-2006
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -44,6 +44,8 @@ struct zebSUInfo {
        } su;
     } u;
     int ordinal;
+    zint doc_occurrences;
+    zint term_occurrences;
 };
 
 struct zebSUInfoB {
@@ -594,6 +596,8 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
        data1_node *node_str = NULL;
        data1_node *node_ordinal = NULL;
        data1_node *node_type = NULL;
+        data1_node *node_doc_occurrences = NULL;
+        data1_node *node_term_occurrences = NULL;
        data1_node *np2;
        char oid_str[128];
        int oid_str_len;
@@ -615,6 +619,15 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
                node_ordinal = np2->child;
            else if (!strcmp(np2->u.tag.tag, "type"))
                node_type = np2->child;
+           else if (!strcmp(np2->u.tag.tag, "dococcurrences"))
+               node_doc_occurrences = np2->child;
+           else if (!strcmp(np2->u.tag.tag, "termoccurrences"))
+               node_term_occurrences = np2->child;
+            else
+            {
+                yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails",
+                        np2->u.tag.tag);
+            }
        }
        assert(node_ordinal);
 
@@ -629,6 +642,18 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei,
            (*zsuip)->info.index_type = 'w';
        }
 
+        if (node_doc_occurrences)
+        {
+            data1_node *np = node_doc_occurrences;
+            (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data,
+                                                     np->u.data.len);
+        }
+        if (node_term_occurrences)
+        {
+            data1_node *np = node_term_occurrences;
+            (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data,
+                                                      np->u.data.len);
+        }
        if (node_set && node_use)
        {
            (*zsuip)->info.which = ZEB_SU_SET_USE;
@@ -1165,6 +1190,11 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei,
        }
        data1_mk_tag_data_int (zei->dh, node_attr, "ordinal",
                               zsui->info.ordinal, zei->nmem);
+
+        data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences",
+                                zsui->info.doc_occurrences, zei->nmem);
+        data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences",
+                                zsui->info.term_occurrences, zei->nmem);
     }
     /* convert to "SGML" and write it */
 #if ZINFO_DEBUG
@@ -1482,15 +1512,79 @@ int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle,
     }
     return 0;
 }
-                         
-int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord,
-                            int *index_type, 
-                            const char **db,
-                            int *set, int *use,
-                            const char **string_index)
+
+
+struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord,
+                                              int dirty_mark,
+                                              const char **db)
 {
     struct zebDatabaseInfoB *zdb;
 
+    for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+    {
+       struct zebSUInfoB **zsui;
+
+       if (zdb->attributeDetails->readFlag)
+           zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
+
+       for (zsui = &zdb->attributeDetails->SUInfo; *zsui;
+             zsui = &(*zsui)->next)
+           if ((*zsui)->info.ordinal == ord)
+            {
+                struct zebSUInfoB *zsui_this = *zsui;
+                
+                /* take it out of the list and move to front */
+                *zsui = (*zsui)->next;
+                zsui_this->next = zdb->attributeDetails->SUInfo;
+                zdb->attributeDetails->SUInfo = zsui_this;
+
+                if (dirty_mark)
+                    zdb->attributeDetails->dirty = 1;
+                if (db)
+                    *db = zdb->databaseName;
+                return zsui_this;
+            }
+    }
+    return 0;
+}
+
+
+
+int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord,
+                                        int term_delta, int doc_delta)
+{
+    struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0);
+    if (zsui)
+    {
+        zsui->info.term_occurrences += term_delta;
+        zsui->info.doc_occurrences += doc_delta;
+        return 0;
+    }
+    return -1;
+}
+
+int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord,
+                                     zint *term_occurrences,
+                                     zint *doc_occurrences)
+{
+    struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0);
+    if (zsui)
+    {
+        *term_occurrences = zsui->info.term_occurrences;
+        *doc_occurrences = zsui->info.doc_occurrences;
+        return 0;
+    }
+    return -1;
+}
+
+int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord,
+                           int *index_type, 
+                           const char **db,
+                           int *set, int *use,
+                           const char **string_index)
+{
+    struct zebSUInfoB *zsui;
+
     if (set)
        *set = -1;
     if (use)
@@ -1500,38 +1594,30 @@ int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord,
     if (string_index)
        *string_index = 0;
 
-    for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+    zsui = zebraExplain_get_sui_info(zei, ord, 0, db);
+    if (zsui)
     {
-       struct zebSUInfoB *zsui;
-
-       if (zdb->attributeDetails->readFlag)
-           zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
-           
-       for (zsui = zdb->attributeDetails->SUInfo; zsui; zsui = zsui->next)
-           if (zsui->info.ordinal == ord)
-           {
-               if (db)
-                   *db = zdb->databaseName;
-               if (zsui->info.which == ZEB_SU_SET_USE)
-               {
-                   if (set)
-                       *set = zsui->info.u.su.set;
-                   if (use)
-                       *use = zsui->info.u.su.use;
-               }
-
-               if (zsui->info.which == ZEB_SU_STR)
-                   if (string_index)
-                       *string_index = zsui->info.u.str;
-
-               if (index_type)
-                   *index_type = zsui->info.index_type;
-               return 0;
-           }
+        if (zsui->info.which == ZEB_SU_SET_USE)
+        {
+            if (set)
+                *set = zsui->info.u.su.set;
+            if (use)
+                *use = zsui->info.u.su.use;
+        }
+        
+        if (zsui->info.which == ZEB_SU_STR)
+            if (string_index)
+                *string_index = zsui->info.u.str;
+        
+        if (index_type)
+            *index_type = zsui->info.index_type;
+        return 0;
     }
     return -1;
 }
 
+
+
 zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei,
                                          zebAccessObject *op,
                                          Odr_oid *oid)
@@ -1587,6 +1673,8 @@ int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type,
     zsui->info.u.su.set = set;
     zsui->info.u.su.use = use;
     zsui->info.ordinal = (zei->ordinalSU)++;
+    zsui->info.doc_occurrences = 0;
+    zsui->info.term_occurrences = 0;
     return zsui->info.ordinal;
 }
 
index a0cfacc..73347ed 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: zinfo.h,v 1.30 2006-05-10 08:13:23 adam Exp $
-   Copyright (C) 1995-2005
+/* $Id: zinfo.h,v 1.31 2006-05-10 12:31:09 adam Exp $
+   Copyright (C) 1995-2006
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -71,6 +71,13 @@ int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord,
                             int *index_type, const char **db,
                             int *set, int *use, const char **string_index);
 
+int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord,
+                                        int term_delta, int doc_delta);
+
+int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord,
+                                     zint *term_occurrences,
+                                     zint *doc_occurrences);
+
 int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle,
                          int (*f)(void *handle, int ord));