Added code which maintains number of term occurrences and document
[idzebra-moved-to-github.git] / index / extract.c
index a48dc38..66212ce 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: extract.c,v 1.207 2006-04-05 02:11:44 adam Exp $
-   Copyright (C) 1995-2005
+/* $Id: extract.c,v 1.210 2006-05-10 12:31:08 adam Exp $
+   Copyright (C) 1995-2006
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -526,13 +526,24 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
             yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
             return ZEBRA_OK;
         }
-        if (zh->records_processed < zh->m_file_verbose_limit)
-            yaz_log (YLOG_LOG, "add %s %s " PRINTF_OFF_T, zh->m_record_type,
-                  fname, recordOffset);
-        rec = rec_new (zh->reg->records);
 
+        rec = rec_new (zh->reg->records);
+        
         *sysno = rec->sysno;
-
+        
+        if (zh->records_processed < zh->m_file_verbose_limit)
+        {
+            if (matchStr)
+                yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                        " " ZINT_FORMAT " %s" ,
+                        zh->m_record_type,
+                        fname, recordOffset, *sysno, matchStr);
+            else
+                yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                        " " ZINT_FORMAT , 
+                        zh->m_record_type,
+                        fname, recordOffset, *sysno);
+        }
        recordAttr = rec_init_attr (zh->reg->zei, rec);
        recordAttr->staticrank = extractCtrl.staticrank;
 
@@ -542,6 +553,8 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
             dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
                            sizeof(*sysno), sysno);
         }
+
+
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
 #else
@@ -591,15 +604,26 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
             /* record going to be deleted */
             if (zebra_rec_keys_empty(delkeys))
             {
-                yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
-                        zh->m_record_type, fname, recordOffset);
+                yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                         " " ZINT_FORMAT,
+                        zh->m_record_type, fname, recordOffset, *sysno);
                 yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
             }
             else
             {
                 if (zh->records_processed < zh->m_file_verbose_limit)
-                    yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
-                            zh->m_record_type, fname, recordOffset);
+                {
+                    if (matchStr)
+                        yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                                " " ZINT_FORMAT " %s" ,
+                                zh->m_record_type,
+                                fname, recordOffset, *sysno, matchStr);
+                    else
+                        yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                                " " ZINT_FORMAT , 
+                                zh->m_record_type,
+                                fname, recordOffset, *sysno);
+                }
                 zh->records_deleted++;
                 if (matchStr)
                {
@@ -616,8 +640,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh,
         {
            /* flush new keys for sort&search etc */
             if (zh->records_processed < zh->m_file_verbose_limit)
-                yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
-                      zh->m_record_type, fname, recordOffset);
+            {
+                if (matchStr)
+                    yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT " %s" ,
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno, matchStr);
+                else
+                    yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT , 
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno);
+            }
            recordAttr->staticrank = extractCtrl.staticrank;
 #if NATTR
             extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
@@ -1027,6 +1061,8 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
                            sizeof(*sysno), sysno);
         }
+
+
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
 #else
@@ -1315,6 +1351,58 @@ int explain_extract (void *handle, Record rec, data1_node *n)
     return 0;
 }
 
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+                             zebra_rec_keys_t reckeys)
+{
+    ZebraExplainInfo zei = zh->reg->zei;
+    struct ord_stat {
+        int no;
+        int ord;
+        struct ord_stat *next;
+    };
+
+    if (zebra_rec_keys_rewind(reckeys))
+    {
+        struct ord_stat *ord_list = 0;
+        struct ord_stat *p;
+       size_t slen;
+       const char *str;
+       struct it_key key_in;
+       while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+        {
+            int ord = key_in.mem[0]; 
+
+            for (p = ord_list; p ; p = p->next)
+                if (p->ord == ord)
+                {
+                    p->no++;
+                    break;
+                }
+            if (!p)
+            {
+                p = xmalloc(sizeof(*p));
+                p->no = 1;
+                p->ord = ord;
+                p->next = ord_list;
+                ord_list = p;
+            }
+        }
+
+        p = ord_list;
+        while (p)
+        {
+            struct ord_stat *p1 = p;
+
+            if (is_insert)
+                zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+            else
+                zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+            p = p->next;
+            xfree(p1);
+        }
+    }
+}
+
 void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
                               int cmd,
                              zebra_rec_keys_t reckeys,
@@ -1322,6 +1410,8 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
 {
     ZebraExplainInfo zei = zh->reg->zei;
 
+    extract_rec_keys_adjust(zh, cmd, reckeys);
+
     if (!zh->reg->key_buf)
     {
        int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
@@ -1368,7 +1458,7 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
            zh->reg->key_buf_used +=
                key_SU_encode(ch, (char*)zh->reg->key_buf +
                              zh->reg->key_buf_used);
-           
+
            /* copy the 0-terminated stuff from str to output */
            memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
            zh->reg->key_buf_used += slen;
@@ -2124,3 +2214,11 @@ void encode_key_flush (struct encode_info *i, FILE *outf)
     i->prevseq=0;
 }
 #endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+