Fix printf()-format mismatch error, %d/zint.
[idzebra-moved-to-github.git] / index / extract.c
index 06eb1b4..9d2742c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $
+/* $Id: extract.c,v 1.204 2006-03-20 15:17:30 mike Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -32,6 +32,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <fcntl.h>
 
 #include "index.h"
+#include "orddict.h"
 #include <direntz.h>
 #include <charmap.h>
 
@@ -206,6 +207,8 @@ static void file_end (void *handle, off_t offset)
     }
 }
 
+#define FILE_MATCH_BLANK "\t "
+
 static char *fileMatchStr (ZebraHandle zh,
                           zebra_rec_keys_t reckeys,
                            const char *fname, const char *spec)
@@ -216,8 +219,8 @@ static char *fileMatchStr (ZebraHandle zh,
 
     while (1)
     {
-        while (*s == ' ' || *s == '\t')
-            s++;
+       for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
+           ;
         if (!*s)
             break;
         if (*s == '(')
@@ -226,21 +229,26 @@ static char *fileMatchStr (ZebraHandle zh,
            char attset_str[64], attname_str[64];
            data1_attset *attset;
            int i;
-            char matchFlag[32];
             int attSet = 1, attUse = 1;
             int first = 1;
-
-            s++;
-           for (i = 0; *s && *s != ',' && *s != ')'; s++)
-               if (i < 63)
+           
+           for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+               ;
+           for (i = 0; *s && *s != ',' && *s != ')' && 
+                    !strchr(FILE_MATCH_BLANK, *s); s++)
+               if (i+1 < sizeof(attset_str))
                    attset_str[i++] = *s;
            attset_str[i] = '\0';
-
+           
+           for (; strchr(FILE_MATCH_BLANK, *s); s++)
+               ;
            if (*s == ',')
            {
-               s++;
-               for (i = 0; *s && *s != ')'; s++)
-                   if (i < 63)
+               for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+                   ;
+               for (i = 0; *s && *s != ')' && 
+                        !strchr(FILE_MATCH_BLANK, *s); s++)
+                   if (i+1 < sizeof(attname_str))
                        attname_str[i++] = *s;
                attname_str[i] = '\0';
            }
@@ -257,12 +265,7 @@ static char *fileMatchStr (ZebraHandle zh,
            }
             searchRecordKey (zh, reckeys, attSet, attUse, ws, 32);
 
-            if (*s == ')')
-            {
-                for (i = 0; i<32; i++)
-                    matchFlag[i] = 1;
-            }
-            else
+            if (*s != ')')
             {
                 yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s",
                       spec, zh->m_group ? zh->m_group : "none");
@@ -271,7 +274,7 @@ static char *fileMatchStr (ZebraHandle zh,
             s++;
 
             for (i = 0; i<32; i++)
-                if (matchFlag[i] && ws[i])
+                if (ws[i])
                 {
                     if (first)
                     {
@@ -294,12 +297,12 @@ static char *fileMatchStr (ZebraHandle zh,
             char special[64];
             const char *spec_src = NULL;
             const char *s1 = ++s;
-            while (*s1 && *s1 != ' ' && *s1 != '\t')
+            while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
                 s1++;
 
             spec_len = s1 - s;
-            if (spec_len > 63)
-                spec_len = 63;
+            if (spec_len > sizeof(special)-1)
+                spec_len = sizeof(special)-1;
             memcpy (special, s, spec_len);
             special[spec_len] = '\0';
             s = s1;
@@ -329,7 +332,7 @@ static char *fileMatchStr (ZebraHandle zh,
 
             while (*s && *s != stopMarker)
             {
-                if (i < 63)
+                if (i+1 < sizeof(tmpString))
                     tmpString[i++] = *s++;
             }
             if (*s)
@@ -490,7 +493,9 @@ static int file_extract_record(ZebraHandle zh,
        }
        if (matchStr)
        {
-            char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+                                         matchStr);
            if (rinfo)
            {
                assert(*rinfo == sizeof(*sysno));
@@ -533,9 +538,15 @@ static int file_extract_record(ZebraHandle zh,
 
         if (matchStr)
         {
-            dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+                           sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
         extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                 recordAttr->staticrank);
         zh->records_inserted++;
@@ -582,7 +593,7 @@ static int file_extract_record(ZebraHandle zh,
             {
                 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
                         zh->m_record_type, fname, recordOffset);
-                yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false");
+                yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
             }
             else
             {
@@ -591,7 +602,10 @@ static int file_extract_record(ZebraHandle zh,
                             zh->m_record_type, fname, recordOffset);
                 zh->records_deleted++;
                 if (matchStr)
-                    dict_delete (zh->reg->matchDict, matchStr);
+               {
+                   int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+                    dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+               }
                 rec_del (zh->reg->records, &rec);
             }
            rec_rm (&rec);
@@ -600,25 +614,19 @@ static int file_extract_record(ZebraHandle zh,
         }
         else
         {
-            /* record going to be updated */
-            if (zebra_rec_keys_empty(delkeys))
-            {
+           /* flush new keys for sort&search etc */
+            if (zh->records_processed < zh->m_file_verbose_limit)
                 yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
                       zh->m_record_type, fname, recordOffset);
-                yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
-            }
-            else
-            {
-               /* flush new keys for sort&search etc */
-                if (zh->records_processed < zh->m_file_verbose_limit)
-                    yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
-                        zh->m_record_type, fname, recordOffset);
-               recordAttr->staticrank = extractCtrl.staticrank;
-                extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
+           recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+            extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+            extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+            extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                         recordAttr->staticrank);
-                zh->records_updated++;
-            }
+            zh->records_updated++;
         }
        zebra_rec_keys_close(delkeys);
 #if NATTR
@@ -652,10 +660,16 @@ static int file_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -841,6 +855,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                                int force_update,
                                int allow_update)
 {
+    SYSNO sysno0 = 0;
     RecordAttr *recordAttr;
     struct recExtractCtrl extractCtrl;
     int r;
@@ -938,14 +953,14 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
        yaz_log (YLOG_WARN, "extract error: no such filter");
        return ZEBRA_FAIL;
     }
-    /* match criteria */
-    matchStr = NULL;
 
     if (extractCtrl.match_criteria[0])
        match_criteria = extractCtrl.match_criteria;
 
-    if (! *sysno) {
-        char *rinfo;
+    if (!sysno) {
+
+       sysno = &sysno0;
+
         if (match_criteria && *match_criteria) {
             matchStr = match_criteria;
         } else {
@@ -959,8 +974,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                 }
             }
         }
-        if (matchStr) {
-            rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+        if (matchStr) 
+       {
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+           char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+                                         matchStr);
             if (rinfo)
            {
                assert(*rinfo == sizeof(*sysno));
@@ -981,8 +999,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
         /* new record */
         if (delete_flag)
         {
-           if (show_progress)
-               yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+           yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
                         pr_fname, (long) recordOffset);
             yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
             return ZEBRA_FAIL;
@@ -999,10 +1016,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
 
         if (matchStr)
         {
-            dict_insert (zh->reg->matchDict, matchStr,
-                         sizeof(*sysno), sysno);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+                           sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
        
 #if 0
        print_rec_keys(zh, zh->reg->keys);
@@ -1023,8 +1045,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
 
        if (!allow_update)
        {
-           if (show_progress)
-               yaz_log (YLOG_LOG, "skipped %s %s %ld", 
+           yaz_log (YLOG_LOG, "skipped %s %s %ld", 
                         recordType, pr_fname, (long) recordOffset);
            logRecord(zh);
            return ZEBRA_FAIL;
@@ -1039,8 +1060,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                               rec->info[recInfo_delKeys],
                               rec->size[recInfo_delKeys],
                               0);
+#if NATTR
+       zebra_rec_keys_set_buf(sortKeys,
+                              rec->info[recInfo_sortKeys],
+                              rec->size[recInfo_sortKeys],
+                              0);
+#else
         sortKeys.buf_used = rec->size[recInfo_sortKeys];
         sortKeys.buf = rec->info[recInfo_sortKeys];
+#endif
 
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 0, sortKeys);
@@ -1054,13 +1082,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             /* record going to be deleted */
             if (zebra_rec_keys_empty(delkeys))
             {
-               if (show_progress)
-               {
-                   yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
-                            pr_fname, (long) recordOffset);
-                   yaz_log (YLOG_WARN, "cannot delete file above, "
-                            "storeKeys false");
-               }
+               yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+                    pr_fname, (long) recordOffset);
+               yaz_log (YLOG_WARN, "cannot delete file above, "
+                            "storeKeys false (3)");
            }
             else
             {
@@ -1069,7 +1094,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                             pr_fname, (long) recordOffset);
                 zh->records_deleted++;
                 if (matchStr)
-                    dict_delete (zh->reg->matchDict, matchStr);
+               {
+                   int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+                    dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+               }
                 rec_del (zh->reg->records, &rec);
             }
            rec_rm (&rec);
@@ -1078,27 +1106,18 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
         }
         else
         {
-            /* record going to be updated */
-            if (zebra_rec_keys_empty(delkeys))
-            {
-               if (show_progress)
-               {
-                   yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
-                            pr_fname, (long) recordOffset);
-                   yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
-               }
-           }
-            else
-            {
-               if (show_progress)
+           if (show_progress)
                    yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
                             pr_fname, (long) recordOffset);
-               recordAttr->staticrank = extractCtrl.staticrank;
-                extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, 
+           recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+            extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+            extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+            extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, 
                                         recordAttr->staticrank);
-                zh->records_updated++;
-            }
+            zh->records_updated++;
         }
        zebra_rec_keys_close(delkeys);
 #if NATTR
@@ -1131,10 +1150,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -1233,7 +1258,7 @@ int explain_extract (void *handle, Record rec, data1_node *n)
        zebra_rec_keys_t delkeys = zebra_rec_keys_open();
        
 #if NATTR
-       zebra_rec_keys_t sortkeys = zzebra_rec_keys_open();
+       zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
 #else
        struct sortKeys sortkeys;
 #endif
@@ -1269,10 +1294,16 @@ int explain_extract (void *handle, Record rec, data1_node *n)
                           &rec->size[recInfo_delKeys]);
 
     xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     return 0;
 }
@@ -1341,6 +1372,12 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
 
            if (zh->m_staticrank) /* rank config enabled ? */
            {
+               if (staticrank < 0)
+               {
+                   yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
+                           (long) staticrank);
+                   staticrank = 0;
+               }
                *keyp++ = staticrank;
                key_out.len = 4;
            }
@@ -1495,14 +1532,6 @@ void extract_flushWriteKeys (ZebraHandle zh, int final)
     zh->reg->key_buf_used = 0;
 }
 
-void extract_add_it_key (ZebraHandle zh,
-                        zebra_rec_keys_t *keys,
-                        int reg_type,
-                        const char *str, int slen, struct it_key *key)
-{
-    zebra_rec_keys_write(*keys, reg_type, str, slen, key);
-}
-
 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
                                  zebra_rec_keys_t reckeys,
                                  zebra_snippets *snippets)
@@ -1598,16 +1627,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length)
     key.mem[3] = p->seqno;
 
 #if 0
-    /* just for debugging .. */
-    yaz_log(YLOG_LOG, "add: set=%d use=%d "
-           "record_id=%lld section_id=%lld seqno=%lld",
-           p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno);
+    if (1)
+    {
+       char strz[80];
+       int i;
+
+       strz[0] = 0;
+       for (i = 0; i<length && i < 20; i++)
+           sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
+       /* just for debugging .. */
+       yaz_log(YLOG_LOG, "add: set=%d use=%d "
+               "record_id=%lld section_id=%lld seqno=%lld %s",
+               p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
+               strz);
+    }
 #endif
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->keys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->keys, str, length, &key);
 }
 
 #if NATTR
@@ -1635,10 +1671,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length)
     key.mem[2] = p->section_id;
     key.mem[3] = p->seqno;
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->sortKeys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
 }
 #else
 static void extract_add_sort_string (RecWord *p, const char *str, int length)
@@ -1922,6 +1955,7 @@ void encode_key_init (struct encode_info *i)
     i->prevcmd=-1;
     i->keylen=0;
     i->encode_handle = iscz1_start();
+    i->decode_handle = iscz1_start();
 }
 
 #define OLDENCODE 1
@@ -1943,19 +1977,42 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf)
     /* and copy & align key so we can mangle */
     memcpy (&key, k+1, sizeof(struct it_key));  /* *k is insert/delete */
 
+#if 0
+    /* debugging */
+    key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d");
+#endif
+    assert(key.mem[0] >= 0);
+
     bp0 = bp++;
     iscz1_encode(i->encode_handle, &bp, &src);
+
     *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
     if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
     {
         yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
         exit (1);
     }
+
+#if 0
+    /* debugging */
+    if (1)
+    {
+       struct it_key key2;
+       const char *src = bp0+1;
+       char *dst = (char*) &key2;
+       iscz1_decode(i->decode_handle, &dst, &src);
+
+       key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d");
+
+       assert(key2.mem[1]);
+    }
+#endif
 }
 
 void encode_key_flush (struct encode_info *i, FILE *outf)
-{ /* dummy routine */
+{ 
     iscz1_stop(i->encode_handle);
+    iscz1_stop(i->decode_handle);
 }
 
 #else