Fixed bug #462: recordId: (set,use) is picky
[idzebra-moved-to-github.git] / index / extract.c
index 06eb1b4..fbd37ac 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.197 2005-10-28 09:22:50 adam Exp $
+/* $Id: extract.c,v 1.201 2006-02-08 13:45:44 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -206,6 +206,8 @@ static void file_end (void *handle, off_t offset)
     }
 }
 
+#define FILE_MATCH_BLANK "\t "
+
 static char *fileMatchStr (ZebraHandle zh,
                           zebra_rec_keys_t reckeys,
                            const char *fname, const char *spec)
@@ -216,8 +218,8 @@ static char *fileMatchStr (ZebraHandle zh,
 
     while (1)
     {
-        while (*s == ' ' || *s == '\t')
-            s++;
+       for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
+           ;
         if (!*s)
             break;
         if (*s == '(')
@@ -226,21 +228,26 @@ static char *fileMatchStr (ZebraHandle zh,
            char attset_str[64], attname_str[64];
            data1_attset *attset;
            int i;
-            char matchFlag[32];
             int attSet = 1, attUse = 1;
             int first = 1;
-
-            s++;
-           for (i = 0; *s && *s != ',' && *s != ')'; s++)
-               if (i < 63)
+           
+           for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+               ;
+           for (i = 0; *s && *s != ',' && *s != ')' && 
+                    !strchr(FILE_MATCH_BLANK, *s); s++)
+               if (i+1 < sizeof(attset_str))
                    attset_str[i++] = *s;
            attset_str[i] = '\0';
-
+           
+           for (; strchr(FILE_MATCH_BLANK, *s); s++)
+               ;
            if (*s == ',')
            {
-               s++;
-               for (i = 0; *s && *s != ')'; s++)
-                   if (i < 63)
+               for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
+                   ;
+               for (i = 0; *s && *s != ')' && 
+                        !strchr(FILE_MATCH_BLANK, *s); s++)
+                   if (i+1 < sizeof(attname_str))
                        attname_str[i++] = *s;
                attname_str[i] = '\0';
            }
@@ -257,12 +264,7 @@ static char *fileMatchStr (ZebraHandle zh,
            }
             searchRecordKey (zh, reckeys, attSet, attUse, ws, 32);
 
-            if (*s == ')')
-            {
-                for (i = 0; i<32; i++)
-                    matchFlag[i] = 1;
-            }
-            else
+            if (*s != ')')
             {
                 yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s",
                       spec, zh->m_group ? zh->m_group : "none");
@@ -271,7 +273,7 @@ static char *fileMatchStr (ZebraHandle zh,
             s++;
 
             for (i = 0; i<32; i++)
-                if (matchFlag[i] && ws[i])
+                if (ws[i])
                 {
                     if (first)
                     {
@@ -294,12 +296,12 @@ static char *fileMatchStr (ZebraHandle zh,
             char special[64];
             const char *spec_src = NULL;
             const char *s1 = ++s;
-            while (*s1 && *s1 != ' ' && *s1 != '\t')
+            while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
                 s1++;
 
             spec_len = s1 - s;
-            if (spec_len > 63)
-                spec_len = 63;
+            if (spec_len > sizeof(special)-1)
+                spec_len = sizeof(special)-1;
             memcpy (special, s, spec_len);
             special[spec_len] = '\0';
             s = s1;
@@ -329,7 +331,7 @@ static char *fileMatchStr (ZebraHandle zh,
 
             while (*s && *s != stopMarker)
             {
-                if (i < 63)
+                if (i+1 < sizeof(tmpString))
                     tmpString[i++] = *s++;
             }
             if (*s)
@@ -535,7 +537,11 @@ static int file_extract_record(ZebraHandle zh,
         {
             dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
         extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                 recordAttr->staticrank);
         zh->records_inserted++;
@@ -582,7 +588,7 @@ static int file_extract_record(ZebraHandle zh,
             {
                 yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
                         zh->m_record_type, fname, recordOffset);
-                yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false");
+                yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
             }
             else
             {
@@ -600,25 +606,19 @@ static int file_extract_record(ZebraHandle zh,
         }
         else
         {
-            /* record going to be updated */
-            if (zebra_rec_keys_empty(delkeys))
-            {
+           /* flush new keys for sort&search etc */
+            if (zh->records_processed < zh->m_file_verbose_limit)
                 yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
                       zh->m_record_type, fname, recordOffset);
-                yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
-            }
-            else
-            {
-               /* flush new keys for sort&search etc */
-                if (zh->records_processed < zh->m_file_verbose_limit)
-                    yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
-                        zh->m_record_type, fname, recordOffset);
-               recordAttr->staticrank = extractCtrl.staticrank;
-                extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
+           recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+            extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+            extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+            extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
                                         recordAttr->staticrank);
-                zh->records_updated++;
-            }
+            zh->records_updated++;
         }
        zebra_rec_keys_close(delkeys);
 #if NATTR
@@ -652,10 +652,16 @@ static int file_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -841,6 +847,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                                int force_update,
                                int allow_update)
 {
+    SYSNO sysno0 = 0;
     RecordAttr *recordAttr;
     struct recExtractCtrl extractCtrl;
     int r;
@@ -938,14 +945,14 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
        yaz_log (YLOG_WARN, "extract error: no such filter");
        return ZEBRA_FAIL;
     }
-    /* match criteria */
-    matchStr = NULL;
 
     if (extractCtrl.match_criteria[0])
        match_criteria = extractCtrl.match_criteria;
 
-    if (! *sysno) {
-        char *rinfo;
+    if (!sysno) {
+
+       sysno = &sysno0;
+
         if (match_criteria && *match_criteria) {
             matchStr = match_criteria;
         } else {
@@ -960,7 +967,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             }
         }
         if (matchStr) {
-            rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+           char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
             if (rinfo)
            {
                assert(*rinfo == sizeof(*sysno));
@@ -981,8 +988,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
         /* new record */
         if (delete_flag)
         {
-           if (show_progress)
-               yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+           yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
                         pr_fname, (long) recordOffset);
             yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
             return ZEBRA_FAIL;
@@ -1002,7 +1008,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             dict_insert (zh->reg->matchDict, matchStr,
                          sizeof(*sysno), sysno);
         }
+#if NATTR
+       extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
        
 #if 0
        print_rec_keys(zh, zh->reg->keys);
@@ -1023,8 +1033,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
 
        if (!allow_update)
        {
-           if (show_progress)
-               yaz_log (YLOG_LOG, "skipped %s %s %ld", 
+           yaz_log (YLOG_LOG, "skipped %s %s %ld", 
                         recordType, pr_fname, (long) recordOffset);
            logRecord(zh);
            return ZEBRA_FAIL;
@@ -1039,8 +1048,15 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                               rec->info[recInfo_delKeys],
                               rec->size[recInfo_delKeys],
                               0);
+#if NATTR
+       zebra_rec_keys_set_buf(sortKeys,
+                              rec->info[recInfo_sortKeys],
+                              rec->size[recInfo_sortKeys],
+                              0);
+#else
         sortKeys.buf_used = rec->size[recInfo_sortKeys];
         sortKeys.buf = rec->info[recInfo_sortKeys];
+#endif
 
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 0, sortKeys);
@@ -1054,13 +1070,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
             /* record going to be deleted */
             if (zebra_rec_keys_empty(delkeys))
             {
-               if (show_progress)
-               {
-                   yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
-                            pr_fname, (long) recordOffset);
-                   yaz_log (YLOG_WARN, "cannot delete file above, "
-                            "storeKeys false");
-               }
+               yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+                    pr_fname, (long) recordOffset);
+               yaz_log (YLOG_WARN, "cannot delete file above, "
+                            "storeKeys false (3)");
            }
             else
             {
@@ -1078,27 +1091,18 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
         }
         else
         {
-            /* record going to be updated */
-            if (zebra_rec_keys_empty(delkeys))
-            {
-               if (show_progress)
-               {
-                   yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
-                            pr_fname, (long) recordOffset);
-                   yaz_log (YLOG_WARN, "cannot update file above, storeKeys false");
-               }
-           }
-            else
-            {
-               if (show_progress)
+           if (show_progress)
                    yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
                             pr_fname, (long) recordOffset);
-               recordAttr->staticrank = extractCtrl.staticrank;
-                extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, 
+           recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+            extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+            extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+            extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, 
                                         recordAttr->staticrank);
-                zh->records_updated++;
-            }
+            zh->records_updated++;
         }
        zebra_rec_keys_close(delkeys);
 #if NATTR
@@ -1131,10 +1135,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
     /* update sort keys */
     xfree (rec->info[recInfo_sortKeys]);
 
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     /* save file size of original record */
     zebraExplain_recordBytesIncrement (zh->reg->zei,
@@ -1233,7 +1243,7 @@ int explain_extract (void *handle, Record rec, data1_node *n)
        zebra_rec_keys_t delkeys = zebra_rec_keys_open();
        
 #if NATTR
-       zebra_rec_keys_t sortkeys = zzebra_rec_keys_open();
+       zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
 #else
        struct sortKeys sortkeys;
 #endif
@@ -1269,10 +1279,16 @@ int explain_extract (void *handle, Record rec, data1_node *n)
                           &rec->size[recInfo_delKeys]);
 
     xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+    zebra_rec_keys_get_buf(zh->reg->sortKeys,
+                          &rec->info[recInfo_sortKeys],
+                          &rec->size[recInfo_sortKeys]);
+#else
     rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
     rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
     zh->reg->sortKeys.buf = NULL;
     zh->reg->sortKeys.buf_max = 0;
+#endif
 
     return 0;
 }
@@ -1495,14 +1511,6 @@ void extract_flushWriteKeys (ZebraHandle zh, int final)
     zh->reg->key_buf_used = 0;
 }
 
-void extract_add_it_key (ZebraHandle zh,
-                        zebra_rec_keys_t *keys,
-                        int reg_type,
-                        const char *str, int slen, struct it_key *key)
-{
-    zebra_rec_keys_write(*keys, reg_type, str, slen, key);
-}
-
 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
                                  zebra_rec_keys_t reckeys,
                                  zebra_snippets *snippets)
@@ -1598,16 +1606,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length)
     key.mem[3] = p->seqno;
 
 #if 0
-    /* just for debugging .. */
-    yaz_log(YLOG_LOG, "add: set=%d use=%d "
-           "record_id=%lld section_id=%lld seqno=%lld",
-           p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno);
+    if (1)
+    {
+       char strz[80];
+       int i;
+
+       strz[0] = 0;
+       for (i = 0; i<length && i < 20; i++)
+           sprintf(strz+strlen(strz), "%02X", str[i] & 0xff);
+       /* just for debugging .. */
+       yaz_log(YLOG_LOG, "add: set=%d use=%d "
+               "record_id=%lld section_id=%lld seqno=%lld %s",
+               p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno,
+               strz);
+    }
 #endif
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->keys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->keys, str, length, &key);
 }
 
 #if NATTR
@@ -1635,10 +1650,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length)
     key.mem[2] = p->section_id;
     key.mem[3] = p->seqno;
 
-    extract_add_it_key(p->extractCtrl->handle, 
-                      &zh->reg->sortKeys,
-                      p->index_type, str,
-                      length, &key);
+    zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
 }
 #else
 static void extract_add_sort_string (RecWord *p, const char *str, int length)