Allow same document to be modified multiple times within one transaction.
[idzebra-moved-to-github.git] / index / extract.c
index 25a20fc..dcd4206 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.188 2005-08-05 10:40:13 adam Exp $
+/* $Id: extract.c,v 1.195 2005-09-16 09:58:38 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -43,6 +43,8 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 
 #define USE_SHELLSORT 0
 
+#define REC_MULTI_SKIP 0
+
 #if USE_SHELLSORT
 static void shellsort(void *ar, int r, size_t s,
                       int (*cmp)(const void *a, const void *b))
@@ -403,6 +405,7 @@ static int file_extract_record(ZebraHandle zh,
     SYSNO sysnotmp;
     Record rec;
     off_t recordOffset = 0;
+    struct recExtractCtrl extractCtrl;
     
     /* announce database */
     if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
@@ -414,8 +417,6 @@ static int file_extract_record(ZebraHandle zh,
 
     if (fi->fd != -1)
     {
-       struct recExtractCtrl extractCtrl;
-
         /* we are going to read from a file, so prepare the extraction */
        create_rec_keys_codec(&zh->reg->keys);
 #if NATTR
@@ -436,6 +437,8 @@ static int file_extract_record(ZebraHandle zh,
        extractCtrl.schemaAdd = extract_schema_add;
        extractCtrl.dh = zh->reg->dh;
        extractCtrl.match_criteria[0] = '\0';
+       extractCtrl.staticrank = 0;
+       
        extractCtrl.first_record = fi->file_offset ? 0 : 1;
 
        extract_set_store_data_prepare(&extractCtrl);
@@ -480,7 +483,7 @@ static int file_extract_record(ZebraHandle zh,
             return 0;
         }
         if (extractCtrl.match_criteria[0])
-            matchStr = extractCtrl.match_criteria;
+            matchStr = extractCtrl.match_criteria;     
     }
 
     /* perform match if sysno not known and if match criteria is specified */
@@ -541,13 +544,15 @@ static int file_extract_record(ZebraHandle zh,
         *sysno = rec->sysno;
 
        recordAttr = rec_init_attr (zh->reg->zei, rec);
+       recordAttr->staticrank = extractCtrl.staticrank;
 
         if (matchStr)
         {
             dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
         }
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-        extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+        extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys,
+                                recordAttr->staticrank);
 
         zh->records_inserted++;
     }
@@ -566,6 +571,7 @@ static int file_extract_record(ZebraHandle zh,
        
        recordAttr = rec_init_attr (zh->reg->zei, rec);
 
+#if REC_MULTI_SKIP
        if (!force_update && recordAttr->runNumber ==
             zebraExplain_runNumberIncrement (zh->reg->zei, 0))
        {
@@ -578,6 +584,8 @@ static int file_extract_record(ZebraHandle zh,
            logRecord (zh);
            return 1;
        }
+#endif
+       /* flush old keys for sort&search etc. */
         delkeys.buf_used = rec->size[recInfo_delKeys];
        delkeys.buf = rec->info[recInfo_delKeys];
 
@@ -585,7 +593,8 @@ static int file_extract_record(ZebraHandle zh,
         sortKeys.buf = rec->info[recInfo_sortKeys];
 
        extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
-        extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
+        extract_flushRecordKeys (zh, *sysno, 0, &delkeys,
+                                recordAttr->staticrank); /* old values */  
         if (deleteFlag)
         {
             /* record going to be deleted */
@@ -620,11 +629,14 @@ static int file_extract_record(ZebraHandle zh,
             }
             else
             {
+               /* flush new keys for sort&search etc */
                 if (zh->records_processed < zh->m_file_verbose_limit)
                     yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
                         zh->m_record_type, fname, recordOffset);
+               recordAttr->staticrank = extractCtrl.staticrank;
                 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+                extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys,
+                                        recordAttr->staticrank);
                 zh->records_updated++;
             }
         }
@@ -920,6 +932,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
     extractCtrl.dh = zh->reg->dh;
     extractCtrl.handle = zh;
     extractCtrl.match_criteria[0] = '\0';
+    extractCtrl.staticrank = 0;
     
     init_extractCtrl(zh, &extractCtrl);
 
@@ -998,6 +1011,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
         *sysno = rec->sysno;
 
        recordAttr = rec_init_attr (zh->reg->zei, rec);
+       recordAttr->staticrank = extractCtrl.staticrank;
 
         if (matchStr)
         {
@@ -1005,7 +1019,8 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
                          sizeof(*sysno), sysno);
         }
        extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-        extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+        extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys,
+                                recordAttr->staticrank);
 
         zh->records_inserted++;
     } 
@@ -1033,6 +1048,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
        
        recordAttr = rec_init_attr (zh->reg->zei, rec);
        
+#if REC_MULTI_SKIP
        if (!force_update) {
            if (recordAttr->runNumber ==
                zebraExplain_runNumberIncrement (zh->reg->zei, 0))
@@ -1046,6 +1062,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
                return ZEBRA_FAIL;
            }
        }
+#endif
 
         delkeys.buf_used = rec->size[recInfo_delKeys];
        delkeys.buf = rec->info[recInfo_delKeys];
@@ -1054,7 +1071,8 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
         sortKeys.buf = rec->info[recInfo_sortKeys];
 
        extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
-        extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
+        extract_flushRecordKeys (zh, *sysno, 0, &delkeys,
+                                recordAttr->staticrank);
         if (delete_flag)
         {
             /* record going to be deleted */
@@ -1099,8 +1117,10 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh,
                if (show_progress)
                    yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
                             pr_fname, (long) recordOffset);
+               recordAttr->staticrank = extractCtrl.staticrank;
                 extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
-                extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+                extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, 
+                                        recordAttr->staticrank);
                 zh->records_updated++;
             }
         }
@@ -1220,6 +1240,7 @@ int explain_extract (void *handle, Record rec, data1_node *n)
 
     extractCtrl.flagShowRecords = 0;
     extractCtrl.match_criteria[0] = '\0';
+    extractCtrl.staticrank = 0;
     extractCtrl.handle = handle;
     extractCtrl.first_record = 1;
     
@@ -1244,9 +1265,9 @@ int explain_extract (void *handle, Record rec, data1_node *n)
        sortkeys.buf = rec->info[recInfo_sortKeys];
 
        extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
-       extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys);
+       extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys, 0);
     }
-    extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys);
+    extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys, 0);
     extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
 
     xfree (rec->info[recInfo_delKeys]);
@@ -1265,7 +1286,8 @@ int explain_extract (void *handle, Record rec, data1_node *n)
 }
 
 void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
-                              int cmd, struct recKeys *reckeys)
+                              int cmd, struct recKeys *reckeys,
+                             zint staticrank)
 {
     void *decode_handle = iscz1_start();
     int off = 0;
@@ -1293,11 +1315,13 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
     while (off < reckeys->buf_used)
     {
         const char *src = reckeys->buf + off;
-        struct it_key key;
-       char *dst = (char*) &key;
+        struct it_key key_in;
+       struct it_key key_out;
+       char *dst = (char*) &key_in;
+       zint *keyp = key_out.mem;
 
        iscz1_decode(decode_handle, &dst, &src);
-       assert(key.len == 4);
+       assert(key_in.len == 4);
 
         if (zh->reg->key_buf_used + 1024 > 
             (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
@@ -1307,7 +1331,7 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
         (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
            (char*)zh->reg->key_buf + zh->reg->key_buf_used;
 
-       ch = (int) key.mem[0];  /* ordinal for field/use/attribute */
+       ch = (int) key_in.mem[0];  /* ordinal for field/use/attribute */
 
        zh->reg->key_buf_used +=
            key_SU_encode (ch,((char*)zh->reg->key_buf) +
@@ -1318,17 +1342,24 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
         ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0';
         ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd;
 
-        key.len = 3;
-       if (key.mem[1]) /* filter specified record ID */
-           key.mem[0] = key.mem[1];
+       if (zh->m_staticrank) /* rank config enabled ? */
+       {
+           *keyp++ = staticrank;
+           key_out.len = 4;
+       }
+       else
+           key_out.len = 3;
+
+       if (key_in.mem[1]) /* filter specified record ID */
+           *keyp++ = key_in.mem[1];
        else
-           key.mem[0] = sysno;
-       key.mem[1] = key.mem[2];  /* section_id */
-       key.mem[2] = key.mem[3];  /* sequence .. */
+           *keyp++ = sysno;
+       *keyp++ = key_in.mem[2];  /* section_id */
+       *keyp++ = key_in.mem[3];  /* sequence .. */
 
         memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used,
-               &key, sizeof(key));
-        (zh->reg->key_buf_used) += sizeof(key);
+               &key_out, sizeof(key_out));
+        (zh->reg->key_buf_used) += sizeof(key_out);
         off = src - reckeys->buf;
     }
     assert (off == reckeys->buf_used);
@@ -1501,13 +1532,11 @@ void extract_add_it_key (ZebraHandle zh,
 ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys,
                                  zebra_snippets *snippets)
 {
-   void *decode_handle = iscz1_start();
+    void *decode_handle = iscz1_start();
     int off = 0;
     int seqno = 0;
     NMEM nmem = nmem_create();
 
-    yaz_log(YLOG_LOG, "zebra_rec_keys_snippets buf=%p sz=%d", reckeys->buf,
-           reckeys->buf_used);
     assert(reckeys->buf);
     while (off < reckeys->buf_used)
     {
@@ -1516,14 +1545,20 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys,
        char *dst = (char*) &key;
        char dst_buf[IT_MAX_WORD];
        char *dst_term = dst_buf;
+       int index_type = 0, ord;
 
        iscz1_decode(decode_handle, &dst, &src);
        assert(key.len <= 4 && key.len > 2);
 
        seqno = (int) key.mem[key.len-1];
-
-       zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1);
-       zebra_snippets_append(snippets, seqno, src[0], key.mem[0], dst_term);
+       ord = key.mem[0];
+
+       zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
+                               0/* db */, 0/* set */, 0/* use */);
+       assert(index_type);
+       zebra_term_untrans_iconv(zh, nmem, index_type,
+                                &dst_term, src);
+       zebra_snippets_append(snippets, seqno, ord, dst_term);
         while (*src++)
            ;
         off = src - reckeys->buf;
@@ -1659,9 +1694,9 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length)
     {
         int set, use, slen;
 
-        off += key_SU_decode(&set, sk->buf + off);
-        off += key_SU_decode(&use, sk->buf + off);
-        off += key_SU_decode(&slen, sk->buf + off);
+        off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
+        off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
+        off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
         off += slen;
         if (p->attrSet == set && p->attrUse == use)
             return;
@@ -1905,9 +1940,9 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
     {
         int set, use, slen;
         
-        off += key_SU_decode(&set, sk->buf + off);
-        off += key_SU_decode(&use, sk->buf + off);
-        off += key_SU_decode(&slen, sk->buf + off);
+        off += key_SU_decode(&set, (unsigned char *) sk->buf + off);
+        off += key_SU_decode(&use, (unsigned char *) sk->buf + off);
+        off += key_SU_decode(&slen, (unsigned char *) sk->buf + off);
         
         sortIdx_type(sortIdx, use);
         if (cmd == 1)