On -V, display Expat version
[idzebra-moved-to-github.git] / index / extract.c
index fb6e228..557c2f2 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: extract.c,v 1.137 2003-02-26 12:30:54 pop Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: extract.c,v 1.146 2003-10-07 09:18:43 adam Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
    Index Data Aps
 
 This file is part of the Zebra server.
@@ -419,7 +419,8 @@ static int recordExtract (ZebraHandle zh,
                           SYSNO *sysno, const char *fname,
                           struct recordGroup *rGroup, int deleteFlag,
                           struct file_read_info *fi,
-                         RecType recType, char *subType, void *clientData)
+                         RecType recType, char *subType, void *clientData,
+                          int force_update)
 {
     RecordAttr *recordAttr;
     int r;
@@ -440,8 +441,6 @@ static int recordExtract (ZebraHandle zh,
        zh->reg->keys.prevAttrSet = -1;
        zh->reg->keys.prevSeqNo = 0;
        zh->reg->sortKeys.buf_used = 0;
-       zh->reg->sortKeys.buf_max = 0;
-       zh->reg->sortKeys.buf = 0;
        
        recordOffset = fi->file_moffset;
        extractCtrl.offset = fi->file_moffset;
@@ -582,7 +581,7 @@ static int recordExtract (ZebraHandle zh,
        
        recordAttr = rec_init_attr (zh->reg->zei, rec);
 
-       if (recordAttr->runNumber ==
+       if (!force_update && recordAttr->runNumber ==
             zebraExplain_runNumberIncrement (zh->reg->zei, 0))
        {
             yaz_log (LOG_LOG, "run number = %d", recordAttr->runNumber);
@@ -884,7 +883,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
     {
         file_begin (fi);
         r = recordExtract (zh, sysno, fname, rGroup, deleteFlag, fi,
-                           recType, subType, clientData);
+                           recType, subType, clientData, 1);
     } while (r && !sysno && fi->file_more);
     file_read_stop (fi);
     if (fd != -1)
@@ -919,7 +918,8 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType,
                                 recordType,
                                 sysno,
                                 match_criteria,
-                                "<no file>"));
+                                "<no file>",
+                                0,1));
 }
 /*
   If sysno is provided, then it's used to identify the reocord.
@@ -935,7 +935,9 @@ int bufferExtractRecord (ZebraHandle zh,
                         const char *recordType,
                         int *sysno,
                         const char *match_criteria,
-                        const char *fname)
+                        const char *fname,
+                        int force_update,
+                        int allow_update)
 
 {
     RecordAttr *recordAttr;
@@ -963,41 +965,45 @@ int bufferExtractRecord (ZebraHandle zh,
     extractCtrl.endf = zebra_record_int_end;
     extractCtrl.fh = &fc;
 
-    /* announce database */
-    if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName))
-    {
-      if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, 0))
-       return 0;
-    }
-
     zh->reg->keys.buf_used = 0;
     zh->reg->keys.prevAttrUse = -1;
     zh->reg->keys.prevAttrSet = -1;
     zh->reg->keys.prevSeqNo = 0;
     zh->reg->sortKeys.buf_used = 0;
-    zh->reg->sortKeys.buf_max = 0;
-    zh->reg->sortKeys.buf = 0;
 
+    /* announce database */
+
+    if (!(rGroup->databaseName)) {
+        logf (LOG_WARN, "Invalid record group, no database name given");
+       return 0;
+    }
+    
+    if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName))
+    {
+        if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, 0))
+            return 0;
+    }
+    
     if (*recordType) {
-      logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType);
-      recType = recType_byName (zh->reg->recTypes, recordType, subType,
-                               &clientData);
+        logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType);
+        recType = recType_byName (zh->reg->recTypes, recordType, subType,
+                                  &clientData);
     } else {
-      if (!(rGroup->recordType)) {
-        logf (LOG_WARN, "No such record type defined");
-        return 0;
-      }
-      logf (LOG_DEBUG, "Get record type from rgroup: %s",rGroup->recordType);
-      recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType,
-                               &clientData);
-      recordType = rGroup->recordType;
+        if (!(rGroup->recordType)) {
+            logf (LOG_WARN, "No such record type defined");
+            return 0;
+        }
+        logf (LOG_DEBUG, "Get record type from rgroup: %s",rGroup->recordType);
+        recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType,
+                                  &clientData);
+        recordType = rGroup->recordType;
     }
-
+    
     if (!recType) {
-      logf (LOG_WARN, "No such record type: %s", rGroup->recordType);
-      return 0;
+        logf (LOG_WARN, "No such record type: %s", rGroup->recordType);
+        return 0;
     }
-
+    
     extractCtrl.subType = subType;
     extractCtrl.init = extract_init;
     extractCtrl.tokenAdd = extract_token_add;
@@ -1043,25 +1049,24 @@ int bufferExtractRecord (ZebraHandle zh,
     /* match criteria */
     matchStr = NULL;
 
-    if (! *sysno) {
-      char *rinfo;
-      if (strlen(match_criteria) > 0) {
-       matchStr = (char *)match_criteria;
-      } else {
-       if (rGroup->recordId && *rGroup->recordId) {
-         matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, 
-                                  rGroup->recordId);
-       }
-      }
-      if (matchStr) {
-       rinfo = dict_lookup (zh->reg->matchDict, matchStr);
-       if (rinfo)
-         memcpy (sysno, rinfo+1, sizeof(*sysno));
-      } else {
-       logf (LOG_WARN, "Bad match criteria (recordID)");
-       return 0;
-      }
-
+    if (! *sysno && match_criteria) {
+        char *rinfo;
+        if (*match_criteria) {
+            matchStr = (char *)match_criteria;
+        } else {
+            if (rGroup->recordId && *rGroup->recordId) {
+                matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, 
+                                         rGroup->recordId);
+            }
+        }
+        if (matchStr) {
+            rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+            if (rinfo)
+                memcpy (sysno, rinfo+1, sizeof(*sysno));
+        } else {
+            logf (LOG_WARN, "Bad match criteria (recordID)");
+            return 0;
+        }
     }
 
     if (! *sysno)
@@ -1091,28 +1096,38 @@ int bufferExtractRecord (ZebraHandle zh,
         extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
 
         zh->records_inserted++;
-    }
+    } 
     else
     {
         /* record already exists */
         struct recKeys delkeys;
         struct sortKeys sortKeys;
 
+       if (!allow_update) {
+             logf (LOG_LOG, "skipped %s %s %ld", 
+                   recordType, fname, (long) recordOffset);
+             logRecord(zh);
+             return -1;
+       }
+
         rec = rec_get (zh->reg->records, *sysno);
         assert (rec);
        
        recordAttr = rec_init_attr (zh->reg->zei, rec);
 
-       if (recordAttr->runNumber ==
-           zebraExplain_runNumberIncrement (zh->reg->zei, 0))
-       {
-           logf (LOG_LOG, "skipped %s %s %ld", recordType,
-                 fname, (long) recordOffset);
-           extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys);
-           rec_rm (&rec);
-            logRecord(zh);
-           return 1;
+       if (!force_update) {
+         if (recordAttr->runNumber ==
+             zebraExplain_runNumberIncrement (zh->reg->zei, 0))
+           {
+             logf (LOG_LOG, "skipped %s %s %ld", recordType,
+                   fname, (long) recordOffset);
+             extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys);
+             rec_rm (&rec);
+             logRecord(zh);
+             return 1;
+           }
        }
+
         delkeys.buf_used = rec->size[recInfo_delKeys];
        delkeys.buf = rec->info[recInfo_delKeys];
 
@@ -1288,8 +1303,9 @@ int explain_extract (void *handle, Record rec, data1_node *n)
     extractCtrl.zebra_maps = zh->reg->zebra_maps;
     extractCtrl.flagShowRecords = 0;
     extractCtrl.handle = handle;
-    
-    grs_extract_tree(&extractCtrl, n);
+
+    if (n)
+       grs_extract_tree(&extractCtrl, n);
 
     if (rec->size[recInfo_delKeys])
     {
@@ -1735,9 +1751,9 @@ static void extract_add_complete_field (RecWord *p)
 void extract_token_add (RecWord *p)
 {
     WRBUF wrbuf;
-
 #if 0
-    yaz_log (LOG_LOG, "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s",
+    yaz_log (LOG_LOG, "token_add "
+            "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s",
              p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length,
              p->string);
 #endif
@@ -1819,6 +1835,7 @@ char *encode_key_int (int d, char *bp)
     }
     return bp;
 }
+#define OLDENCODE 1
 
 #ifdef OLDENCODE
 /* this is the old encode_key_write