Put local variables footer in all c, h files.
[idzebra-moved-to-github.git] / index / extract.c
index fbd37ac..3f21862 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.201 2006-02-08 13:45:44 adam Exp $
+/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -32,6 +32,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <fcntl.h>
 
 #include "index.h"
+#include "orddict.h"
 #include <direntz.h>
 #include <charmap.h>
 
@@ -378,13 +379,13 @@ static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
     ctrl->flagShowRecords = !zh->m_flag_rw;
 }
 
-static int file_extract_record(ZebraHandle zh,
-                              SYSNO *sysno, const char *fname,
-                              int deleteFlag,
-                              struct file_read_info *fi,
-                              int force_update,
-                              RecType recType,
-                              void *recTypeClientData)
+static ZEBRA_RES file_extract_record(ZebraHandle zh,
+                                    SYSNO *sysno, const char *fname,
+                                    int deleteFlag,
+                                    struct file_read_info *fi,
+                                    int force_update,
+                                    RecType recType,
+                                    void *recTypeClientData)
 {
     RecordAttr *recordAttr;
     int r;
@@ -399,7 +400,7 @@ static int file_extract_record(ZebraHandle zh,
     {
         if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
                                      zh->m_explain_database))
-           return 0;
+           return ZEBRA_FAIL;
     }
 
     if (fi->fd != -1)
@@ -446,7 +447,7 @@ static int file_extract_record(ZebraHandle zh,
 
         yaz_log_init_prefix2 (0);
        if (r == RECCTRL_EXTRACT_EOF)
-           return 0;
+           return ZEBRA_FAIL;
        else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
        {
             /* error occured during extraction ... */
@@ -456,7 +457,7 @@ static int file_extract_record(ZebraHandle zh,
                 yaz_log (YLOG_WARN, "fail %s %s " PRINTF_OFF_T, zh->m_record_type,
                       fname, recordOffset);
             }
-            return 0;
+            return ZEBRA_FAIL;
         }
        else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
        {
@@ -468,7 +469,7 @@ static int file_extract_record(ZebraHandle zh,
                       PRINTF_OFF_T, zh->m_record_type,
                       fname, recordOffset);
             }
-            return 0;
+            return ZEBRA_FAIL;
         }
         if (extractCtrl.match_criteria[0])
             matchStr = extractCtrl.match_criteria;     
@@ -487,12 +488,14 @@ static int file_extract_record(ZebraHandle zh,
            if (!matchStr)
            {
                yaz_log(YLOG_WARN, "Bad match criteria");
-               return 0;
+               return ZEBRA_FAIL;
            }
        }
        if (matchStr)
        {
-            char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+                                         matchStr);
            if (rinfo)
            {
                assert(*rinfo == sizeof(*sysno));
@@ -505,12 +508,12 @@ static int file_extract_record(ZebraHandle zh,
          /* the extraction process returned no information - the record
             is probably empty - unless flagShowRecords is in use */
          if (!zh->m_flag_rw)
-             return 1;
+             return ZEBRA_OK;
   
          if (zh->records_processed < zh->m_file_verbose_limit)
             yaz_log (YLOG_WARN, "empty %s %s " PRINTF_OFF_T, zh->m_record_type,
            fname, recordOffset);
-         return 1;
+         return ZEBRA_OK;
     }
 
     if (! *sysno)
@@ -521,22 +524,36 @@ static int file_extract_record(ZebraHandle zh,
            yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type,
                  fname, recordOffset);
             yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
-            return 1;
+            return ZEBRA_OK;
         }
-        if (zh->records_processed < zh->m_file_verbose_limit)
-            yaz_log (YLOG_LOG, "add %s %s " PRINTF_OFF_T, zh->m_record_type,
-                  fname, recordOffset);
-        rec = rec_new (zh->reg->records);
 
+        rec = rec_new (zh->reg->records);
+        
         *sysno = rec->sysno;
-
+        
+        if (zh->records_processed < zh->m_file_verbose_limit)
+          if (matchStr)
+            yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                    " " ZINT_FORMAT " %s" ,
+                    zh->m_record_type,
+                    fname, recordOffset, *sysno, matchStr);
+          else
+            yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T 
+                    " " ZINT_FORMAT , 
+                    zh->m_record_type,
+                    fname, recordOffset, *sysno);
+        
        recordAttr = rec_init_attr (zh->reg->zei, rec);
        recordAttr->staticrank = extractCtrl.staticrank;
 
         if (matchStr)
         {
-            dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+                           sizeof(*sysno), sysno);
         }
+
+
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
 #else
@@ -586,30 +603,54 @@ static int file_extract_record(ZebraHandle zh,
             /* record going to be deleted */
             if (zebra_rec_keys_empty(delkeys))
             {
-                yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
-                        zh->m_record_type, fname, recordOffset);
+                yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                         " " ZINT_FORMAT,
+                        zh->m_record_type, fname, recordOffset, *sysno);
                 yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)");
             }
             else
             {
                 if (zh->records_processed < zh->m_file_verbose_limit)
-                    yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T,
-                            zh->m_record_type, fname, recordOffset);
+                  if (matchStr)
+                    yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT " %s" ,
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno, matchStr);
+                  else
+                    yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT , 
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno);
+
+
+
                 zh->records_deleted++;
                 if (matchStr)
-                    dict_delete (zh->reg->matchDict, matchStr);
+               {
+                   int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+                    dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+               }
                 rec_del (zh->reg->records, &rec);
             }
            rec_rm (&rec);
             logRecord (zh);
-            return 1;
+            return ZEBRA_OK;
         }
         else
         {
            /* flush new keys for sort&search etc */
             if (zh->records_processed < zh->m_file_verbose_limit)
-                yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T,
-                      zh->m_record_type, fname, recordOffset);
+                  if (matchStr)
+                    yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT " %s" ,
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno, matchStr);
+                  else
+                    yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T 
+                            " " ZINT_FORMAT , 
+                            zh->m_record_type,
+                            fname, recordOffset, *sysno);
+
            recordAttr->staticrank = extractCtrl.staticrank;
 #if NATTR
             extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
@@ -719,13 +760,14 @@ static int file_extract_record(ZebraHandle zh,
     /* commit this record */
     rec_put (zh->reg->records, &rec);
     logRecord (zh);
-    return 1;
+    return ZEBRA_OK;
 }
 
-int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, 
-                int deleteFlag)
+ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, 
+                            int deleteFlag)
 {
-    int r, i, fd;
+    ZEBRA_RES r = ZEBRA_OK;
+    int i, fd;
     char gprefix[128];
     char ext[128];
     char ext_res[128];
@@ -776,7 +818,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
                          &recTypeClientData)))
     {
         yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
-        return 0;
+        return ZEBRA_FAIL;
     }
 
     switch(recType->version)
@@ -801,16 +843,15 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
         else
             strcpy (full_rep, fname);
         
-
         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
         {
             yaz_log (YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
            zh->m_record_type = original_record_type;
-            return 0;
+            return ZEBRA_FAIL;
         }
     }
     fi = file_read_start (fd);
-    do
+    while(1)
     {
        fi->file_moffset = fi->file_offset;
        fi->file_more = 0;  /* file_end not called (yet) */
@@ -821,8 +862,15 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname,
            fi->file_offset = fi->file_moffset;
            lseek(fi->fd, fi->file_moffset, SEEK_SET);
        }
+       if (r != ZEBRA_OK)
+       {
+           break;
+       }
+       if (sysno)
+       {
+           break;
+       }
     }
-    while (r && !sysno);
     file_read_stop (fi);
     if (fd != -1)
         close (fd);
@@ -913,7 +961,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
     
     if (!recType)
     {
-        yaz_log (YLOG_WARN, "No such record type: %s", zh->m_record_type);
+        yaz_log (YLOG_WARN, "No such record type: %s", recordType);
         return ZEBRA_FAIL;
     }
     
@@ -966,8 +1014,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                 }
             }
         }
-        if (matchStr) {
-           char *rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+        if (matchStr) 
+       {
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+           char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
+                                         matchStr);
             if (rinfo)
            {
                assert(*rinfo == sizeof(*sysno));
@@ -1005,9 +1056,12 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
 
         if (matchStr)
         {
-            dict_insert (zh->reg->matchDict, matchStr,
-                         sizeof(*sysno), sysno);
+           int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+            dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+                           sizeof(*sysno), sysno);
         }
+
+
 #if NATTR
        extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
 #else
@@ -1082,7 +1136,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh,
                             pr_fname, (long) recordOffset);
                 zh->records_deleted++;
                 if (matchStr)
-                    dict_delete (zh->reg->matchDict, matchStr);
+               {
+                   int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+                    dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+               }
                 rec_del (zh->reg->records, &rec);
             }
            rec_rm (&rec);
@@ -1357,6 +1414,12 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
 
            if (zh->m_staticrank) /* rank config enabled ? */
            {
+               if (staticrank < 0)
+               {
+                   yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
+                           (long) staticrank);
+                   staticrank = 0;
+               }
                *keyp++ = staticrank;
                key_out.len = 4;
            }
@@ -1532,7 +1595,8 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
            ord = key.mem[0];
            
            zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
-                                   0/* db */, 0/* set */, 0/* use */);
+                                   0/* db */, 0/* set */, 0/* use */,
+                                   0 /* string_index */);
            assert(index_type);
            zebra_term_untrans_iconv(zh, nmem, index_type,
                                     &dst_term, str);
@@ -1561,7 +1625,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
            assert(key.len <= 4 && key.len > 2);
 
            zebraExplain_lookup_ord(zh->reg->zei,
-                                   key.mem[0], &index_type, &db, 0, 0);
+                                   key.mem[0], &index_type, &db, 0, 0, 0);
            
            seqno = (int) key.mem[key.len-1];
            
@@ -1934,6 +1998,7 @@ void encode_key_init (struct encode_info *i)
     i->prevcmd=-1;
     i->keylen=0;
     i->encode_handle = iscz1_start();
+    i->decode_handle = iscz1_start();
 }
 
 #define OLDENCODE 1
@@ -1955,19 +2020,42 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf)
     /* and copy & align key so we can mangle */
     memcpy (&key, k+1, sizeof(struct it_key));  /* *k is insert/delete */
 
+#if 0
+    /* debugging */
+    key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d");
+#endif
+    assert(key.mem[0] >= 0);
+
     bp0 = bp++;
     iscz1_encode(i->encode_handle, &bp, &src);
+
     *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
     if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
     {
         yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
         exit (1);
     }
+
+#if 0
+    /* debugging */
+    if (1)
+    {
+       struct it_key key2;
+       const char *src = bp0+1;
+       char *dst = (char*) &key2;
+       iscz1_decode(i->decode_handle, &dst, &src);
+
+       key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d");
+
+       assert(key2.mem[1]);
+    }
+#endif
 }
 
 void encode_key_flush (struct encode_info *i, FILE *outf)
-{ /* dummy routine */
+{ 
     iscz1_stop(i->encode_handle);
+    iscz1_stop(i->decode_handle);
 }
 
 #else
@@ -2071,3 +2159,11 @@ void encode_key_flush (struct encode_info *i, FILE *outf)
     i->prevseq=0;
 }
 #endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+