nmake: align with pazpar2 WRT icu/libxslt
[idzebra-moved-to-github.git] / index / records.c
index 8498d3b..0fd33cf 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the Zebra server.
-   Copyright (C) 1994-2009 Index Data
+   Copyright (C) Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -18,12 +18,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
 /*
- *  Format of first block
+ *  Format of first block (assumes a 512 block size)
  *      next       (8 bytes)
  *      ref_count  (2 bytes)
  *      block      (500 bytes)
  *
- *  Format of subsequent blocks 
+ *  Format of subsequent blocks
  *      next  (8 bytes)
  *      block (502 bytes)
  *
@@ -32,6 +32,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *      (length, data) - pairs
  *      length = 0 if same as previous
  */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
@@ -44,6 +47,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #if HAVE_BZLIB_H
 #include <bzlib.h>
 #endif
+#if HAVE_ZLIB_H
+#include <zlib.h>
+#endif
 
 #define REC_BLOCK_TYPES 2
 #define REC_HEAD_MAGIC "recindex"
@@ -183,7 +189,7 @@ static ZEBRA_RES rec_release_blocks(Records p, zint sysno)
             /* the list of blocks can all be removed (ref == 0) */
             first = 0;
        }
-       
+
         if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock),
                       &p->head.block_free[dst_type]))
         {
@@ -265,7 +271,7 @@ static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos)
             cptr = p->tmp_buf + no_written;
         }
         block_prev = block_free;
-        no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) 
+        no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type])
             - sizeof(zint);
         p->head.block_used[dst_type]++;
     }
@@ -277,6 +283,28 @@ static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos)
     return ZEBRA_OK;
 }
 
+int rec_check_compression_method(int compression_method)
+{
+    switch(compression_method)
+    {
+    case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+        return 1;
+#else
+        return 0;
+#endif
+    case REC_COMPRESS_BZIP2:
+#if HAVE_BZLIB_H
+        return 1;
+#else
+        return 0;
+#endif
+    case REC_COMPRESS_NONE:
+        return 1;
+    }
+    return 0;
+}
+
 Records rec_open(BFiles bfs, int rw, int compression_method)
 {
     Records p;
@@ -288,7 +316,7 @@ Records rec_open(BFiles bfs, int rw, int compression_method)
     memset(&p->head, '\0', sizeof(p->head));
     p->compression_method = compression_method;
     p->rw = rw;
-    p->tmp_size = 1024;
+    p->tmp_size = 4096;
     p->tmp_buf = (char *) xmalloc(p->tmp_size);
     p->compression_chunk_size = 0;
     if (compression_method == REC_COMPRESS_BZIP2)
@@ -310,16 +338,16 @@ Records rec_open(BFiles bfs, int rw, int compression_method)
             p->head.block_last[i] = 1;
             p->head.block_used[i] = 0;
         }
-        p->head.block_size[0] = 128;
+        p->head.block_size[0] = 256;
         p->head.block_move[0] = 0;
         for (i = 1; i<REC_BLOCK_TYPES; i++)
         {
-            p->head.block_size[i] = p->head.block_size[i-1] * 4;
-            p->head.block_move[i] = p->head.block_size[i] * 12;
+            p->head.block_size[i] = p->head.block_size[i-1] * 8;
+            p->head.block_move[i] = p->head.block_size[i] * 2;
         }
         if (rw)
        {
-            if (recindex_write_head(p->recindex, 
+            if (recindex_write_head(p->recindex,
                                     &p->head, sizeof(p->head)) != ZEBRA_OK)
                ret = ZEBRA_FAIL;
        }
@@ -452,7 +480,7 @@ static void rec_cache_flush_block1(Records p, Record rec, Record last_rec,
        }
        if (i == 0)
        {
-           rec_encode_zint(rec_sysno_to_int(rec->sysno), 
+           rec_encode_zint(rec_sysno_to_int(rec->sysno),
                            (unsigned char *) *out_buf + *out_offset, &len);
            (*out_offset) += len;
        }
@@ -492,14 +520,40 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
        char compression_method = p->compression_method;
        switch (compression_method)
        {
+        case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+           csize = out_offset + (out_offset >> 6) + 620;
+            while (1)
+            {
+                int r;
+                uLongf destLen = csize;
+                rec_tmp_expand(p, csize);
+                r = compress((Bytef *) p->tmp_buf+sizeof(zint)+sizeof(short)+
+                             sizeof(char),
+                             &destLen, (const Bytef *) out_buf, out_offset);
+                csize = destLen;
+                if (r == Z_OK)
+                {
+                    break;
+                }
+                if (r != Z_MEM_ERROR)
+                {
+                    yaz_log(YLOG_WARN, "compress error: %d", r);
+                    csize = 0;
+                    break;
+                }
+                csize = csize * 2;
+            }
+#endif
+            break;
        case REC_COMPRESS_BZIP2:
-#if HAVE_BZLIB_H       
+#if HAVE_BZLIB_H
            csize = out_offset + (out_offset >> 6) + 620;
            rec_tmp_expand(p, csize);
 #ifdef BZ_CONFIG_ERROR
-           i = BZ2_bzBuffToBuffCompress 
+           i = BZ2_bzBuffToBuffCompress
 #else
-           i = bzBuffToBuffCompress 
+           i = bzBuffToBuffCompress
 #endif
                                    (p->tmp_buf+sizeof(zint)+sizeof(short)+
                                      sizeof(char),
@@ -509,14 +563,12 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
                yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
                csize = 0;
            }
-           yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,
-                 csize);
 #endif
            break;
        case REC_COMPRESS_NONE:
            break;
        }
-       if (!csize)  
+       if (!csize)
        {
            /* either no compression or compression not supported ... */
            csize = out_offset;
@@ -529,7 +581,7 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
        memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count));
        memcpy(p->tmp_buf + sizeof(zint)+sizeof(short),
                &compression_method, sizeof(compression_method));
-               
+
        /* -------- compression */
        if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos)
            != ZEBRA_OK)
@@ -609,7 +661,7 @@ static ZEBRA_RES rec_cache_flush(Records p, int saveCount)
     {
         struct record_cache_entry *e = p->record_cache + i;
         rec_free(&e->rec);
-    } 
+    }
     /* i still being used ... */
     for (j = 0; j<saveCount; j++, i++)
         memcpy(p->record_cache+j, p->record_cache+i,
@@ -708,9 +760,6 @@ static Record rec_get_int(Records p, zint sysno)
     char *nptr, *cptr;
     char *in_buf = 0;
     char *bz_buf = 0;
-#if HAVE_BZLIB_H
-    unsigned int bz_size;
-#endif
     char compression_method;
 
     assert(sysno > 0);
@@ -730,7 +779,7 @@ static Record rec_get_int(Records p, zint sysno)
     freeblock = entry.next / 8;
 
     assert(freeblock > 0);
-    
+
     rec_tmp_expand(p, entry.size);
 
     cptr = p->tmp_buf;
@@ -744,7 +793,7 @@ static Record rec_get_int(Records p, zint sysno)
         zint tmp;
 
         cptr += p->head.block_size[dst_type] - sizeof(freeblock);
-        
+
         memcpy(&tmp, cptr, sizeof(tmp));
         r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
        if (r < 0)
@@ -761,27 +810,57 @@ static Record rec_get_int(Records p, zint sysno)
     in_size = entry.size - sizeof(short) - sizeof(char);
     switch (compression_method)
     {
+    case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+        if (1)
+        {
+            unsigned int bz_size = entry.size * 20 + 100;
+            while (1)
+            {
+                uLongf destLen = bz_size;
+                bz_buf = (char *) xmalloc(bz_size);
+                i = uncompress((Bytef *) bz_buf, &destLen,
+                               (const Bytef *) in_buf, in_size);
+                if (i == Z_OK)
+                {
+                    bz_size = destLen;
+                    break;
+                }
+                yaz_log(YLOG_LOG, "failed");
+                xfree(bz_buf);
+                bz_size *= 2;
+            }
+            in_buf = bz_buf;
+            in_size = bz_size;
+        }
+#else
+        yaz_log(YLOG_FATAL, "cannot decompress record(s) in ZLIB format");
+        return 0;
+#endif
+        break;
     case REC_COMPRESS_BZIP2:
 #if HAVE_BZLIB_H
-       bz_size = entry.size * 20 + 100;
-       while (1)
-       {
-           bz_buf = (char *) xmalloc(bz_size);
+        if (1)
+        {
+            unsigned int bz_size = entry.size * 20 + 100;
+            while (1)
+            {
+                bz_buf = (char *) xmalloc(bz_size);
 #ifdef BZ_CONFIG_ERROR
-           i = BZ2_bzBuffToBuffDecompress
+                i = BZ2_bzBuffToBuffDecompress
 #else
-           i = bzBuffToBuffDecompress
+                    i = bzBuffToBuffDecompress
 #endif
-                (bz_buf, &bz_size, in_buf, in_size, 0, 0);
-           yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size);
-           if (i == BZ_OK)
-               break;
-           yaz_log(YLOG_LOG, "failed");
-           xfree(bz_buf);
-            bz_size *= 2;
-       }
-       in_buf = bz_buf;
-       in_size = bz_size;
+                    (bz_buf, &bz_size, in_buf, in_size, 0, 0);
+                if (i == BZ_OK)
+                    break;
+                yaz_log(YLOG_LOG, "failed");
+                xfree(bz_buf);
+                bz_size *= 2;
+            }
+            in_buf = bz_buf;
+            in_size = bz_size;
+        }
 #else
        yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format");
        return 0;
@@ -858,6 +937,21 @@ Record rec_get_root(Records p)
     return rec_get(p, rec_sysno_to_ext(1));
 }
 
+Record rec_get_next(Records p, Record rec)
+{
+    Record next = 0;
+    zint next_sysno_int = rec_sysno_to_int(rec->sysno);
+
+    while (!next)
+    {
+         ++next_sysno_int;
+        if (next_sysno_int == p->head.index_last)
+            break;
+        next = rec_get(p, rec_sysno_to_ext(next_sysno_int));
+    }
+    return next;
+}
+
 static Record rec_new_int(Records p)
 {
     int i;
@@ -999,7 +1093,7 @@ void rec_prstat(Records records, int verbose)
 {
     int i;
     zint total_bytes = 0;
-    
+
     yaz_log (YLOG_LOG,
           "Total records                        %8" ZINT_FORMAT0,
           records->head.no_records);