Allow records to be zlib-compressed
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 6 Jul 2009 11:15:09 +0000 (13:15 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 6 Jul 2009 11:15:09 +0000 (13:15 +0200)
Configure now looks for zlib development headers + libs. When present,
zlib compression may be enabled with recordCompression: zlib in zebra.cfg.

configure.ac
debian/control
idzebra.spec.in
index/recindex.h
index/records.c
index/zebraapi.c

index 936f76a..d996cd2 100644 (file)
@@ -174,6 +174,11 @@ else
     fi
 fi
 dnl
+dnl ------- zlib
+AC_CHECK_LIB([z],[compress2])
+if test "$ac_cv_lib_z_compress2" = "yes"; then
+    AC_CHECK_HEADERS([zlib.h])
+fi
 dnl ------ -lm
 AC_CHECK_LIB(m,sqrt)
 dnl ------ -ldl
index 5e98673..61afbb6 100644 (file)
@@ -2,7 +2,7 @@ Source: idzebra
 Priority: optional
 Maintainer: Adam Dickmeiss <adam@indexdata.dk>
 Standards-Version: 3.6.0
-Build-Depends: debhelper (>= 4.0.0), libyaz3-dev (>= 3.0.30), libexpat1-dev, tcl8.4-dev, libbz2-dev, libxslt1-dev
+Build-Depends: debhelper (>= 4.0.0), libyaz3-dev (>= 3.0.30), libexpat1-dev, tcl8.4-dev, libbz2-dev, libxslt1-dev, zlib1g-dev
 
 Package: idzebra-2.0
 Section: text
index 1355f00..46cf0d2 100644 (file)
@@ -9,7 +9,7 @@ Source: idzebra-%{version}.tar.gz
 BuildRoot: %{_tmppath}/idzebra-%{version}-root
 Packager: Adam Dickmeiss <adam@indexdata.dk>
 URL: http://www.indexdata.dk/zebra/
-BuildRequires: libyaz3-devel expat-devel bzip2-devel tcl
+BuildRequires: libyaz3-devel expat-devel bzip2-devel tcl zlib-devel
 Summary: High-performance, structured text indexing and retrival engine.
 
 %description
index a3078c1..3320929 100644 (file)
@@ -93,6 +93,7 @@ zint rec_sysno_to_int(zint sysno);
 /** \brief compression types */
 #define REC_COMPRESS_NONE   0
 #define REC_COMPRESS_BZIP2  1
+#define REC_COMPRESS_ZLIB   2
 
 enum { 
     recInfo_fileType, 
index ac05fa8..60908db 100644 (file)
@@ -44,6 +44,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #if HAVE_BZLIB_H
 #include <bzlib.h>
 #endif
+#if HAVE_ZLIB_H
+#include <zlib.h>
+#endif
 
 #define REC_BLOCK_TYPES 2
 #define REC_HEAD_MAGIC "recindex"
@@ -492,6 +495,34 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
        char compression_method = p->compression_method;
        switch (compression_method)
        {
+        case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+           csize = out_offset + (out_offset >> 6) + 620;
+            while (1)
+            {
+                int r;
+                uLongf destLen = csize;
+                rec_tmp_expand(p, csize);
+                r = compress((Bytef *) p->tmp_buf+sizeof(zint)+sizeof(short)+
+                             sizeof(char),
+                             &destLen, (const Bytef *) out_buf, out_offset);
+                csize = destLen;
+                if (r == Z_OK)
+                {
+                    yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count,
+                            out_offset, csize);
+                    break;
+                }
+                if (r != Z_MEM_ERROR)
+                {
+                    yaz_log(YLOG_WARN, "compress error: %d", r);
+                    csize = 0;
+                    break;
+                }
+                csize = csize * 2;
+            }
+#endif
+            break;
        case REC_COMPRESS_BZIP2:
 #if HAVE_BZLIB_H       
            csize = out_offset + (out_offset >> 6) + 620;
@@ -509,8 +540,8 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos,
                yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
                csize = 0;
            }
-           yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,
-                 csize);
+           yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count,
+                    out_offset, csize);
 #endif
            break;
        case REC_COMPRESS_NONE:
@@ -761,6 +792,32 @@ static Record rec_get_int(Records p, zint sysno)
     in_size = entry.size - sizeof(short) - sizeof(char);
     switch (compression_method)
     {
+    case REC_COMPRESS_ZLIB:
+#if HAVE_ZLIB_H
+       bz_size = entry.size * 20 + 100;
+       while (1)
+       {
+            uLongf destLen = bz_size;
+           bz_buf = (char *) xmalloc(bz_size);
+           i = uncompress((Bytef *) bz_buf, &destLen,
+                           (const Bytef *) in_buf, in_size);
+           if (i == Z_OK)
+            {
+                yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size);
+                bz_size = destLen; 
+               break;
+            }
+           yaz_log(YLOG_LOG, "failed");
+           xfree(bz_buf);
+            bz_size *= 2;
+       }
+       in_buf = bz_buf;
+       in_size = bz_size;
+#else
+       yaz_log(YLOG_FATAL, "cannot decompress record(s) in ZLIB format");
+       return 0;
+#endif
+        break;
     case REC_COMPRESS_BZIP2:
 #if HAVE_BZLIB_H
        bz_size = entry.size * 20 + 100;
index 2b69a2d..65b9176 100644 (file)
@@ -334,7 +334,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
 {
     struct zebra_register *reg;
     int record_compression = REC_COMPRESS_NONE;
-    const char *recordCompression = 0;
+    const char *compression_str = 0;
     const char *profilePath;
     int sort_type = ZEBRA_SORT_TYPE_FLAT;
     ZEBRA_RES ret = ZEBRA_OK;
@@ -420,11 +420,18 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
     zebraRankInstall(reg, rank_similarity_class);
     zebraRankInstall(reg, rank_static_class);
 
-    recordCompression = res_get_def(res, "recordCompression", "none");
-    if (!strcmp(recordCompression, "none"))
+    compression_str = res_get_def(res, "compression_str", "none");
+    if (!strcmp(compression_str, "none"))
        record_compression = REC_COMPRESS_NONE;
-    if (!strcmp(recordCompression, "bzip2"))
+    else if (!strcmp(compression_str, "bzip2"))
        record_compression = REC_COMPRESS_BZIP2;
+    else if (!strcmp(compression_str, "zlib"))
+       record_compression = REC_COMPRESS_ZLIB;
+    else
+    {
+        yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str);
+        ret = ZEBRA_FAIL;
+    }
 
     {
        const char *index_fname = res_get_def(res, "index", "default.idx");