From d71149439012fadf092321e3b507d23711715743 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 6 Jul 2009 13:15:09 +0200 Subject: [PATCH] Allow records to be zlib-compressed Configure now looks for zlib development headers + libs. When present, zlib compression may be enabled with recordCompression: zlib in zebra.cfg. --- configure.ac | 5 +++++ debian/control | 2 +- idzebra.spec.in | 2 +- index/recindex.h | 1 + index/records.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- index/zebraapi.c | 15 ++++++++++---- 6 files changed, 78 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 936f76a..d996cd2 100644 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,11 @@ else fi fi dnl +dnl ------- zlib +AC_CHECK_LIB([z],[compress2]) +if test "$ac_cv_lib_z_compress2" = "yes"; then + AC_CHECK_HEADERS([zlib.h]) +fi dnl ------ -lm AC_CHECK_LIB(m,sqrt) dnl ------ -ldl diff --git a/debian/control b/debian/control index 5e98673..61afbb6 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: idzebra Priority: optional Maintainer: Adam Dickmeiss Standards-Version: 3.6.0 -Build-Depends: debhelper (>= 4.0.0), libyaz3-dev (>= 3.0.30), libexpat1-dev, tcl8.4-dev, libbz2-dev, libxslt1-dev +Build-Depends: debhelper (>= 4.0.0), libyaz3-dev (>= 3.0.30), libexpat1-dev, tcl8.4-dev, libbz2-dev, libxslt1-dev, zlib1g-dev Package: idzebra-2.0 Section: text diff --git a/idzebra.spec.in b/idzebra.spec.in index 1355f00..46cf0d2 100644 --- a/idzebra.spec.in +++ b/idzebra.spec.in @@ -9,7 +9,7 @@ Source: idzebra-%{version}.tar.gz BuildRoot: %{_tmppath}/idzebra-%{version}-root Packager: Adam Dickmeiss URL: http://www.indexdata.dk/zebra/ -BuildRequires: libyaz3-devel expat-devel bzip2-devel tcl +BuildRequires: libyaz3-devel expat-devel bzip2-devel tcl zlib-devel Summary: High-performance, structured text indexing and retrival engine. %description diff --git a/index/recindex.h b/index/recindex.h index a3078c1..3320929 100644 --- a/index/recindex.h +++ b/index/recindex.h @@ -93,6 +93,7 @@ zint rec_sysno_to_int(zint sysno); /** \brief compression types */ #define REC_COMPRESS_NONE 0 #define REC_COMPRESS_BZIP2 1 +#define REC_COMPRESS_ZLIB 2 enum { recInfo_fileType, diff --git a/index/records.c b/index/records.c index ac05fa8..60908db 100644 --- a/index/records.c +++ b/index/records.c @@ -44,6 +44,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if HAVE_BZLIB_H #include #endif +#if HAVE_ZLIB_H +#include +#endif #define REC_BLOCK_TYPES 2 #define REC_HEAD_MAGIC "recindex" @@ -492,6 +495,34 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos, char compression_method = p->compression_method; switch (compression_method) { + case REC_COMPRESS_ZLIB: +#if HAVE_ZLIB_H + csize = out_offset + (out_offset >> 6) + 620; + while (1) + { + int r; + uLongf destLen = csize; + rec_tmp_expand(p, csize); + r = compress((Bytef *) p->tmp_buf+sizeof(zint)+sizeof(short)+ + sizeof(char), + &destLen, (const Bytef *) out_buf, out_offset); + csize = destLen; + if (r == Z_OK) + { + yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, + out_offset, csize); + break; + } + if (r != Z_MEM_ERROR) + { + yaz_log(YLOG_WARN, "compress error: %d", r); + csize = 0; + break; + } + csize = csize * 2; + } +#endif + break; case REC_COMPRESS_BZIP2: #if HAVE_BZLIB_H csize = out_offset + (out_offset >> 6) + 620; @@ -509,8 +540,8 @@ static ZEBRA_RES rec_flush_shared(Records p, short ref_count, zint *sysnos, yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i); csize = 0; } - yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset, - csize); + yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, + out_offset, csize); #endif break; case REC_COMPRESS_NONE: @@ -761,6 +792,32 @@ static Record rec_get_int(Records p, zint sysno) in_size = entry.size - sizeof(short) - sizeof(char); switch (compression_method) { + case REC_COMPRESS_ZLIB: +#if HAVE_ZLIB_H + bz_size = entry.size * 20 + 100; + while (1) + { + uLongf destLen = bz_size; + bz_buf = (char *) xmalloc(bz_size); + i = uncompress((Bytef *) bz_buf, &destLen, + (const Bytef *) in_buf, in_size); + if (i == Z_OK) + { + yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size); + bz_size = destLen; + break; + } + yaz_log(YLOG_LOG, "failed"); + xfree(bz_buf); + bz_size *= 2; + } + in_buf = bz_buf; + in_size = bz_size; +#else + yaz_log(YLOG_FATAL, "cannot decompress record(s) in ZLIB format"); + return 0; +#endif + break; case REC_COMPRESS_BZIP2: #if HAVE_BZLIB_H bz_size = entry.size * 20 + 100; diff --git a/index/zebraapi.c b/index/zebraapi.c index 2b69a2d..65b9176 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -334,7 +334,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, { struct zebra_register *reg; int record_compression = REC_COMPRESS_NONE; - const char *recordCompression = 0; + const char *compression_str = 0; const char *profilePath; int sort_type = ZEBRA_SORT_TYPE_FLAT; ZEBRA_RES ret = ZEBRA_OK; @@ -420,11 +420,18 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, zebraRankInstall(reg, rank_similarity_class); zebraRankInstall(reg, rank_static_class); - recordCompression = res_get_def(res, "recordCompression", "none"); - if (!strcmp(recordCompression, "none")) + compression_str = res_get_def(res, "compression_str", "none"); + if (!strcmp(compression_str, "none")) record_compression = REC_COMPRESS_NONE; - if (!strcmp(recordCompression, "bzip2")) + else if (!strcmp(compression_str, "bzip2")) record_compression = REC_COMPRESS_BZIP2; + else if (!strcmp(compression_str, "zlib")) + record_compression = REC_COMPRESS_ZLIB; + else + { + yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str); + ret = ZEBRA_FAIL; + } { const char *index_fname = res_get_def(res, "index", "default.idx"); -- 1.7.10.4