From 764b78fb481cf1084a5a583ffefbe8a0f32af28c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 16 Apr 2002 22:31:42 +0000 Subject: [PATCH] isamb work --- include/Makefile.am | 2 +- include/isamb.h | 18 +- include/rsisamb.h | 33 ++++ index/index.h | 4 +- index/kinput.c | 174 +++++++++++------ index/trunc.c | 192 +++++++++---------- index/zebraapi.c | 22 ++- index/zsets.c | 3 +- isamb/isamb.c | 533 ++++++++++++++++++++++++++++++++++++++++++--------- rset/Makefile.am | 4 +- rset/rsisamb.c | 157 +++++++++++++++ 11 files changed, 884 insertions(+), 258 deletions(-) create mode 100644 include/rsisamb.h create mode 100644 rset/rsisamb.c diff --git a/include/Makefile.am b/include/Makefile.am index 2e9ccdd..a753b84 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -3,4 +3,4 @@ noinst_HEADERS = bfile.h bset.h charmap.h dfa.h dict.h direntz.h isam.h \ isamc.h isamd.h isams.h mfile.h passwddb.h recctrl.h res.h rsbool.h rset.h \ rsisam.h rsisamc.h rsisams.h rsisamd.h rsm_or.h rsnull.h rstemp.h set.h \ sortidx.h str.h zebra-lock.h zebramap.h zebrautl.h zebraver.h isamb.h \ - rsbetween.h + rsbetween.h rsisamb.h diff --git a/include/isamb.h b/include/isamb.h index 355c7a4..e89e8cc 100644 --- a/include/isamb.h +++ b/include/isamb.h @@ -1,5 +1,7 @@ /* - * $Id: isamb.h,v 1.1 2000-10-17 12:37:09 adam Exp $ + * Copyright (C) 2000-2002, Index Data + * All rights reserved. + * $Id: isamb.h,v 1.2 2002-04-16 22:31:42 adam Exp $ */ #ifndef ISAMB_H @@ -9,8 +11,20 @@ #include typedef struct ISAMB_s *ISAMB; +typedef struct ISAMB_PP_s *ISAMB_PP; +typedef ISAMC_P ISAMB_P; -ISAMB isamb_open (BFiles bfs, const char *name, ISAMC_M method); +ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method); void isamb_close (ISAMB isamb); +ISAMB_P isamb_merge (ISAMB b, ISAMB_P pos, ISAMC_I data); + +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos); + +int isamb_pp_read (ISAMB_PP pp, void *buf); + +void isamb_pp_close (ISAMB_PP pp); + +int isamb_pp_num (ISAMB_PP pp); + #endif diff --git a/include/rsisamb.h b/include/rsisamb.h new file mode 100644 index 0000000..01f2dd3 --- /dev/null +++ b/include/rsisamb.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2001-2002, Index Data + * All rights reserved. + * + * $Id: rsisamb.h,v 1.1 2002-04-16 22:31:42 adam Exp $ + */ + +#ifndef RSET_ISAMB_H +#define RSET_ISAMB_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern const struct rset_control *rset_kind_isamb; + +typedef struct rset_isamb_parms +{ + int (*cmp)(const void *p1, const void *p2); + int key_size; + ISAMB is; + ISAMB_P pos; + RSET_TERM rset_term; +} rset_isamb_parms; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/index/index.h b/index/index.h index 12703d2..fcd8eb0 100644 --- a/index/index.h +++ b/index/index.h @@ -2,7 +2,7 @@ * Copyright (C) 1995-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto - * $Id: index.h,v 1.81 2002-04-15 14:05:43 adam Exp $ + * $Id: index.h,v 1.82 2002-04-16 22:31:42 adam Exp $ */ #ifndef INDEX_H @@ -23,6 +23,7 @@ #include #include #include +#include #define ISAM_DEFAULT "c" #include #include @@ -203,6 +204,7 @@ struct zebra_register { ISAM isam; ISAMC isamc; ISAMD isamd; + ISAMB isamb; Dict dict; Dict matchDict; SortIdx sortIdx; diff --git a/index/kinput.c b/index/kinput.c index b1fb8dd..e8d154a 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * - * $Id: kinput.c,v 1.48 2002-04-05 08:46:26 adam Exp $ + * $Id: kinput.c,v 1.49 2002-04-16 22:31:42 adam Exp $ * * Bugs * - Allocates a lot of memory for the merge process, but never releases it. @@ -29,11 +29,6 @@ #define INP_BUF_START 60000 #define INP_BUF_ADD 400000 -static int no_diffs = 0; -static int no_updates = 0; -static int no_deletions = 0; -static int no_insertions = 0; -static int no_iterations = 0; struct key_file { int no; /* file no */ @@ -229,11 +224,13 @@ struct heap_info { int heapnum; int *ptr; int (*cmp)(const void *p1, const void *p2); - Dict dict; - ISAMS isams; - ISAM isam; - ISAMC isamc; - ISAMD isamd; + struct zebra_register *reg; + + int no_diffs; + int no_updates; + int no_deletions; + int no_insertions; + int no_iterations; }; struct heap_info *key_heap_init (int nkeys, @@ -254,6 +251,12 @@ struct heap_info *key_heap_init (int nkeys, hi->ptr[i] = i; hi->info.buf[i] = (char *) xmalloc (INP_NAME_MAX); } + hi->no_diffs = 0; + hi->no_diffs = 0; + hi->no_updates = 0; + hi->no_deletions = 0; + hi->no_insertions = 0; + hi->no_iterations = 0; return hi; } @@ -340,7 +343,7 @@ static int heap_read_one (struct heap_info *hi, char *name, char *key) key_heap_delete (hi); if ((r = key_file_read (kf, rbuf))) key_heap_insert (hi, rbuf, r, kf); - no_iterations++; + hi->no_iterations++; return 1; } @@ -401,30 +404,82 @@ int heap_inpc (struct heap_info *hi) strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); - no_diffs++; - if ((dict_info = dict_lookup (hi->dict, hci.cur_name))) + hi->no_diffs++; + if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) + { + memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P)); + isamc_p2 = isc_merge (hi->reg->isamc, isamc_p, isamc_i); + if (!isamc_p2) + { + hi->no_deletions++; + if (!dict_delete (hi->reg->dict, this_name)) + abort(); + } + else + { + hi->no_updates++; + if (isamc_p2 != isamc_p) + dict_insert (hi->reg->dict, this_name, + sizeof(ISAMC_P), &isamc_p2); + } + } + else + { + isamc_p = isc_merge (hi->reg->isamc, 0, isamc_i); + hi->no_insertions++; + dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p); + } + } + xfree (isamc_i); + xfree (hci.key); + return 0; +} + +int heap_inpb (struct heap_info *hi) +{ + struct heap_cread_info hci; + ISAMC_I isamc_i = (ISAMC_I) xmalloc (sizeof(*isamc_i)); + + hci.key = (char *) xmalloc (KEY_SIZE); + hci.mode = 1; + hci.hi = hi; + hci.more = heap_read_one (hi, hci.cur_name, hci.key); + + isamc_i->clientData = &hci; + isamc_i->read_item = heap_cread_item; + + while (hci.more) + { + char this_name[INP_NAME_MAX]; + ISAMC_P isamc_p, isamc_p2; + char *dict_info; + + strcpy (this_name, hci.cur_name); + assert (hci.cur_name[1]); + hi->no_diffs++; + if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { memcpy (&isamc_p, dict_info+1, sizeof(ISAMC_P)); - isamc_p2 = isc_merge (hi->isamc, isamc_p, isamc_i); + isamc_p2 = isamb_merge (hi->reg->isamb, isamc_p, isamc_i); if (!isamc_p2) { - no_deletions++; - if (!dict_delete (hi->dict, this_name)) + hi->no_deletions++; + if (!dict_delete (hi->reg->dict, this_name)) abort(); } else { - no_updates++; + hi->no_updates++; if (isamc_p2 != isamc_p) - dict_insert (hi->dict, this_name, + dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p2); } } else { - isamc_p = isc_merge (hi->isamc, 0, isamc_i); - no_insertions++; - dict_insert (hi->dict, this_name, sizeof(ISAMC_P), &isamc_p); + isamc_p = isamb_merge (hi->reg->isamb, 0, isamc_i); + hi->no_insertions++; + dict_insert (hi->reg->dict, this_name, sizeof(ISAMC_P), &isamc_p); } } xfree (isamc_i); @@ -453,30 +508,30 @@ int heap_inpd (struct heap_info *hi) strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); - no_diffs++; - if ((dict_info = dict_lookup (hi->dict, hci.cur_name))) + hi->no_diffs++; + if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { memcpy (&isamd_p, dict_info+1, sizeof(ISAMD_P)); - isamd_p2 = isamd_append (hi->isamd, isamd_p, isamd_i); + isamd_p2 = isamd_append (hi->reg->isamd, isamd_p, isamd_i); if (!isamd_p2) { - no_deletions++; - if (!dict_delete (hi->dict, this_name)) + hi->no_deletions++; + if (!dict_delete (hi->reg->dict, this_name)) abort(); } else { - no_updates++; + hi->no_updates++; if (isamd_p2 != isamd_p) - dict_insert (hi->dict, this_name, + dict_insert (hi->reg->dict, this_name, sizeof(ISAMD_P), &isamd_p2); } } else { - isamd_p = isamd_append (hi->isamd, 0, isamd_i); - no_insertions++; - dict_insert (hi->dict, this_name, sizeof(ISAMD_P), &isamd_p); + isamd_p = isamd_append (hi->reg->isamd, 0, isamd_i); + hi->no_insertions++; + dict_insert (hi->reg->dict, this_name, sizeof(ISAMD_P), &isamd_p); } } xfree (isamd_i); @@ -519,33 +574,34 @@ int heap_inp (struct heap_info *hi) key_buf = new_key_buf; } } - no_diffs++; + hi->no_diffs++; nmemb = key_buf_ptr / KEY_SIZE; assert (nmemb * (int) KEY_SIZE == key_buf_ptr); - if ((info = dict_lookup (hi->dict, cur_name))) + if ((info = dict_lookup (hi->reg->dict, cur_name))) { ISAM_P isam_p, isam_p2; memcpy (&isam_p, info+1, sizeof(ISAM_P)); - isam_p2 = is_merge (hi->isam, isam_p, nmemb, key_buf); + isam_p2 = is_merge (hi->reg->isam, isam_p, nmemb, key_buf); if (!isam_p2) { - no_deletions++; - if (!dict_delete (hi->dict, cur_name)) + hi->no_deletions++; + if (!dict_delete (hi->reg->dict, cur_name)) abort (); } else { - no_updates++; + hi->no_updates++; if (isam_p2 != isam_p) - dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p2); + dict_insert (hi->reg->dict, cur_name, + sizeof(ISAM_P), &isam_p2); } } else { ISAM_P isam_p; - no_insertions++; - isam_p = is_merge (hi->isam, 0, nmemb, key_buf); - dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p); + hi->no_insertions++; + isam_p = is_merge (hi->reg->isam, 0, nmemb, key_buf); + dict_insert (hi->reg->dict, cur_name, sizeof(ISAM_P), &isam_p); } memcpy (key_buf, next_key, KEY_SIZE); strcpy (cur_name, next_name); @@ -574,12 +630,12 @@ int heap_inps (struct heap_info *hi) strcpy (this_name, hci.cur_name); assert (hci.cur_name[1]); - no_diffs++; - if (!(dict_info = dict_lookup (hi->dict, hci.cur_name))) + hi->no_diffs++; + if (!(dict_info = dict_lookup (hi->reg->dict, hci.cur_name))) { - isams_p = isams_merge (hi->isams, isams_i); - no_insertions++; - dict_insert (hi->dict, this_name, sizeof(ISAMS_P), &isams_p); + isams_p = isams_merge (hi->reg->isams, isams_i); + hi->no_insertions++; + dict_insert (hi->reg->dict, this_name, sizeof(ISAMS_P), &isams_p); } else { @@ -663,34 +719,32 @@ void zebra_index_merge (ZebraHandle zh) progressInfo.totalOffset += kf[i]->buf_size; } hi = key_heap_init (nkeys, key_qsort_compare); - hi->dict = zh->reg->dict; - hi->isams = zh->reg->isams; - hi->isam = zh->reg->isam; - hi->isamc = zh->reg->isamc; - hi->isamd = zh->reg->isamd; + hi->reg = zh->reg; for (i = 1; i<=nkeys; i++) if ((r = key_file_read (kf[i], rbuf))) key_heap_insert (hi, rbuf, r, kf[i]); if (zh->reg->isams) heap_inps (hi); - else if (zh->reg->isamc) + if (zh->reg->isamc) heap_inpc (hi); - else if (zh->reg->isam) + if (zh->reg->isam) heap_inp (hi); - else if (zh->reg->isamd) + if (zh->reg->isamd) heap_inpd (hi); + if (zh->reg->isamb) + heap_inpb (hi); for (i = 1; i<=nkeys; i++) { extract_get_fname_tmp (zh, rbuf, i); unlink (rbuf); } - logf (LOG_LOG, "Iterations . . .%7d", no_iterations); - logf (LOG_LOG, "Distinct words .%7d", no_diffs); - logf (LOG_LOG, "Updates. . . . .%7d", no_updates); - logf (LOG_LOG, "Deletions. . . .%7d", no_deletions); - logf (LOG_LOG, "Insertions . . .%7d", no_insertions); + logf (LOG_LOG, "Iterations . . .%7d", hi->no_iterations); + logf (LOG_LOG, "Distinct words .%7d", hi->no_diffs); + logf (LOG_LOG, "Updates. . . . .%7d", hi->no_updates); + logf (LOG_LOG, "Deletions. . . .%7d", hi->no_deletions); + logf (LOG_LOG, "Insertions . . .%7d", hi->no_insertions); zh->reg->key_file_no = 0; key_heap_destroy (hi, nkeys); diff --git a/index/trunc.c b/index/trunc.c index 40c0e50..8f4a42b 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,83 +1,9 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss + * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * - * $Log: trunc.c,v $ - * Revision 1.23 2002-04-12 14:40:42 adam - * Work on XPATH - * - * Revision 1.22 2002/04/05 08:46:26 adam - * Zebra with full functionality - * - * Revision 1.21 2002/04/04 14:14:13 adam - * Multiple registers (alpha early) - * - * Revision 1.20 2002/03/20 20:24:29 adam - * Hits per term. Returned in SearchResult-1 - * - * Revision 1.19 2001/01/16 16:56:15 heikki - * Searching in my isam-d - * - * Revision 1.18 2000/05/18 12:01:36 adam - * System call times(2) used again. More 64-bit fixes. - * - * Revision 1.17 2000/03/15 15:00:30 adam - * First work on threaded version. - * - * Revision 1.16 1999/11/30 13:48:03 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.15 1999/07/20 13:59:18 adam - * Fixed bug that occurred when phrases had 0 hits. - * - * Revision 1.14 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.13 1999/05/12 13:08:06 adam - * First version of ISAMS. - * - * Revision 1.12 1999/02/02 14:51:10 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.11 1998/03/25 13:48:02 adam - * Fixed bug in rset_trunc_r. - * - * Revision 1.10 1998/03/05 08:45:13 adam - * New result set model and modular ranking system. Moved towards - * descent server API. System information stored as "SGML" records. - * - * Revision 1.9 1998/01/12 15:04:09 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.8 1997/10/31 12:34:27 adam - * Bug fix: memory leak. - * - * Revision 1.7 1997/09/29 09:07:29 adam - * Minor change. - * - * Revision 1.6 1997/09/22 12:39:06 adam - * Added get_pos method for the ranked result sets. - * - * Revision 1.5 1997/09/17 12:19:17 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.4 1996/12/23 15:30:44 adam - * Work on truncation. - * Bug fix: result sets weren't deleted after server shut down. - * - * Revision 1.3 1996/12/20 11:07:14 adam - * Multi-or result set. - * - * Revision 1.2 1996/11/08 11:10:28 adam - * Buffers used during file match got bigger. - * Compressed ISAM support everywhere. - * Bug fixes regarding masking characters in queries. - * Redesigned Regexp-2 queries. - * - * Revision 1.1 1996/11/04 14:07:40 adam - * Moved truncation code to trunc.c. + * $Id: trunc.c,v 1.24 2002-04-16 22:31:42 adam Exp $ * */ #include @@ -92,6 +18,7 @@ #include #include #include +#include #if NEW_TRUNC #include #endif @@ -294,31 +221,34 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); -#if 1 + if (preserve_position) + { /* section that preserve all keys */ - heap_delete (ti); - if (is_readkey (ispt[n], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, n); + heap_delete (ti); + if (is_readkey (ispt[n], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, n); + else + is_pt_free (ispt[n]); + } else - is_pt_free (ispt[n]); -#else -/* section that preserve all keys with unique sysnos */ - while (1) { - if (!is_readkey (ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - is_pt_free (ispt[n]); - break; - } - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) +/* section that preserve all keys with unique sysnos */ + while (1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); - break; + if (!is_readkey (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + is_pt_free (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } } } -#endif } heap_close (ti); xfree (ispt); @@ -429,7 +359,6 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, heap_close (ti); xfree (ispt); } - else if (zi->reg->isams) { ISAMS_PP *ispt; @@ -472,6 +401,60 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, heap_close (ti); xfree (ispt); } + else if (zi->reg->isamb) + { + ISAMB_PP *ispt; + int i; + struct trunc_info *ti; + + ispt = (ISAMB_PP *) xmalloc (sizeof(*ispt) * (to-from)); + + ti = heap_init (to-from, sizeof(struct it_key), + key_compare_it); + for (i = to-from; --i >= 0; ) + { + ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i]); + if (isamb_pp_read (ispt[i], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, i); + else + isamb_pp_close (ispt[i]); + } + while (ti->heapnum) + { + int n = ti->indx[ti->ptr[1]]; + + rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + + if (preserve_position) + { + heap_delete (ti); + if (isamb_pp_read (ispt[n], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, n); + else + isamb_pp_close (ispt[n]); + } + else + { + while (1) + { + if (!isamb_pp_read (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + isamb_pp_close (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } + } + } + } + heap_close (ti); + xfree (ispt); + } else logf (LOG_WARN, "Unknown isam set in rset_trunc_r"); @@ -618,6 +601,21 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, #endif qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp); } + else if (zi->reg->isamb) + { + if (no == 1) + { + rset_isamb_parms parms; + + parms.key_size = sizeof(struct it_key); + parms.cmp = key_compare_it; + parms.pos = *isam_p; + parms.is = zi->reg->isamb; + parms.rset_term = rset_term_create (term, length, flags); + return rset_create (rset_kind_isamb, &parms); + } + qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp); + } else { logf (LOG_WARN, "Unknown isam set in rset_trunc"); diff --git a/index/zebraapi.c b/index/zebraapi.c index ecc2b32..0cf60f3 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2002, Index Data * All rights reserved. * - * $Id: zebraapi.c,v 1.56 2002-04-15 14:05:43 adam Exp $ + * $Id: zebraapi.c,v 1.57 2002-04-16 22:31:42 adam Exp $ */ #include @@ -187,6 +187,7 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, reg->isam = 0; reg->isamc = 0; reg->isamd = 0; + reg->isamb = 0; reg->zei = 0; reg->matchDict = 0; @@ -227,7 +228,7 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, return 0; } } - else if (res_get_match (res, "isam", "i", ISAM_DEFAULT)) + if (res_get_match (res, "isam", "i", ISAM_DEFAULT)) { if (!(reg->isam = is_open (reg->bfs, FNAME_ISAM, key_compare, rw, sizeof (struct it_key), res))) @@ -236,7 +237,7 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, return 0; } } - else if (res_get_match (res, "isam", "c", ISAM_DEFAULT)) + if (res_get_match (res, "isam", "c", ISAM_DEFAULT)) { struct ISAMC_M_s isamc_m; if (!(reg->isamc = isc_open (reg->bfs, FNAME_ISAMC, @@ -246,7 +247,7 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, return 0; } } - else if (res_get_match (res, "isam", "d", ISAM_DEFAULT)) + if (res_get_match (res, "isam", "d", ISAM_DEFAULT)) { struct ISAMD_M_s isamd_m; @@ -257,6 +258,17 @@ struct zebra_register *zebra_register_open (ZebraService zs, const char *name, return 0; } } + if (res_get_match (res, "isam", "b", ISAM_DEFAULT)) + { + struct ISAMC_M_s isamc_m; + + if (!(reg->isamb = isamb_open (reg->bfs, "isamb", + rw, key_isamc_m(res, &isamc_m)))) + { + logf (LOG_WARN, "isamb_open"); + return 0; + } + } reg->zei = zebraExplain_open (reg->records, reg->dh, res, rw, reg, explain_extract); @@ -305,6 +317,8 @@ static void zebra_register_close (ZebraService zs, struct zebra_register *reg) isc_close (reg->isamc); if (reg->isamd) isamd_close (reg->isamd); + if (reg->isamb) + isamb_close (reg->isamb); rec_close (®->records); } diff --git a/index/zsets.c b/index/zsets.c index b9c30ec..7297d02 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zsets.c,v 1.34 2002-04-04 14:14:13 adam Exp $ + * $Id: zsets.c,v 1.35 2002-04-16 22:31:42 adam Exp $ */ #include #include @@ -679,6 +679,7 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) do { kno++; + yaz_log (LOG_LOG, "%d:%d", key.sysno, key.seqno); if (key.sysno != psysno) { score = (*rc->calc) (handle, psysno); diff --git a/isamb/isamb.c b/isamb/isamb.c index 15a825e..c0940ad 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,142 +1,495 @@ #include +#include #include +#include + +struct ISAMB_head { + int first_block; + int last_block; + int block_size; +}; + +#define ISAMB_DATA_OFFSET 3 struct ISAMB_s { BFiles bfs; + BFile bf; ISAMC_M method; + int head_dirty; + + struct ISAMB_head head; +}; + +struct ISAMB_block { + int pos; + int size; + int leaf; + int dirty; + int offset; + unsigned char *bytes; + void *decodeClientData; +}; + +struct ISAMB_PP_s { + ISAMB isamb; + int level; + struct ISAMB_block **block; }; -typedef unsigned char *Bpage; +void encode_ptr (char **dst, int pos) +{ + memcpy (*dst, &pos, sizeof(pos)); + (*dst) += sizeof(pos); +} -ISAMB isamb_open (BFiles bfs, const char *name, ISAMC_M method) +void decode_ptr (char **src, int *pos) +{ + memcpy (pos, *src, sizeof(*pos)); + (*src) += sizeof(*pos); +} + +ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method) { ISAMB isamb = xmalloc (sizeof(*isamb)); isamb->bfs = bfs; isamb->method = (ISAMC_M) xmalloc (sizeof(*method)); memcpy (isamb->method, method, sizeof(*method)); + + isamb->head.first_block = 1; + isamb->head.last_block = 1; + isamb->head.block_size = 1024; + isamb->head_dirty = 0; + + isamb->bf = bf_open (bfs, name, isamb->head.block_size, writeflag); + + bf_read (isamb->bf, 0, 0, sizeof(struct ISAMB_head), + &isamb->head); return isamb; } void isamb_close (ISAMB isamb) { + if (isamb->head_dirty) + bf_write (isamb->bf, 0, 0, sizeof(struct ISAMB_head), &isamb->head); xfree (isamb->method); xfree (isamb); } -#if 0 -/* read page at pos */ -void isamb_get_block (ISAMB is, ISAMB_pos pos, Bpage *page) +struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos) { + struct ISAMB_block *p; + if (!pos) + return 0; + p = xmalloc (sizeof(*p)); + p->pos = pos; + p->bytes = xmalloc (b->head.block_size); + bf_read (b->bf, pos, 0, 0, p->bytes); + p->leaf = p->bytes[0]; + p->size = p->bytes[1] + 256 * p->bytes[2]; + p->offset = ISAMB_DATA_OFFSET; + p->dirty = 0; + p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); + return p; } -/* alloc page */ -ISAMB_pos isamb_alloc_block (ISAMB is, int block_size, Bpage *page) +struct ISAMB_block *new_block (ISAMB b, int leaf) { + struct ISAMB_block *p; + + p = xmalloc (sizeof(*p)); + p->pos = b->head.last_block++; + b->head_dirty = 1; + p->bytes = xmalloc (b->head.block_size); + memset (p->bytes, 0, b->head.block_size); + p->leaf = leaf; + p->size = ISAMB_DATA_OFFSET; + p->dirty = 1; + p->offset = ISAMB_DATA_OFFSET; + p->decodeClientData = (*b->method->code_start)(ISAMC_DECODE); + return p; } -#define isamb_page_set_leaf (p) 0[p] = 1 -#define isamb_page_set_noleaf (p) 0[p] = 0 -#define isamb_page_datalist (4+p) - -static void isamb_page_set_no(Bpage page, int no) +void close_block (ISAMB b, struct ISAMB_block *p) { - page[1] = no & 255; - page[2] = (no >> 8) & 255; - page[3] = (no >> 16) & 255; + if (!p) + return; + if (p->dirty) + { + p->bytes[0] = p->leaf; + p->bytes[1] = p->size & 255; + p->bytes[2] = p->size >> 8; + bf_write (b->bf, p->pos, 0, 0, p->bytes); + } + (*b->method->code_stop)(ISAMC_DECODE, p->decodeClientData); + xfree (p->bytes); + xfree (p); } -static int isamb_page_get_no(Bpage page) +void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item, + struct ISAMB_block **sp, + void *sub_item, int *sub_size); + +void insert_leaf (ISAMB b, struct ISAMB_block *p, const void *new_item, + struct ISAMB_block **sp, + void *sub_item, int *sub_size) { - return page[1] + 256*page[2] + 65536*page[3]; + char dst_buf[2048]; + char *dst = dst_buf; + char *src = p->bytes + ISAMB_DATA_OFFSET; + char *endp = p->bytes + p->size; + void *c1 = (*b->method->code_start)(ISAMC_DECODE); + void *c2 = (*b->method->code_start)(ISAMC_ENCODE); + char *half1 = 0; + char *half2 = 0; + char *cut = dst_buf + p->size / 2; + char cut_item_buf[256]; + int cut_item_size = 0; + + while (src != endp) + { + char file_item_buf[256]; + char *file_item = file_item_buf; + + (*b->method->code_item)(ISAMC_DECODE, c1, &file_item, &src); + if (new_item) + { + int d = (*b->method->compare_item)(file_item_buf, new_item); + if (d > 0) + { + char *item_ptr = (char*) new_item; + (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr); + new_item = 0; + p->dirty = 1; + } + else if (d == 0) + { + new_item = 0; + } + } + + if (!half1 && dst > cut) + { + half1 = dst; /* candidate for splitting */ + + file_item = file_item_buf; + (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item); + + cut_item_size = file_item - file_item_buf; + memcpy (cut_item_buf, file_item_buf, cut_item_size); + + half2 = dst; + } + else + { + file_item = file_item_buf; + (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &file_item); + } + } + if (new_item) + { + char *item_ptr = (char*) new_item; + (*b->method->code_item)(ISAMC_ENCODE, c2, &dst, &item_ptr); + new_item = 0; + p->dirty = 1; + } + p->size = dst - dst_buf + ISAMB_DATA_OFFSET; + if (p->size > b->head.block_size) + { + char *first_dst; + char *cut_item = cut_item_buf; + + /* first half */ + p->size = half1 - dst_buf + ISAMB_DATA_OFFSET; + memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, half1 - dst_buf); + + /* second half */ + *sp = new_block (b, 1); + + (*b->method->code_reset)(c2); + + first_dst = (*sp)->bytes + ISAMB_DATA_OFFSET; + + (*b->method->code_item)(ISAMC_ENCODE, c2, &first_dst, &cut_item); + + memcpy (first_dst, half2, dst - half2); + + (*sp)->size = (first_dst - (char*) (*sp)->bytes) + (dst - half2); + (*sp)->dirty = 1; + p->dirty = 1; + memcpy (sub_item, cut_item_buf, cut_item_size); + *sub_size = cut_item_size; + + yaz_log (LOG_LOG, "l split %d / %d", p->size, (*sp)->size); + + } + else + { + assert (p->size > ISAMB_DATA_OFFSET); + assert (p->size <= b->head.block_size); + memcpy (p->bytes+ISAMB_DATA_OFFSET, dst_buf, dst - dst_buf); + *sp = 0; + } + (*b->method->code_stop)(ISAMC_DECODE, c1); + (*b->method->code_stop)(ISAMC_ENCODE, c2); } -void isamb_insert_sub(ISAMB is, ISAMB_pos *pos, const void *data) +void insert_int (ISAMB b, struct ISAMB_block *p, const void *new_item, + struct ISAMB_block **sp, + void *split_item, int *split_size) { - const char *src; - char dst[200]; - int no, i; - - isamb_get_block (is, *pos, &page); - if (!isamb_page_isleaf (page)) + char *startp = p->bytes + ISAMB_DATA_OFFSET; + char *src = startp; + char *endp = p->bytes + p->size; + int pos; + struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0; + char sub_item[256]; + int sub_size; + + *sp = 0; + + decode_ptr (&src, &pos); + while (src != endp) + { + int item_len; + int d; + decode_ptr (&src, &item_len); + d = (*b->method->compare_item)(src, new_item); + if (d > 0) + { + sub_p1 = open_block (b, pos); + assert (sub_p1); + insert_sub (b, sub_p1, new_item, &sub_p2, + sub_item, &sub_size); + break; + } + src += item_len; + decode_ptr (&src, &pos); + } + if (!sub_p1) { - ISAMB_pos subptr; - src = isamb_page_datalist (page); - no = isamb_page_get_no (page); - decodeClientData = (*is->method->code_start)(ISAMC_DECODE); - - isamb_read_subptr (&subptr, &src); - for (i = 0; imethod->code_item)(ISAMC_DECODE, decodeClientData, - dst, &src); - if ((*is->method->compare_item)(data, dst) < 0) - break; - - isamb_read_subptr (&subptr, src); - } - isamb_insert_sub (is, subptr, data); - *pos = subptr; - (*is->method->code_stop)(ISAMC_DECODE, decodeClientData); + sub_p1 = open_block (b, pos); + assert (sub_p1); + insert_sub (b, sub_p1, new_item, &sub_p2, + sub_item, &sub_size); } + if (sub_p2) + { + char dst_buf[2048]; + char *dst = dst_buf; + + assert (sub_size < 20); + + memcpy (dst, startp, src - startp); + + dst += src - startp; + + encode_ptr (&dst, sub_size); /* sub length and item */ + memcpy (dst, sub_item, sub_size); + dst += sub_size; + + encode_ptr (&dst, sub_p2->pos); /* pos */ + + if (endp - src) /* remaining data */ + { + memcpy (dst, src, endp - src); + dst += endp - src; + } + p->size = dst - dst_buf + ISAMB_DATA_OFFSET; + if (p->size <= b->head.block_size) + { + memcpy (startp, dst_buf, dst - dst_buf); + } + else + { + int p_new_size; + char *half; + src = dst_buf; + endp = dst; + + half = src + b->head.block_size/2; + decode_ptr (&src, &pos); + while (src <= half) + { + decode_ptr (&src, split_size); + src += *split_size; + decode_ptr (&src, &pos); + } + p_new_size = src - dst_buf; + memcpy (p->bytes + ISAMB_DATA_OFFSET, dst_buf, p_new_size); + p_new_size += ISAMB_DATA_OFFSET; + + decode_ptr (&src, split_size); + memcpy (split_item, src, *split_size); + src += *split_size; + + *sp = new_block (b, 0); + (*sp)->size = endp - src; + memcpy ((*sp)->bytes+ISAMB_DATA_OFFSET, src, (*sp)->size); + (*sp)->size += ISAMB_DATA_OFFSET; + + yaz_log (LOG_LOG, "i split %d -> %d %d", + p->size, p_new_size, (*sp)->size); + p->size = p_new_size; + } + p->dirty = 1; + close_block (b, sub_p2); + } + close_block (b, sub_p1); +} + +void insert_sub (ISAMB b, struct ISAMB_block *p, const void *new_item, + struct ISAMB_block **sp, + void *sub_item, int *sub_size) +{ + if (p->leaf) + insert_leaf (b, p, new_item, sp, sub_item, sub_size); + else + insert_int (b, p, new_item, sp, sub_item, sub_size); +} + +int isamb_insert_one (ISAMB b, const void *item, ISAMC_P pos) +{ + struct ISAMB_block *p, *sp = 0; + char sub_item[256]; + int sub_size; + + if (!pos) + p = new_block (b, 1); else + p = open_block (b, pos); + if (!p) + return -1; + + insert_sub (b, p, item, &sp, sub_item, &sub_size); + if (sp) + { /* increase level of tree by one */ + struct ISAMB_block *p2 = new_block (b, 0); + char *dst = p2->bytes + p2->size; + + encode_ptr (&dst, p->pos); + assert (sub_size < 20); + encode_ptr (&dst, sub_size); + memcpy (dst, sub_item, sub_size); + dst += sub_size; + encode_ptr (&dst, sp->pos); + + p2->size = dst - (char*) p2->bytes; + pos = p2->pos; /* return new super page */ + close_block (b, sp); + close_block (b, p2); + } + else + pos = p->pos; /* return current one (again) */ + close_block (b, p); + return pos; +} + +ISAMB_P isamb_merge (ISAMB b, ISAMB_P pos, ISAMC_I data) +{ + int i_mode; + char item_buf[256]; + char *item_ptr = item_buf; + while ((*data->read_item)(data->clientData, &item_ptr, &i_mode)) { - src = isamb_page_datalist (page); - no = isamb_page_get_no (page); - decodeClientData = (*is->method->code_start)(ISAMC_DECODE); - diff = -1; - for (i = 0; imethod->code_item)(ISAMC_DECODE, decodeClientData, - dst, &src); - diff = (*is->method->compare_item)(data, dst); - if (diff <= 0) - break; - } - if (diff < 0) - { - int j; - src = isamb_page_datalist (page); - page2 = isamb_page_dup (is, page); - dst2 = isamb_page_datalist (page2); - src2 = data; - for (j = 0; j <= no; j++) - { - if ( i == j) - (*is->method->code_item)(ISAMC_ENCODE, encodeClientData, - &dst2, &src2); - if (j < no) - { - char *dst0 = dst; - (*is->method->code_item)(ISAMC_DECODE, decodeClientData, - &dst, &src); - (*is->method->code_item)(ISAMC_ENCODE, encodeClientData, - &dst2, &dst0); - } - } - } + item_ptr = item_buf; + pos = isamb_insert_one (b, item_buf, pos); } + return pos; } -/* insert data(input) in table is(input) at pos(input/output) */ -int isamb_insert (ISAMB is, ISAMB_pos *pos, const void *data) + +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos) { - void *decodeClientData; + ISAMB_PP pp = xmalloc (sizeof(*pp)); - Bpage page; - if (*pos == 0) + pp->isamb = isamb; + pp->block = xmalloc (10 * sizeof(*pp->block)); + + pp->level = 0; + while (1) { - *pos = isamb_alloc_block (is, 1024, &page); - isamb_page_set_leaf (page); - isamb_page_set_no (page, 0); + struct ISAMB_block *p = open_block (isamb, pos); + char *src = p->bytes + p->offset; + pp->block[pp->level] = p; + + if (p->bytes[0]) /* leaf */ + break; + + decode_ptr (&src, &pos); + p->offset = src - (char*) p->bytes; + pp->level++; } - else /* find leaf ... */ + pp->block[pp->level+1] = 0; + return pp; +} + +void isamb_pp_close (ISAMB_PP pp) +{ + int i; + if (!pp) + return; + for (i = 0; i <= pp->level; i++) + close_block (pp->isamb, pp->block[i]); + xfree (pp->block); + xfree (pp); +} + +int isamb_pp_read (ISAMB_PP pp, void *buf) +{ + char *dst = buf; + char *src; + struct ISAMB_block *p = pp->block[pp->level]; + if (!p) + return 0; + + while (p->offset == p->size) { - isamb_insert_sub (is, pos, const void *data); + int pos, item_len; + while (p->offset == p->size) + { + if (pp->level == 0) + return 0; + close_block (pp->isamb, pp->block[pp->level]); + pp->block[pp->level] = 0; + (pp->level)--; + p = pp->block[pp->level]; + assert (p->bytes[0] == 0); /* must be int */ + } + src = p->bytes + p->offset; + + decode_ptr (&src, &item_len); + src += item_len; + decode_ptr (&src, &pos); + + p->offset = src - (char*) p->bytes; + ++(pp->level); + + while (1) + { + pp->block[pp->level] = p = open_block (pp->isamb, pos); + + if (p->bytes[0]) /* leaf */ + { + break; + } + src = p->bytes + p->offset; + decode_ptr (&src, &pos); + p->offset = src - (char*) p->bytes; + pp->level++; + } } + assert (p->offset < p->size); + assert (p->bytes[0]); + src = p->bytes + p->offset; + (*pp->isamb->method->code_item)(ISAMC_DECODE, p->decodeClientData, + &dst, &src); + p->offset = src - (char*) p->bytes; + return 1; +} + +int isamb_pp_num (ISAMB_PP pp) +{ + return 1; } -#endif diff --git a/rset/Makefile.am b/rset/Makefile.am index f35adce..820bb94 100644 --- a/rset/Makefile.am +++ b/rset/Makefile.am @@ -1,8 +1,8 @@ -## $Id: Makefile.am,v 1.3 2002-04-09 15:24:13 heikki Exp $ +## $Id: Makefile.am,v 1.4 2002-04-16 22:31:42 adam Exp $ noinst_LIBRARIES = librset.a librset_a_SOURCES = rset.c rstemp.c rsisam.c rsnull.c rsbool.c rsbetween.c \ - rsisamc.c rsm_or.c rsisams.c rsisamd.c + rsisamc.c rsm_or.c rsisams.c rsisamd.c rsisamb.c INCLUDES = -I$(srcdir)/../include @YAZINC@ diff --git a/rset/rsisamb.c b/rset/rsisamb.c new file mode 100644 index 0000000..7466c4a --- /dev/null +++ b/rset/rsisamb.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) 1994-2002, Index Data + * All rights reserved. + * + * $Id: rsisamb.c,v 1.1 2002-04-16 22:31:42 adam Exp $ + */ + + +#include +#include +#include +#include + +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); +static RSFD r_open (RSET ct, int flag); +static void r_close (RSFD rfd); +static void r_delete (RSET ct); +static void r_rewind (RSFD rfd); +static int r_count (RSET ct); +static int r_read (RSFD rfd, void *buf, int *term_index); +static int r_write (RSFD rfd, const void *buf); + +static const struct rset_control control = +{ + "isamc", + r_create, + r_open, + r_close, + r_delete, + r_rewind, + r_count, + r_read, + r_write, +}; + +const struct rset_control *rset_kind_isamb = &control; + +struct rset_pp_info { + ISAMB_PP pt; + struct rset_pp_info *next; + struct rset_isamb_info *info; + int *countp; + void *buf; +}; + +struct rset_isamb_info { + ISAMB is; + ISAMB_P pos; + int key_size; + int (*cmp)(const void *p1, const void *p2); + struct rset_pp_info *ispt_list; +}; + +static void *r_create(RSET ct, const struct rset_control *sel, void *parms) +{ + rset_isamb_parms *pt = (rset_isamb_parms *) parms; + struct rset_isamb_info *info; + + ct->flags |= RSET_FLAG_VOLATILE; + info = (struct rset_isamb_info *) xmalloc (sizeof(*info)); + info->is = pt->is; + info->pos = pt->pos; + info->key_size = pt->key_size; + info->cmp = pt->cmp; + info->ispt_list = NULL; + ct->no_rset_terms = 1; + ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); + ct->rset_terms[0] = pt->rset_term; + return info; +} + +RSFD r_open (RSET ct, int flag) +{ + struct rset_isamb_info *info = (struct rset_isamb_info *) ct->buf; + struct rset_pp_info *ptinfo; + + logf (LOG_DEBUG, "risamb_open"); + if (flag & RSETF_WRITE) + { + logf (LOG_FATAL, "ISAMB set type is read-only"); + return NULL; + } + ptinfo = (struct rset_pp_info *) xmalloc (sizeof(*ptinfo)); + ptinfo->next = info->ispt_list; + info->ispt_list = ptinfo; + ptinfo->pt = isamb_pp_open (info->is, info->pos); + ptinfo->info = info; + if (ct->rset_terms[0]->nn < 0) + ct->rset_terms[0]->nn = isamb_pp_num (ptinfo->pt); + ct->rset_terms[0]->count = 0; + ptinfo->countp = &ct->rset_terms[0]->count; + ptinfo->buf = xmalloc (info->key_size); + return ptinfo; +} + +static void r_close (RSFD rfd) +{ + struct rset_isamb_info *info = ((struct rset_pp_info*) rfd)->info; + struct rset_pp_info **ptinfop; + + for (ptinfop = &info->ispt_list; *ptinfop; ptinfop = &(*ptinfop)->next) + if (*ptinfop == rfd) + { + xfree ((*ptinfop)->buf); + isamb_pp_close ((*ptinfop)->pt); + *ptinfop = (*ptinfop)->next; + xfree (rfd); + return; + } + logf (LOG_FATAL, "r_close but no rfd match!"); + assert (0); +} + +static void r_delete (RSET ct) +{ + struct rset_isamb_info *info = (struct rset_isamb_info *) ct->buf; + + logf (LOG_DEBUG, "rsisamb_delete"); + assert (info->ispt_list == NULL); + rset_term_destroy (ct->rset_terms[0]); + xfree (ct->rset_terms); + xfree (info); +} + +static void r_rewind (RSFD rfd) +{ + logf (LOG_DEBUG, "rsisamb_rewind"); + abort (); +} + +static int r_count (RSET ct) +{ + return 0; +} + +static int r_read (RSFD rfd, void *buf, int *term_index) +{ + struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd; + int r; + *term_index = 0; + r = isamb_pp_read(pinfo->pt, buf); + if (r > 0) + { + if (*pinfo->countp == 0 || (*pinfo->info->cmp)(buf, pinfo->buf) > 1) + { + memcpy (pinfo->buf, buf, pinfo->info->key_size); + (*pinfo->countp)++; + } + } + return r; +} + +static int r_write (RSFD rfd, const void *buf) +{ + logf (LOG_FATAL, "ISAMB set type is read-only"); + return -1; +} -- 1.7.10.4