X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Ftrunc.c;h=ba3f18f50bc6631ddc3f1e557b6eeef3f0e5cce8;hb=47ea1fc957c7b97bb30a26698f072109cae275e4;hp=167c562ee5135953dd0f84dba817909d9046e526;hpb=0912d7cd382d578f045ef8f2cd1e092b3747d6e5;p=idzebra-moved-to-github.git diff --git a/index/trunc.c b/index/trunc.c index 167c562..ba3f18f 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,71 +1,9 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss + * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto * - * $Log: trunc.c,v $ - * Revision 1.19 2001-01-16 16:56:15 heikki - * Searching in my isam-d - * - * Revision 1.18 2000/05/18 12:01:36 adam - * System call times(2) used again. More 64-bit fixes. - * - * Revision 1.17 2000/03/15 15:00:30 adam - * First work on threaded version. - * - * Revision 1.16 1999/11/30 13:48:03 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.15 1999/07/20 13:59:18 adam - * Fixed bug that occurred when phrases had 0 hits. - * - * Revision 1.14 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.13 1999/05/12 13:08:06 adam - * First version of ISAMS. - * - * Revision 1.12 1999/02/02 14:51:10 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.11 1998/03/25 13:48:02 adam - * Fixed bug in rset_trunc_r. - * - * Revision 1.10 1998/03/05 08:45:13 adam - * New result set model and modular ranking system. Moved towards - * descent server API. System information stored as "SGML" records. - * - * Revision 1.9 1998/01/12 15:04:09 adam - * The test option (-s) only uses read-lock (and not write lock). - * - * Revision 1.8 1997/10/31 12:34:27 adam - * Bug fix: memory leak. - * - * Revision 1.7 1997/09/29 09:07:29 adam - * Minor change. - * - * Revision 1.6 1997/09/22 12:39:06 adam - * Added get_pos method for the ranked result sets. - * - * Revision 1.5 1997/09/17 12:19:17 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.4 1996/12/23 15:30:44 adam - * Work on truncation. - * Bug fix: result sets weren't deleted after server shut down. - * - * Revision 1.3 1996/12/20 11:07:14 adam - * Multi-or result set. - * - * Revision 1.2 1996/11/08 11:10:28 adam - * Buffers used during file match got bigger. - * Compressed ISAM support everywhere. - * Bug fixes regarding masking characters in queries. - * Redesigned Regexp-2 queries. - * - * Revision 1.1 1996/11/04 14:07:40 adam - * Moved truncation code to trunc.c. + * $Id: trunc.c,v 1.26 2002-07-25 13:06:43 adam Exp $ * */ #include @@ -73,18 +11,17 @@ #define NEW_TRUNC 1 -#include "zserver.h" +#include "index.h" #include #include #include -#if ZMBOL #include #include #include +#include #if NEW_TRUNC #include #endif -#endif struct trunc_info { int *ptr; @@ -183,16 +120,18 @@ static void heap_close (struct trunc_info *ti) } static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, - const char *flags, ISAMS_P *isam_p, int from, int to, - int merge_chunk) + const char *flags, ISAMS_P *isam_p, int from, int to, + int merge_chunk, int preserve_position, + int term_type) { RSET result; RSFD result_rsfd; rset_temp_parms parms; + parms.cmp = key_compare_it; parms.key_size = sizeof(struct it_key); - parms.temp_path = res_get (zi->service->res, "setTmpDir"); - parms.rset_term = rset_term_create (term, length, flags); + parms.temp_path = res_get (zi->res, "setTmpDir"); + parms.rset_term = rset_term_create (term, length, flags, term_type); result = rset_create (rset_kind_temp, &parms); result_rsfd = rset_open (result, RSETF_WRITE); @@ -213,10 +152,14 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, { if (i_add <= to - i) rset[rscur] = rset_trunc_r (zi, term, length, flags, - isam_p, i, i+i_add, merge_chunk); + isam_p, i, i+i_add, + merge_chunk, preserve_position, + term_type); else rset[rscur] = rset_trunc_r (zi, term, length, flags, - isam_p, i, to, merge_chunk); + isam_p, i, to, + merge_chunk, preserve_position, + term_type); rscur++; } ti = heap_init (rscur, sizeof(struct it_key), key_compare_it); @@ -258,8 +201,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, xfree (rsfd); heap_close (ti); } -#if ZMBOL - else if (zi->service->isam) + else if (zi->reg->isam) { ISPT *ispt; int i; @@ -271,7 +213,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = is_position (zi->service->isam, isam_p[from+i]); + ispt[i] = is_position (zi->reg->isam, isam_p[from+i]); if (is_readkey (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else @@ -282,36 +224,39 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); -#if 1 + if (preserve_position) + { /* section that preserve all keys */ - heap_delete (ti); - if (is_readkey (ispt[n], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, n); + heap_delete (ti); + if (is_readkey (ispt[n], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, n); + else + is_pt_free (ispt[n]); + } else - is_pt_free (ispt[n]); -#else -/* section that preserve all keys with unique sysnos */ - while (1) { - if (!is_readkey (ispt[n], ti->tmpbuf)) - { - heap_delete (ti); - is_pt_free (ispt[n]); - break; - } - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) +/* section that preserve all keys with unique sysnos */ + while (1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); - break; + if (!is_readkey (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + is_pt_free (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } } } -#endif } heap_close (ti); xfree (ispt); } - else if (zi->service->isamc) + else if (zi->reg->isamc) { ISAMC_PP *ispt; int i; @@ -323,7 +268,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isc_pp_open (zi->service->isamc, isam_p[from+i]); + ispt[i] = isc_pp_open (zi->reg->isamc, isam_p[from+i]); if (isc_pp_read (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else @@ -334,37 +279,38 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, int n = ti->indx[ti->ptr[1]]; rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); -#if 0 -/* section that preserve all keys */ - heap_delete (ti); - if (isc_pp_read (ispt[n], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, n); - else - isc_pp_close (ispt[n]); -#else -/* section that preserve all keys with unique sysnos */ - while (1) + if (preserve_position) { - if (!isc_pp_read (ispt[n], ti->tmpbuf)) - { - heap_delete (ti); + heap_delete (ti); + if (isc_pp_read (ispt[n], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, n); + else isc_pp_close (ispt[n]); - break; - } - if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + } + else + { + while (1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); - break; + if (!isc_pp_read (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + isc_pp_close (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } } } -#endif } heap_close (ti); xfree (ispt); } - else if (zi->service->isamd) + else if (zi->reg->isamd) { ISAMD_PP *ispt; int i; @@ -376,7 +322,9 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isamd_pp_open (zi->service->isamd, isam_p[from+i]); + logf(LOG_FATAL, "isam_d does not (currently) support truncs"); + abort(); + /*ispt[i] = isamd_pp_open (zi->reg->isamd, isam_p[from+i]); */ if (isamd_pp_read (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else @@ -416,9 +364,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, heap_close (ti); xfree (ispt); } - -#endif - else if (zi->service->isams) + else if (zi->reg->isams) { ISAMS_PP *ispt; int i; @@ -430,7 +376,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isams_pp_open (zi->service->isams, isam_p[from+i]); + ispt[i] = isams_pp_open (zi->reg->isams, isam_p[from+i]); if (isams_pp_read (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); else @@ -460,6 +406,60 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, heap_close (ti); xfree (ispt); } + else if (zi->reg->isamb) + { + ISAMB_PP *ispt; + int i; + struct trunc_info *ti; + + ispt = (ISAMB_PP *) xmalloc (sizeof(*ispt) * (to-from)); + + ti = heap_init (to-from, sizeof(struct it_key), + key_compare_it); + for (i = to-from; --i >= 0; ) + { + ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i]); + if (isamb_pp_read (ispt[i], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, i); + else + isamb_pp_close (ispt[i]); + } + while (ti->heapnum) + { + int n = ti->indx[ti->ptr[1]]; + + rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + + if (preserve_position) + { + heap_delete (ti); + if (isamb_pp_read (ispt[n], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, n); + else + isamb_pp_close (ispt[n]); + } + else + { + while (1) + { + if (!isamb_pp_read (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + isamb_pp_close (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } + } + } + } + heap_close (ti); + xfree (ispt); + } else logf (LOG_WARN, "Unknown isam set in rset_trunc_r"); @@ -475,7 +475,6 @@ static int isams_trunc_cmp (const void *p1, const void *p2) return i1 - i2; } -#if ZMBOL static int isam_trunc_cmp (const void *p1, const void *p2) { ISAM_P i1 = *(ISAM_P*) p1; @@ -510,54 +509,58 @@ static int isamd_trunc_cmp (const void *p1, const void *p2) return d; return isamd_block (i1) - isamd_block (i2); } -#endif RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, - const char *term, int length, const char *flags) + const char *term, int length, const char *flags, + int preserve_position, int term_type) { logf (LOG_DEBUG, "rset_trunc no=%d", no); if (no < 1) { rset_null_parms parms; - parms.rset_term = rset_term_create (term, length, flags); + parms.rset_term = rset_term_create (term, length, flags, term_type); return rset_create (rset_kind_null, &parms); } - if (zi->service->isams) + if (zi->reg->isams) { if (no == 1) { rset_isams_parms parms; parms.pos = *isam_p; - parms.is = zi->service->isams; - parms.rset_term = rset_term_create (term, length, flags); + parms.is = zi->reg->isams; + parms.rset_term = rset_term_create (term, length, flags, + term_type); return rset_create (rset_kind_isams, &parms); } qsort (isam_p, no, sizeof(*isam_p), isams_trunc_cmp); } -#if ZMBOL - else if (zi->service->isam) + else if (zi->reg->isam) { if (no == 1) { rset_isam_parms parms; parms.pos = *isam_p; - parms.is = zi->service->isam; - parms.rset_term = rset_term_create (term, length, flags); + parms.is = zi->reg->isam; + parms.rset_term = rset_term_create (term, length, flags, + term_type); return rset_create (rset_kind_isam, &parms); } qsort (isam_p, no, sizeof(*isam_p), isam_trunc_cmp); } - else if (zi->service->isamc) + else if (zi->reg->isamc) { if (no == 1) { rset_isamc_parms parms; + parms.key_size = sizeof(struct it_key); + parms.cmp = key_compare_it; parms.pos = *isam_p; - parms.is = zi->service->isamc; - parms.rset_term = rset_term_create (term, length, flags); + parms.is = zi->reg->isamc; + parms.rset_term = rset_term_create (term, length, flags, + term_type); return rset_create (rset_kind_isamc, &parms); } #if NEW_TRUNC @@ -567,25 +570,29 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; - parms.isc = zi->service->isamc; + parms.isc = zi->reg->isamc; parms.isam_positions = isam_p; parms.no_isam_positions = no; parms.no_save_positions = 100000; - parms.rset_term = rset_term_create (term, length, flags); + parms.rset_term = rset_term_create (term, length, flags, + term_type); return rset_create (rset_kind_m_or, &parms); } #endif qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); } - else if (zi->service->isamd) + else if (zi->reg->isamd) { if (no == 1) { rset_isamd_parms parms; - parms.pos = *isam_p; - parms.is = zi->service->isamd; - parms.rset_term = rset_term_create (term, length, flags); + logf(LOG_FATAL, "isam_d does not (currently) support truncs"); + abort(); + /* parms.pos = *isam_p; */ + parms.is = zi->reg->isamd; + parms.rset_term = rset_term_create (term, length, flags, + term_type); return rset_create (rset_kind_isamd, &parms); } #if NEW_TRUNC_NOT_DONE_FOR_ISAM_D @@ -596,7 +603,7 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; parms.isc = 0; - parms.isamd=zi->service->isamd; + parms.isamd=zi->reg->isamd; parms.isam_positions = isam_p; parms.no_isam_positions = no; parms.no_save_positions = 100000; @@ -606,12 +613,28 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, #endif qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp); } -#endif + else if (zi->reg->isamb) + { + if (no == 1) + { + rset_isamb_parms parms; + + parms.key_size = sizeof(struct it_key); + parms.cmp = key_compare_it; + parms.pos = *isam_p; + parms.is = zi->reg->isamb; + parms.rset_term = rset_term_create (term, length, flags, + term_type); + return rset_create (rset_kind_isamb, &parms); + } + qsort (isam_p, no, sizeof(*isam_p), isamd_trunc_cmp); + } else { logf (LOG_WARN, "Unknown isam set in rset_trunc"); return rset_create (rset_kind_null, NULL); } - return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100); + return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100, + preserve_position, term_type); }