From 32411af8c3defde4eaa0cbf6e1abad4eff12fbc3 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 20 Apr 2005 08:32:36 +0000 Subject: [PATCH] WS cleanups. Added setting trunclimit and truncchunk to adjust the truncation code limits. --- index/trunc.c | 229 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 113 insertions(+), 116 deletions(-) diff --git a/index/trunc.c b/index/trunc.c index 9dc70af..683460c 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.52 2005-04-13 13:03:47 adam Exp $ +/* $Id: trunc.c,v 1.53 2005-04-20 08:32:36 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -39,7 +39,7 @@ struct trunc_info { char *buf; }; -static void heap_swap (struct trunc_info *ti, int i1, int i2) +static void heap_swap(struct trunc_info *ti, int i1, int i2) { int swap; @@ -48,11 +48,11 @@ static void heap_swap (struct trunc_info *ti, int i1, int i2) ti->ptr[i2] = swap; } -static void heap_delete (struct trunc_info *ti) +static void heap_delete(struct trunc_info *ti) { int cur = 1, child = 2; - heap_swap (ti, 1, ti->heapnum--); + heap_swap(ti, 1, ti->heapnum--); while (child <= ti->heapnum) { if (child < ti->heapnum && (*ti->cmp)(ti->heap[ti->ptr[child]], @@ -61,7 +61,7 @@ static void heap_delete (struct trunc_info *ti) if ((*ti->cmp)(ti->heap[ti->ptr[cur]], ti->heap[ti->ptr[child]]) > 0) { - heap_swap (ti, cur, child); + heap_swap(ti, cur, child); cur = child; child = 2*cur; } @@ -70,26 +70,26 @@ static void heap_delete (struct trunc_info *ti) } } -static void heap_insert (struct trunc_info *ti, const char *buf, int indx) +static void heap_insert(struct trunc_info *ti, const char *buf, int indx) { int cur, parent; cur = ++(ti->heapnum); - memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize); + memcpy(ti->heap[ti->ptr[cur]], buf, ti->keysize); ti->indx[ti->ptr[cur]] = indx; parent = cur/2; while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]], ti->heap[ti->ptr[cur]]) > 0) { - heap_swap (ti, cur, parent); + heap_swap(ti, cur, parent); cur = parent; parent = cur/2; } } -static struct trunc_info *heap_init (int size, int key_size, - int (*cmp)(const void *p1, - const void *p2)) +static struct trunc_info *heap_init(int size, int key_size, + int (*cmp)(const void *p1, + const void *p2)) { struct trunc_info *ti = (struct trunc_info *) xmalloc(sizeof(*ti)); int i; @@ -112,7 +112,7 @@ static struct trunc_info *heap_init (int size, int key_size, return ti; } -static void heap_close (struct trunc_info *ti) +static void heap_close(struct trunc_info *ti) { xfree(ti->ptr); xfree(ti->indx); @@ -134,16 +134,9 @@ static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, RSFD result_rsfd; int nn = 0; - /* - rset_temp_parms parms; - parms.cmp = key_compare_it; - parms.key_size = sizeof(struct it_key); - parms.temp_path = res_get (zi->res, "setTmpDir"); - result = rset_create (rset_kind_temp, &parms); - */ result = rstemp_create( rset_nmem,kctrl, scope, - res_get (zi->res, "setTmpDir"), termid); - result_rsfd = rset_open (result, RSETF_WRITE); + res_get(zi->res, "setTmpDir"), termid); + result_rsfd = rset_open(result, RSETF_WRITE); if (to - from > merge_chunk) { @@ -160,58 +153,58 @@ static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, for (i = from; i < to; i += i_add) { if (i_add <= to - i) - rset[rscur] = rset_trunc_r (zi, term, length, flags, - isam_p, i, i+i_add, - merge_chunk, preserve_position, - term_type, rset_nmem, - kctrl, scope,termid); + rset[rscur] = rset_trunc_r(zi, term, length, flags, + isam_p, i, i+i_add, + merge_chunk, preserve_position, + term_type, rset_nmem, + kctrl, scope,termid); else - rset[rscur] = rset_trunc_r (zi, term, length, flags, - isam_p, i, to, - merge_chunk, preserve_position, - term_type, rset_nmem, - kctrl, scope,termid); + rset[rscur] = rset_trunc_r(zi, term, length, flags, + isam_p, i, to, + merge_chunk, preserve_position, + term_type, rset_nmem, + kctrl, scope,termid); rscur++; } ti = heap_init (rscur, sizeof(struct it_key), key_compare_it); for (i = rscur; --i >= 0; ) { - rsfd[i] = rset_open (rset[i], RSETF_READ); + rsfd[i] = rset_open(rset[i], RSETF_READ); if (rset_read(rsfd[i], ti->tmpbuf,0)) - heap_insert (ti, ti->tmpbuf, i); + heap_insert(ti, ti->tmpbuf, i); else { - rset_close (rsfd[i]); - rset_delete (rset[i]); + rset_close(rsfd[i]); + rset_delete(rset[i]); } } while (ti->heapnum) { int n = ti->indx[ti->ptr[1]]; - rset_write (result_rsfd, ti->heap[ti->ptr[1]]); + rset_write(result_rsfd, ti->heap[ti->ptr[1]]); nn++; while (1) { - if (!rset_read (rsfd[n], ti->tmpbuf,0)) + if(!rset_read (rsfd[n], ti->tmpbuf,0)) { - heap_delete (ti); - rset_close (rsfd[n]); - rset_delete (rset[n]); + heap_delete(ti); + rset_close(rsfd[n]); + rset_delete(rset[n]); break; } if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + heap_insert(ti, ti->tmpbuf, n); break; } } } xfree(rset); xfree(rsfd); - heap_close (ti); + heap_close(ti); } else if (zi->reg->isamc) { @@ -221,50 +214,50 @@ static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, ispt = (ISAMC_PP *) xmalloc(sizeof(*ispt) * (to-from)); - ti = heap_init (to-from, sizeof(struct it_key), - key_compare_it); + ti = heap_init(to-from, sizeof(struct it_key), + key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isamc_pp_open (zi->reg->isamc, isam_p[from+i]); - if (isamc_pp_read (ispt[i], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, i); + ispt[i] = isamc_pp_open(zi->reg->isamc, isam_p[from+i]); + if (isamc_pp_read(ispt[i], ti->tmpbuf)) + heap_insert(ti, ti->tmpbuf, i); else - isamc_pp_close (ispt[i]); + isamc_pp_close(ispt[i]); } while (ti->heapnum) { int n = ti->indx[ti->ptr[1]]; - rset_write (result_rsfd, ti->heap[ti->ptr[1]]); + rset_write(result_rsfd, ti->heap[ti->ptr[1]]); nn++; if (preserve_position) { - heap_delete (ti); - if (isamc_pp_read (ispt[n], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + if (isamc_pp_read(ispt[n], ti->tmpbuf)) + heap_insert(ti, ti->tmpbuf, n); else - isamc_pp_close (ispt[n]); + isamc_pp_close(ispt[n]); } else { while (1) { - if (!isamc_pp_read (ispt[n], ti->tmpbuf)) + if (!isamc_pp_read(ispt[n], ti->tmpbuf)) { - heap_delete (ti); - isamc_pp_close (ispt[n]); + heap_delete(ti); + isamc_pp_close(ispt[n]); break; } if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + heap_insert(ti, ti->tmpbuf, n); break; } } } } - heap_close (ti); + heap_close(ti); xfree(ispt); } else if (zi->reg->isams) @@ -276,39 +269,39 @@ static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, ispt = (ISAMS_PP *) xmalloc(sizeof(*ispt) * (to-from)); - ti = heap_init (to-from, sizeof(struct it_key), - key_compare_it); + ti = heap_init(to-from, sizeof(struct it_key), + key_compare_it); for (i = to-from; --i >= 0; ) { - ispt[i] = isams_pp_open (zi->reg->isams, isam_p[from+i]); - if (isams_pp_read (ispt[i], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, i); + ispt[i] = isams_pp_open(zi->reg->isams, isam_p[from+i]); + if (isams_pp_read(ispt[i], ti->tmpbuf)) + heap_insert(ti, ti->tmpbuf, i); else - isams_pp_close (ispt[i]); + isams_pp_close(ispt[i]); } while (ti->heapnum) { int n = ti->indx[ti->ptr[1]]; - rset_write (result_rsfd, ti->heap[ti->ptr[1]]); + rset_write(result_rsfd, ti->heap[ti->ptr[1]]); nn++; while (1) { - if (!isams_pp_read (ispt[n], ti->tmpbuf)) + if (!isams_pp_read(ispt[n], ti->tmpbuf)) { - heap_delete (ti); - isams_pp_close (ispt[n]); + heap_delete(ti); + isams_pp_close(ispt[n]); break; } if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + heap_insert(ti, ti->tmpbuf, n); break; } } } - heap_close (ti); + heap_close(ti); xfree(ispt); } else if (zi->reg->isamb) @@ -319,63 +312,63 @@ static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length, ispt = (ISAMB_PP *) xmalloc(sizeof(*ispt) * (to-from)); - ti = heap_init (to-from, sizeof(struct it_key), - key_compare_it); + ti = heap_init(to-from, sizeof(struct it_key), + key_compare_it); for (i = to-from; --i >= 0; ) { if (isam_p[from+i]) { - ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i], scope); - if (isamb_pp_read (ispt[i], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, i); + ispt[i] = isamb_pp_open(zi->reg->isamb, isam_p[from+i], scope); + if (isamb_pp_read(ispt[i], ti->tmpbuf)) + heap_insert(ti, ti->tmpbuf, i); else - isamb_pp_close (ispt[i]); + isamb_pp_close(ispt[i]); } } while (ti->heapnum) { int n = ti->indx[ti->ptr[1]]; - rset_write (result_rsfd, ti->heap[ti->ptr[1]]); + rset_write(result_rsfd, ti->heap[ti->ptr[1]]); nn++; if (preserve_position) { - heap_delete (ti); - if (isamb_pp_read (ispt[n], ti->tmpbuf)) - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + if (isamb_pp_read(ispt[n], ti->tmpbuf)) + heap_insert(ti, ti->tmpbuf, n); else - isamb_pp_close (ispt[n]); + isamb_pp_close(ispt[n]); } else { while (1) { - if (!isamb_pp_read (ispt[n], ti->tmpbuf)) + if (!isamb_pp_read(ispt[n], ti->tmpbuf)) { - heap_delete (ti); - isamb_pp_close (ispt[n]); + heap_delete(ti); + isamb_pp_close(ispt[n]); break; } if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) { - heap_delete (ti); - heap_insert (ti, ti->tmpbuf, n); + heap_delete(ti); + heap_insert(ti, ti->tmpbuf, n); break; } } } } - heap_close (ti); + heap_close(ti); xfree(ispt); } else - yaz_log (YLOG_WARN, "Unknown isam set in rset_trunc_r"); + yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc_r"); - rset_close (result_rsfd); + rset_close(result_rsfd); return result; } -static int isams_trunc_cmp (const void *p1, const void *p2) +static int isams_trunc_cmp(const void *p1, const void *p2) { ISAM_P i1 = *(ISAM_P*) p1; ISAM_P i2 = *(ISAM_P*) p2; @@ -387,15 +380,15 @@ static int isams_trunc_cmp (const void *p1, const void *p2) return 0; } -static int isamc_trunc_cmp (const void *p1, const void *p2) +static int isamc_trunc_cmp(const void *p1, const void *p2) { ISAM_P i1 = *(ISAM_P*) p1; ISAM_P i2 = *(ISAM_P*) p2; zint d; - d = (isamc_type (i1) - isamc_type (i2)); + d = (isamc_type(i1) - isamc_type(i2)); if (d == 0) - d = isamc_block (i1) - isamc_block (i2); + d = isamc_block(i1) - isamc_block(i2); if (d > 0) return 1; else if (d < 0) @@ -403,56 +396,60 @@ static int isamc_trunc_cmp (const void *p1, const void *p2) return 0; } -RSET rset_trunc (ZebraHandle zi, ISAM_P *isam_p, int no, - const char *term, int length, const char *flags, - int preserve_position, int term_type, NMEM rset_nmem, - const struct key_control *kctrl, int scope) +RSET rset_trunc(ZebraHandle zi, ISAM_P *isam_p, int no, + const char *term, int length, const char *flags, + int preserve_position, int term_type, NMEM rset_nmem, + const struct key_control *kctrl, int scope) { TERMID termid; - yaz_log (YLOG_DEBUG, "rset_trunc no=%d", no); + int trunc_chunk; + yaz_log(YLOG_DEBUG, "rset_trunc no=%d", no); if (no < 1) - return rsnull_create (rset_nmem,kctrl); + return rsnull_create(rset_nmem,kctrl); termid = rset_term_create(term, length, flags, term_type,rset_nmem); if (zi->reg->isams) { if (no == 1) return rsisams_create(rset_nmem, kctrl, scope, - zi->reg->isams, *isam_p, termid); - qsort (isam_p, no, sizeof(*isam_p), isams_trunc_cmp); + zi->reg->isams, *isam_p, termid); + qsort(isam_p, no, sizeof(*isam_p), isams_trunc_cmp); } else if (zi->reg->isamc) { if (no == 1) return rsisamc_create(rset_nmem, kctrl, scope, - zi->reg->isamc, *isam_p, termid); - qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); + zi->reg->isamc, *isam_p, termid); + qsort(isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); } else if (zi->reg->isamb) { + int trunc_limit = atoi(res_get_def(zi->res, "trunclimit", "10000")); if (no == 1) return rsisamb_create(rset_nmem,kctrl, scope, - zi->reg->isamb, *isam_p, termid); - else if (no <10000 ) /* FIXME - hardcoded number */ + zi->reg->isamb, *isam_p, termid); + else if (no < trunc_limit) { RSET r; RSET *rsets = xmalloc(no*sizeof(RSET)); /* use nmem! */ int i; for (i = 0; ireg->isamb, isam_p[i], termid); + rsets[i] = rsisamb_create(rset_nmem, kctrl, scope, + zi->reg->isamb, isam_p[i], termid); r = rsmulti_or_create( rset_nmem, kctrl, scope, no, rsets); xfree(rsets); return r; } - qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); + fprintf(stderr, "Using rset_trunc_r limit=%d\n", trunc_limit); + qsort(isam_p, no, sizeof(*isam_p), isamc_trunc_cmp); } else { - yaz_log (YLOG_WARN, "Unknown isam set in rset_trunc"); - return rsnull_create (rset_nmem, kctrl); + yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc"); + return rsnull_create(rset_nmem, kctrl); } - return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100, - preserve_position, term_type, rset_nmem,kctrl,scope, - termid); + trunc_chunk = atoi(res_get_def(zi->res, "truncchunk", "100")); + return rset_trunc_r(zi, term, length, flags, isam_p, 0, no, trunc_chunk, + preserve_position, term_type, rset_nmem, kctrl, scope, + termid); } -- 1.7.10.4