X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=fb9ad2e5a1167620429d3ac6a14c16b4c7b92b41;hb=2c7c2ca460fee75f3bebc0479b9787f0c401db03;hp=5fbaca4fb54fb83a96e15ce091484e90024f33d7;hpb=817315213374bf6b9a484f7e74fd4913d99c0755;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 5fbaca4..fb9ad2e 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,16 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.26 1995-10-12 12:40:54 adam + * Revision 1.29 1995-10-13 16:01:49 adam + * Work on relations. + * + * Revision 1.28 1995/10/13 12:26:43 adam + * Optimization of truncation. + * + * Revision 1.27 1995/10/12 17:07:22 adam + * Truncation works. + * + * Revision 1.26 1995/10/12 12:40:54 adam * Bug fixes in rpn_prox. * * Revision 1.25 1995/10/10 13:59:24 adam @@ -216,6 +225,7 @@ static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, } struct trunc_info { + int *ptr; int *indx; char **heap; int heapnum; @@ -230,27 +240,23 @@ static void heap_swap (struct trunc_info *ti, int i1, int i2) { int swap; - memcpy (ti->swapbuf, ti->heap[i1], ti->keysize); - memcpy (ti->heap[i1], ti->heap[i2], ti->keysize); - memcpy (ti->heap[i2], ti->swapbuf, ti->keysize); - - swap = ti->indx[i1]; - ti->indx[i1] = ti->indx[i2]; - ti->indx[i2] = swap; + swap = ti->ptr[i1]; + ti->ptr[i1] = ti->ptr[i2]; + ti->ptr[i2] = swap; } static void heap_delete (struct trunc_info *ti) { int cur = 1, child = 2; - assert (ti->heapnum > 0); - memcpy (ti->heap[1], ti->heap[ti->heapnum], ti->keysize); - ti->indx[1] = ti->indx[ti->heapnum--]; + heap_swap (ti, 1, ti->heapnum--); while (child <= ti->heapnum) { if (child < ti->heapnum && - (*ti->cmp)(ti->heap[child], ti->heap[1+child]) > 0) + (*ti->cmp)(ti->heap[ti->ptr[child]], + ti->heap[ti->ptr[1+child]]) > 0) child++; - if ((*ti->cmp)(ti->heap[cur], ti->heap[child]) > 0) + if ((*ti->cmp)(ti->heap[ti->ptr[cur]], + ti->heap[ti->ptr[child]]) > 0) { heap_swap (ti, cur, child); cur = child; @@ -266,10 +272,11 @@ static void heap_insert (struct trunc_info *ti, const char *buf, int indx) int cur, parent; cur = ++(ti->heapnum); - memcpy (ti->heap[cur], buf, ti->keysize); - ti->indx[cur] = indx; + memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize); + ti->indx[ti->ptr[cur]] = indx; parent = cur/2; - while (parent && (*ti->cmp)(ti->heap[parent], ti->heap[cur]) > 0) + while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]], + ti->heap[ti->ptr[cur]]) > 0) { heap_swap (ti, cur, parent); cur = parent; @@ -290,16 +297,21 @@ struct trunc_info *heap_init (int size, int key_size, ti->cmp = cmp; ti->indx = xmalloc (size * sizeof(*ti->indx)); ti->heap = xmalloc (size * sizeof(*ti->heap)); + ti->ptr = xmalloc (size * sizeof(*ti->ptr)); ti->swapbuf = xmalloc (ti->keysize); ti->tmpbuf = xmalloc (ti->keysize); ti->buf = xmalloc (size * ti->keysize); for (i = size; --i >= 0; ) + { + ti->ptr[i] = i; ti->heap[i] = ti->buf + ti->keysize * i; + } return ti; } static void heap_close (struct trunc_info *ti) { + xfree (ti->ptr); xfree (ti->indx); xfree (ti->heap); xfree (ti->swapbuf); @@ -307,27 +319,85 @@ static void heap_close (struct trunc_info *ti) xfree (ti); } -static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int from, int to, - int merge_chunk) +static RSET rset_trunc_r (ISAM isam, ISAM_P *isam_p, int from, int to, + int merge_chunk) { - logf (LOG_DEBUG, "rset_trunc, range=%d-%d", from, to-1); + RSET result; + RSFD result_rsfd; + rset_temp_parms parms; + + parms.key_size = sizeof(struct it_key); + result = rset_create (rset_kind_temp, &parms); + result_rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); + if (to - from > merge_chunk) { - return NULL; + RSFD *rsfd; + RSET *rset; + int i, i_add = (to-from)/merge_chunk + 1; + struct trunc_info *ti; + int rscur = 0; + int rsmax = (to-from)/i_add + 1; + + rset = xmalloc (sizeof(*rset) * rsmax); + rsfd = xmalloc (sizeof(*rsfd) * rsmax); + + for (i = from; i < to; i += i_add) + { + if (i_add <= to - i) + rset[rscur] = rset_trunc_r (isam, isam_p, i, i+i_add, + merge_chunk); + else + rset[rscur] = rset_trunc_r (isam, isam_p, i, to, + merge_chunk); + rscur++; + } + ti = heap_init (rscur, sizeof(struct it_key), key_compare); + for (i = rscur; --i >= 0; ) + { + rsfd[i] = rset_open (rset[i], RSETF_READ|RSETF_SORT_SYSNO); + if (rset_read (rset[i], rsfd[i], ti->tmpbuf)) + heap_insert (ti, ti->tmpbuf, i); + else + { + rset_close (rset[i], rsfd[i]); + rset_delete (rset[i]); + } + } + while (ti->heapnum) + { + int n = ti->indx[ti->ptr[1]]; + + rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); + + while (1) + { + if (!rset_read (rset[n], rsfd[n], ti->tmpbuf)) + { + heap_delete (ti); + rset_close (rset[n], rsfd[n]); + rset_delete (rset[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } + } + } + xfree (rset); + xfree (rsfd); + heap_close (ti); } else { ISPT *ispt; int i; struct trunc_info *ti; - RSET result; - RSFD rsfd; - rset_temp_parms parms; ispt = xmalloc (sizeof(*ispt) * (to-from)); - parms.key_size = sizeof (struct it_key); - result = rset_create (rset_kind_temp, &parms); - rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); ti = heap_init (to-from, sizeof(struct it_key), key_compare); @@ -336,23 +406,62 @@ static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int from, int to, ispt[i] = is_position (isam, isam_p[from+i]); if (is_readkey (ispt[i], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, i); + else + is_pt_free (ispt[i]); } while (ti->heapnum) { - int n = ti->indx[1]; + int n = ti->indx[ti->ptr[1]]; - rset_write (result, rsfd, ti->heap[1]); + rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]); +#if 0 heap_delete (ti); if (is_readkey (ispt[n], ti->tmpbuf)) heap_insert (ti, ti->tmpbuf, n); + else + is_pt_free (ispt[n]); +#else + while (1) + { + if (!is_readkey (ispt[n], ti->tmpbuf)) + { + heap_delete (ti); + is_pt_free (ispt[n]); + break; + } + if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1) + { + heap_delete (ti); + heap_insert (ti, ti->tmpbuf, n); + break; + } + } +#endif } - for (i = to-from; --i >= 0; ) - is_pt_free (ispt[i]); - rset_close (result, rsfd); heap_close (ti); xfree (ispt); - return result; } + rset_close (result, result_rsfd); + return result; +} + +static int isam_trunc_cmp (const void *p1, const void *p2) +{ + ISAM_P i1 = *(ISAM_P*) p1; + ISAM_P i2 = *(ISAM_P*) p2; + int d; + + d = is_type (i1) - is_type (i2); + if (d) + return d; + return is_block (i1) - is_block (i2); +} + +static RSET rset_trunc (ISAM isam, ISAM_P *isam_p, int no) +{ + + qsort (isam_p, no, sizeof(*isam_p), isam_trunc_cmp); + return rset_trunc_r (isam, isam_p, 0, no, 100); } struct grep_info { @@ -390,17 +499,60 @@ static int grep_handle (Dict_char *name, const char *info, void *p) return 0; } +static void gen_regular_ge (char *dst, int val) +{ + int dst_p = 0; + int w = 1; + int d; + int pos = 0; + int i; + + if (val < 0) + val = 0; + while ((d=(val % (w*10))/w)) + { + sprintf (dst + dst_p, "%d", val); + + dst_p = strlen(dst) - pos - 1; + + dst[dst_p++] = '['; + dst[dst_p++] = d +'1'; + dst[dst_p++] = '-'; + dst[dst_p++] = '9'; + dst[dst_p++] = ']'; + + for (i = 0; ierrCode = 114; return -1; } - + switch (relation_value) + { + case 1: + case 2: + break; + case 4: + logf (LOG_LOG, "Relation ge"); + gen_regular_ge (term_dict + strlen(term_dict), atoi(term_sub)); + logf (LOG_LOG, "dict_lookup_grep: %s", term_dict); + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, grep_handle); + logf (LOG_LOG, "%d positions", grep_info->isam_p_indx); + return 0; + case 5: + logf (LOG_LOG, "Relation gt"); + gen_regular_ge (term_dict + strlen(term_dict), atoi(term_sub)+1); + logf (LOG_LOG, "dict_lookup_grep: %s", term_dict); + dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, grep_handle); + logf (LOG_LOG, "%d positions", grep_info->isam_p_indx); + return 0; + } switch (truncation_value) { case -1: /* not specified */ @@ -456,7 +630,7 @@ static int trunc_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, dict_lookup_grep (zi->wordDict, term_dict, 0, grep_info, grep_handle); break; } - logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); + logf (LOG_LOG, "%d positions", grep_info->isam_p_indx); return 0; } @@ -557,8 +731,8 @@ static RSET rpn_search_APT_word (ZServerInfo *zi, result = rset_create (rset_kind_isam, &parms); } else - result = rset_trunc (zi->wordIsam, grep_info.isam_p_buf, 0, - grep_info.isam_p_indx, 400); + result = rset_trunc (zi->wordIsam, grep_info.isam_p_buf, + grep_info.isam_p_indx); xfree (grep_info.isam_p_buf); return result; } @@ -687,8 +861,8 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, rset[rset_no] = rset_create (rset_kind_null, NULL); else if (grep_info.isam_p_indx > 1) rset[rset_no] = rset_trunc (zi->wordIsam, - grep_info.isam_p_buf, 0, - grep_info.isam_p_indx, 400); + grep_info.isam_p_buf, + grep_info.isam_p_indx); else { rset_isam_parms parms; @@ -745,7 +919,6 @@ static RSET rpn_search_APT_local (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return result; } - static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, oid_value attributeSet) {