X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=3a2491a4e09774694f0f977ffaab227c79f54c91;hb=ce20a8a823a2df86ace4862008684c71a4f06cd2;hp=e82d230c923c459ca1445b77a935cab55e23eb0f;hpb=b879b04a092d5b00cc866cf16f755e55053d2e89;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index e82d230..3a2491a 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,10 +1,37 @@ /* - * Copyright (C) 1995, Index Data I/S + * Copyright (C) 1994-1995, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.2 1995-09-04 12:33:43 adam + * Revision 1.10 1995-09-11 15:23:26 adam + * More work on relevance search. + * + * Revision 1.9 1995/09/11 13:09:35 adam + * More work on relevance feedback. + * + * Revision 1.8 1995/09/08 14:52:27 adam + * Minor changes. Dictionary is lower case now. + * + * Revision 1.7 1995/09/07 13:58:36 adam + * New parameter: result-set file descriptor (RSFD) to support multiple + * positions within the same result-set. + * Boolean operators: and, or, not implemented. + * Result-set references. + * + * Revision 1.6 1995/09/06 16:11:18 adam + * Option: only one word key per file. + * + * Revision 1.5 1995/09/06 10:33:04 adam + * More work on present. Some log messages removed. + * + * Revision 1.4 1995/09/05 15:28:40 adam + * More work on search engine. + * + * Revision 1.3 1995/09/04 15:20:22 adam + * Minor changes. + * + * Revision 1.2 1995/09/04 12:33:43 adam * Various cleanup. YAZ util used instead. * * Revision 1.1 1995/09/04 09:10:40 adam @@ -17,122 +44,157 @@ #include #include -#include -#include -#include +#include "zserver.h" + #include #include +#include +#include +#include -#include - -#include "index.h" - -static Dict dict; -static ISAM isam; - -static RSET rpn_search_APT (Z_AttributesPlusTerm *zapt) +int split_term (ZServerInfo *zi, Z_Term *term, ISAM_P **isam_ps, int *no) { - struct rset_isam_parms parms; + static ISAM_P isam_p[16]; + int isam_p_indx = 0; + char termz[IT_MAX_WORD+1]; + char term_sub[IT_MAX_WORD+1]; + int sizez, i; + char *p0, *p1; const char *info; - Z_Term *term = zapt->term; - + if (term->which != Z_Term_general) - return NULL; - if (!(info = dict_lookup (dict, term->u.general->buf))) - return NULL; - assert (*info == sizeof(parms.pos)); - memcpy (&parms.pos, info+1, sizeof(parms.pos)); - parms.is = isam; - return rset_create (rset_kind_isam, &parms); -} + return 0; + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD) + sizez = IT_MAX_WORD; + for (i = 0; iu.general->buf[i]); + termz[i] = '\0'; -static RSET rpn_search_and (RSET r_l, RSET r_r) -{ - struct it_key k1, k2; - RSET r_dst; - int i1, i2; - rset_open (r_l, 0); - rset_open (r_r, 0); - r_dst = rset_create (rset_kind_temp, NULL); - rset_open (r_dst, 1); - - i1 = rset_read (r_l, &k1); - i2 = rset_read (r_r, &k2); - while (i1 && i2) + p0 = termz; + while (1) { - if (k1.sysno > k2.sysno) - i2 = rset_read (r_r, &k2); - else if (k1.sysno < k2.sysno) - i1 = rset_read (r_l, &k1); - else if (!(i1 = key_compare_x (&k1, &k2))) - { - rset_write (r_dst, &k1); - i1 = rset_read (r_l, &k1); - i2 = rset_read (r_r, &k2); - } - else if (i1 > 0) + if ((p1 = strchr (p0, ' '))) { - rset_write (r_dst, &k2); - i2 = rset_read (r_r, &k2); + memcpy (term_sub, p0, p1-p0); + term_sub[p1-p0] = '\0'; } else + strcpy (term_sub, p0); + logf (LOG_DEBUG, "dict_lookup: %s", term_sub); + if ((info = dict_lookup (zi->wordDict, term_sub))) { - rset_write (r_dst, &k1); - i1 = rset_read (r_l, &k1); + logf (LOG_DEBUG, " found"); + assert (*info == sizeof(*isam_p)); + memcpy (isam_p + isam_p_indx, info+1, sizeof(*isam_p)); + isam_p_indx++; } - } - rset_close (r_dst); - return r_dst; + if (!p1) + break; + p0 = p1+1; + } + *isam_ps = isam_p; + *no = isam_p_indx; + logf (LOG_DEBUG, "%d positions", *no); + return 1; } -static RSET rpn_search_or (RSET r_l, RSET r_r) +static RSET rpn_search_APT_relevance (ZServerInfo *zi, + Z_AttributesPlusTerm *zapt) { - return r_l; + rset_relevance_parms parms; + + parms.key_size = sizeof(struct it_key); + parms.max_rec = 10; + parms.cmp = key_compare; + parms.is = zi->wordIsam; + split_term (zi, zapt->term, &parms.isam_positions, + &parms.no_isam_positions); + if (parms.no_isam_positions > 0) + return rset_create (rset_kind_relevance, &parms); + else + return rset_create (rset_kind_null, NULL); } -static RSET rpn_search_not (RSET r_l, RSET r_r) +static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt) { - return r_l; +#if 0 + Z_Term *term = zapt->term; + char termz[IT_MAX_WORD+1]; + size_t sizez; + struct rset_isam_parms parms; + const char *info; + int i; + + if (term->which != Z_Term_general) + return NULL; + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD) + sizez = IT_MAX_WORD; + for (i = 0; iu.general->buf[i]); + termz[i] = '\0'; + logf (LOG_DEBUG, "dict_lookup: %s", termz); + if (!(info = dict_lookup (zi->wordDict, termz))) + return rset_create (rset_kind_null, NULL); + assert (*info == sizeof(parms.pos)); + memcpy (&parms.pos, info+1, sizeof(parms.pos)); + parms.is = zi->wordIsam; + logf (LOG_DEBUG, "rset_create isam"); + return rset_create (rset_kind_isam, &parms); +#else + return rpn_search_APT_relevance (zi, zapt); +#endif } -static RSET rpn_search_ref (Z_ResultSetId *resultSetId) +static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId) { - return NULL; + ZServerSet *s; + + if (!(s = resultSetGet (zi, resultSetId))) + return rset_create (rset_kind_null, NULL); + return s->rset; } -static RSET rpn_search_structure (Z_RPNStructure *zs) +static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs) { - RSET r; + RSET r = NULL; if (zs->which == Z_RPNStructure_complex) { - RSET r_l, r_r; + rset_bool_parms bool_parms; - r_l = rpn_search_structure (zs->u.complex->s1); - r_r = rpn_search_structure (zs->u.complex->s2); + bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1); + bool_parms.rset_r = rpn_search_structure (zi, zs->u.complex->s2); + bool_parms.key_size = sizeof(struct it_key); + bool_parms.cmp = key_compare; switch (zs->u.complex->operator->which) { case Z_Operator_and: - r = rpn_search_and (r_l, r_r); + r = rset_create (rset_kind_and, &bool_parms); break; case Z_Operator_or: - r = rpn_search_or (r_l, r_r); + r = rset_create (rset_kind_or, &bool_parms); break; case Z_Operator_and_not: - r = rpn_search_not (r_l, r_r); + r = rset_create (rset_kind_not, &bool_parms); break; default: assert (0); } - rset_delete (r_l); - rset_delete (r_r); } else if (zs->which == Z_RPNStructure_simple) { if (zs->u.simple->which == Z_Operand_APT) - r = rpn_search_APT (zs->u.simple->u.attributesPlusTerm); + { + logf (LOG_DEBUG, "rpn_search_APT"); + r = rpn_search_APT (zi, zs->u.simple->u.attributesPlusTerm); + } else if (zs->u.simple->which == Z_Operand_resultSetId) - r = rpn_search_ref (zs->u.simple->u.resultSetId); + { + logf (LOG_DEBUG, "rpn_search_ref"); + r = rpn_search_ref (zi, zs->u.simple->u.resultSetId); + } else { assert (0); @@ -144,3 +206,62 @@ static RSET rpn_search_structure (Z_RPNStructure *zs) } return r; } + +static RSET rpn_save_set (RSET r, int *count) +{ +#if 0 + RSET d; + rset_temp_parms parms; +#endif + int psysno = 0; + struct it_key key; + RSFD rfd; + + logf (LOG_DEBUG, "rpn_save_set"); + *count = 0; +#if 0 + parms.key_size = sizeof(struct it_key); + d = rset_create (rset_kind_temp, &parms); + rset_open (d, 1); +#endif + + rfd = rset_open (r, 0); + while (rset_read (r, rfd, &key)) + { + if (key.sysno != psysno) + { + psysno = key.sysno; + (*count)++; + } +#if 0 + rset_write (d, &key); +#endif + } + rset_close (r, rfd); +#if 0 + rset_close (d); +#endif + logf (LOG_DEBUG, "%d distinct sysnos", *count); +#if 0 + return d; +#endif +} + +int rpn_search (ZServerInfo *zi, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname, int *hits) +{ + RSET rset, result_rset; + + rset = rpn_search_structure (zi, rpn->RPNStructure); + if (!rset) + return 0; + result_rset = rpn_save_set (rset, hits); +#if 0 + rset_delete (result_rset); +#endif + + resultSetAdd (zi, setname, 1, rset); + return 0; +} +