X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=ccl%2Fcclfind.c;h=8153a0e3a1e4c2060538b3b42194f4c987af2705;hb=6b3cf0738c53080781fedd852e26b299224af3c3;hp=4e0bdb49a48614ef455b14e71e475d215e7401e4;hpb=448e7f2a48f6485a8d452c75420524fb37453bc1;p=yaz-moved-to-github.git diff --git a/ccl/cclfind.c b/ccl/cclfind.c index 4e0bdb4..8153a0e 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -44,57 +44,9 @@ /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Log: cclfind.c,v $ - * Revision 1.19 2000-11-16 09:58:02 adam - * Implemented local AttributeSet setting for CCL field maps. + * $Id: cclfind.c,v 1.33 2003-02-14 18:49:22 adam Exp $ * - * Revision 1.18 2000/10/17 19:50:28 adam - * Implemented and-list and or-list for CCL module. - * - * Revision 1.17 2000/05/01 09:36:50 adam - * Range operator only treated in ordered ranges so that minus (-) can be - * used for, say, the and-not operator. - * - * Revision 1.16 2000/03/14 09:06:11 adam - * Added POSIX threads support for frontend server. - * - * Revision 1.15 2000/02/24 23:49:13 adam - * Fixed memory allocation problem. - * - * Revision 1.14 2000/01/31 13:15:21 adam - * Removed uses of assert(3). Cleanup of ODR. CCL parser update so - * that some characters are not surrounded by spaces in resulting term. - * ILL-code updates. - * - * Revision 1.13 1999/12/22 13:13:32 adam - * Search terms may include "operators" without causing error. - * - * Revision 1.12 1999/11/30 13:47:11 adam - * Improved installation. Moved header files to include/yaz. - * - * Revision 1.11 1999/03/31 11:15:37 adam - * Fixed memory leaks in ccl_find_str and ccl_qual_rm. - * - * Revision 1.10 1998/02/11 11:53:33 adam - * Changed code so that it compiles as C++. - * - * Revision 1.9 1997/09/29 08:56:37 adam - * Changed CCL parser to be thread safe. New type, CCL_parser, declared - * and a create/destructers ccl_parser_create/ccl_parser/destory has - * been added. - * - * Revision 1.8 1997/09/01 08:48:11 adam - * New windows NT/95 port using MSV5.0. Only a few changes made - * to avoid warnings. - * - * Revision 1.7 1997/05/14 06:53:26 adam - * C++ support. - * - * Revision 1.6 1997/04/30 08:52:06 quinn - * Null - * - * Revision 1.5 1996/10/11 15:00:24 adam - * CCL parser from Europagate Email gateway 1.0. + * Old Europagate log: * * Revision 1.16 1996/01/08 08:41:13 adam * Removed unused function. @@ -208,7 +160,7 @@ static void strxcat (char *n, const char *src, int len) */ static char *copy_token_name (struct ccl_token *tp) { - char *str = (char *)malloc (tp->len + 1); + char *str = (char *)xmalloc (tp->len + 1); ccl_assert (str); memcpy (str, tp->name, tp->len); str[tp->len] = '\0'; @@ -223,7 +175,7 @@ static char *copy_token_name (struct ccl_token *tp) static struct ccl_rpn_node *mk_node (int kind) { struct ccl_rpn_node *p; - p = (struct ccl_rpn_node *)malloc (sizeof(*p)); + p = (struct ccl_rpn_node *)xmalloc (sizeof(*p)); ccl_assert (p); p->kind = kind; return p; @@ -247,24 +199,24 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) ccl_rpn_delete (rpn->u.p[1]); break; case CCL_RPN_TERM: - free (rpn->u.t.term); + xfree (rpn->u.t.term); for (attr = rpn->u.t.attr_list; attr; attr = attr1) { attr1 = attr->next; if (attr->set) - free (attr->set); - free (attr); + xfree (attr->set); + xfree (attr); } break; case CCL_RPN_SET: - free (rpn->u.setname); + xfree (rpn->u.setname); break; case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; } - free (rpn); + xfree (rpn); } static struct ccl_rpn_node *find_spec (CCL_parser cclp, @@ -293,11 +245,11 @@ static void add_attr (struct ccl_rpn_node *p, const char *set, { struct ccl_rpn_attr *n; - n = (struct ccl_rpn_attr *)malloc (sizeof(*n)); + n = (struct ccl_rpn_attr *)xmalloc (sizeof(*n)); ccl_assert (n); if (set) { - n->set = malloc (strlen(set)+1); + n->set = (char*) xmalloc (strlen(set)+1); strcpy (n->set, set); } else @@ -312,28 +264,26 @@ static void add_attr (struct ccl_rpn_node *p, const char *set, * search_term: Parse CCL search term. * cclp: CCL Parser * qa: Qualifier attributes already applied. + * term_list: tokens we accept as terms in context + * multi: whether we accept "multiple" tokens * return: pointer to node(s); NULL on error. */ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, struct ccl_rpn_attr **qa, - int *term_list) + int *term_list, int multi) { - struct ccl_rpn_attr *qa_tmp[2]; struct ccl_rpn_node *p_top = 0; struct ccl_token *lookahead = cclp->look_token; int and_list = 0; int or_list = 0; char *attset; + const char *truncation_aliases; + + truncation_aliases = + ccl_qual_search_special(cclp->bibset, "truncation"); + if (!truncation_aliases) + truncation_aliases = "?"; - if (!qa) - { - /* no qualifier(s) applied. Use 'term' if it is defined */ - - qa = qa_tmp; - ccl_assert (qa); - qa[0] = ccl_qual_search (cclp, "term", 4); - qa[1] = NULL; - } if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0)) and_list = 1; if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0)) @@ -342,6 +292,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, { struct ccl_rpn_node *p; size_t no, i; + int no_spaces = 0; int left_trunc = 0; int right_trunc = 0; int mid_trunc = 0; @@ -351,17 +302,26 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, int truncation_value = -1; int completeness_value = -1; int len = 0; - int max = 200; - if (and_list || or_list) + size_t max = 200; + if (and_list || or_list || !multi) max = 1; - + + /* ignore commas when dealing with and-lists .. */ + if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA) + { + lookahead = lookahead->next; + ADVANCE; + continue; + } /* go through each TERM token. If no truncation attribute is yet met, then look for left/right truncation markers (?) and set left_trunc/right_trunc/mid_trunc accordingly */ for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++) { for (i = 0; ilen; i++) - if (truncation_value == -1 && lookahead->name[i] == '?') + if (lookahead->name[i] == ' ') + no_spaces++; + else if (strchr(truncation_aliases, lookahead->name[i])) { if (no == 0 && i == 0 && lookahead->len >= 1) left_trunc = 1; @@ -378,6 +338,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, if (len == 0) break; /* no more terms . stop . */ + if (p_top) { if (or_list) @@ -449,14 +410,14 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset)) { /* no structure attribute met. Apply either structure attribute WORD or PHRASE depending on number of CCL tokens */ - if (no == 1) + if (no == 1 && no_spaces == 0) add_attr (p, attset, CCL_BIB1_STR, 2); else add_attr (p, attset, CCL_BIB1_STR, 1); } /* make the RPN token */ - p->u.t.term = (char *)malloc (len); + p->u.t.term = (char *)xmalloc (len); ccl_assert (p->u.t.term); p->u.t.term[0] = '\0'; for (i = 0; ierror_code = CCL_ERR_TERM_EXPECTED; @@ -533,57 +496,15 @@ static struct ccl_rpn_node *search_term (CCL_parser cclp, struct ccl_rpn_attr **qa) { static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1}; - return search_term_x(cclp, qa, list); + return search_term_x(cclp, qa, list, 0); } -/* - * qualifiers: Parse CCL qualifiers and search terms. - * cclp: CCL Parser - * la: Token pointer to RELATION token. - * qa: Qualifier attributes already applied. - * return: pointer to node(s); NULL on error. - */ -static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, - struct ccl_rpn_attr **qa) +static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, + struct ccl_rpn_attr **ap) { - struct ccl_token *lookahead = cclp->look_token; - struct ccl_rpn_attr **ap; - int no = 0; - int i, rel; char *attset; -#if 0 - if (qa) - { - cclp->error_code = CCL_ERR_DOUBLE_QUAL; - return NULL; - } -#endif - for (lookahead = cclp->look_token; lookahead != la; - lookahead=lookahead->next) - no++; - if (qa) - for (i=0; qa[i]; i++) - no++; - ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap)); - ccl_assert (ap); - for (i = 0; cclp->look_token != la; i++) - { - ap[i] = ccl_qual_search (cclp, cclp->look_token->name, - cclp->look_token->len); - if (!ap[i]) - { - cclp->error_code = CCL_ERR_UNKNOWN_QUAL; - free (ap); - return NULL; - } - ADVANCE; - if (KIND == CCL_TOK_COMMA) - ADVANCE; - } - if (qa) - while (*qa) - ap[i++] = *qa++; - ap[i] = NULL; + int rel; + if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)) { /* unordered relation */ @@ -591,7 +512,6 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, if (KIND != CCL_TOK_EQ) { cclp->error_code = CCL_ERR_EQ_EXPECTED; - free (ap); return NULL; } ADVANCE; @@ -600,21 +520,18 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, ADVANCE; if (!(p = find_spec (cclp, ap))) { - free (ap); return NULL; } if (KIND != CCL_TOK_RP) { cclp->error_code = CCL_ERR_RP_EXPECTED; ccl_rpn_delete (p); - free (ap); return NULL; } ADVANCE; } else p = search_terms (cclp, ap); - free (ap); return p; } /* ordered relation ... */ @@ -642,18 +559,15 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, else { struct ccl_rpn_node *p; - + ADVANCE; /* skip relation */ if (KIND == CCL_TOK_TERM && - cclp->look_token->next->len == 1 && + cclp->look_token->next && cclp->look_token->next->len == 1 && cclp->look_token->next->name[0] == '-') { struct ccl_rpn_node *p1; if (!(p1 = search_term (cclp, ap))) - { - free (ap); return NULL; - } ADVANCE; /* skip '-' */ if (KIND == CCL_TOK_TERM) /* = term - term ? */ { @@ -662,7 +576,6 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, if (!(p2 = search_term (cclp, ap))) { ccl_rpn_delete (p1); - free (ap); return NULL; } p = mk_node (CCL_RPN_AND); @@ -670,66 +583,202 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, add_attr (p1, attset, CCL_BIB1_REL, 4); p->u.p[1] = p2; add_attr (p2, attset, CCL_BIB1_REL, 2); - free (ap); return p; } else /* = term - */ { add_attr (p1, attset, CCL_BIB1_REL, 4); - free (ap); return p1; } } else if (cclp->look_token->len == 1 && - cclp->look_token->name[0] == '"') /* = - term ? */ + cclp->look_token->name[0] == '-') /* = - term ? */ { ADVANCE; if (!(p = search_term (cclp, ap))) - { - free (ap); return NULL; - } add_attr (p, attset, CCL_BIB1_REL, 2); - free (ap); return p; } else if (KIND == CCL_TOK_LP) { ADVANCE; if (!(p = find_spec (cclp, ap))) - { - free (ap); return NULL; - } if (KIND != CCL_TOK_RP) { cclp->error_code = CCL_ERR_RP_EXPECTED; ccl_rpn_delete (p); - free (ap); return NULL; } ADVANCE; - free (ap); return p; } else { if (!(p = search_terms (cclp, ap))) - { - free (ap); return NULL; - } add_attr (p, attset, CCL_BIB1_REL, rel); - free (ap); return p; } cclp->error_code = CCL_ERR_TERM_EXPECTED; } - free (ap); return NULL; } /* + * qualifiers1: Parse CCL qualifiers and search terms. + * cclp: CCL Parser + * la: Token pointer to RELATION token. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ +static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la, + struct ccl_rpn_attr **qa) +{ + struct ccl_token *lookahead = cclp->look_token; + struct ccl_token *look_start = cclp->look_token; + struct ccl_rpn_attr **ap; + struct ccl_rpn_node *node = 0; + const char *field_str; + int no = 0; + int seq = 0; + int i; + int mode_merge = 1; +#if 0 + if (qa) + { + cclp->error_code = CCL_ERR_DOUBLE_QUAL; + return NULL; + } +#endif + for (lookahead = cclp->look_token; lookahead != la; + lookahead=lookahead->next) + no++; + if (qa) + for (i=0; qa[i]; i++) + no++; + ap = (struct ccl_rpn_attr **)xmalloc ((no ? (no+1) : 2) * sizeof(*ap)); + ccl_assert (ap); + + field_str = ccl_qual_search_special(cclp->bibset, "field"); + if (field_str) + { + if (!strcmp (field_str, "or")) + mode_merge = 0; + else if (!strcmp (field_str, "merge")) + mode_merge = 1; + } + if (!mode_merge) + { + /* consider each field separately and OR */ + lookahead = look_start; + while (lookahead != la) + { + ap[1] = 0; + seq = 0; + while ((ap[0] = ccl_qual_search (cclp, lookahead->name, + lookahead->len, seq)) != 0) + { + struct ccl_rpn_node *node_sub; + cclp->look_token = la; + + node_sub = qualifiers2(cclp, ap); + if (!node_sub) + { + ccl_rpn_delete (node); + xfree (ap); + return 0; + } + if (node) + { + struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + node_this->u.p[0] = node; + node_this->u.p[1] = node_sub; + node = node_this; + } + else + node = node_sub; + seq++; + } + if (seq == 0) + { + cclp->look_token = lookahead; + cclp->error_code = CCL_ERR_UNKNOWN_QUAL; + xfree (ap); + return NULL; + } + lookahead = lookahead->next; + if (lookahead->kind == CCL_TOK_COMMA) + lookahead = lookahead->next; + } + } + else + { + /* merge attributes from ALL fields - including inherited ones */ + while (1) + { + struct ccl_rpn_node *node_sub; + int found = 0; + lookahead = look_start; + for (i = 0; lookahead != la; i++) + { + ap[i] = ccl_qual_search (cclp, lookahead->name, + lookahead->len, seq); + if (ap[i]) + found++; + if (!ap[i] && seq > 0) + ap[i] = ccl_qual_search (cclp, lookahead->name, + lookahead->len, 0); + if (!ap[i]) + { + cclp->look_token = lookahead; + cclp->error_code = CCL_ERR_UNKNOWN_QUAL; + xfree (ap); + return NULL; + } + lookahead = lookahead->next; + if (lookahead->kind == CCL_TOK_COMMA) + lookahead = lookahead->next; + } + if (qa) + { + struct ccl_rpn_attr **qa0 = qa; + + while (*qa0) + ap[i++] = *qa0++; + } + ap[i] = NULL; + + if (!found) + break; + + cclp->look_token = lookahead; + + node_sub = qualifiers2(cclp, ap); + if (!node_sub) + { + ccl_rpn_delete (node); + break; + } + if (node) + { + struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + node_this->u.p[0] = node; + node_this->u.p[1] = node_sub; + node = node_this; + } + else + node = node_sub; + seq++; + } + } + xfree (ap); + return node; +} + + +/* * search_terms: Parse CCL search terms - including proximity. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -739,17 +788,27 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa) { static int list[] = { - CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, -1}; + CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1}; struct ccl_rpn_node *p1, *p2, *pn; - p1 = search_term_x (cclp, qa, list); + p1 = search_term_x (cclp, qa, list, 1); if (!p1) return NULL; while (1) { if (KIND == CCL_TOK_PROX) { + struct ccl_rpn_node *p_prox = 0; + /* ! word order specified */ + /* % word order not specified */ + p_prox = mk_node(CCL_RPN_TERM); + p_prox->u.t.term = (char *) xmalloc(cclp->look_token->len); + memcpy(p_prox->u.t.term, cclp->look_token->name, + cclp->look_token->len); + p_prox->u.t.term[cclp->look_token->len] = 0; + p_prox->u.t.attr_list = 0; + ADVANCE; - p2 = search_term_x (cclp, qa, list); + p2 = search_term_x (cclp, qa, list, 1); if (!p2) { ccl_rpn_delete (p1); @@ -758,11 +817,12 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; + pn->u.p[2] = p_prox; p1 = pn; } else if (is_term_ok(KIND, list)) { - p2 = search_term_x (cclp, qa, list); + p2 = search_term_x (cclp, qa, list, 1); if (!p2) { ccl_rpn_delete (p1); @@ -771,6 +831,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; + pn->u.p[2] = 0; p1 = pn; } else @@ -826,12 +887,51 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp, { lookahead = lookahead->next; if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ) - return qualifiers (cclp, lookahead, qa); + return qualifiers1 (cclp, lookahead, qa); if (lookahead->kind != CCL_TOK_COMMA) break; lookahead = lookahead->next; } - return search_terms (cclp, qa); + if (qa) + return search_terms (cclp, qa); + else + { + struct ccl_rpn_attr *qa[2]; + struct ccl_rpn_node *node = 0; + int seq; + lookahead = cclp->look_token; + + qa[1] = 0; + for(seq = 0; ;seq++) + { + struct ccl_rpn_node *node_sub; + qa[0] = ccl_qual_search(cclp, "term", 4, seq); + if (!qa[0]) + break; + + cclp->look_token = lookahead; + + node_sub = search_terms (cclp, qa); + if (!node_sub) + { + ccl_rpn_delete (node); + return 0; + } + if (node) + { + struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + node_this->u.p[0] = node; + node_this->u.p[1] = node_sub; + node_this->u.p[2] = 0; + node = node_this; + } + else + node = node_sub; + } + if (!node) + node = search_terms (cclp, 0); + return node; + } } /* @@ -861,6 +961,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, pn = mk_node (CCL_RPN_AND); pn->u.p[0] = p1; pn->u.p[1] = p2; + pn->u.p[2] = 0; p1 = pn; continue; case CCL_TOK_OR: @@ -874,6 +975,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, pn = mk_node (CCL_RPN_OR); pn->u.p[0] = p1; pn->u.p[1] = p2; + pn->u.p[2] = 0; p1 = pn; continue; case CCL_TOK_NOT: @@ -887,6 +989,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, pn = mk_node (CCL_RPN_NOT); pn->u.p[0] = p1; pn->u.p[1] = p2; + pn->u.p[2] = 0; p1 = pn; continue; } @@ -899,6 +1002,8 @@ struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list) { struct ccl_rpn_node *p; + + cclp->look_token = list; p = find_spec (cclp, NULL); if (p && KIND != CCL_TOK_EOL)