X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fcclfind.c;h=72f1aa6fd2627104c4cc79d9aea9c7b560af4c10;hb=be821514c869d68186361b5aab6bbfd1aa60e087;hp=08a4f79440fcd2c0ba2d35d302d77021bc654400;hpb=6e8a8f144dfc8225d07a56d6a7f18056cb18ee22;p=yaz-moved-to-github.git diff --git a/src/cclfind.c b/src/cclfind.c index 08a4f79..72f1aa6 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -41,10 +41,22 @@ * USE OR PERFORMANCE OF THIS SOFTWARE. * */ + +/** + * \file cclfind.c + * \brief Implements parsing of a CCL FIND query. + * + * This source file implements parsing of a CCL Query (ISO8777). + * The parser uses predictive parsing, but it does several tokens + * of lookahead in the handling of relational operations.. So + * it's not really pure. + */ + + /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Id: cclfind.c,v 1.2 2004-08-11 09:02:48 adam Exp $ + * $Id: cclfind.c,v 1.8 2005-06-25 15:46:03 adam Exp $ * * Old Europagate log: * @@ -109,7 +121,7 @@ /* move one token forward */ #define ADVANCE cclp->look_token = cclp->look_token->next -/* +/** * qual_val_type: test for existance of attribute type/value pair. * qa: Attribute array * type: Type of attribute to search for @@ -128,7 +140,7 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value, while (q) { if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC && - q->value.numeric == value) + q->value.numeric == value) { if (attset) *attset = q->set; @@ -139,7 +151,7 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value, return 0; } -/* +/** * strxcat: concatenate strings. * n: Null-terminated Destination string * src: Source string to be appended (not null-terminated) @@ -154,7 +166,7 @@ static void strxcat (char *n, const char *src, int len) *n = '\0'; } -/* +/** * copy_token_name: Return copy of CCL token name * tp: Pointer to token info. * return: malloc(3) allocated copy of token name. @@ -168,7 +180,7 @@ static char *copy_token_name (struct ccl_token *tp) return str; } -/* +/** * mk_node: Create RPN node. * kind: Type of node. * return: pointer to allocated node. @@ -182,7 +194,7 @@ static struct ccl_rpn_node *mk_node (int kind) return p; } -/* +/** * ccl_rpn_delete: Delete RPN tree. * rpn: Pointer to tree. */ @@ -204,8 +216,8 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) for (attr = rpn->u.t.attr_list; attr; attr = attr1) { attr1 = attr->next; - if (attr->kind == CCL_RPN_ATTR_STRING) - xfree(attr->value.str); + if (attr->kind == CCL_RPN_ATTR_STRING) + xfree(attr->value.str); if (attr->set) xfree (attr->set); xfree (attr); @@ -217,6 +229,7 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); + ccl_rpn_delete (rpn->u.p[2]); break; } xfree (rpn); @@ -237,7 +250,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa); static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, - const char *set, int type) + const char *set, int type) { struct ccl_rpn_attr *n; @@ -259,7 +272,7 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, return n; } -/* +/** * add_attr_numeric: Add attribute (type/value) to RPN term node. * p: RPN node of type term. * type: Type of attribute @@ -267,7 +280,7 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, * set: Attribute set name */ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, - int type, int value) + int type, int value) { struct ccl_rpn_attr *n; @@ -277,7 +290,7 @@ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, } static void add_attr_string (struct ccl_rpn_node *p, const char *set, - int type, char *value) + int type, char *value) { struct ccl_rpn_attr *n; @@ -287,7 +300,7 @@ static void add_attr_string (struct ccl_rpn_node *p, const char *set, } -/* +/** * search_term: Parse CCL search term. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -307,9 +320,9 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, const char *truncation_aliases; truncation_aliases = - ccl_qual_search_special(cclp->bibset, "truncation"); + ccl_qual_search_special(cclp->bibset, "truncation"); if (!truncation_aliases) - truncation_aliases = "?"; + truncation_aliases = "?"; if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0)) and_list = 1; @@ -332,13 +345,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, size_t max = 200; if (and_list || or_list || !multi) max = 1; - - /* ignore commas when dealing with and-lists .. */ + + /* ignore commas when dealing with and-lists .. */ if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA) { - lookahead = lookahead->next; + lookahead = lookahead->next; ADVANCE; - continue; + continue; } /* go through each TERM token. If no truncation attribute is yet met, then look for left/right truncation markers (?) and @@ -347,8 +360,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, { for (i = 0; ilen; i++) if (lookahead->name[i] == ' ') - no_spaces++; - else if (strchr(truncation_aliases, lookahead->name[i])) + no_spaces++; + else if (strchr(truncation_aliases, lookahead->name[i])) { if (no == 0 && i == 0 && lookahead->len >= 1) left_trunc = 1; @@ -358,7 +371,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, else mid_trunc = 1; } - len += 1+lookahead->len; + len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } @@ -396,48 +409,48 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, struct ccl_rpn_attr *attr; for (attr = qa[i]; attr; attr = attr->next) - switch(attr->kind) - { - case CCL_RPN_ATTR_STRING: - add_attr_string(p, attr->set, attr->type, - attr->value.str); - break; - case CCL_RPN_ATTR_NUMERIC: - if (attr->value.numeric > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) - { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value.numeric; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value.numeric; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value.numeric; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value.numeric; - left_trunc = right_trunc = mid_trunc = 0; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value.numeric; - break; - } - add_attr_numeric(p, attr->set, attr->type, - attr->value.numeric); - } - } + switch(attr->kind) + { + case CCL_RPN_ATTR_STRING: + add_attr_string(p, attr->set, attr->type, + attr->value.str); + break; + case CCL_RPN_ATTR_NUMERIC: + if (attr->value.numeric > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value.numeric; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value.numeric; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value.numeric; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value.numeric; + left_trunc = right_trunc = mid_trunc = 0; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value.numeric; + break; + } + add_attr_numeric(p, attr->set, attr->type, + attr->value.numeric); + } + } } /* len now holds the number of characters in the RPN term */ /* no holds the number of CCL tokens (1 or more) */ @@ -451,7 +464,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, else add_attr_numeric (p, attset, CCL_BIB1_STR, 1); } - + /* make the RPN token */ p->u.t.term = (char *)xmalloc (len); ccl_assert (p->u.t.term); @@ -466,17 +479,14 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, src_len--; src_str++; } - else if (i == no-1 && right_trunc) + if (i == no-1 && right_trunc) src_len--; - if (src_len) + if (i && cclp->look_token->ws_prefix_len) { - int len = strlen(p->u.t.term); - if (len && - !strchr("-+", *src_str) && - !strchr("-+", p->u.t.term[len-1])) - { - strcat (p->u.t.term, " "); - } + size_t len = strlen(p->u.t.term); + memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf, + cclp->look_token->ws_prefix_len); + p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0'; } strxcat (p->u.t.term, src_str, src_len); ADVANCE; @@ -535,43 +545,13 @@ static struct ccl_rpn_node *search_term (CCL_parser cclp, return search_term_x(cclp, qa, list, 0); } -static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, - struct ccl_rpn_attr **ap) +static +struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, + struct ccl_rpn_attr **ap, char *attset) { - char *attset; - int rel; + int rel = 0; + struct ccl_rpn_node *p; - if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)) - { - /* unordered relation */ - struct ccl_rpn_node *p; - if (KIND != CCL_TOK_EQ) - { - cclp->error_code = CCL_ERR_EQ_EXPECTED; - return NULL; - } - ADVANCE; - if (KIND == CCL_TOK_LP) - { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - { - return NULL; - } - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); - return NULL; - } - ADVANCE; - } - else - p = search_terms (cclp, ap); - return p; - } - /* ordered relation ... */ - rel = 0; if (cclp->look_token->len == 1) { if (cclp->look_token->name[0] == '<') @@ -591,78 +571,185 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, rel = 6; } if (!rel) + { cclp->error_code = CCL_ERR_BAD_RELATION; - else + return NULL; + } + ADVANCE; /* skip relation */ + if (rel == 3 && + qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, 0)) { - struct ccl_rpn_node *p; - - ADVANCE; /* skip relation */ - if (KIND == CCL_TOK_TERM && - cclp->look_token->next && cclp->look_token->next->len == 1 && - cclp->look_token->next->name[0] == '-') + /* allow - inside term and treat it as range _always_ */ + /* relation is =. Extract "embedded" - to separate terms */ + if (KIND == CCL_TOK_TERM) { - struct ccl_rpn_node *p1; - if (!(p1 = search_term (cclp, ap))) - return NULL; - ADVANCE; /* skip '-' */ - if (KIND == CCL_TOK_TERM) /* = term - term ? */ + size_t i; + for (i = 0; ilook_token->len; i++) { - struct ccl_rpn_node *p2; - - if (!(p2 = search_term (cclp, ap))) - { - ccl_rpn_delete (p1); - return NULL; - } - p = mk_node (CCL_RPN_AND); - p->u.p[0] = p1; - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - p->u.p[1] = p2; - add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); - return p; + if (cclp->look_token->name[i] == '-') + break; } - else /* = term - */ - { - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - return p1; + + if (cclp->look_token->len > 1 && i == 0) + { /* -xx*/ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = cclp->look_token->name + 1; + ntoken->len = cclp->look_token->len - 1; + + cclp->look_token->len = 1; + cclp->look_token->name = "-"; + } + else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1) + { /* xx- */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = "-"; + ntoken->len = 1; + + (cclp->look_token->len)--; + } + else if (cclp->look_token->len > 2 && i < cclp->look_token->len) + { /* xx-yy */ + struct ccl_token *ntoken1 = ccl_token_add (cclp->look_token); + struct ccl_token *ntoken2 = ccl_token_add (ntoken1); + + ntoken1->kind = CCL_TOK_TERM; /* generate - */ + ntoken1->name = "-"; + ntoken1->len = 1; + + ntoken2->kind = CCL_TOK_TERM; /* generate yy */ + ntoken2->name = cclp->look_token->name + (i+1); + ntoken2->len = cclp->look_token->len - (i+1); + + cclp->look_token->len = i; /* adjust xx */ + } + else if (i == cclp->look_token->len && + cclp->look_token->next && + cclp->look_token->next->kind == CCL_TOK_TERM && + cclp->look_token->next->len > 1 && + cclp->look_token->next->name[0] == '-') + + { /* xx -yy */ + /* we _know_ that xx does not have - in it */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; /* generate - */ + ntoken->name = "-"; + ntoken->len = 1; + + (ntoken->next->name)++; /* adjust yy */ + (ntoken->next->len)--; } } - else if (cclp->look_token->len == 1 && - cclp->look_token->name[0] == '-') /* = - term ? */ - { - ADVANCE; - if (!(p = search_term (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, 2); - return p; - } - else if (KIND == CCL_TOK_LP) + } + + if (rel == 3 && + KIND == CCL_TOK_TERM && + cclp->look_token->next && cclp->look_token->next->len == 1 && + cclp->look_token->next->name[0] == '-') + { + struct ccl_rpn_node *p1; + if (!(p1 = search_term (cclp, ap))) + return NULL; + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - return NULL; - if (KIND != CCL_TOK_RP) + struct ccl_rpn_node *p2; + + if (!(p2 = search_term (cclp, ap))) { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); + ccl_rpn_delete (p1); return NULL; } - ADVANCE; + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); return p; } - else + else /* = term - */ { - if (!(p = search_terms (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, rel); - return p; + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + return p1; } - cclp->error_code = CCL_ERR_TERM_EXPECTED; } + else if (rel == 3 && + cclp->look_token->len == 1 && + cclp->look_token->name[0] == '-') /* = - term ? */ + { + ADVANCE; + if (!(p = search_term (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, 2); + return p; + } + else if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (cclp, ap))) + return NULL; + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; + return p; + } + else + { + if (!(p = search_terms (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, rel); + return p; + } + cclp->error_code = CCL_ERR_TERM_EXPECTED; return NULL; } -/* +static +struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, struct ccl_rpn_attr **ap) +{ + char *attset; + struct ccl_rpn_node *p; + + if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset) + || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) + return qualifiers_order(cclp, ap, attset); + + /* unordered relation */ + if (KIND != CCL_TOK_EQ) + { + cclp->error_code = CCL_ERR_EQ_EXPECTED; + return NULL; + } + ADVANCE; + if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (cclp, ap))) + { + return NULL; + } + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; + } + else + p = search_terms (cclp, ap); + return p; +} + +/** * qualifiers1: Parse CCL qualifiers and search terms. * cclp: CCL Parser * la: Token pointer to RELATION token. @@ -814,7 +901,7 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la, } -/* +/** * search_terms: Parse CCL search terms - including proximity. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -876,7 +963,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, return p1; } -/* +/** * search_elements: Parse CCL search elements * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -970,7 +1057,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp, } } -/* +/** * find_spec: Parse CCL find specification * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -1038,8 +1125,6 @@ struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list) { struct ccl_rpn_node *p; - - cclp->look_token = list; p = find_spec (cclp, NULL); if (p && KIND != CCL_TOK_EOL) @@ -1059,7 +1144,7 @@ struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list) return p; } -/* +/** * ccl_find: Parse CCL find - token representation * bibset: Bibset to be used for the parsing * list: List of tokens @@ -1085,7 +1170,7 @@ struct ccl_rpn_node *ccl_find (CCL_bibset bibset, struct ccl_token *list, return p; } -/* +/** * ccl_find_str: Parse CCL find - string representation * bibset: Bibset to be used for the parsing * str: String to be parsed @@ -1112,3 +1197,11 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, ccl_token_del (list); return p; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +