X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=72f1aa6fd2627104c4cc79d9aea9c7b560af4c10;hp=0883235617aea7dd7cf7cdf6822ba97f8a02f514;hb=10f391e000230e5a93c768f541a24a379809489e;hpb=e4baade57ba02d625ecd6452b0f4383b24c25f2d diff --git a/src/cclfind.c b/src/cclfind.c index 0883235..72f1aa6 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -41,10 +41,22 @@ * USE OR PERFORMANCE OF THIS SOFTWARE. * */ + +/** + * \file cclfind.c + * \brief Implements parsing of a CCL FIND query. + * + * This source file implements parsing of a CCL Query (ISO8777). + * The parser uses predictive parsing, but it does several tokens + * of lookahead in the handling of relational operations.. So + * it's not really pure. + */ + + /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Id: cclfind.c,v 1.4 2004-09-29 20:37:50 adam Exp $ + * $Id: cclfind.c,v 1.8 2005-06-25 15:46:03 adam Exp $ * * Old Europagate log: * @@ -109,7 +121,7 @@ /* move one token forward */ #define ADVANCE cclp->look_token = cclp->look_token->next -/* +/** * qual_val_type: test for existance of attribute type/value pair. * qa: Attribute array * type: Type of attribute to search for @@ -128,7 +140,7 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value, while (q) { if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC && - q->value.numeric == value) + q->value.numeric == value) { if (attset) *attset = q->set; @@ -139,7 +151,7 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value, return 0; } -/* +/** * strxcat: concatenate strings. * n: Null-terminated Destination string * src: Source string to be appended (not null-terminated) @@ -154,7 +166,7 @@ static void strxcat (char *n, const char *src, int len) *n = '\0'; } -/* +/** * copy_token_name: Return copy of CCL token name * tp: Pointer to token info. * return: malloc(3) allocated copy of token name. @@ -168,7 +180,7 @@ static char *copy_token_name (struct ccl_token *tp) return str; } -/* +/** * mk_node: Create RPN node. * kind: Type of node. * return: pointer to allocated node. @@ -182,7 +194,7 @@ static struct ccl_rpn_node *mk_node (int kind) return p; } -/* +/** * ccl_rpn_delete: Delete RPN tree. * rpn: Pointer to tree. */ @@ -204,8 +216,8 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) for (attr = rpn->u.t.attr_list; attr; attr = attr1) { attr1 = attr->next; - if (attr->kind == CCL_RPN_ATTR_STRING) - xfree(attr->value.str); + if (attr->kind == CCL_RPN_ATTR_STRING) + xfree(attr->value.str); if (attr->set) xfree (attr->set); xfree (attr); @@ -238,7 +250,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa); static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, - const char *set, int type) + const char *set, int type) { struct ccl_rpn_attr *n; @@ -260,7 +272,7 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, return n; } -/* +/** * add_attr_numeric: Add attribute (type/value) to RPN term node. * p: RPN node of type term. * type: Type of attribute @@ -268,7 +280,7 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, * set: Attribute set name */ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, - int type, int value) + int type, int value) { struct ccl_rpn_attr *n; @@ -278,7 +290,7 @@ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, } static void add_attr_string (struct ccl_rpn_node *p, const char *set, - int type, char *value) + int type, char *value) { struct ccl_rpn_attr *n; @@ -288,7 +300,7 @@ static void add_attr_string (struct ccl_rpn_node *p, const char *set, } -/* +/** * search_term: Parse CCL search term. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -308,9 +320,9 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, const char *truncation_aliases; truncation_aliases = - ccl_qual_search_special(cclp->bibset, "truncation"); + ccl_qual_search_special(cclp->bibset, "truncation"); if (!truncation_aliases) - truncation_aliases = "?"; + truncation_aliases = "?"; if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0)) and_list = 1; @@ -333,13 +345,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, size_t max = 200; if (and_list || or_list || !multi) max = 1; - - /* ignore commas when dealing with and-lists .. */ + + /* ignore commas when dealing with and-lists .. */ if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA) { - lookahead = lookahead->next; + lookahead = lookahead->next; ADVANCE; - continue; + continue; } /* go through each TERM token. If no truncation attribute is yet met, then look for left/right truncation markers (?) and @@ -348,8 +360,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, { for (i = 0; ilen; i++) if (lookahead->name[i] == ' ') - no_spaces++; - else if (strchr(truncation_aliases, lookahead->name[i])) + no_spaces++; + else if (strchr(truncation_aliases, lookahead->name[i])) { if (no == 0 && i == 0 && lookahead->len >= 1) left_trunc = 1; @@ -359,7 +371,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, else mid_trunc = 1; } - len += 1+lookahead->len; + len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } @@ -397,48 +409,48 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, struct ccl_rpn_attr *attr; for (attr = qa[i]; attr; attr = attr->next) - switch(attr->kind) - { - case CCL_RPN_ATTR_STRING: - add_attr_string(p, attr->set, attr->type, - attr->value.str); - break; - case CCL_RPN_ATTR_NUMERIC: - if (attr->value.numeric > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) - { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value.numeric; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value.numeric; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value.numeric; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value.numeric; - left_trunc = right_trunc = mid_trunc = 0; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value.numeric; - break; - } - add_attr_numeric(p, attr->set, attr->type, - attr->value.numeric); - } - } + switch(attr->kind) + { + case CCL_RPN_ATTR_STRING: + add_attr_string(p, attr->set, attr->type, + attr->value.str); + break; + case CCL_RPN_ATTR_NUMERIC: + if (attr->value.numeric > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value.numeric; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value.numeric; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value.numeric; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value.numeric; + left_trunc = right_trunc = mid_trunc = 0; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value.numeric; + break; + } + add_attr_numeric(p, attr->set, attr->type, + attr->value.numeric); + } + } } /* len now holds the number of characters in the RPN term */ /* no holds the number of CCL tokens (1 or more) */ @@ -469,15 +481,12 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, } if (i == no-1 && right_trunc) src_len--; - if (src_len) + if (i && cclp->look_token->ws_prefix_len) { - int len = strlen(p->u.t.term); - if (len && - !strchr("-+", *src_str) && - !strchr("-+", p->u.t.term[len-1])) - { - strcat (p->u.t.term, " "); - } + size_t len = strlen(p->u.t.term); + memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf, + cclp->look_token->ws_prefix_len); + p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0'; } strxcat (p->u.t.term, src_str, src_len); ADVANCE; @@ -538,7 +547,7 @@ static struct ccl_rpn_node *search_term (CCL_parser cclp, static struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, - struct ccl_rpn_attr **ap, char *attset) + struct ccl_rpn_attr **ap, char *attset) { int rel = 0; struct ccl_rpn_node *p; @@ -564,140 +573,140 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, if (!rel) { cclp->error_code = CCL_ERR_BAD_RELATION; - return NULL; + return NULL; } ADVANCE; /* skip relation */ if (rel == 3 && - qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, 0)) + qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, 0)) { - /* allow - inside term and treat it as range _always_ */ - /* relation is =. Extract "embedded" - to separate terms */ - if (KIND == CCL_TOK_TERM) - { - int i; - for (i = 0; ilook_token->len; i++) - { - if (cclp->look_token->name[i] == '-') - break; - } - - if (cclp->look_token->len > 1 && i == 0) - { /* -xx*/ - struct ccl_token *ntoken = ccl_token_add (cclp->look_token); - - ntoken->kind = CCL_TOK_TERM; - ntoken->name = cclp->look_token->name + 1; - ntoken->len = cclp->look_token->len - 1; - - cclp->look_token->len = 1; - cclp->look_token->name = "-"; - } - else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1) - { /* xx- */ - struct ccl_token *ntoken = ccl_token_add (cclp->look_token); - - ntoken->kind = CCL_TOK_TERM; - ntoken->name = "-"; - ntoken->len = 1; - - (cclp->look_token->len)--; - } - else if (cclp->look_token->len > 2 && i < cclp->look_token->len) - { /* xx-yy */ - struct ccl_token *ntoken1 = ccl_token_add (cclp->look_token); - struct ccl_token *ntoken2 = ccl_token_add (ntoken1); - - ntoken1->kind = CCL_TOK_TERM; /* generate - */ - ntoken1->name = "-"; - ntoken1->len = 1; - - ntoken2->kind = CCL_TOK_TERM; /* generate yy */ - ntoken2->name = cclp->look_token->name + (i+1); - ntoken2->len = cclp->look_token->len - (i+1); - - cclp->look_token->len = i; /* adjust xx */ - } - else if (i == cclp->look_token->len && - cclp->look_token->next && - cclp->look_token->next->kind == CCL_TOK_TERM && - cclp->look_token->next->len > 1 && - cclp->look_token->next->name[0] == '-') - - { /* xx -yy */ - /* we _know_ that xx does not have - in it */ - struct ccl_token *ntoken = ccl_token_add (cclp->look_token); - - ntoken->kind = CCL_TOK_TERM; /* generate - */ - ntoken->name = "-"; - ntoken->len = 1; - - (ntoken->next->name)++; /* adjust yy */ - (ntoken->next->len)--; - } - } + /* allow - inside term and treat it as range _always_ */ + /* relation is =. Extract "embedded" - to separate terms */ + if (KIND == CCL_TOK_TERM) + { + size_t i; + for (i = 0; ilook_token->len; i++) + { + if (cclp->look_token->name[i] == '-') + break; + } + + if (cclp->look_token->len > 1 && i == 0) + { /* -xx*/ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = cclp->look_token->name + 1; + ntoken->len = cclp->look_token->len - 1; + + cclp->look_token->len = 1; + cclp->look_token->name = "-"; + } + else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1) + { /* xx- */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = "-"; + ntoken->len = 1; + + (cclp->look_token->len)--; + } + else if (cclp->look_token->len > 2 && i < cclp->look_token->len) + { /* xx-yy */ + struct ccl_token *ntoken1 = ccl_token_add (cclp->look_token); + struct ccl_token *ntoken2 = ccl_token_add (ntoken1); + + ntoken1->kind = CCL_TOK_TERM; /* generate - */ + ntoken1->name = "-"; + ntoken1->len = 1; + + ntoken2->kind = CCL_TOK_TERM; /* generate yy */ + ntoken2->name = cclp->look_token->name + (i+1); + ntoken2->len = cclp->look_token->len - (i+1); + + cclp->look_token->len = i; /* adjust xx */ + } + else if (i == cclp->look_token->len && + cclp->look_token->next && + cclp->look_token->next->kind == CCL_TOK_TERM && + cclp->look_token->next->len > 1 && + cclp->look_token->next->name[0] == '-') + + { /* xx -yy */ + /* we _know_ that xx does not have - in it */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; /* generate - */ + ntoken->name = "-"; + ntoken->len = 1; + + (ntoken->next->name)++; /* adjust yy */ + (ntoken->next->len)--; + } + } } if (rel == 3 && - KIND == CCL_TOK_TERM && - cclp->look_token->next && cclp->look_token->next->len == 1 && - cclp->look_token->next->name[0] == '-') + KIND == CCL_TOK_TERM && + cclp->look_token->next && cclp->look_token->next->len == 1 && + cclp->look_token->next->name[0] == '-') { - struct ccl_rpn_node *p1; - if (!(p1 = search_term (cclp, ap))) - return NULL; - ADVANCE; /* skip '-' */ - if (KIND == CCL_TOK_TERM) /* = term - term ? */ - { - struct ccl_rpn_node *p2; - - if (!(p2 = search_term (cclp, ap))) - { - ccl_rpn_delete (p1); - return NULL; - } - p = mk_node (CCL_RPN_AND); - p->u.p[0] = p1; - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - p->u.p[1] = p2; - add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); - return p; - } - else /* = term - */ - { - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - return p1; - } + struct ccl_rpn_node *p1; + if (!(p1 = search_term (cclp, ap))) + return NULL; + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ + { + struct ccl_rpn_node *p2; + + if (!(p2 = search_term (cclp, ap))) + { + ccl_rpn_delete (p1); + return NULL; + } + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); + return p; + } + else /* = term - */ + { + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + return p1; + } } else if (rel == 3 && - cclp->look_token->len == 1 && - cclp->look_token->name[0] == '-') /* = - term ? */ + cclp->look_token->len == 1 && + cclp->look_token->name[0] == '-') /* = - term ? */ { - ADVANCE; - if (!(p = search_term (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, 2); - return p; + ADVANCE; + if (!(p = search_term (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, 2); + return p; } else if (KIND == CCL_TOK_LP) { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - return NULL; - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); - return NULL; - } - ADVANCE; - return p; + ADVANCE; + if (!(p = find_spec (cclp, ap))) + return NULL; + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; + return p; } else { - if (!(p = search_terms (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, rel); - return p; + if (!(p = search_terms (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, rel); + return p; } cclp->error_code = CCL_ERR_TERM_EXPECTED; return NULL; @@ -710,37 +719,37 @@ struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, struct ccl_rpn_attr **ap) struct ccl_rpn_node *p; if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset) - || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) - return qualifiers_order(cclp, ap, attset); + || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) + return qualifiers_order(cclp, ap, attset); /* unordered relation */ if (KIND != CCL_TOK_EQ) { - cclp->error_code = CCL_ERR_EQ_EXPECTED; - return NULL; + cclp->error_code = CCL_ERR_EQ_EXPECTED; + return NULL; } ADVANCE; if (KIND == CCL_TOK_LP) { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - { - return NULL; - } - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); - return NULL; - } - ADVANCE; + ADVANCE; + if (!(p = find_spec (cclp, ap))) + { + return NULL; + } + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; } else - p = search_terms (cclp, ap); + p = search_terms (cclp, ap); return p; } -/* +/** * qualifiers1: Parse CCL qualifiers and search terms. * cclp: CCL Parser * la: Token pointer to RELATION token. @@ -892,7 +901,7 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la, } -/* +/** * search_terms: Parse CCL search terms - including proximity. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -954,7 +963,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, return p1; } -/* +/** * search_elements: Parse CCL search elements * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -1048,7 +1057,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp, } } -/* +/** * find_spec: Parse CCL find specification * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -1135,7 +1144,7 @@ struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list) return p; } -/* +/** * ccl_find: Parse CCL find - token representation * bibset: Bibset to be used for the parsing * list: List of tokens @@ -1161,7 +1170,7 @@ struct ccl_rpn_node *ccl_find (CCL_bibset bibset, struct ccl_token *list, return p; } -/* +/** * ccl_find_str: Parse CCL find - string representation * bibset: Bibset to be used for the parsing * str: String to be parsed @@ -1188,3 +1197,11 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, ccl_token_del (list); return p; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +