X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=c86015fe126a215e764bb37954b1702c7ad4e5a4;hp=f7903fe387ef7736684c84a1c8167449bbfe20e2;hb=8cb8947e3a7bff4dbf8f124871cb4905df1adce7;hpb=3e9991341defe84826dbb663400c2b434ef03641 diff --git a/src/cclfind.c b/src/cclfind.c index f7903fe..c86015f 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -1,8 +1,8 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2012 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ -/** +/** * \file cclfind.c * \brief Implements parsing of a CCL FIND query. * @@ -61,7 +61,7 @@ static int qual_val_type(ccl_qualifier_t *qa, int type, int value, /** * strxcat: concatenate strings. - * n: Null-terminated Destination string + * n: Null-terminated Destination string * src: Source string to be appended (not null-terminated) * len: Length of source string. */ @@ -171,7 +171,7 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, const char *set, int type) { struct ccl_rpn_attr *n; - + n = (struct ccl_rpn_attr *)xmalloc(sizeof(*n)); ccl_assert(n); if (set) @@ -181,7 +181,7 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, n->type = type; n->next = p->u.t.attr_list; p->u.t.attr_list = n; - + return n; } @@ -229,9 +229,38 @@ static size_t cmp_operator(const char **aliases, const char *input) #define REGEX_CHARS "^[]{}()|.*+?!$" #define CCL_CHARS "#?\\" + +static int has_ccl_masking(const char *src_str, + size_t src_len, + const char **truncation_aliases, + const char **mask_aliases) +{ + size_t j; + int quote_mode = 0; + + for (j = 0; j < src_len; j++) + { + size_t op_size; + if (j > 0 && src_str[j-1] == '\\') + ; + else if (src_str[j] == '"') + quote_mode = !quote_mode; + else if (!quote_mode && + (op_size = cmp_operator(truncation_aliases, + src_str + j))) + return 1; + else if (!quote_mode && + (op_size = cmp_operator(mask_aliases, + src_str + j))) + return 1; + } + return 0; +} + static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, - char *dst_term, int *regex_trunc, int *z3958_trunc, + char *dst_term, int regex_trunc, int z3958_trunc, const char **truncation_aliases, + const char **mask_aliases, int is_first, int is_last, int *left_trunc, int *right_trunc) { @@ -243,16 +272,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, size_t op_size; if (j > 0 && src_str[j-1] == '\\') { - if (*regex_trunc && strchr(REGEX_CHARS "\\", src_str[j])) - { - *regex_trunc = 2; + if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j])) strcat(dst_term, "\\"); - } - else if (*z3958_trunc && strchr(CCL_CHARS "\\", src_str[j])) - { - *z3958_trunc = 2; + else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j])) strcat(dst_term, "\\"); - } strxcat(dst_term, src_str + j, 1); } else if (src_str[j] == '"') @@ -263,16 +286,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, ) { j += (op_size - 1); /* j++ in for loop */ - if (*regex_trunc) - { + if (regex_trunc) strcat(dst_term, ".*"); - *regex_trunc = 2; /* regex trunc is really needed */ - } - else if (*z3958_trunc) - { + else if (z3958_trunc) strcat(dst_term, "?"); - *z3958_trunc = 2; - } else if (is_first && j == 0) *left_trunc = 1; else if (is_last && j == src_len - 1) @@ -283,18 +300,14 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, return -1; } } - else if (!quote_mode && src_str[j] == '#') + else if (!quote_mode && + (op_size = cmp_operator(mask_aliases, src_str + j))) { - if (*regex_trunc) - { + j += (op_size - 1); /* j++ in for loop */ + if (regex_trunc) strcat(dst_term, "."); - *regex_trunc = 2; /* regex trunc is really needed */ - } - else if (*z3958_trunc) - { + else if (z3958_trunc) strcat(dst_term, "#"); - *z3958_trunc = 2; - } else { cclp->error_code = CCL_ERR_TRUNC_NOT_SINGLE; @@ -303,24 +316,203 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, } else if (src_str[j] != '\\') { - if (*regex_trunc && strchr(REGEX_CHARS, src_str[j])) - { - *regex_trunc = 2; + if (regex_trunc && strchr(REGEX_CHARS, src_str[j])) strcat(dst_term, "\\"); - } - else if (*z3958_trunc && strchr(CCL_CHARS, src_str[j])) - { - *z3958_trunc = 2; + else if (z3958_trunc && strchr(CCL_CHARS, src_str[j])) strcat(dst_term, "\\"); - } - strxcat(dst_term, src_str + j, 1); + strxcat(dst_term, src_str + j, 1); } } return 0; } + +#if YAZ_781 +static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, + struct ccl_rpn_attr *attr_use, + ccl_qualifier_t *qa, + size_t no, int term_len, + const char **truncation_aliases, + const char **mask_aliases, + int is_phrase, + int is_ccl_masked, + int auto_group) +{ + struct ccl_rpn_node *p; + size_t i; + int relation_value = -1; + int position_value = -1; + int structure_value = -1; + int truncation_value = -1; + int completeness_value = -1; + + int left_trunc = 0; + int right_trunc = 0; + int regex_trunc = 0; + int z3958_trunc = 0; + char *attset; + struct ccl_token *lookahead = cclp->look_token; + + p = ccl_rpn_node_create(CCL_RPN_TERM); + p->u.t.attr_list = NULL; + p->u.t.term = NULL; + if (qa && qa[0]) + { + const char *n = ccl_qual_get_name(qa[0]); + if (n) + p->u.t.qual = xstrdup(n); + } + /* go through all attributes and add them to the attribute list */ + for (i = 0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) + if (attr->type != 1 || attr == attr_use) + { + switch (attr->kind) + { + case CCL_RPN_ATTR_STRING: + ccl_add_attr_string(p, attr->set, attr->type, + attr->value.str); + break; + case CCL_RPN_ATTR_NUMERIC: + if (attr->value.numeric > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value.numeric; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value.numeric; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value.numeric; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value.numeric; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value.numeric; + break; + } + ccl_add_attr_numeric(p, attr->set, attr->type, + attr->value.numeric); + } + } + } + } + attset = 0; + if (structure_value == -1 && ( + auto_group || + qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset)) + ) + { + if (!is_phrase) + ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2); + else + ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1); + } + if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX, + &attset)) + { + if (is_ccl_masked) + regex_trunc = 1; /* regex trunc (102) allowed */ + } + else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958, + &attset)) + { + if (is_ccl_masked) + z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */ + } + /* make the RPN token */ + p->u.t.term = (char *)xmalloc(term_len * 2 + 2); + ccl_assert(p->u.t.term); + p->u.t.term[0] = '\0'; + + for (i = 0; i < no; i++) + { + const char *src_str = lookahead->name; + size_t src_len = lookahead->len; + + if (p->u.t.term[0] && lookahead->ws_prefix_len) + { + strxcat(p->u.t.term, lookahead->ws_prefix_buf, + lookahead->ws_prefix_len); + } + if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc, + z3958_trunc, truncation_aliases, mask_aliases, + i == 0, i == no - 1, + &left_trunc, &right_trunc)) + { + ccl_rpn_delete(p); + return NULL; + } + lookahead = lookahead->next; + } + if (left_trunc && right_trunc) + { + if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH, + &attset)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; + ccl_rpn_delete(p); + return NULL; + } + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3); + } + else if (right_trunc) + { + if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT, + &attset)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; + ccl_rpn_delete(p); + return NULL; + } + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1); + } + else if (left_trunc) + { + if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT, + &attset)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; + ccl_rpn_delete(p); + return NULL; + } + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2); + } + else if (regex_trunc) + { + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102); + } + else if (z3958_trunc) + { + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104); + } + else + { + if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE, + &attset)) + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100); + } + return p; +} +#endif + /** - * search_term: Parse CCL search term. + * search_term: Parse CCL search term. * cclp: CCL Parser * qa: Qualifier attributes already applied. * term_list: tokens we accept as terms in context @@ -336,9 +528,10 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, int and_list = 0; int auto_group = 0; int or_list = 0; - char *attset; const char **truncation_aliases; const char *t_default[2]; + const char **mask_aliases; + const char *m_default[2]; truncation_aliases = ccl_qual_search_special(cclp->bibset, "truncation"); @@ -349,6 +542,16 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, t_default[1] = 0; } + mask_aliases = + ccl_qual_search_special(cclp->bibset, "mask"); + if (!mask_aliases) + { + mask_aliases = m_default; + m_default[0] = "#"; + m_default[1] = 0; + } + + if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0)) and_list = 1; if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AUTO_GROUP, 0)) @@ -357,23 +560,28 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, or_list = 1; while (1) { - struct ccl_rpn_node *p; + struct ccl_rpn_node *p = 0; size_t no, i; + int len = 0; int is_phrase = 0; + int is_ccl_masked = 0; +#if YAZ_781 +#else + char *attset; int relation_value = -1; int position_value = -1; int structure_value = -1; int truncation_value = -1; int completeness_value = -1; - int len = 0; int left_trunc = 0; int right_trunc = 0; int regex_trunc = 0; int z3958_trunc = 0; +#endif size_t max = 200; if (and_list || or_list || !multi) max = 1; - + /* ignore commas when dealing with and-lists .. */ if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA) { @@ -388,6 +596,11 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (lookahead->name[i] == ' ') this_is_phrase = 1; + if (has_ccl_masking(lookahead->name, lookahead->len, + truncation_aliases, + mask_aliases)) + is_ccl_masked = 1; + if (auto_group) { if (no > 0 && (is_phrase || is_phrase != this_is_phrase)) @@ -402,7 +615,49 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (len == 0) break; /* no more terms . stop . */ - + +#if YAZ_781 + /* go through all attributes and add them to the attribute list */ + for (i = 0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) + if (attr->type == 1) + { + struct ccl_rpn_node *tmp2; + tmp2 = ccl_term_one_use(cclp, attr, qa, no, len, + truncation_aliases, mask_aliases, + is_phrase, is_ccl_masked, + auto_group); + if (!tmp2) + { + ccl_rpn_delete(p); + return 0; + } + if (!p) + p = tmp2; + else + { + struct ccl_rpn_node *tmp1; + tmp1 = ccl_rpn_node_create(CCL_RPN_OR); + tmp1->u.p[0] = p; + tmp1->u.p[1] = tmp2; + p = tmp1; + } + } + } + if (!p) + { + p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len, + truncation_aliases, mask_aliases, + is_phrase, is_ccl_masked, auto_group); + if (!p) + return 0; + } + for (i = 0; i < no; i++) + ADVANCE; +#else /* create the term node, but wait a moment before adding the term */ p = ccl_rpn_node_create(CCL_RPN_TERM); p->u.t.attr_list = NULL; @@ -418,7 +673,7 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, for (i=0; qa && qa[i]; i++) { struct ccl_rpn_attr *attr; - + for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) switch(attr->kind) { @@ -462,8 +717,9 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } } } + attset = 0; if (structure_value == -1 && ( - auto_group || + auto_group || qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset)) ) { @@ -476,12 +732,14 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX, &attset)) { - regex_trunc = 1; /* regex trunc (102) allowed */ + if (is_ccl_masked) + regex_trunc = 1; /* regex trunc (102) allowed */ } else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958, &attset)) { - z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */ + if (is_ccl_masked) + z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */ } /* make the RPN token */ @@ -498,8 +756,9 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf, cclp->look_token->ws_prefix_len); } - if (append_term(cclp, src_str, src_len, p->u.t.term, ®ex_trunc, - &z3958_trunc, truncation_aliases, i == 0, i == no - 1, + if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc, + z3958_trunc, truncation_aliases, mask_aliases, + i == 0, i == no - 1, &left_trunc, &right_trunc)) { ccl_rpn_delete(p); @@ -507,26 +766,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } ADVANCE; } - /* make the top node point to us.. */ - if (p_top) - { - struct ccl_rpn_node *tmp; - - if (or_list) - tmp = ccl_rpn_node_create(CCL_RPN_OR); - else if (and_list) - tmp = ccl_rpn_node_create(CCL_RPN_AND); - else - tmp = ccl_rpn_node_create(CCL_RPN_AND); - tmp->u.p[0] = p_top; - tmp->u.p[1] = p; - - p_top = tmp; - } - else - p_top = p; - - if (left_trunc && right_trunc) { if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH, @@ -560,11 +799,11 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2); } - else if (regex_trunc == 2) + else if (regex_trunc) { ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102); } - else if (z3958_trunc == 2) + else if (z3958_trunc) { ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104); } @@ -574,6 +813,26 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, &attset)) ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100); } +#endif + /* make the top node point to us.. */ + if (p_top) + { + struct ccl_rpn_node *tmp; + + if (or_list) + tmp = ccl_rpn_node_create(CCL_RPN_OR); + else if (and_list) + tmp = ccl_rpn_node_create(CCL_RPN_AND); + else + tmp = ccl_rpn_node_create(CCL_RPN_AND); + tmp->u.p[0] = p_top; + tmp->u.p[1] = p; + + p_top = tmp; + } + else + p_top = p; + if (!multi) break; } @@ -612,7 +871,7 @@ static struct ccl_rpn_node *search_terms2(CCL_parser cclp, static int list[] = { CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1}; - + return search_term_x(cclp, qa, list, 1); } } @@ -658,12 +917,17 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, if (KIND == CCL_TOK_TERM) { size_t i; + int quote_mode = 0; for (i = 0; ilook_token->len; i++) { - if (cclp->look_token->name[i] == '-') + if (i > 0 && cclp->look_token->name[i] == '\\') + ; + else if (cclp->look_token->name[i] == '"') + quote_mode = !quote_mode; + else if (cclp->look_token->name[i] == '-' && !quote_mode) break; } - + if (cclp->look_token->len > 1 && i == 0) { /* -xx*/ struct ccl_token *ntoken = ccl_token_add(cclp->look_token); @@ -705,7 +969,7 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, cclp->look_token->next->kind == CCL_TOK_TERM && cclp->look_token->next->len > 1 && cclp->look_token->next->name[0] == '-') - + { /* xx -yy */ /* we _know_ that xx does not have - in it */ struct ccl_token *ntoken = ccl_token_add(cclp->look_token); @@ -715,11 +979,11 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, ntoken->len = 1; (ntoken->next->name)++; /* adjust yy */ - (ntoken->next->len)--; + (ntoken->next->len)--; } } } - + if (rel == 3 && KIND == CCL_TOK_TERM && cclp->look_token->next && cclp->look_token->next->len == 1 && @@ -732,7 +996,7 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, if (KIND == CCL_TOK_TERM) /* = term - term ? */ { struct ccl_rpn_node *p2; - + if (!(p2 = search_term(cclp, ap))) { ccl_rpn_delete(p1); @@ -765,10 +1029,11 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, { if (!(p = search_terms(cclp, ap))) return NULL; - ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel); + if (rel != 3 || + !qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_OMIT_EQUALS, 0)) + ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel); return p; } - cclp->error_code = CCL_ERR_TERM_EXPECTED; return NULL; } @@ -776,7 +1041,7 @@ static struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap) { char *attset; - + if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset) || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) return qualifiers_order(cclp, ap, attset); @@ -792,13 +1057,13 @@ struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap) } /** - * qualifier_list: Parse CCL qualifiers and search terms. + * qualifier_list: Parse CCL qualifiers and search terms. * cclp: CCL Parser * la: Token pointer to RELATION token. * qa: Qualifier attributes already applied. * return: pointer to node(s); NULL on error. */ -static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, +static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, struct ccl_token *la, ccl_qualifier_t *qa) { @@ -848,7 +1113,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, { struct ccl_rpn_node *node_sub; cclp->look_token = la; - + node_sub = qualifier_relation(cclp, ap); if (!node_sub) { @@ -858,7 +1123,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, } if (node) { - struct ccl_rpn_node *node_this = + struct ccl_rpn_node *node_this = ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; @@ -911,17 +1176,17 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, if (qa) { ccl_qualifier_t *qa0 = qa; - + while (*qa0) ap[i++] = *qa0++; } ap[i] = NULL; - + if (!found) break; - + cclp->look_token = lookahead; - + node_sub = qualifier_relation(cclp, ap); if (!node_sub) { @@ -930,7 +1195,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, } if (node) { - struct ccl_rpn_node *node_this = + struct ccl_rpn_node *node_this = ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; @@ -1072,7 +1337,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, } if (node) { - struct ccl_rpn_node *node_this = + struct ccl_rpn_node *node_this = ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; @@ -1160,7 +1425,7 @@ struct ccl_rpn_node *ccl_parser_find_str(CCL_parser cclp, const char *str) return p; } -struct ccl_rpn_node *ccl_parser_find_token(CCL_parser cclp, +struct ccl_rpn_node *ccl_parser_find_token(CCL_parser cclp, struct ccl_token *list) { struct ccl_rpn_node *p;