X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=072fdabc0aa86d23c5a5e4459ddbf53568c835a9;hp=6207b55700f7d443598cbfaa9cef8a4ed8017719;hb=0e7fdbc857d4905e67a7bdf8fe5b6c773ee47a7b;hpb=379504a233e3e2cc85bca1e7b6d864f1395aec7c diff --git a/src/cclfind.c b/src/cclfind.c index 6207b55..072fdab 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2009 Index Data + * Copyright (C) 1995-2012 Index Data * See the file LICENSE for details. */ /** @@ -11,9 +11,13 @@ * of lookahead in the handling of relational operations.. So * it's not really pure. */ +#if HAVE_CONFIG_H +#include +#endif #include #include +#include #include "cclp.h" @@ -208,7 +212,22 @@ void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, n->value.str = xstrdup(value); } +static size_t cmp_operator(const char **aliases, const char *input) +{ + for (; *aliases; aliases++) + { + const char *cp = *aliases; + size_t i; + for (i = 0; *cp && *cp == input[i]; i++, cp++) + ; + if (*cp == '\0') + return i; + } + return 0; +} +#define REGEX_CHARS "^[]{}()|.*+?!$" +#define CCL_CHARS "#?\\" /** * search_term: Parse CCL search term. * cclp: CCL Parser @@ -247,15 +266,16 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, struct ccl_rpn_node *p; size_t no, i; int no_spaces = 0; - int left_trunc = 0; - int right_trunc = 0; - int mid_trunc = 0; int relation_value = -1; int position_value = -1; int structure_value = -1; int truncation_value = -1; int completeness_value = -1; int len = 0; + int left_trunc = 0; + int right_trunc = 0; + int regex_trunc = 0; + int z3958_trunc = 0; size_t max = 200; if (and_list || or_list || !multi) max = 1; @@ -275,16 +295,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, for (i = 0; ilen; i++) if (lookahead->name[i] == ' ') no_spaces++; - else if (strchr(truncation_aliases[0], lookahead->name[i])) - { - if (no == 0 && i == 0 && lookahead->len >= 1) - left_trunc = 1; - else if (!is_term_ok(lookahead->next->kind, term_list) && - i == lookahead->len-1 && i >= 1) - right_trunc = 1; - else - mid_trunc = 1; - } len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } @@ -339,7 +349,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (truncation_value != -1) continue; truncation_value = attr->value.numeric; - left_trunc = right_trunc = mid_trunc = 0; break; case CCL_BIB1_COM: if (completeness_value != -1) @@ -365,30 +374,113 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1); } + if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX, + &attset)) + { + regex_trunc = 1; /* regex trunc (102) allowed */ + } + else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958, + &attset)) + { + z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */ + } + /* make the RPN token */ - p->u.t.term = (char *)xmalloc(len); + p->u.t.term = (char *)xmalloc(len * 2 + 2); ccl_assert(p->u.t.term); p->u.t.term[0] = '\0'; for (i = 0; ilook_token->name; size_t src_len = cclp->look_token->len; - - if (i == 0 && left_trunc) + int j; + int quote_mode = 0; + + if (p->u.t.term[0] && cclp->look_token->ws_prefix_len) { - src_len--; - src_str++; + strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf, + cclp->look_token->ws_prefix_len); } - if (i == no-1 && right_trunc) - src_len--; - if (p->u.t.term[0] && cclp->look_token->ws_prefix_len) + for (j = 0; j < src_len; j++) { - size_t len = strlen(p->u.t.term); - memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf, - cclp->look_token->ws_prefix_len); - p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0'; + size_t op_size; + if (j > 0 && src_str[j-1] == '\\') + { + if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j])) + { + regex_trunc = 2; + strcat(p->u.t.term, "\\"); + } + else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j])) + { + z3958_trunc = 2; + strcat(p->u.t.term, "\\"); + } + strxcat(p->u.t.term, src_str + j, 1); + } + else if (src_str[j] == '"') + quote_mode = !quote_mode; + else if (!quote_mode && + (op_size = cmp_operator(truncation_aliases, + src_str + j)) + ) + { + j += (op_size - 1); /* j++ in for loop */ + if (regex_trunc) + { + strcat(p->u.t.term, ".*"); + regex_trunc = 2; /* regex trunc is really needed */ + } + else if (z3958_trunc) + { + strcat(p->u.t.term, "?"); + z3958_trunc = 2; + } + else if (i == 0 && j == 0) + left_trunc = 1; + else if (i == no - 1 && j == src_len - 1) + right_trunc = 1; + else + { + cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; + ccl_rpn_delete(p); + return NULL; + } + } + else if (!quote_mode && src_str[j] == '#') + { + if (regex_trunc) + { + strcat(p->u.t.term, "."); + regex_trunc = 2; /* regex trunc is really needed */ + } + else if (z3958_trunc) + { + strcat(p->u.t.term, "#"); + z3958_trunc = 2; + } + else + { + cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; + ccl_rpn_delete(p); + return NULL; + } + } + else if (src_str[j] != '\\') + { + if (regex_trunc && strchr(REGEX_CHARS, src_str[j])) + { + regex_trunc = 2; + strcat(p->u.t.term, "\\"); + } + else if (z3958_trunc && strchr(CCL_CHARS, src_str[j])) + { + z3958_trunc = 2; + strcat(p->u.t.term, "\\"); + } + strxcat(p->u.t.term, src_str + j, 1); + } } - strxcat(p->u.t.term, src_str, src_len); ADVANCE; } @@ -445,6 +537,14 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2); } + else if (regex_trunc == 2) + { + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102); + } + else if (z3958_trunc == 2) + { + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104); + } else { if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE, @@ -465,6 +565,37 @@ static struct ccl_rpn_node *search_term(CCL_parser cclp, ccl_qualifier_t *qa) return search_term_x(cclp, qa, list, 0); } + +static struct ccl_rpn_node *search_terms2(CCL_parser cclp, + ccl_qualifier_t *qa) +{ + if (KIND == CCL_TOK_LP) + { + struct ccl_rpn_node *p; + ADVANCE; + if (!(p = find_spec(cclp, qa))) + return NULL; + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete(p); + return NULL; + } + ADVANCE; + return p; + } + else + { + static int list[] = { + CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, + CCL_TOK_REL, CCL_TOK_SET, -1}; + + return search_term_x(cclp, qa, list, 1); + } +} + + + static struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, ccl_qualifier_t *ap, char *attset) @@ -607,20 +738,6 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2); return p; } - else if (KIND == CCL_TOK_LP) - { - ADVANCE; - if (!(p = find_spec(cclp, ap))) - return NULL; - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete(p); - return NULL; - } - ADVANCE; - return p; - } else { if (!(p = search_terms(cclp, ap))) @@ -636,7 +753,6 @@ static struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap) { char *attset; - struct ccl_rpn_node *p; if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset) || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) @@ -649,24 +765,7 @@ struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap) return NULL; } ADVANCE; - if (KIND == CCL_TOK_LP) - { - ADVANCE; - if (!(p = find_spec(cclp, ap))) - { - return NULL; - } - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete(p); - return NULL; - } - ADVANCE; - } - else - p = search_terms(cclp, ap); - return p; + return search_terms(cclp, ap); } /** @@ -833,9 +932,10 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa) { static int list[] = { - CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1}; + CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, + CCL_TOK_REL, CCL_TOK_SET, -1}; struct ccl_rpn_node *p1, *p2, *pn; - p1 = search_term_x(cclp, qa, list, 1); + p1 = search_terms2(cclp, qa); if (!p1) return NULL; while (1) @@ -853,7 +953,7 @@ static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa) p_prox->u.t.attr_list = 0; ADVANCE; - p2 = search_term_x(cclp, qa, list, 1); + p2 = search_terms2(cclp, qa); if (!p2) { ccl_rpn_delete(p1); @@ -867,7 +967,7 @@ static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa) } else if (is_term_ok(KIND, list)) { - p2 = search_term_x(cclp, qa, list, 1); + p2 = search_terms2(cclp, qa); if (!p2) { ccl_rpn_delete(p1); @@ -896,22 +996,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, { struct ccl_rpn_node *p1; struct ccl_token *lookahead; - if (KIND == CCL_TOK_LP) - { - ADVANCE; - p1 = find_spec(cclp, qa); - if (!p1) - return NULL; - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete(p1); - return NULL; - } - ADVANCE; - return p1; - } - else if (KIND == CCL_TOK_SET) + if (KIND == CCL_TOK_SET) { ADVANCE; if (KIND == CCL_TOK_EQ) @@ -937,7 +1022,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, break; lookahead = lookahead->next; } - if (qa) + if (qa || lookahead->kind == CCL_TOK_LP) return search_terms(cclp, qa); else {