From: Adam Dickmeiss Date: Fri, 9 Jul 2010 09:55:46 +0000 (+0200) Subject: Work on fixing CCL quote problem, bug #3539. X-Git-Tag: v4.0.11~3 X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=commitdiff_plain;h=89d7ade063c6c1ecd3aba012204bb6d9ec5b8792 Work on fixing CCL quote problem, bug #3539. --- diff --git a/src/cclfind.c b/src/cclfind.c index eb27800..87a216c 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -247,15 +247,14 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, struct ccl_rpn_node *p; size_t no, i; int no_spaces = 0; - int left_trunc = 0; - int right_trunc = 0; - int mid_trunc = 0; int relation_value = -1; int position_value = -1; int structure_value = -1; int truncation_value = -1; int completeness_value = -1; int len = 0; + int left_trunc = 0; + int right_trunc = 0; size_t max = 200; if (and_list || or_list || !multi) max = 1; @@ -275,17 +274,9 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, for (i = 0; ilen; i++) if (lookahead->name[i] == ' ') no_spaces++; - else if (strchr(truncation_aliases[0], lookahead->name[i])) - { - if (no == 0 && i == 0 && lookahead->len >= 1) - left_trunc = 1; - else if (!is_term_ok(lookahead->next->kind, term_list) && - i == lookahead->len-1 && i >= 1) - right_trunc = 1; - else - mid_trunc = 1; - } len += 1+lookahead->len+lookahead->ws_prefix_len; + left_trunc = lookahead->left_trunc; + right_trunc = lookahead->right_trunc; lookahead = lookahead->next; } @@ -339,7 +330,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (truncation_value != -1) continue; truncation_value = attr->value.numeric; - left_trunc = right_trunc = mid_trunc = 0; break; case CCL_BIB1_COM: if (completeness_value != -1) @@ -374,13 +364,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, const char *src_str = cclp->look_token->name; size_t src_len = cclp->look_token->len; - if (i == 0 && left_trunc) - { - src_len--; - src_str++; - } - if (i == no-1 && right_trunc) - src_len--; if (p->u.t.term[0] && cclp->look_token->ws_prefix_len) { size_t len = strlen(p->u.t.term); diff --git a/src/cclp.h b/src/cclp.h index ab9b510..dbfc741 100644 --- a/src/cclp.h +++ b/src/cclp.h @@ -55,6 +55,8 @@ struct ccl_token { struct ccl_token *prev; const char *ws_prefix_buf; /* leading white space buf */ size_t ws_prefix_len; /* leading white space len */ + int left_trunc; /* left truncated */ + int right_trunc; /* right truncated */ }; /** CCL parser structure */ diff --git a/src/ccltoken.c b/src/ccltoken.c index 79d6efc..ed45762 100644 --- a/src/ccltoken.c +++ b/src/ccltoken.c @@ -79,6 +79,7 @@ struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command) last->next->prev = last; last = last->next; } + last->left_trunc = last->right_trunc = 0; last->ws_prefix_buf = (const char *) cp0; last->ws_prefix_len = cp - cp0; last->next = NULL; @@ -121,53 +122,66 @@ struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command) else last->kind = CCL_TOK_REL; break; - case '\"': - last->kind = CCL_TOK_TERM; - last->name = (const char *) cp; - last->len = 0; - while (*cp && *cp != '\"') + default: + --cp; + --last->len; + if (*cp == '?') { + last->left_trunc = 1; cp++; - ++ last->len; } - if (*cp == '\"') - cp++; - break; - default: - if (!strchr("(),%!><= \t\n\r", cp[-1])) + if (*cp == '"') { - while (*cp && !strchr("(),%!><= \t\n\r", *cp)) + cp++; + last->kind = CCL_TOK_TERM; + last->name = (const char *) cp; + while (*cp && *cp != '"') { cp++; ++ last->len; } + if (*cp) + cp++; + } + else + { + last->kind = CCL_TOK_TERM; + last->name = (const char *) cp; + while (*cp && !strchr("(),%!><=? \t\n\r", *cp)) + { + ++ last->len; + cp++; + } + aliases = ccl_qual_search_special(cclp->bibset, "and"); + if (!aliases) + aliases = cclp->ccl_token_and; + if (token_cmp(cclp, aliases, last)) + last->kind = CCL_TOK_AND; + + aliases = ccl_qual_search_special(cclp->bibset, "or"); + if (!aliases) + aliases = cclp->ccl_token_or; + if (token_cmp(cclp, aliases, last)) + last->kind = CCL_TOK_OR; + + aliases = ccl_qual_search_special(cclp->bibset, "not"); + if (!aliases) + aliases = cclp->ccl_token_not; + if (token_cmp(cclp, aliases, last)) + last->kind = CCL_TOK_NOT; + + aliases = ccl_qual_search_special(cclp->bibset, "set"); + if (!aliases) + aliases = cclp->ccl_token_set; + + if (token_cmp(cclp, aliases, last)) + last->kind = CCL_TOK_SET; + } + if (*cp == '?') + { + last->right_trunc = 1; + cp++; } - last->kind = CCL_TOK_TERM; - - aliases = ccl_qual_search_special(cclp->bibset, "and"); - if (!aliases) - aliases = cclp->ccl_token_and; - if (token_cmp(cclp, aliases, last)) - last->kind = CCL_TOK_AND; - - aliases = ccl_qual_search_special(cclp->bibset, "or"); - if (!aliases) - aliases = cclp->ccl_token_or; - if (token_cmp(cclp, aliases, last)) - last->kind = CCL_TOK_OR; - - aliases = ccl_qual_search_special(cclp->bibset, "not"); - if (!aliases) - aliases = cclp->ccl_token_not; - if (token_cmp(cclp, aliases, last)) - last->kind = CCL_TOK_NOT; - - aliases = ccl_qual_search_special(cclp->bibset, "set"); - if (!aliases) - aliases = cclp->ccl_token_set; - - if (token_cmp(cclp, aliases, last)) - last->kind = CCL_TOK_SET; } } return first;