X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=beec79328e2e8ce1069e09552776d8ab496f7eb6;hp=52ea57c7d2636ca1adb5f5ca62e951c13b47ddb0;hb=4d1450cc691292cef5bcfdd41cefc030e4dabbf6;hpb=b1df5f9013d82510f6250d93623a0126ec19265f diff --git a/src/cclfind.c b/src/cclfind.c index 52ea57c..beec793 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -100,7 +100,7 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) ccl_assert(p); p->kind = kind; - switch(kind) + switch (kind) { case CCL_RPN_TERM: p->u.t.attr_list = 0; @@ -113,6 +113,54 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) return p; } +static struct ccl_rpn_node *ccl_rpn_dup(struct ccl_rpn_node *rpn) +{ + struct ccl_rpn_node *n; + struct ccl_rpn_attr *attr, **attrp; + if (!rpn) + return 0; + n = ccl_rpn_node_create(rpn->kind); + switch (rpn->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + break; + case CCL_RPN_TERM: + n->u.t.term = xstrdup(rpn->u.t.term); + n->u.t.qual = rpn->u.t.qual ? xstrdup(rpn->u.t.qual) : 0; + attrp = &n->u.t.attr_list; + for (attr = rpn->u.t.attr_list; attr; attr = attr->next) + { + *attrp = (struct ccl_rpn_attr *) xmalloc(sizeof(**attrp)); + (*attrp)->kind = attr->kind; + (*attrp)->type = attr->type; + if (attr->kind == CCL_RPN_ATTR_STRING) + (*attrp)->value.str = xstrdup(attr->value.str); + else + (*attrp)->value.numeric = attr->value.numeric; + if (attr->set) + (*attrp)->set = xstrdup(attr->set); + else + (*attrp)->set = 0; + attrp = &(*attrp)->next; + } + *attrp = 0; + break; + case CCL_RPN_SET: + n->u.setname = xstrdup(rpn->u.setname); + break; + case CCL_RPN_PROX: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + n->u.p[2] = ccl_rpn_dup(rpn->u.p[2]); + break; + } + return n; +} + /** * ccl_rpn_delete: Delete RPN tree. * rpn: Pointer to tree. @@ -159,7 +207,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa); static int is_term_ok(int look, int *list) { - for (;*list >= 0; list++) + for (; *list >= 0; list++) if (look == *list) return 1; return 0; @@ -170,9 +218,7 @@ static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa); static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, const char *set, int type) { - struct ccl_rpn_attr *n; - - n = (struct ccl_rpn_attr *)xmalloc(sizeof(*n)); + struct ccl_rpn_attr *n = (struct ccl_rpn_attr *) xmalloc(sizeof(*n)); ccl_assert(n); if (set) n->set = xstrdup(set); @@ -181,7 +227,6 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, n->type = type; n->next = p->u.t.attr_list; p->u.t.attr_list = n; - return n; } @@ -195,9 +240,7 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, int type, int value) { - struct ccl_rpn_attr *n; - - n = add_attr_node(p, set, type); + struct ccl_rpn_attr *n = add_attr_node(p, set, type); n->kind = CCL_RPN_ATTR_NUMERIC; n->value.numeric = value; } @@ -205,9 +248,7 @@ void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, int type, char *value) { - struct ccl_rpn_attr *n; - - n = add_attr_node(p, set, type); + struct ccl_rpn_attr *n = add_attr_node(p, set, type); n->kind = CCL_RPN_ATTR_STRING; n->value.str = xstrdup(value); } @@ -231,7 +272,7 @@ static size_t cmp_operator(const char **aliases, const char *input) #define CCL_CHARS "#?\\" static int has_ccl_masking(const char *src_str, - int src_len, + size_t src_len, const char **truncation_aliases, const char **mask_aliases) { @@ -327,15 +368,12 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, } -#if YAZ_781 static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, + struct ccl_token *lookahead0, struct ccl_rpn_attr *attr_use, ccl_qualifier_t *qa, - int no, int term_len, - const char **truncation_aliases, - const char **mask_aliases, + size_t no, int is_phrase, - int is_ccl_masked, int auto_group) { struct ccl_rpn_node *p; @@ -350,8 +388,42 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, int right_trunc = 0; int regex_trunc = 0; int z3958_trunc = 0; + int is_ccl_masked = 0; char *attset; - struct ccl_token *lookahead = cclp->look_token; + struct ccl_token *lookahead = lookahead0; + const char **truncation_aliases; + const char *t_default[2]; + const char **mask_aliases; + const char *m_default[2]; + int term_len = 0; + + truncation_aliases = + ccl_qual_search_special(cclp->bibset, "truncation"); + if (!truncation_aliases) + { + truncation_aliases = t_default; + t_default[0] = "?"; + t_default[1] = 0; + } + mask_aliases = + ccl_qual_search_special(cclp->bibset, "mask"); + if (!mask_aliases) + { + mask_aliases = m_default; + m_default[0] = "#"; + m_default[1] = 0; + } + for (i = 0; i < no; i++) + { + if (has_ccl_masking(lookahead->name, lookahead->len, + truncation_aliases, + mask_aliases)) + is_ccl_masked = 1; + + term_len += 1 + lookahead->len + lookahead->ws_prefix_len; + lookahead = lookahead->next; + } + lookahead = lookahead0; p = ccl_rpn_node_create(CCL_RPN_TERM); p->u.t.attr_list = NULL; @@ -509,7 +581,77 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, } return p; } -#endif + +static struct ccl_rpn_node *split_recur(CCL_parser cclp, ccl_qualifier_t *qa, + struct ccl_rpn_node *parent, + struct ccl_token **ar, size_t sz) +{ + size_t l; + struct ccl_rpn_node *p_top = 0; + assert(sz > 0); + for (l = 1; l <= sz; l++) + { + struct ccl_rpn_node *p1; + struct ccl_rpn_node *p2 = ccl_term_one_use(cclp, ar[0], + /* attr_use */0, + qa, l, + l > 1, + /* auto_group */0); + if (!p2) + return 0; + if (parent) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_AND); + tmp->u.p[0] = l > 1 ? ccl_rpn_dup(parent) : parent; + tmp->u.p[1] = p2; + p2 = tmp; + } + if (sz > l) + p1 = split_recur(cclp, qa, p2, ar + l, sz - l); + else + p1 = p2; + if (p_top) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_OR); + tmp->u.p[0] = p_top; + tmp->u.p[1] = p1; + p_top = tmp; + } + else + p_top = p1; + } + assert(p_top); + return p_top; +} + +static struct ccl_rpn_node *search_term_split_list(CCL_parser cclp, + ccl_qualifier_t *qa, + int *term_list, int multi) +{ + struct ccl_rpn_node *p; + struct ccl_token **ar; + struct ccl_token *lookahead = cclp->look_token; + size_t i, sz; + for (sz = 0; is_term_ok(lookahead->kind, term_list); sz++) + lookahead = lookahead->next; + if (sz == 0) + { + cclp->error_code = CCL_ERR_TERM_EXPECTED; + return 0; + } + ar = (struct ccl_token **) xmalloc(sizeof(*lookahead) * sz); + lookahead = cclp->look_token; + for (i = 0; is_term_ok(lookahead->kind, term_list); i++) + { + ar[i] = lookahead; + lookahead = lookahead->next; + } + p = split_recur(cclp, qa, 0, ar, sz); + xfree(ar); + for (i = 0; i < sz; i++) + ADVANCE; + return p; +} /** * search_term: Parse CCL search term. @@ -528,29 +670,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, int and_list = 0; int auto_group = 0; int or_list = 0; - const char **truncation_aliases; - const char *t_default[2]; - const char **mask_aliases; - const char *m_default[2]; - - truncation_aliases = - ccl_qual_search_special(cclp->bibset, "truncation"); - if (!truncation_aliases) - { - truncation_aliases = t_default; - t_default[0] = "?"; - t_default[1] = 0; - } - - mask_aliases = - ccl_qual_search_special(cclp->bibset, "mask"); - if (!mask_aliases) - { - mask_aliases = m_default; - m_default[0] = "#"; - m_default[1] = 0; - } - if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0)) and_list = 1; @@ -558,26 +677,15 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, auto_group = 1; if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0)) or_list = 1; + if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_SPLIT_LIST, 0)) + { + return search_term_split_list(cclp, qa, term_list, multi); + } while (1) { struct ccl_rpn_node *p = 0; size_t no, i; - int len = 0; int is_phrase = 0; - int is_ccl_masked = 0; -#if YAZ_781 -#else - char *attset; - int relation_value = -1; - int position_value = -1; - int structure_value = -1; - int truncation_value = -1; - int completeness_value = -1; - int left_trunc = 0; - int right_trunc = 0; - int regex_trunc = 0; - int z3958_trunc = 0; -#endif size_t max = 200; if (and_list || or_list || !multi) max = 1; @@ -595,12 +703,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, for (i = 0; ilen; i++) if (lookahead->name[i] == ' ') this_is_phrase = 1; - - if (has_ccl_masking(lookahead->name, lookahead->len, - truncation_aliases, - mask_aliases)) - is_ccl_masked = 1; - if (auto_group) { if (no > 0 && (is_phrase || is_phrase != this_is_phrase)) @@ -609,14 +711,12 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } else if (this_is_phrase || no > 0) is_phrase = 1; - len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } - if (len == 0) + if (no == 0) break; /* no more terms . stop . */ -#if YAZ_781 /* go through all attributes and add them to the attribute list */ for (i = 0; qa && qa[i]; i++) { @@ -626,10 +726,9 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (attr->type == 1) { struct ccl_rpn_node *tmp2; - tmp2 = ccl_term_one_use(cclp, attr, qa, no, len, - truncation_aliases, mask_aliases, - is_phrase, is_ccl_masked, - auto_group); + tmp2 = ccl_term_one_use(cclp, cclp->look_token, + attr, qa, no, + is_phrase, auto_group); if (!tmp2) { ccl_rpn_delete(p); @@ -648,172 +747,13 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } } if (!p) - { - p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len, - truncation_aliases, mask_aliases, - is_phrase, is_ccl_masked, auto_group); - if (!p) - return 0; - } + p = ccl_term_one_use(cclp, cclp->look_token, + 0 /* attr: no use */, qa, no, + is_phrase, auto_group); for (i = 0; i < no; i++) ADVANCE; -#else - /* create the term node, but wait a moment before adding the term */ - p = ccl_rpn_node_create(CCL_RPN_TERM); - p->u.t.attr_list = NULL; - p->u.t.term = NULL; - if (qa && qa[0]) - { - const char *n = ccl_qual_get_name(qa[0]); - if (n) - p->u.t.qual = xstrdup(n); - } - - /* go through all attributes and add them to the attribute list */ - for (i=0; qa && qa[i]; i++) - { - struct ccl_rpn_attr *attr; - - for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) - switch(attr->kind) - { - case CCL_RPN_ATTR_STRING: - ccl_add_attr_string(p, attr->set, attr->type, - attr->value.str); - break; - case CCL_RPN_ATTR_NUMERIC: - if (attr->value.numeric > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) - { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value.numeric; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value.numeric; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value.numeric; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value.numeric; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value.numeric; - break; - } - ccl_add_attr_numeric(p, attr->set, attr->type, - attr->value.numeric); - } - } - } - attset = 0; - if (structure_value == -1 && ( - auto_group || - qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset)) - ) - { - if (!is_phrase) - ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2); - else - ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1); - } - - if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX, - &attset)) - { - if (is_ccl_masked) - regex_trunc = 1; /* regex trunc (102) allowed */ - } - else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958, - &attset)) - { - if (is_ccl_masked) - z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */ - } - - /* make the RPN token */ - p->u.t.term = (char *)xmalloc(len * 2 + 2); - ccl_assert(p->u.t.term); - p->u.t.term[0] = '\0'; - for (i = 0; ilook_token->name; - size_t src_len = cclp->look_token->len; - - if (p->u.t.term[0] && cclp->look_token->ws_prefix_len) - { - strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf, - cclp->look_token->ws_prefix_len); - } - if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc, - z3958_trunc, truncation_aliases, mask_aliases, - i == 0, i == no - 1, - &left_trunc, &right_trunc)) - { - ccl_rpn_delete(p); - return NULL; - } - ADVANCE; - } - if (left_trunc && right_trunc) - { - if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH, - &attset)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; - ccl_rpn_delete(p); - return NULL; - } - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3); - } - else if (right_trunc) - { - if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT, - &attset)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; - ccl_rpn_delete(p); - return NULL; - } - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1); - } - else if (left_trunc) - { - if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT, - &attset)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; - ccl_rpn_delete(p); - return NULL; - } - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2); - } - else if (regex_trunc) - { - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102); - } - else if (z3958_trunc) - { - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104); - } - else - { - if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE, - &attset)) - ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100); - } -#endif + if (!p) + return 0; /* make the top node point to us.. */ if (p_top) { @@ -877,7 +817,6 @@ static struct ccl_rpn_node *search_terms2(CCL_parser cclp, } - static struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, ccl_qualifier_t *ap, char *attset)