X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=9403634d1b4816c25d0fa7fbcc0edcc2a0a07a88;hp=b108b7411fa28ae783ab38c6da76b33753ead5b4;hb=e484d47e2e06b836878786d9772d01cb764913c7;hpb=203519066271f3449d1c4401ed7408f9da5e58ec diff --git a/src/cclfind.c b/src/cclfind.c index b108b74..9403634 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -100,7 +100,7 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) ccl_assert(p); p->kind = kind; - switch(kind) + switch (kind) { case CCL_RPN_TERM: p->u.t.attr_list = 0; @@ -113,6 +113,71 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) return p; } +static struct ccl_rpn_node *ccl_rpn_node_mkbool(struct ccl_rpn_node *l, + struct ccl_rpn_node *r, + enum ccl_rpn_kind op) +{ + if (l && r) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(op); + tmp->u.p[0] = l; + tmp->u.p[1] = r; + tmp->u.p[2] = 0; + return tmp; + } + else if (r) + return r; + return l; +} + +static struct ccl_rpn_node *ccl_rpn_dup(struct ccl_rpn_node *rpn) +{ + struct ccl_rpn_node *n; + struct ccl_rpn_attr *attr, **attrp; + if (!rpn) + return 0; + n = ccl_rpn_node_create(rpn->kind); + switch (rpn->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + break; + case CCL_RPN_TERM: + n->u.t.term = xstrdup(rpn->u.t.term); + n->u.t.qual = rpn->u.t.qual ? xstrdup(rpn->u.t.qual) : 0; + attrp = &n->u.t.attr_list; + for (attr = rpn->u.t.attr_list; attr; attr = attr->next) + { + *attrp = (struct ccl_rpn_attr *) xmalloc(sizeof(**attrp)); + (*attrp)->kind = attr->kind; + (*attrp)->type = attr->type; + if (attr->kind == CCL_RPN_ATTR_STRING) + (*attrp)->value.str = xstrdup(attr->value.str); + else + (*attrp)->value.numeric = attr->value.numeric; + if (attr->set) + (*attrp)->set = xstrdup(attr->set); + else + (*attrp)->set = 0; + attrp = &(*attrp)->next; + } + *attrp = 0; + break; + case CCL_RPN_SET: + n->u.setname = xstrdup(rpn->u.setname); + break; + case CCL_RPN_PROX: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + n->u.p[2] = ccl_rpn_dup(rpn->u.p[2]); + break; + } + return n; +} + /** * ccl_rpn_delete: Delete RPN tree. * rpn: Pointer to tree. @@ -159,7 +224,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa); static int is_term_ok(int look, int *list) { - for (;*list >= 0; list++) + for (; *list >= 0; list++) if (look == *list) return 1; return 0; @@ -170,9 +235,7 @@ static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa); static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, const char *set, int type) { - struct ccl_rpn_attr *n; - - n = (struct ccl_rpn_attr *)xmalloc(sizeof(*n)); + struct ccl_rpn_attr *n = (struct ccl_rpn_attr *) xmalloc(sizeof(*n)); ccl_assert(n); if (set) n->set = xstrdup(set); @@ -181,7 +244,6 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, n->type = type; n->next = p->u.t.attr_list; p->u.t.attr_list = n; - return n; } @@ -195,9 +257,7 @@ static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p, void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, int type, int value) { - struct ccl_rpn_attr *n; - - n = add_attr_node(p, set, type); + struct ccl_rpn_attr *n = add_attr_node(p, set, type); n->kind = CCL_RPN_ATTR_NUMERIC; n->value.numeric = value; } @@ -205,9 +265,7 @@ void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, int type, char *value) { - struct ccl_rpn_attr *n; - - n = add_attr_node(p, set, type); + struct ccl_rpn_attr *n = add_attr_node(p, set, type); n->kind = CCL_RPN_ATTR_STRING; n->value.str = xstrdup(value); } @@ -328,9 +386,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, + struct ccl_token *lookahead0, struct ccl_rpn_attr *attr_use, ccl_qualifier_t *qa, - size_t no, int term_len, + size_t no, int is_phrase, int auto_group) { @@ -348,11 +407,12 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, int z3958_trunc = 0; int is_ccl_masked = 0; char *attset; - struct ccl_token *lookahead = cclp->look_token; + struct ccl_token *lookahead = lookahead0; const char **truncation_aliases; const char *t_default[2]; const char **mask_aliases; const char *m_default[2]; + int term_len = 0; truncation_aliases = ccl_qual_search_special(cclp->bibset, "truncation"); @@ -362,7 +422,6 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, t_default[0] = "?"; t_default[1] = 0; } - mask_aliases = ccl_qual_search_special(cclp->bibset, "mask"); if (!mask_aliases) @@ -371,17 +430,17 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, m_default[0] = "#"; m_default[1] = 0; } - - for (i = 0; i < no; i++) { if (has_ccl_masking(lookahead->name, lookahead->len, truncation_aliases, mask_aliases)) is_ccl_masked = 1; + + term_len += 1 + lookahead->len + lookahead->ws_prefix_len; lookahead = lookahead->next; } - lookahead = cclp->look_token; + lookahead = lookahead0; p = ccl_rpn_node_create(CCL_RPN_TERM); p->u.t.attr_list = NULL; @@ -397,7 +456,7 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, { struct ccl_rpn_attr *attr; for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) - if (attr->type != 1 || attr == attr_use) + if (attr->type != 1 || !attr_use || attr == attr_use) { switch (attr->kind) { @@ -540,6 +599,102 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, return p; } +static struct ccl_rpn_node *ccl_term_multi_use(CCL_parser cclp, + struct ccl_token *lookahead0, + ccl_qualifier_t *qa, + size_t no, + int is_phrase, + int auto_group) +{ + struct ccl_rpn_node *p = 0; + int i; + for (i = 0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) + if (attr->type == 1 && i == 0) + { + struct ccl_rpn_node *tmp2; + tmp2 = ccl_term_one_use(cclp, lookahead0, + attr, qa, no, + is_phrase, auto_group); + if (!tmp2) + { + ccl_rpn_delete(p); + return 0; + } + p = ccl_rpn_node_mkbool(p, tmp2, CCL_RPN_OR); + } + } + if (!p) + p = ccl_term_one_use(cclp, lookahead0, + 0 /* attr: no use */, qa, no, + is_phrase, auto_group); + return p; +} + +static struct ccl_rpn_node *split_recur(CCL_parser cclp, ccl_qualifier_t *qa, + struct ccl_rpn_node *parent, + struct ccl_token **ar, size_t sz) +{ + size_t l; + struct ccl_rpn_node *p_top = 0; + assert(sz > 0); + for (l = 1; l <= sz; l++) + { + struct ccl_rpn_node *p1; + struct ccl_rpn_node *p2 = ccl_term_multi_use(cclp, ar[0], + qa, l, + l > 1, + /* auto_group */0); + if (!p2) + return 0; + if (parent) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_AND); + tmp->u.p[0] = l > 1 ? ccl_rpn_dup(parent) : parent; + tmp->u.p[1] = p2; + p2 = tmp; + } + if (sz > l) + p1 = split_recur(cclp, qa, p2, ar + l, sz - l); + else + p1 = p2; + p_top = ccl_rpn_node_mkbool(p_top, p1, CCL_RPN_OR); + } + assert(p_top); + return p_top; +} + +static struct ccl_rpn_node *search_term_split_list(CCL_parser cclp, + ccl_qualifier_t *qa, + int *term_list, int multi) +{ + struct ccl_rpn_node *p; + struct ccl_token **ar; + struct ccl_token *lookahead = cclp->look_token; + size_t i, sz; + for (sz = 0; is_term_ok(lookahead->kind, term_list); sz++) + lookahead = lookahead->next; + if (sz == 0) + { + cclp->error_code = CCL_ERR_TERM_EXPECTED; + return 0; + } + ar = (struct ccl_token **) xmalloc(sizeof(*lookahead) * sz); + lookahead = cclp->look_token; + for (i = 0; is_term_ok(lookahead->kind, term_list); i++) + { + ar[i] = lookahead; + lookahead = lookahead->next; + } + p = split_recur(cclp, qa, 0, ar, sz); + xfree(ar); + for (i = 0; i < sz; i++) + ADVANCE; + return p; +} + /** * search_term: Parse CCL search term. * cclp: CCL Parser @@ -564,11 +719,14 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, auto_group = 1; if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0)) or_list = 1; + if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_SPLIT_LIST, 0)) + { + return search_term_split_list(cclp, qa, term_list, multi); + } while (1) { struct ccl_rpn_node *p = 0; size_t no, i; - int len = 0; int is_phrase = 0; size_t max = 200; if (and_list || or_list || !multi) @@ -595,70 +753,18 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } else if (this_is_phrase || no > 0) is_phrase = 1; - len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } - if (len == 0) + if (no == 0) break; /* no more terms . stop . */ - - /* go through all attributes and add them to the attribute list */ - for (i = 0; qa && qa[i]; i++) - { - struct ccl_rpn_attr *attr; - - for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next) - if (attr->type == 1) - { - struct ccl_rpn_node *tmp2; - tmp2 = ccl_term_one_use(cclp, attr, qa, no, len, - is_phrase, - auto_group); - if (!tmp2) - { - ccl_rpn_delete(p); - return 0; - } - if (!p) - p = tmp2; - else - { - struct ccl_rpn_node *tmp1; - tmp1 = ccl_rpn_node_create(CCL_RPN_OR); - tmp1->u.p[0] = p; - tmp1->u.p[1] = tmp2; - p = tmp1; - } - } - } - if (!p) - { - p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len, - is_phrase, auto_group); - if (!p) - return 0; - } + p = ccl_term_multi_use(cclp, cclp->look_token, qa, no, + is_phrase, auto_group); for (i = 0; i < no; i++) ADVANCE; - /* make the top node point to us.. */ - if (p_top) - { - struct ccl_rpn_node *tmp; - - if (or_list) - tmp = ccl_rpn_node_create(CCL_RPN_OR); - else if (and_list) - tmp = ccl_rpn_node_create(CCL_RPN_AND); - else - tmp = ccl_rpn_node_create(CCL_RPN_AND); - tmp->u.p[0] = p_top; - tmp->u.p[1] = p; - - p_top = tmp; - } - else - p_top = p; - + if (!p) + return 0; + p_top = ccl_rpn_node_mkbool(p_top, p, or_list ? CCL_RPN_OR : CCL_RPN_AND); if (!multi) break; } @@ -703,7 +809,6 @@ static struct ccl_rpn_node *search_terms2(CCL_parser cclp, } - static struct ccl_rpn_node *qualifiers_order(CCL_parser cclp, ccl_qualifier_t *ap, char *attset) @@ -947,16 +1052,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, xfree(ap); return 0; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); seq++; } if (seq == 0) @@ -1019,16 +1115,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, ccl_rpn_delete(node); break; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); seq++; } } @@ -1161,17 +1248,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, ccl_rpn_delete(node); return 0; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node_this->u.p[2] = 0; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); } if (!node) node = search_terms(cclp, 0); @@ -1187,7 +1264,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, */ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) { - struct ccl_rpn_node *p1, *p2, *pn; + struct ccl_rpn_node *p1, *p2; if (!(p1 = search_elements(cclp, qa))) return NULL; while (1) @@ -1202,11 +1279,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_AND); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_AND); continue; case CCL_TOK_OR: ADVANCE; @@ -1216,11 +1289,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_OR); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_OR); continue; case CCL_TOK_NOT: ADVANCE; @@ -1230,11 +1299,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_NOT); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_NOT); continue; } break;