X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcclfind.c;h=752dd85bb8c8c0b5b55b1c94b795630124a8b170;hp=13714d66598665bf5d8fb6d227ec37c656985625;hb=4db60a9b1f537de4c0f04587b44be32d0701a64f;hpb=5921175c5859c16c2ba411999831b8aaf64917b4 diff --git a/src/cclfind.c b/src/cclfind.c index 13714d6..752dd85 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -113,52 +113,21 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) return p; } -static struct ccl_rpn_node *ccl_rpn_dup(struct ccl_rpn_node *rpn) +static struct ccl_rpn_node *ccl_rpn_node_mkbool(struct ccl_rpn_node *l, + struct ccl_rpn_node *r, + enum ccl_rpn_kind op) { - struct ccl_rpn_node *n; - struct ccl_rpn_attr *attr, **attrp; - if (!rpn) - return 0; - n = ccl_rpn_node_create(rpn->kind); - switch (rpn->kind) + if (l && r) { - case CCL_RPN_AND: - case CCL_RPN_OR: - case CCL_RPN_NOT: - n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); - n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); - break; - case CCL_RPN_TERM: - n->u.t.term = xstrdup(rpn->u.t.term); - n->u.t.qual = rpn->u.t.qual ? xstrdup(rpn->u.t.qual) : 0; - attrp = &n->u.t.attr_list; - for (attr = rpn->u.t.attr_list; attr; attr = attr->next) - { - *attrp = (struct ccl_rpn_attr *) xmalloc(sizeof(**attrp)); - (*attrp)->kind = attr->kind; - (*attrp)->type = attr->type; - if (attr->kind == CCL_RPN_ATTR_STRING) - (*attrp)->value.str = xstrdup(attr->value.str); - else - (*attrp)->value.numeric = attr->value.numeric; - if (attr->set) - (*attrp)->set = xstrdup(attr->set); - else - (*attrp)->set = 0; - attrp = &(*attrp)->next; - } - *attrp = 0; - break; - case CCL_RPN_SET: - n->u.setname = xstrdup(rpn->u.setname); - break; - case CCL_RPN_PROX: - n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); - n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); - n->u.p[2] = ccl_rpn_dup(rpn->u.p[2]); - break; + struct ccl_rpn_node *tmp = ccl_rpn_node_create(op); + tmp->u.p[0] = l; + tmp->u.p[1] = r; + tmp->u.p[2] = 0; + return tmp; } - return n; + else if (r) + return r; + return l; } /** @@ -606,16 +575,7 @@ static struct ccl_rpn_node *ccl_term_multi_use(CCL_parser cclp, ccl_rpn_delete(p); return 0; } - if (!p) - p = tmp2; - else - { - struct ccl_rpn_node *tmp1; - tmp1 = ccl_rpn_node_create(CCL_RPN_OR); - tmp1->u.p[0] = p; - tmp1->u.p[1] = tmp2; - p = tmp1; - } + p = ccl_rpn_node_mkbool(p, tmp2, CCL_RPN_OR); } } if (!p) @@ -626,41 +586,35 @@ static struct ccl_rpn_node *ccl_term_multi_use(CCL_parser cclp, } static struct ccl_rpn_node *split_recur(CCL_parser cclp, ccl_qualifier_t *qa, - struct ccl_rpn_node *parent, - struct ccl_token **ar, size_t sz) + struct ccl_token **ar, size_t sz, + size_t sub_len) { size_t l; struct ccl_rpn_node *p_top = 0; assert(sz > 0); - for (l = 1; l <= sz; l++) + for (l = 1; l <= sz && l <= sub_len; l++) { - struct ccl_rpn_node *p1; struct ccl_rpn_node *p2 = ccl_term_multi_use(cclp, ar[0], qa, l, l > 1, /* auto_group */0); if (!p2) - return 0; - if (parent) { - struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_AND); - tmp->u.p[0] = l > 1 ? ccl_rpn_dup(parent) : parent; - tmp->u.p[1] = p2; - p2 = tmp; + ccl_rpn_delete(p_top); + return 0; } if (sz > l) - p1 = split_recur(cclp, qa, p2, ar + l, sz - l); - else - p1 = p2; - if (p_top) { - struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_OR); - tmp->u.p[0] = p_top; - tmp->u.p[1] = p1; - p_top = tmp; + struct ccl_rpn_node *p1 = split_recur(cclp, qa, ar + l, sz - l, + sub_len); + if (!p1) + { + ccl_rpn_delete(p2); + return 0; + } + p2 = ccl_rpn_node_mkbool(p2, p1, CCL_RPN_AND); } - else - p_top = p1; + p_top = ccl_rpn_node_mkbool(p_top, p2, CCL_RPN_OR); } assert(p_top); return p_top; @@ -673,7 +627,7 @@ static struct ccl_rpn_node *search_term_split_list(CCL_parser cclp, struct ccl_rpn_node *p; struct ccl_token **ar; struct ccl_token *lookahead = cclp->look_token; - size_t i, sz; + size_t i, sz, sub_len; for (sz = 0; is_term_ok(lookahead->kind, term_list); sz++) lookahead = lookahead->next; if (sz == 0) @@ -688,7 +642,14 @@ static struct ccl_rpn_node *search_term_split_list(CCL_parser cclp, ar[i] = lookahead; lookahead = lookahead->next; } - p = split_recur(cclp, qa, 0, ar, sz); + /* choose sub phrase carefully to avoid huge expansions */ + if (sz >= 7) + sub_len = 1; + else if (sz >= 5) + sub_len = 2; + else + sub_len = 3; + p = split_recur(cclp, qa, ar, sz, sub_len); xfree(ar); for (i = 0; i < sz; i++) ADVANCE; @@ -764,25 +725,7 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, ADVANCE; if (!p) return 0; - /* make the top node point to us.. */ - if (p_top) - { - struct ccl_rpn_node *tmp; - - if (or_list) - tmp = ccl_rpn_node_create(CCL_RPN_OR); - else if (and_list) - tmp = ccl_rpn_node_create(CCL_RPN_AND); - else - tmp = ccl_rpn_node_create(CCL_RPN_AND); - tmp->u.p[0] = p_top; - tmp->u.p[1] = p; - - p_top = tmp; - } - else - p_top = p; - + p_top = ccl_rpn_node_mkbool(p_top, p, or_list ? CCL_RPN_OR : CCL_RPN_AND); if (!multi) break; } @@ -1070,16 +1013,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, xfree(ap); return 0; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); seq++; } if (seq == 0) @@ -1142,16 +1076,7 @@ static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, ccl_rpn_delete(node); break; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); seq++; } } @@ -1284,17 +1209,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, ccl_rpn_delete(node); return 0; } - if (node) - { - struct ccl_rpn_node *node_this = - ccl_rpn_node_create(CCL_RPN_OR); - node_this->u.p[0] = node; - node_this->u.p[1] = node_sub; - node_this->u.p[2] = 0; - node = node_this; - } - else - node = node_sub; + node = ccl_rpn_node_mkbool(node, node_sub, CCL_RPN_OR); } if (!node) node = search_terms(cclp, 0); @@ -1310,7 +1225,7 @@ static struct ccl_rpn_node *search_elements(CCL_parser cclp, */ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) { - struct ccl_rpn_node *p1, *p2, *pn; + struct ccl_rpn_node *p1, *p2; if (!(p1 = search_elements(cclp, qa))) return NULL; while (1) @@ -1325,11 +1240,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_AND); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_AND); continue; case CCL_TOK_OR: ADVANCE; @@ -1339,11 +1250,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_OR); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_OR); continue; case CCL_TOK_NOT: ADVANCE; @@ -1353,11 +1260,7 @@ static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa) ccl_rpn_delete(p1); return NULL; } - pn = ccl_rpn_node_create(CCL_RPN_NOT); - pn->u.p[0] = p1; - pn->u.p[1] = p2; - pn->u.p[2] = 0; - p1 = pn; + p1 = ccl_rpn_node_mkbool(p1, p2, CCL_RPN_NOT); continue; } break;