From c95d4ba97774c6feff8579544c36bf22b5d35976 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 4 Jun 2015 17:16:27 +0200 Subject: [PATCH] CCL: And-list / sub-phrase split-out YAZ-844 Special structure config s=sl enables this. --- doc/book.xml | 6 +++ include/yaz/ccl.h | 1 + src/cclfind.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++--- src/cclqfile.c | 2 + test/test_ccl.c | 17 +++++++ util/bib1 | 1 + 6 files changed, 162 insertions(+), 8 deletions(-) diff --git a/doc/book.xml b/doc/book.xml index e6e839f..ecebd1d 100644 --- a/doc/book.xml +++ b/doc/book.xml @@ -5836,6 +5836,12 @@ typedef struct { in YAZ 4.2.38. + s=sl + + Tokens are split into sub-phrases of all combinations - in order. + This facility appeared in YAZ 5.14.0. + + r=o Allows ranges and the operators greather-than, less-than, ... diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index d701095..b7f4e44 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -356,6 +356,7 @@ struct ccl_rpn_attr *ccl_parser_qual_search(CCL_parser cclp, const char *name, #define CCL_BIB1_STR_AND_LIST (-2) #define CCL_BIB1_STR_OR_LIST (-3) #define CCL_BIB1_STR_AUTO_GROUP (-4) +#define CCL_BIB1_STR_SPLIT_LIST (-5) #define CCL_BIB1_REL_ORDER (-1) #define CCL_BIB1_REL_PORDER (-2) #define CCL_BIB1_REL_OMIT_EQUALS (-3) diff --git a/src/cclfind.c b/src/cclfind.c index 5d7f821..beec793 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -113,6 +113,54 @@ struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) return p; } +static struct ccl_rpn_node *ccl_rpn_dup(struct ccl_rpn_node *rpn) +{ + struct ccl_rpn_node *n; + struct ccl_rpn_attr *attr, **attrp; + if (!rpn) + return 0; + n = ccl_rpn_node_create(rpn->kind); + switch (rpn->kind) + { + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + break; + case CCL_RPN_TERM: + n->u.t.term = xstrdup(rpn->u.t.term); + n->u.t.qual = rpn->u.t.qual ? xstrdup(rpn->u.t.qual) : 0; + attrp = &n->u.t.attr_list; + for (attr = rpn->u.t.attr_list; attr; attr = attr->next) + { + *attrp = (struct ccl_rpn_attr *) xmalloc(sizeof(**attrp)); + (*attrp)->kind = attr->kind; + (*attrp)->type = attr->type; + if (attr->kind == CCL_RPN_ATTR_STRING) + (*attrp)->value.str = xstrdup(attr->value.str); + else + (*attrp)->value.numeric = attr->value.numeric; + if (attr->set) + (*attrp)->set = xstrdup(attr->set); + else + (*attrp)->set = 0; + attrp = &(*attrp)->next; + } + *attrp = 0; + break; + case CCL_RPN_SET: + n->u.setname = xstrdup(rpn->u.setname); + break; + case CCL_RPN_PROX: + n->u.p[0] = ccl_rpn_dup(rpn->u.p[0]); + n->u.p[1] = ccl_rpn_dup(rpn->u.p[1]); + n->u.p[2] = ccl_rpn_dup(rpn->u.p[2]); + break; + } + return n; +} + /** * ccl_rpn_delete: Delete RPN tree. * rpn: Pointer to tree. @@ -321,9 +369,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len, static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, + struct ccl_token *lookahead0, struct ccl_rpn_attr *attr_use, ccl_qualifier_t *qa, - size_t no, int term_len, + size_t no, int is_phrase, int auto_group) { @@ -341,11 +390,12 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, int z3958_trunc = 0; int is_ccl_masked = 0; char *attset; - struct ccl_token *lookahead = cclp->look_token; + struct ccl_token *lookahead = lookahead0; const char **truncation_aliases; const char *t_default[2]; const char **mask_aliases; const char *m_default[2]; + int term_len = 0; truncation_aliases = ccl_qual_search_special(cclp->bibset, "truncation"); @@ -369,9 +419,11 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, truncation_aliases, mask_aliases)) is_ccl_masked = 1; + + term_len += 1 + lookahead->len + lookahead->ws_prefix_len; lookahead = lookahead->next; } - lookahead = cclp->look_token; + lookahead = lookahead0; p = ccl_rpn_node_create(CCL_RPN_TERM); p->u.t.attr_list = NULL; @@ -530,6 +582,77 @@ static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp, return p; } +static struct ccl_rpn_node *split_recur(CCL_parser cclp, ccl_qualifier_t *qa, + struct ccl_rpn_node *parent, + struct ccl_token **ar, size_t sz) +{ + size_t l; + struct ccl_rpn_node *p_top = 0; + assert(sz > 0); + for (l = 1; l <= sz; l++) + { + struct ccl_rpn_node *p1; + struct ccl_rpn_node *p2 = ccl_term_one_use(cclp, ar[0], + /* attr_use */0, + qa, l, + l > 1, + /* auto_group */0); + if (!p2) + return 0; + if (parent) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_AND); + tmp->u.p[0] = l > 1 ? ccl_rpn_dup(parent) : parent; + tmp->u.p[1] = p2; + p2 = tmp; + } + if (sz > l) + p1 = split_recur(cclp, qa, p2, ar + l, sz - l); + else + p1 = p2; + if (p_top) + { + struct ccl_rpn_node *tmp = ccl_rpn_node_create(CCL_RPN_OR); + tmp->u.p[0] = p_top; + tmp->u.p[1] = p1; + p_top = tmp; + } + else + p_top = p1; + } + assert(p_top); + return p_top; +} + +static struct ccl_rpn_node *search_term_split_list(CCL_parser cclp, + ccl_qualifier_t *qa, + int *term_list, int multi) +{ + struct ccl_rpn_node *p; + struct ccl_token **ar; + struct ccl_token *lookahead = cclp->look_token; + size_t i, sz; + for (sz = 0; is_term_ok(lookahead->kind, term_list); sz++) + lookahead = lookahead->next; + if (sz == 0) + { + cclp->error_code = CCL_ERR_TERM_EXPECTED; + return 0; + } + ar = (struct ccl_token **) xmalloc(sizeof(*lookahead) * sz); + lookahead = cclp->look_token; + for (i = 0; is_term_ok(lookahead->kind, term_list); i++) + { + ar[i] = lookahead; + lookahead = lookahead->next; + } + p = split_recur(cclp, qa, 0, ar, sz); + xfree(ar); + for (i = 0; i < sz; i++) + ADVANCE; + return p; +} + /** * search_term: Parse CCL search term. * cclp: CCL Parser @@ -554,11 +677,14 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, auto_group = 1; if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0)) or_list = 1; + if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_SPLIT_LIST, 0)) + { + return search_term_split_list(cclp, qa, term_list, multi); + } while (1) { struct ccl_rpn_node *p = 0; size_t no, i; - int len = 0; int is_phrase = 0; size_t max = 200; if (and_list || or_list || !multi) @@ -585,11 +711,10 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } else if (this_is_phrase || no > 0) is_phrase = 1; - len += 1+lookahead->len+lookahead->ws_prefix_len; lookahead = lookahead->next; } - if (len == 0) + if (no == 0) break; /* no more terms . stop . */ /* go through all attributes and add them to the attribute list */ @@ -601,7 +726,8 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, if (attr->type == 1) { struct ccl_rpn_node *tmp2; - tmp2 = ccl_term_one_use(cclp, attr, qa, no, len, + tmp2 = ccl_term_one_use(cclp, cclp->look_token, + attr, qa, no, is_phrase, auto_group); if (!tmp2) { @@ -621,7 +747,8 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp, } } if (!p) - p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len, + p = ccl_term_one_use(cclp, cclp->look_token, + 0 /* attr: no use */, qa, no, is_phrase, auto_group); for (i = 0; i < no; i++) ADVANCE; diff --git a/src/cclqfile.c b/src/cclqfile.c index df98f33..2cc73bd 100644 --- a/src/cclqfile.c +++ b/src/cclqfile.c @@ -150,6 +150,8 @@ int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name, value = CCL_BIB1_STR_OR_LIST; if (!ccl_stricmp (value_str, "ag")) value = CCL_BIB1_STR_AUTO_GROUP; + if (!ccl_stricmp (value_str, "sl")) + value = CCL_BIB1_STR_SPLIT_LIST; break; case 't': case 'T': diff --git a/test/test_ccl.c b/test/test_ccl.c index bcd753c..853a05a 100644 --- a/test/test_ccl.c +++ b/test/test_ccl.c @@ -87,6 +87,7 @@ void tst1(int pass) ccl_qual_fitem(bibset, "r=o", "x"); ccl_qual_fitem(bibset, "dc.title", "title"); ccl_qual_fitem(bibset, "s=ag", "ag"); + ccl_qual_fitem(bibset, "s=sl", "splitlist"); break; case 1: strcpy(tstline, "ti u=4 s=pw t=l,r"); @@ -118,6 +119,9 @@ void tst1(int pass) strcpy(tstline, "ag s=ag"); ccl_qual_line(bibset, tstline); + + strcpy(tstline, "splitlist s=sl"); + ccl_qual_line(bibset, tstline); break; case 2: ccl_qual_buf(bibset, "ti u=4 s=pw t=l,r\n" @@ -131,6 +135,7 @@ void tst1(int pass) "title dc.title\n" "comb term dc.title\n" "ag s=ag\n" + "splitlist s=sl\n" ); break; case 3: @@ -177,6 +182,9 @@ void tst1(int pass) " \n" " \n" " \n" + " \n" + " \n" + " \n" "\n"; doc = xmlParseMemory(xml_str, strlen(xml_str)); @@ -428,6 +436,15 @@ void tst1(int pass) YAZ_CHECK(tst_ccl_query(bibset, "ag=\"a b c\" \"d e\"", "@and @attr 4=1 \"a b c\" @attr 4=1 \"d e\" ")); + + YAZ_CHECK(tst_ccl_query(bibset, "splitlist=a", "a ")); + YAZ_CHECK(tst_ccl_query(bibset, "splitlist=a b", "@or " + "@and a b \"a b\" ")); + YAZ_CHECK(tst_ccl_query(bibset, "splitlist=a b c", "@or @or @or " + "@and @and a b c " + "@and a \"b c\" " + "@and \"a b\" c " + "\"a b c\" ")); ccl_qual_rm(&bibset); } diff --git a/util/bib1 b/util/bib1 index b2a8352..0de6ab8 100644 --- a/util/bib1 +++ b/util/bib1 @@ -5,3 +5,4 @@ date 1=31 r=r,omiteq x 1=x r=o title dc.title comb term dc.title +splitlist s=sl,pw -- 1.7.10.4