From e4baade57ba02d625ecd6452b0f4383b24c25f2d Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 29 Sep 2004 20:37:50 +0000 Subject: [PATCH] Added CCL facility r=r "range" which is similar to r=o "ordered" but does not require white-space before and after the dash, e.g. x=-1990 is equivalent to x <= 1990 iff r=r, but equivalent to x= -1980 iff r=o. Added CCL tests for ranges. Fixed memory leak in CCL parser that occurred when proximity was used. --- NEWS | 5 + client/default.bib | 8 +- include/yaz/ccl.h | 6 +- src/cclfind.c | 280 +++++++++++++++++++++++++++++++++------------------- src/cclqfile.c | 4 +- src/ccltoken.c | 18 +++- test/tstccl.c | 51 ++++++++-- 7 files changed, 253 insertions(+), 119 deletions(-) diff --git a/NEWS b/NEWS index e7740e2..9b9c19d 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Possible compatibility problems with earlier versions marked with '*'. +Added CCL facility r=r "range" which is similar to r=o "ordered" but +does not require white-space before and after the dash in a range, e.g. +x=-1990 is equivalent to x <= 1990 iff r=r, but equivalent to x= -1980 +iff r=o. + Fixed a few AC_TRY_LINK configure tests that did not operate properly due to new GCC removing "redundant" code. diff --git a/client/default.bib b/client/default.bib index 4d2046f..6fcac76 100644 --- a/client/default.bib +++ b/client/default.bib @@ -1,5 +1,5 @@ # CCL field mappings -# $Id: default.bib,v 1.8 2001-10-28 23:10:03 adam Exp $ +# $Id: default.bib,v 1.9 2004-09-29 20:37:50 adam Exp $ # # The rule below is used when no fields are specified term t=l,r s=al @@ -14,9 +14,9 @@ isbn u=7 issn u=8 cc u=20 su u=21 s=pw -date u=30 r=o -dp u=31 r=o -da u=32 r=o +date u=30 r=r +dp u=31 r=r +da u=32 r=r la u=54 s=pw ab u=62 s=pw note u=63 s=pw diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index a52829b..7724ace 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -45,7 +45,7 @@ /* * CCL - header file * - * $Id: ccl.h,v 1.17 2004-09-22 12:17:24 adam Exp $ + * $Id: ccl.h,v 1.18 2004-09-29 20:37:50 adam Exp $ * * Old Europagate Log: * @@ -182,6 +182,7 @@ typedef struct ccl_qualifiers *CCL_bibset; #define CCL_BIB1_STR_AND_LIST (-2) #define CCL_BIB1_STR_OR_LIST (-3) #define CCL_BIB1_REL_ORDER (-1) +#define CCL_BIB1_REL_PORDER (-2) #define CCL_BIB1_TRU_CAN_LEFT (-1) #define CCL_BIB1_TRU_CAN_RIGHT (-2) @@ -251,6 +252,9 @@ YAZ_EXPORT struct ccl_token *ccl_token_simple (const char *command); /* Delete token list */ YAZ_EXPORT void ccl_token_del (struct ccl_token *list); +/* Add single token after node at */ +YAZ_EXPORT struct ccl_token *ccl_token_add (struct ccl_token *at); + /* Parse CCL Find command - NULL-terminated string */ YAZ_EXPORT struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, int *error, int *pos); diff --git a/src/cclfind.c b/src/cclfind.c index 82dd726..0883235 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -44,7 +44,7 @@ /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Id: cclfind.c,v 1.3 2004-08-11 20:13:36 adam Exp $ + * $Id: cclfind.c,v 1.4 2004-09-29 20:37:50 adam Exp $ * * Old Europagate log: * @@ -217,6 +217,7 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); + ccl_rpn_delete (rpn->u.p[2]); break; } xfree (rpn); @@ -535,43 +536,13 @@ static struct ccl_rpn_node *search_term (CCL_parser cclp, return search_term_x(cclp, qa, list, 0); } -static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, - struct ccl_rpn_attr **ap) +static +struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, + struct ccl_rpn_attr **ap, char *attset) { - char *attset; - int rel; + int rel = 0; + struct ccl_rpn_node *p; - if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)) - { - /* unordered relation */ - struct ccl_rpn_node *p; - if (KIND != CCL_TOK_EQ) - { - cclp->error_code = CCL_ERR_EQ_EXPECTED; - return NULL; - } - ADVANCE; - if (KIND == CCL_TOK_LP) - { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - { - return NULL; - } - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); - return NULL; - } - ADVANCE; - } - else - p = search_terms (cclp, ap); - return p; - } - /* ordered relation ... */ - rel = 0; if (cclp->look_token->len == 1) { if (cclp->look_token->name[0] == '<') @@ -591,77 +562,184 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, rel = 6; } if (!rel) + { cclp->error_code = CCL_ERR_BAD_RELATION; - else + return NULL; + } + ADVANCE; /* skip relation */ + if (rel == 3 && + qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, 0)) { - struct ccl_rpn_node *p; + /* allow - inside term and treat it as range _always_ */ + /* relation is =. Extract "embedded" - to separate terms */ + if (KIND == CCL_TOK_TERM) + { + int i; + for (i = 0; ilook_token->len; i++) + { + if (cclp->look_token->name[i] == '-') + break; + } + + if (cclp->look_token->len > 1 && i == 0) + { /* -xx*/ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = cclp->look_token->name + 1; + ntoken->len = cclp->look_token->len - 1; + + cclp->look_token->len = 1; + cclp->look_token->name = "-"; + } + else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1) + { /* xx- */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; + ntoken->name = "-"; + ntoken->len = 1; + + (cclp->look_token->len)--; + } + else if (cclp->look_token->len > 2 && i < cclp->look_token->len) + { /* xx-yy */ + struct ccl_token *ntoken1 = ccl_token_add (cclp->look_token); + struct ccl_token *ntoken2 = ccl_token_add (ntoken1); + + ntoken1->kind = CCL_TOK_TERM; /* generate - */ + ntoken1->name = "-"; + ntoken1->len = 1; + + ntoken2->kind = CCL_TOK_TERM; /* generate yy */ + ntoken2->name = cclp->look_token->name + (i+1); + ntoken2->len = cclp->look_token->len - (i+1); + + cclp->look_token->len = i; /* adjust xx */ + } + else if (i == cclp->look_token->len && + cclp->look_token->next && + cclp->look_token->next->kind == CCL_TOK_TERM && + cclp->look_token->next->len > 1 && + cclp->look_token->next->name[0] == '-') + + { /* xx -yy */ + /* we _know_ that xx does not have - in it */ + struct ccl_token *ntoken = ccl_token_add (cclp->look_token); + + ntoken->kind = CCL_TOK_TERM; /* generate - */ + ntoken->name = "-"; + ntoken->len = 1; + + (ntoken->next->name)++; /* adjust yy */ + (ntoken->next->len)--; + } + } + } - ADVANCE; /* skip relation */ - if (KIND == CCL_TOK_TERM && - cclp->look_token->next && cclp->look_token->next->len == 1 && - cclp->look_token->next->name[0] == '-') - { - struct ccl_rpn_node *p1; - if (!(p1 = search_term (cclp, ap))) - return NULL; - ADVANCE; /* skip '-' */ - if (KIND == CCL_TOK_TERM) /* = term - term ? */ - { - struct ccl_rpn_node *p2; - - if (!(p2 = search_term (cclp, ap))) - { - ccl_rpn_delete (p1); - return NULL; - } - p = mk_node (CCL_RPN_AND); - p->u.p[0] = p1; - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - p->u.p[1] = p2; - add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); - return p; - } - else /* = term - */ - { - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); - return p1; - } - } - else if (cclp->look_token->len == 1 && - cclp->look_token->name[0] == '-') /* = - term ? */ - { - ADVANCE; - if (!(p = search_term (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, 2); - return p; - } - else if (KIND == CCL_TOK_LP) - { - ADVANCE; - if (!(p = find_spec (cclp, ap))) - return NULL; - if (KIND != CCL_TOK_RP) - { - cclp->error_code = CCL_ERR_RP_EXPECTED; - ccl_rpn_delete (p); - return NULL; - } - ADVANCE; - return p; - } - else - { - if (!(p = search_terms (cclp, ap))) - return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, rel); - return p; - } - cclp->error_code = CCL_ERR_TERM_EXPECTED; + if (rel == 3 && + KIND == CCL_TOK_TERM && + cclp->look_token->next && cclp->look_token->next->len == 1 && + cclp->look_token->next->name[0] == '-') + { + struct ccl_rpn_node *p1; + if (!(p1 = search_term (cclp, ap))) + return NULL; + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ + { + struct ccl_rpn_node *p2; + + if (!(p2 = search_term (cclp, ap))) + { + ccl_rpn_delete (p1); + return NULL; + } + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); + return p; + } + else /* = term - */ + { + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + return p1; + } + } + else if (rel == 3 && + cclp->look_token->len == 1 && + cclp->look_token->name[0] == '-') /* = - term ? */ + { + ADVANCE; + if (!(p = search_term (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, 2); + return p; + } + else if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (cclp, ap))) + return NULL; + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; + return p; + } + else + { + if (!(p = search_terms (cclp, ap))) + return NULL; + add_attr_numeric (p, attset, CCL_BIB1_REL, rel); + return p; } + cclp->error_code = CCL_ERR_TERM_EXPECTED; return NULL; } +static +struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, struct ccl_rpn_attr **ap) +{ + char *attset; + struct ccl_rpn_node *p; + + if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset) + || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset)) + return qualifiers_order(cclp, ap, attset); + + /* unordered relation */ + if (KIND != CCL_TOK_EQ) + { + cclp->error_code = CCL_ERR_EQ_EXPECTED; + return NULL; + } + ADVANCE; + if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (cclp, ap))) + { + return NULL; + } + if (KIND != CCL_TOK_RP) + { + cclp->error_code = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + return NULL; + } + ADVANCE; + } + else + p = search_terms (cclp, ap); + return p; +} + /* * qualifiers1: Parse CCL qualifiers and search terms. * cclp: CCL Parser @@ -1038,8 +1116,6 @@ struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list) { struct ccl_rpn_node *p; - - cclp->look_token = list; p = find_spec (cclp, NULL); if (p && KIND != CCL_TOK_EOL) diff --git a/src/cclqfile.c b/src/cclqfile.c index d46b730..0a618cb 100644 --- a/src/cclqfile.c +++ b/src/cclqfile.c @@ -44,7 +44,7 @@ /* CCL qualifiers * Europagate, 1995 * - * $Id: cclqfile.c,v 1.3 2004-09-22 11:21:51 adam Exp $ + * $Id: cclqfile.c,v 1.4 2004-09-29 20:37:50 adam Exp $ * * Old Europagate Log: * @@ -132,6 +132,8 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) type = CCL_BIB1_REL; if (!ccl_stricmp (qual_value, "o")) value = CCL_BIB1_REL_ORDER; + else if (!ccl_stricmp (qual_value, "r")) + value = CCL_BIB1_REL_PORDER; break; case 'p': case 'P': diff --git a/src/ccltoken.c b/src/ccltoken.c index ae8b4c0..06ff2a7 100644 --- a/src/ccltoken.c +++ b/src/ccltoken.c @@ -44,7 +44,7 @@ /* CCL - lexical analysis * Europagate, 1995 * - * $Id: ccltoken.c,v 1.2 2004-08-11 11:44:30 adam Exp $ + * $Id: ccltoken.c,v 1.3 2004-09-29 20:37:50 adam Exp $ * * Old Europagate Log: * @@ -317,6 +317,22 @@ struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command) return first; } +struct ccl_token *ccl_token_add (struct ccl_token *at) +{ + struct ccl_token *n = (struct ccl_token *)xmalloc (sizeof(*n)); + ccl_assert(n); + n->next = at->next; + n->prev = at; + at->next = n; + if (n->next) + n->next->prev = n; + + n->kind = CCL_TOK_TERM; + n->name = 0; + n->len = 0; + return n; +} + struct ccl_token *ccl_tokenize (const char *command) { CCL_parser cclp = ccl_parser_create (); diff --git a/test/tstccl.c b/test/tstccl.c index 9f8bb1b..37cf0c9 100644 --- a/test/tstccl.c +++ b/test/tstccl.c @@ -1,12 +1,13 @@ /* - * Copyright (c) 2002-2003, Index Data + * Copyright (c) 2002-2004, Index Data * See the file LICENSE for details. * - * $Id: tstccl.c,v 1.3 2004-09-22 11:21:51 adam Exp $ + * $Id: tstccl.c,v 1.4 2004-09-29 20:37:51 adam Exp $ */ /* CCL test */ +#include #include struct ccl_tst { @@ -23,10 +24,22 @@ static struct ccl_tst query_str[] = { { "x1 and", 0}, { "tix=x5", 0}, { "spid%æserne", "@prox 0 1 0 2 k 2 @attr 4=2 @attr 1=1016 spid @attr 4=2 @attr 1=1016 æserne "}, + { "date=1980", "@attr 2=3 1980 "}, + { "date=234-1990", "@and @attr 2=4 234 @attr 2=2 1990 "}, + { "date=234- 1990", "@and @attr 2=4 234 @attr 2=2 1990 "}, + { "date=234 -1990", "@and @attr 2=4 234 @attr 2=2 1990 "}, + { "date=234 - 1990", "@and @attr 2=4 234 @attr 2=2 1990 "}, + { "date=-1980", "@attr 2=2 1980 "}, + { "date=- 1980", "@attr 2=2 1980 "}, + { "x=-1980", "@attr 2=3 -1980 "}, + { "x=- 1980", "@attr 2=2 1980 "}, + { "x= -1980", "@attr 2=3 -1980 "}, + { "x=234-1990", "@attr 2=3 234-1990 "}, + { "x=234 - 1990", "@and @attr 2=4 234 @attr 2=2 1990 "}, {0, 0} }; -void tst1(int pass) +void tst1(int pass, int *number_of_errors) { CCL_parser parser = ccl_parser_create (); CCL_bibset bibset = ccl_qual_mk(); @@ -39,6 +52,8 @@ void tst1(int pass) ccl_qual_fitem(bibset, "u=4 s=pw t=l,r", "ti"); ccl_qual_fitem(bibset, "1=1016 s=al,pw", "term"); ccl_qual_fitem(bibset, "1=/my/title", "dc.title"); + ccl_qual_fitem(bibset, "r=r", "date"); + ccl_qual_fitem(bibset, "r=o", "x"); break; case 1: strcpy(tstline, "ti u=4 s=pw t=l,r"); @@ -49,12 +64,21 @@ void tst1(int pass) strcpy(tstline, "dc.title 1=/my/title"); ccl_qual_line(bibset, tstline); + + strcpy(tstline, "date r=r # ordered relation"); + ccl_qual_line(bibset, tstline); + + strcpy(tstline, "x r=o # ordered relation"); + ccl_qual_line(bibset, tstline); break; case 2: ccl_qual_buf(bibset, "ti u=4 s=pw t=l,r\n" "term 1=1016 s=al,pw\r\n" "\n" - "dc.title 1=/my/title\n"); + "dc.title 1=/my/title\n" + "date r=r\n" + "x r=o\n" + ); break; default: exit(23); @@ -78,14 +102,14 @@ void tst1(int pass) printf ("Failed %s\n", query_str[i].query); printf (" got:%s:\n", wrbuf_buf(wrbuf)); printf (" expected failure\n"); - exit(3); + (*number_of_errors)++; } else if (strcmp(wrbuf_buf(wrbuf), query_str[i].result)) { printf ("Failed %s\n", query_str[i].query); printf (" got:%s:\n", wrbuf_buf(wrbuf)); printf (" expected:%s:\n", query_str[i].result); - exit(2); + (*number_of_errors)++; } ccl_rpn_delete(rpn); wrbuf_free(wrbuf, 1); @@ -95,7 +119,7 @@ void tst1(int pass) printf ("Failed %s\n", query_str[i].query); printf (" got failure\n"); printf (" expected:%s:\n", query_str[i].result); - exit(4); + (*number_of_errors)++; } } ccl_parser_destroy (parser); @@ -104,8 +128,15 @@ void tst1(int pass) int main(int argc, char **argv) { - tst1(0); - tst1(1); - tst1(2); + int number_of_errors = 0; + tst1(0, &number_of_errors); + if (number_of_errors) + exit(1); + tst1(1, &number_of_errors); + if (number_of_errors) + exit(1); + tst1(2, &number_of_errors); + if (number_of_errors) + exit(1); exit(0); } -- 1.7.10.4