From 79135288365437a3b1ee89ba36059dcec9f52a70 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 23 Jun 2003 10:22:21 +0000 Subject: [PATCH] String attributes for CCL parser --- CHANGELOG | 2 + ccl/Makefile.am | 11 +++- ccl/bib1 | 5 +- ccl/cclfind.c | 150 +++++++++++++++++++++++++++++++++-------------------- ccl/cclptree.c | 23 +++++--- ccl/cclqfile.c | 44 +++++++++++++--- ccl/cclqual.c | 27 +++++++--- ccl/tstccl.c | 56 ++++++++++++++++++++ include/yaz/ccl.h | 13 +++-- util/tstwrbuf.c | 4 +- zutil/yaz-ccl.c | 16 ++++-- 11 files changed, 266 insertions(+), 85 deletions(-) create mode 100644 ccl/tstccl.c diff --git a/CHANGELOG b/CHANGELOG index 947c176..b1ee42a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ Possible compatibility problems with earlier versions marked with '*'. +String value attributes for CCL parser. + --- 2.0.3 2003/06/20 Fix CCL directive @case handling so that it affects string match for diff --git a/ccl/Makefile.am b/ccl/Makefile.am index d6bd565..13beca9 100644 --- a/ccl/Makefile.am +++ b/ccl/Makefile.am @@ -1,14 +1,23 @@ -## $Id: Makefile.am,v 1.9 2002-06-06 13:02:01 adam Exp $ +## $Id: Makefile.am,v 1.10 2003-06-23 10:22:21 adam Exp $ noinst_LTLIBRARIES = libccl.la noinst_PROGRAMS = cclsh +check_PROGRAMS = tstccl + +TESTS = $(check_PROGRAMS) + EXTRA_DIST = bib1 AM_CPPFLAGS=-I$(top_srcdir)/include libccl_la_SOURCES = cclfind.c ccltoken.c cclerrms.c cclqual.c cclptree.c \ cclqfile.c cclstr.c + cclsh_SOURCES=cclsh.c cclsh_LDADD = libccl.la ../util/libutil.la $(READLINE_LIBS) + +tstccl_SOURCES = tstccl.c +tstccl_LDADD = libccl.la ../util/libutil.la + diff --git a/ccl/bib1 b/ccl/bib1 index afa776a..1dc9dca 100644 --- a/ccl/bib1 +++ b/ccl/bib1 @@ -1,4 +1,4 @@ -# $Id: bib1,v 1.12 2002-10-14 19:45:36 adam Exp $ +# $Id: bib1,v 1.13 2003-06-23 10:22:21 adam Exp $ # CCL qualifiers and their mappings # # Each line takes the form: @@ -114,3 +114,6 @@ exp:category exp1,1=1 forfatter au # tiau=x is equivalent to ti=x or ti=x tiau ti au +# +# string atributes +dc.title 1=/my/title diff --git a/ccl/cclfind.c b/ccl/cclfind.c index 8153a0e..9fdcf13 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -44,7 +44,7 @@ /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Id: cclfind.c,v 1.33 2003-02-14 18:49:22 adam Exp $ + * $Id: cclfind.c,v 1.34 2003-06-23 10:22:21 adam Exp $ * * Old Europagate log: * @@ -127,7 +127,8 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value, for (i = 0; (q=qa[i]); i++) while (q) { - if (q->type == type && q->value == value) + if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC && + q->value.numeric == value) { if (attset) *attset = q->set; @@ -203,6 +204,8 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) for (attr = rpn->u.t.attr_list; attr; attr = attr1) { attr1 = attr->next; + if (attr->kind == CCL_RPN_ATTR_STRING) + xfree(attr->value.str); if (attr->set) xfree (attr->set); xfree (attr); @@ -233,18 +236,11 @@ static int is_term_ok (int look, int *list) static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa); -/* - * add_attr: Add attribute (type/value) to RPN term node. - * p: RPN node of type term. - * type: Type of attribute - * value: Value of attribute - * set: Attribute set name - */ -static void add_attr (struct ccl_rpn_node *p, const char *set, - int type, int value) +static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, + const char *set, int type) { struct ccl_rpn_attr *n; - + n = (struct ccl_rpn_attr *)xmalloc (sizeof(*n)); ccl_assert (n); if (set) @@ -255,12 +251,43 @@ static void add_attr (struct ccl_rpn_node *p, const char *set, else n->set = 0; n->type = type; - n->value = value; n->next = p->u.t.attr_list; p->u.t.attr_list = n; + + n->kind = CCL_RPN_ATTR_NUMERIC; + n->value.numeric = 0; + return n; } /* + * add_attr_numeric: Add attribute (type/value) to RPN term node. + * p: RPN node of type term. + * type: Type of attribute + * value: Value of attribute + * set: Attribute set name + */ +static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, + int type, int value) +{ + struct ccl_rpn_attr *n; + + n = add_attr_node(p, set, type); + n->kind = CCL_RPN_ATTR_NUMERIC; + n->value.numeric = value; +} + +static void add_attr_string (struct ccl_rpn_node *p, const char *set, + int type, char *value) +{ + struct ccl_rpn_attr *n; + + n = add_attr_node(p, set, type); + n->kind = CCL_RPN_ATTR_STRING; + n->value.str = xstrdup(value); +} + + +/* * search_term: Parse CCL search term. * cclp: CCL Parser * qa: Qualifier attributes already applied. @@ -369,39 +396,48 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, struct ccl_rpn_attr *attr; for (attr = qa[i]; attr; attr = attr->next) - if (attr->value > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) - { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value; - left_trunc = right_trunc = mid_trunc = 0; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value; - break; - } - add_attr (p, attr->set, attr->type, attr->value); - } + switch(attr->kind) + { + case CCL_RPN_ATTR_STRING: + add_attr_string(p, attr->set, attr->type, + attr->value.str); + break; + case CCL_RPN_ATTR_NUMERIC: + if (attr->value.numeric > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value.numeric; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value.numeric; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value.numeric; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value.numeric; + left_trunc = right_trunc = mid_trunc = 0; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value.numeric; + break; + } + add_attr_numeric(p, attr->set, attr->type, + attr->value.numeric); + } + } } /* len now holds the number of characters in the RPN term */ /* no holds the number of CCL tokens (1 or more) */ @@ -411,9 +447,9 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, { /* no structure attribute met. Apply either structure attribute WORD or PHRASE depending on number of CCL tokens */ if (no == 1 && no_spaces == 0) - add_attr (p, attset, CCL_BIB1_STR, 2); + add_attr_numeric (p, attset, CCL_BIB1_STR, 2); else - add_attr (p, attset, CCL_BIB1_STR, 1); + add_attr_numeric (p, attset, CCL_BIB1_STR, 1); } /* make the RPN token */ @@ -454,7 +490,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr (p, attset, CCL_BIB1_TRU, 3); + add_attr_numeric (p, attset, CCL_BIB1_TRU, 3); } else if (right_trunc) { @@ -465,7 +501,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr (p, attset, CCL_BIB1_TRU, 1); + add_attr_numeric (p, attset, CCL_BIB1_TRU, 1); } else if (left_trunc) { @@ -476,13 +512,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr (p, attset, CCL_BIB1_TRU, 2); + add_attr_numeric (p, attset, CCL_BIB1_TRU, 2); } else { if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE, &attset)) - add_attr (p, attset, CCL_BIB1_TRU, 100); + add_attr_numeric (p, attset, CCL_BIB1_TRU, 100); } if (!multi) break; @@ -580,14 +616,14 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, } p = mk_node (CCL_RPN_AND); p->u.p[0] = p1; - add_attr (p1, attset, CCL_BIB1_REL, 4); + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); p->u.p[1] = p2; - add_attr (p2, attset, CCL_BIB1_REL, 2); + add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); return p; } else /* = term - */ { - add_attr (p1, attset, CCL_BIB1_REL, 4); + add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); return p1; } } @@ -597,7 +633,7 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, ADVANCE; if (!(p = search_term (cclp, ap))) return NULL; - add_attr (p, attset, CCL_BIB1_REL, 2); + add_attr_numeric (p, attset, CCL_BIB1_REL, 2); return p; } else if (KIND == CCL_TOK_LP) @@ -618,7 +654,7 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp, { if (!(p = search_terms (cclp, ap))) return NULL; - add_attr (p, attset, CCL_BIB1_REL, rel); + add_attr_numeric (p, attset, CCL_BIB1_REL, rel); return p; } cclp->error_code = CCL_ERR_TERM_EXPECTED; diff --git a/ccl/cclptree.c b/ccl/cclptree.c index 924971f..d1c7d64 100644 --- a/ccl/cclptree.c +++ b/ccl/cclptree.c @@ -44,7 +44,7 @@ /* CCL print rpn tree - infix notation * Europagate, 1995 * - * $Id: cclptree.c,v 1.12 2003-02-14 18:49:23 adam Exp $ + * $Id: cclptree.c,v 1.13 2003-06-23 10:22:21 adam Exp $ * * Old Europagate Log: * @@ -92,11 +92,22 @@ void ccl_pr_tree_as_qrpn(struct ccl_rpn_node *rpn, FILE *fd_out, int indent) { struct ccl_rpn_attr *attr; for (attr = rpn->u.t.attr_list; attr; attr = attr->next) - if (attr->set) - fprintf (fd_out, "@attr %s %d=%d ", attr->set, attr->type, - attr->value); - else - fprintf (fd_out, "@attr %d=%d ", attr->type, attr->value); + { + if (attr->set) + fprintf(fd_out, "@attr %s", attr->set); + else + fprintf(fd_out, "@attr "); + switch(attr->kind) + { + case CCL_RPN_ATTR_NUMERIC: + fprintf (fd_out, "%d=%d ", attr->type, + attr->value.numeric); + break; + case CCL_RPN_ATTR_STRING: + fprintf (fd_out, "%d=%s ", attr->type, + attr->value.str); + } + } } fprintf (fd_out, "\"%s\"\n", rpn->u.t.term); break; diff --git a/ccl/cclqfile.c b/ccl/cclqfile.c index b4aab41..61cd463 100644 --- a/ccl/cclqfile.c +++ b/ccl/cclqfile.c @@ -44,7 +44,7 @@ /* CCL qualifiers * Europagate, 1995 * - * $Id: cclqfile.c,v 1.13 2002-06-06 12:54:24 adam Exp $ + * $Id: cclqfile.c,v 1.14 2003-06-23 10:22:21 adam Exp $ * * Old Europagate Log: * @@ -70,7 +70,9 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) { char qual_spec[128]; - int pair[256]; + int type_ar[128]; + int value_ar[128]; + char *svalue_ar[128]; char *attsets[128]; int pair_no = 0; @@ -85,6 +87,7 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) if (!(split = strchr (qual_spec, '='))) { + /* alias specification .. */ if (pair_no == 0) { ccl_qual_add_combi (bibset, qual_name, cp); @@ -92,6 +95,7 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) } break; } + /* [set,]type=value ... */ cp += no_scan; *split++ = '\0'; @@ -99,11 +103,15 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) setp = strchr (qual_spec, ','); if (setp) { + /* set,type=value ... */ *setp++ = '\0'; qual_type = setp; } else + { + /* type=value ... */ qual_type = qual_spec; + } while (pair_no < 128) { int type, value; @@ -111,7 +119,8 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) qual_value = split; if ((split = strchr (qual_value, ','))) *split++ = '\0'; - value = atoi (qual_value); + + value = 0; switch (qual_type[0]) { case 'u': @@ -157,8 +166,30 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) default: type = atoi (qual_type); } - pair[pair_no*2] = type; - pair[pair_no*2+1] = value; + + type_ar[pair_no] = type; + + if (value) + { + value_ar[pair_no] = value; + svalue_ar[pair_no] = 0; + } + else if (*qual_value >= '0' && *qual_value <= '9') + { + value_ar[pair_no] = atoi (qual_value); + svalue_ar[pair_no] = 0; + } + else + { + size_t len; + if (split) + len = split - qual_value; + else + len = strlen(qual_value); + svalue_ar[pair_no] = xmalloc(len+1); + memcpy(svalue_ar[pair_no], qual_value, len); + svalue_ar[pair_no][len] = '\0'; + } if (setp) { attsets[pair_no] = (char*) xmalloc (strlen(qual_spec)+1); @@ -171,7 +202,8 @@ void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) break; } } - ccl_qual_add_set (bibset, qual_name, pair_no, pair, attsets); + ccl_qual_add_set (bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar, + attsets); } void ccl_qual_fitem (CCL_bibset bibset, const char *cp, const char *qual_name) diff --git a/ccl/cclqual.c b/ccl/cclqual.c index 577c330..b0ec3f2 100644 --- a/ccl/cclqual.c +++ b/ccl/cclqual.c @@ -44,7 +44,7 @@ /* CCL qualifiers * Europagate, 1995 * - * $Id: cclqual.c,v 1.18 2003-06-19 19:51:40 adam Exp $ + * $Id: cclqual.c,v 1.19 2003-06-23 10:22:21 adam Exp $ * * Old Europagate Log: * @@ -193,7 +193,9 @@ void ccl_qual_add_combi (CCL_bibset b, const char *n, const char *names) * pairs: Attributes. pairs[0] first type, pair[1] first value, * ... pair[2*no-2] last type, pair[2*no-1] last value. */ -void ccl_qual_add_set (CCL_bibset b, const char *name, int no, int *pairs, + +void ccl_qual_add_set (CCL_bibset b, const char *name, int no, + int *type_ar, int *value_ar, char **svalue_ar, char **attsets) { struct ccl_qualifier *q; @@ -220,7 +222,7 @@ void ccl_qual_add_set (CCL_bibset b, const char *name, int no, int *pairs, } else { - if (q->sub) + if (q->sub) /* suspect.. */ xfree (q->sub); attrp = &q->attr_list; while (*attrp) @@ -233,8 +235,19 @@ void ccl_qual_add_set (CCL_bibset b, const char *name, int no, int *pairs, attr = (struct ccl_rpn_attr *)xmalloc (sizeof(*attr)); ccl_assert (attr); attr->set = *attsets++; - attr->type = *pairs++; - attr->value = *pairs++; + attr->type = *type_ar++; + if (*svalue_ar) + { + attr->kind = CCL_RPN_ATTR_STRING; + attr->value.str = *svalue_ar; + } + else + { + attr->kind = CCL_RPN_ATTR_NUMERIC; + attr->value.numeric = *value_ar; + } + svalue_ar++; + value_ar++; *attrp = attr; attrp = &attr->next; } @@ -273,7 +286,9 @@ void ccl_qual_rm (CCL_bibset *b) { attr1 = attr->next; if (attr->set) - xfree (attr->set); + xfree(attr->set); + if (attr->kind == CCL_RPN_ATTR_STRING) + xfree(attr->value.str); xfree (attr); } q1 = q->next; diff --git a/ccl/tstccl.c b/ccl/tstccl.c new file mode 100644 index 0000000..e46cad2 --- /dev/null +++ b/ccl/tstccl.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2002-2003, Index Data + * See the file LICENSE for details. + * + * $Id: tstccl.c,v 1.1 2003-06-23 10:22:21 adam Exp $ + */ + +/* CCL test */ + +#include + +static char *query_str[] = { + "x1", + "x1 and x2", + "ti=x3", + "dc.title=x4", + 0 +}; + +void tst1(void) +{ + CCL_parser parser = ccl_parser_create (); + CCL_bibset bibset = ccl_qual_mk(); + int i; + + ccl_qual_fitem(bibset, "u=4 s=pw t=l,r", "ti"); + ccl_qual_fitem(bibset, "1=1016 s=al,pw", "term"); + ccl_qual_fitem(bibset, "1=/my/title", "dc.title"); + + parser->bibset = bibset; + + for (i = 0; query_str[i]; i++) + { + struct ccl_token *token_list = + ccl_parser_tokenize(parser, query_str[i]); + struct ccl_rpn_node *rpn = ccl_parser_find(parser, token_list); + ccl_token_del (token_list); + if (rpn) + { + ccl_rpn_delete(rpn); + } + else + { + printf ("failed %s\n", query_str[i]); + exit(1+i); + } + } + ccl_parser_destroy (parser); + ccl_qual_rm(&bibset); +} + +int main(int argc, char **argv) +{ + tst1(); + exit(0); +} diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index 076b899..175a37b 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -45,7 +45,7 @@ /* * CCL - header file * - * $Id: ccl.h,v 1.13 2002-12-28 12:13:03 adam Exp $ + * $Id: ccl.h,v 1.14 2003-06-23 10:22:21 adam Exp $ * * Old Europagate Log: * @@ -111,7 +111,13 @@ struct ccl_rpn_attr { struct ccl_rpn_attr *next; char *set; int type; - int value; + int kind; +#define CCL_RPN_ATTR_NUMERIC 1 +#define CCL_RPN_ATTR_STRING 2 + union { + int numeric; + char *str; + } value; }; #define CCL_RPN_AND 1 @@ -277,7 +283,8 @@ YAZ_EXPORT void ccl_qual_add (CCL_bibset b, const char *name, int no, int *attr); YAZ_EXPORT void ccl_qual_add_set (CCL_bibset b, const char *name, int no, - int *attr, char **attsets); + int *type, int *value, char **svalue, + char **attsets); YAZ_EXPORT void ccl_qual_add_special (CCL_bibset bibset, const char *n, const char *v); diff --git a/util/tstwrbuf.c b/util/tstwrbuf.c index c1c3f4a..f141c6f 100644 --- a/util/tstwrbuf.c +++ b/util/tstwrbuf.c @@ -2,7 +2,7 @@ * Copyright (c) 2002-2003, Index Data * See the file LICENSE for details. * - * $Id: tstwrbuf.c,v 1.1 2003-05-06 10:07:33 adam Exp $ + * $Id: tstwrbuf.c,v 1.2 2003-06-23 10:22:21 adam Exp $ */ #include @@ -45,7 +45,7 @@ int main (int argc, char **argv) { if (cp[k] != i+1) { - printf ("tstwrbuf 2 %d k=%d\n", k); + printf ("tstwrbuf 2 %d k=%d\n", step, k); exit(1); } k++; diff --git a/zutil/yaz-ccl.c b/zutil/yaz-ccl.c index 4e67499..5b47e79 100644 --- a/zutil/yaz-ccl.c +++ b/zutil/yaz-ccl.c @@ -2,7 +2,7 @@ * Copyright (c) 1996-2003, Index Data. * See the file LICENSE for details. * - * $Id: yaz-ccl.c,v 1.19 2003-02-12 15:06:44 adam Exp $ + * $Id: yaz-ccl.c,v 1.20 2003-06-23 10:22:21 adam Exp $ */ #include @@ -117,8 +117,18 @@ void ccl_pquery (WRBUF w, struct ccl_rpn_node *p) wrbuf_puts (w, att->set); wrbuf_puts (w, " "); } - sprintf(tmpattr, "%d=%d ", att->type, att->value); - wrbuf_puts (w, tmpattr); + switch(att->kind) + { + case CCL_RPN_ATTR_NUMERIC: + sprintf(tmpattr, "%d=%d ", att->type, att->value.numeric); + wrbuf_puts (w, tmpattr); + break; + case CCL_RPN_ATTR_STRING: + sprintf(tmpattr, "%d ", att->type); + wrbuf_puts (w, tmpattr); + wrbuf_puts(w, att->value.str); + break; + } } for (cp = p->u.t.term; *cp; cp++) { -- 1.7.10.4