From 1d8e2adb8e5cfeaf77a84f5f3277785bb24cdd56 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 26 Apr 2007 21:45:16 +0000 Subject: [PATCH] Added functions to create CCL RPN nodes. Added small tokenizer utility (tokenizer.[ch]). CCL qualifier spec parsed using tokenizer which allows more flexible notation in terms of white-space etc. --- include/yaz/Makefile.am | 4 +- include/yaz/ccl.h | 14 ++- include/yaz/tokenizer.h | 75 ++++++++++++++ src/Makefile.am | 4 +- src/cclfind.c | 87 +++++++++------- src/cclqfile.c | 255 ++++++++++++++++++++++++++++------------------- src/tokenizer.c | 157 +++++++++++++++++++++++++++++ 7 files changed, 452 insertions(+), 144 deletions(-) create mode 100644 include/yaz/tokenizer.h create mode 100644 src/tokenizer.c diff --git a/include/yaz/Makefile.am b/include/yaz/Makefile.am index 19f5c43..6aea0da 100644 --- a/include/yaz/Makefile.am +++ b/include/yaz/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.44 2007-04-18 07:34:35 adam Exp $ +## $Id: Makefile.am,v 1.45 2007-04-26 21:45:16 adam Exp $ pkginclude_HEADERS= backend.h ccl.h ccl_xml.h cql.h comstack.h \ diagbib1.h diagsrw.h diagsru_update.h sortspec.h log.h logrpn.h marcdisp.h \ @@ -8,7 +8,7 @@ pkginclude_HEADERS= backend.h ccl.h ccl_xml.h cql.h comstack.h \ tcpip.h test.h timing.h unix.h tpath.h wrbuf.h xmalloc.h \ yaz-ccl.h yaz-iconv.h yaz-util.h yaz-version.h yconfig.h proto.h \ xmlquery.h libxml2_error.h xmltypes.h snprintf.h query-charset.h \ - mutex.h oid_db.h oid_util.h oid_std.h \ + mutex.h oid_db.h oid_util.h oid_std.h tokenizer.h \ \ ill.h ill-core.h item-req.h oclc-ill-req-ext.h z-accdes1.h z-accform1.h \ z-acckrb1.h z-core.h z-date.h z-diag1.h z-espec1.h z-estask.h z-exp.h \ diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index 78e90c3..7772cff 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -49,7 +49,7 @@ /* * CCL - header file * - * $Id: ccl.h,v 1.25 2007-04-26 09:11:56 adam Exp $ + * $Id: ccl.h,v 1.26 2007-04-26 21:45:16 adam Exp $ * * Old Europagate Log: * @@ -297,6 +297,18 @@ void ccl_pquery(WRBUF w, struct ccl_rpn_node *p); YAZ_EXPORT int ccl_parser_get_error(CCL_parser cclp, int *pos); +YAZ_EXPORT +struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind); + +YAZ_EXPORT +void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, + int type, int value); + +YAZ_EXPORT +void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, + int type, char *value); + + #ifndef ccl_assert #define ccl_assert(x) ; #endif diff --git a/include/yaz/tokenizer.h b/include/yaz/tokenizer.h new file mode 100644 index 0000000..02cd195 --- /dev/null +++ b/include/yaz/tokenizer.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 1995-2007, Index Data + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Index Data nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* $Id: tokenizer.h,v 1.1 2007-04-26 21:45:16 adam Exp $ */ + +/** \file tokenizer.h + \brief Header with public definitions about YAZ' tokenizer +*/ + +#ifndef YAZ_TOKENIZER +#define YAZ_TOKENIZER +#include + +YAZ_BEGIN_CDECL + +#define YAZ_TOKENIZER_EOF 0 +#define YAZ_TOKENIZER_ERROR (-1) +#define YAZ_TOKENIZER_STRING (-2) +#define YAZ_TOKENIZER_QSTRING (-3) + +typedef struct yaz_tokenizer *yaz_tokenizer_t; + +YAZ_EXPORT +yaz_tokenizer_t yaz_tokenizer_create(void); + +YAZ_EXPORT +void yaz_tokenizer_destroy(yaz_tokenizer_t t); + +YAZ_EXPORT +void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf); + +YAZ_EXPORT +int yaz_tokenizer_move(yaz_tokenizer_t t); + +YAZ_EXPORT +const char *yaz_tokenizer_string(yaz_tokenizer_t t); + +YAZ_EXPORT +void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple); + +YAZ_END_CDECL + +#endif +/* CQL_H_INCLUDED */ +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/Makefile.am b/src/Makefile.am index 780d0b5..33112c2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,6 @@ ## This file is part of the YAZ toolkit. ## Copyright (C) 1995-2007, Index Data, All rights reserved. -## $Id: Makefile.am,v 1.64 2007-04-25 20:52:19 adam Exp $ +## $Id: Makefile.am,v 1.65 2007-04-26 21:45:17 adam Exp $ YAZ_VERSION_INFO=3:0:0 @@ -92,7 +92,7 @@ libyaz_la_SOURCES=version.c options.c log.c \ tcpdchk.c \ test.c timing.c \ xmlquery.c http.c \ - mime.c mime.h oid_util.c \ + mime.c mime.h oid_util.c tokenizer.c \ record_conv.c retrieval.c elementset.c snprintf.c query-charset.c libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO) diff --git a/src/cclfind.c b/src/cclfind.c index 3d81f2d..fc67b80 100644 --- a/src/cclfind.c +++ b/src/cclfind.c @@ -56,7 +56,7 @@ /* CCL find (to rpn conversion) * Europagate, 1995 * - * $Id: cclfind.c,v 1.10 2007-04-26 09:11:56 adam Exp $ + * $Id: cclfind.c,v 1.11 2007-04-26 21:45:17 adam Exp $ * * Old Europagate log: * @@ -185,12 +185,22 @@ static char *copy_token_name (struct ccl_token *tp) * kind: Type of node. * return: pointer to allocated node. */ -static struct ccl_rpn_node *mk_node (int kind) +struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind) { struct ccl_rpn_node *p; p = (struct ccl_rpn_node *)xmalloc (sizeof(*p)); ccl_assert (p); p->kind = kind; + + switch(kind) + { + case CCL_RPN_TERM: + p->u.t.attr_list = 0; + p->u.t.term = 0; + break; + default: + break; + } return p; } @@ -264,8 +274,6 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, n->next = p->u.t.attr_list; p->u.t.attr_list = n; - n->kind = CCL_RPN_ATTR_NUMERIC; - n->value.numeric = 0; return n; } @@ -276,8 +284,8 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p, * value: Value of attribute * set: Attribute set name */ -static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, - int type, int value) +void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, + int type, int value) { struct ccl_rpn_attr *n; @@ -286,8 +294,8 @@ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set, n->value.numeric = value; } -static void add_attr_string (struct ccl_rpn_node *p, const char *set, - int type, char *value) +void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, + int type, char *value) { struct ccl_rpn_attr *n; @@ -379,17 +387,17 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, if (p_top) { if (or_list) - p = mk_node (CCL_RPN_OR); + p = ccl_rpn_node_create(CCL_RPN_OR); else if (and_list) - p = mk_node (CCL_RPN_AND); + p = ccl_rpn_node_create(CCL_RPN_AND); else - p = mk_node (CCL_RPN_AND); + p = ccl_rpn_node_create(CCL_RPN_AND); p->u.p[0] = p_top; p_top = p; } /* create the term node, but wait a moment before adding the term */ - p = mk_node (CCL_RPN_TERM); + p = ccl_rpn_node_create(CCL_RPN_TERM); p->u.t.attr_list = NULL; p->u.t.term = NULL; @@ -409,8 +417,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, switch(attr->kind) { case CCL_RPN_ATTR_STRING: - add_attr_string(p, attr->set, attr->type, - attr->value.str); + ccl_add_attr_string(p, attr->set, attr->type, + attr->value.str); break; case CCL_RPN_ATTR_NUMERIC: if (attr->value.numeric > 0) @@ -444,8 +452,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, completeness_value = attr->value.numeric; break; } - add_attr_numeric(p, attr->set, attr->type, - attr->value.numeric); + ccl_add_attr_numeric(p, attr->set, attr->type, + attr->value.numeric); } } } @@ -457,9 +465,9 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, { /* no structure attribute met. Apply either structure attribute WORD or PHRASE depending on number of CCL tokens */ if (no == 1 && no_spaces == 0) - add_attr_numeric (p, attset, CCL_BIB1_STR, 2); + ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2); else - add_attr_numeric (p, attset, CCL_BIB1_STR, 1); + ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1); } /* make the RPN token */ @@ -497,7 +505,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr_numeric (p, attset, CCL_BIB1_TRU, 3); + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3); } else if (right_trunc) { @@ -508,7 +516,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr_numeric (p, attset, CCL_BIB1_TRU, 1); + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1); } else if (left_trunc) { @@ -519,13 +527,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, ccl_rpn_delete (p); return NULL; } - add_attr_numeric (p, attset, CCL_BIB1_TRU, 2); + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2); } else { if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE, &attset)) - add_attr_numeric (p, attset, CCL_BIB1_TRU, 100); + ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100); } if (!multi) break; @@ -661,16 +669,16 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - p = mk_node (CCL_RPN_AND); + p = ccl_rpn_node_create(CCL_RPN_AND); p->u.p[0] = p1; - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4); p->u.p[1] = p2; - add_attr_numeric (p2, attset, CCL_BIB1_REL, 2); + ccl_add_attr_numeric(p2, attset, CCL_BIB1_REL, 2); return p; } else /* = term - */ { - add_attr_numeric (p1, attset, CCL_BIB1_REL, 4); + ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4); return p1; } } @@ -681,7 +689,7 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, ADVANCE; if (!(p = search_term (cclp, ap))) return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, 2); + ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2); return p; } else if (KIND == CCL_TOK_LP) @@ -702,7 +710,7 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp, { if (!(p = search_terms (cclp, ap))) return NULL; - add_attr_numeric (p, attset, CCL_BIB1_REL, rel); + ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel); return p; } cclp->error_code = CCL_ERR_TERM_EXPECTED; @@ -812,7 +820,8 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la, } if (node) { - struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + struct ccl_rpn_node *node_this = + ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; node = node_this; @@ -883,7 +892,8 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la, } if (node) { - struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + struct ccl_rpn_node *node_this = + ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; node = node_this; @@ -920,7 +930,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_node *p_prox = 0; /* ! word order specified */ /* % word order not specified */ - p_prox = mk_node(CCL_RPN_TERM); + p_prox = ccl_rpn_node_create(CCL_RPN_TERM); p_prox->u.t.term = (char *) xmalloc(1 + cclp->look_token->len); memcpy(p_prox->u.t.term, cclp->look_token->name, cclp->look_token->len); @@ -934,7 +944,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - pn = mk_node (CCL_RPN_PROX); + pn = ccl_rpn_node_create(CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; pn->u.p[2] = p_prox; @@ -948,7 +958,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - pn = mk_node (CCL_RPN_PROX); + pn = ccl_rpn_node_create(CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; pn->u.p[2] = 0; @@ -996,7 +1006,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp, cclp->error_code = CCL_ERR_SETNAME_EXPECTED; return NULL; } - p1 = mk_node (CCL_RPN_SET); + p1 = ccl_rpn_node_create(CCL_RPN_SET); p1->u.setname = copy_token_name (cclp->look_token); ADVANCE; return p1; @@ -1039,7 +1049,8 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp, } if (node) { - struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR); + struct ccl_rpn_node *node_this = + ccl_rpn_node_create(CCL_RPN_OR); node_this->u.p[0] = node; node_this->u.p[1] = node_sub; node_this->u.p[2] = 0; @@ -1078,7 +1089,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - pn = mk_node (CCL_RPN_AND); + pn = ccl_rpn_node_create(CCL_RPN_AND); pn->u.p[0] = p1; pn->u.p[1] = p2; pn->u.p[2] = 0; @@ -1092,7 +1103,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - pn = mk_node (CCL_RPN_OR); + pn = ccl_rpn_node_create(CCL_RPN_OR); pn->u.p[0] = p1; pn->u.p[1] = p2; pn->u.p[2] = 0; @@ -1106,7 +1117,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp, ccl_rpn_delete (p1); return NULL; } - pn = mk_node (CCL_RPN_NOT); + pn = ccl_rpn_node_create(CCL_RPN_NOT); pn->u.p[0] = p1; pn->u.p[1] = p2; pn->u.p[2] = 0; diff --git a/src/cclqfile.c b/src/cclqfile.c index 9d34229..b3abfd8 100644 --- a/src/cclqfile.c +++ b/src/cclqfile.c @@ -48,7 +48,7 @@ /* CCL qualifiers * Europagate, 1995 * - * $Id: cclqfile.c,v 1.8 2007-04-25 20:52:19 adam Exp $ + * $Id: cclqfile.c,v 1.9 2007-04-26 21:45:17 adam Exp $ * * Old Europagate Log: * @@ -69,148 +69,201 @@ #include #include +#include #include +#include #define MAX_QUAL 128 -void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name) +int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name, + const char **addinfo) { - char qual_spec[128]; + yaz_tokenizer_t yt = yaz_tokenizer_create(); + int type_ar[MAX_QUAL]; int value_ar[MAX_QUAL]; char *svalue_ar[MAX_QUAL]; char *attsets[MAX_QUAL]; int pair_no = 0; + char *type_str = 0; + int t; - while (pair_no < MAX_QUAL) + yaz_tokenizer_single_tokens(yt, ",="); + yaz_tokenizer_read_buf(yt, cp); + *addinfo = 0; + + t = yaz_tokenizer_move(yt); + while (t == YAZ_TOKENIZER_STRING) { - char *qual_value, *qual_type; - char *split, *setp; - int no_scan = 0; - - if (sscanf (cp, "%100s%n", qual_spec, &no_scan) < 1) - break; + /* we don't know what lead is yet */ + char *lead_str = xstrdup(yaz_tokenizer_string(yt)); + const char *value_str = 0; + int type = 0, value = 0; /* indicates attribute value UNSET */ - if (!(split = strchr (qual_spec, '='))) + t = yaz_tokenizer_move(yt); + if (t == ',') { - /* alias specification .. */ - if (pair_no == 0) + /* full attribute spec: set, type = value */ + /* lead is attribute set */ + attsets[pair_no] = lead_str; + t = yaz_tokenizer_move(yt); + if (t != YAZ_TOKENIZER_STRING) { - ccl_qual_add_combi (bibset, qual_name, cp); - return; + *addinfo = "token expected"; + goto out; + } + xfree(type_str); + type_str = xstrdup(yaz_tokenizer_string(yt)); + if (yaz_tokenizer_move(yt) != '=') + { + *addinfo = "= expected"; + goto out; } - break; } - /* [set,]type=value ... */ - cp += no_scan; - - *split++ = '\0'; - - setp = strchr (qual_spec, ','); - if (setp) + else if (t == '=') { - /* set,type=value ... */ - *setp++ = '\0'; - qual_type = setp; + /* lead is attribute type */ + /* attribute set omitted: type = value */ + attsets[pair_no] = 0; + xfree(type_str); + type_str = lead_str; } else { - /* type=value ... */ - qual_type = qual_spec; + /* lead is first of a list of qualifier aliaeses */ + /* qualifier alias: q1 q2 ... */ + xfree(lead_str); + yaz_tokenizer_destroy(yt); + ccl_qual_add_combi (bibset, qual_name, cp); + return 0; } - while (pair_no < MAX_QUAL) + while (1) /* comma separated attribute value list */ { - int type, value; - - qual_value = split; - if ((split = strchr (qual_value, ','))) - *split++ = '\0'; - - value = 0; - switch (qual_type[0]) + t = yaz_tokenizer_move(yt); + /* must have a value now */ + if (t != YAZ_TOKENIZER_STRING) { - case 'u': - case 'U': - type = CCL_BIB1_USE; - break; - case 'r': - case 'R': - type = CCL_BIB1_REL; - if (!ccl_stricmp (qual_value, "o")) - value = CCL_BIB1_REL_ORDER; - else if (!ccl_stricmp (qual_value, "r")) - value = CCL_BIB1_REL_PORDER; - break; - case 'p': - case 'P': - type = CCL_BIB1_POS; - break; - case 's': - case 'S': - type = CCL_BIB1_STR; - if (!ccl_stricmp (qual_value, "pw")) - value = CCL_BIB1_STR_WP; - if (!ccl_stricmp (qual_value, "al")) - value = CCL_BIB1_STR_AND_LIST; - if (!ccl_stricmp (qual_value, "ol")) - value = CCL_BIB1_STR_OR_LIST; - break; - case 't': - case 'T': - type = CCL_BIB1_TRU; - if (!ccl_stricmp (qual_value, "l")) - value = CCL_BIB1_TRU_CAN_LEFT; - else if (!ccl_stricmp (qual_value, "r")) - value = CCL_BIB1_TRU_CAN_RIGHT; - else if (!ccl_stricmp (qual_value, "b")) - value = CCL_BIB1_TRU_CAN_BOTH; - else if (!ccl_stricmp (qual_value, "n")) - value = CCL_BIB1_TRU_CAN_NONE; - break; - case 'c': - case 'C': - type = CCL_BIB1_COM; - break; - default: - type = atoi (qual_type); + *addinfo = "value token expected"; + goto out; + } + value_str = yaz_tokenizer_string(yt); + + if (sscanf(type_str, "%d", &type) == 1) + ; + else if (strlen(type_str) != 1) + { + *addinfo = "bad attribute type"; + goto out; + } + else + { + switch (*type_str) + { + case 'u': + case 'U': + type = CCL_BIB1_USE; + break; + case 'r': + case 'R': + type = CCL_BIB1_REL; + if (!ccl_stricmp (value_str, "o")) + value = CCL_BIB1_REL_ORDER; + else if (!ccl_stricmp (value_str, "r")) + value = CCL_BIB1_REL_PORDER; + break; + case 'p': + case 'P': + type = CCL_BIB1_POS; + break; + case 's': + case 'S': + type = CCL_BIB1_STR; + if (!ccl_stricmp (value_str, "pw")) + value = CCL_BIB1_STR_WP; + if (!ccl_stricmp (value_str, "al")) + value = CCL_BIB1_STR_AND_LIST; + if (!ccl_stricmp (value_str, "ol")) + value = CCL_BIB1_STR_OR_LIST; + break; + case 't': + case 'T': + type = CCL_BIB1_TRU; + if (!ccl_stricmp (value_str, "l")) + value = CCL_BIB1_TRU_CAN_LEFT; + else if (!ccl_stricmp (value_str, "r")) + value = CCL_BIB1_TRU_CAN_RIGHT; + else if (!ccl_stricmp (value_str, "b")) + value = CCL_BIB1_TRU_CAN_BOTH; + else if (!ccl_stricmp (value_str, "n")) + value = CCL_BIB1_TRU_CAN_NONE; + break; + case 'c': + case 'C': + type = CCL_BIB1_COM; + break; + } + } + if (type == 0) + { + /* type was not set in switch above */ + *addinfo = "bad attribute type"; + goto out; } - type_ar[pair_no] = type; - if (value) { value_ar[pair_no] = value; svalue_ar[pair_no] = 0; } - else if (*qual_value >= '0' && *qual_value <= '9') + else if (*value_str >= '0' && *value_str <= '9') { - value_ar[pair_no] = atoi (qual_value); + value_ar[pair_no] = atoi (value_str); svalue_ar[pair_no] = 0; } else { - size_t len; - if (split) - len = split - qual_value; - else - len = strlen(qual_value); - svalue_ar[pair_no] = (char *) xmalloc(len+1); - memcpy(svalue_ar[pair_no], qual_value, len); - svalue_ar[pair_no][len] = '\0'; + value_ar[pair_no] = 0; + svalue_ar[pair_no] = xstrdup(value_str); } - if (setp) + pair_no++; + if (pair_no == MAX_QUAL) { - attsets[pair_no] = xstrdup (qual_spec); + *addinfo = "too many attribute values"; + goto out; } - else - attsets[pair_no] = 0; - pair_no++; - if (!split) + t = yaz_tokenizer_move(yt); + if (t != ',') break; + attsets[pair_no] = attsets[pair_no-1]; } } - ccl_qual_add_set (bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar, - attsets); + out: + xfree(type_str); + type_str = 0; + + yaz_tokenizer_destroy(yt); + + if (*addinfo) + { + int i; + for (i = 0; i +#include +#include +#include +#include +#include +#include + +struct yaz_tokenizer { + int (*get_byte_func)(const void **vp); + const void *get_byte_data; + + int unget_byte; + char *white_space; + char *single_tokens; + char *quote_tokens_begin; + char *quote_tokens_end; + WRBUF wr_string; + int look; +}; + +void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple) +{ + xfree(t->single_tokens); + t->single_tokens = xstrdup(simple); +} + +yaz_tokenizer_t yaz_tokenizer_create(void) +{ + yaz_tokenizer_t t = xmalloc(sizeof(*t)); + t->white_space = xstrdup(" \t\r\n"); + t->single_tokens = xstrdup(""); + t->quote_tokens_begin = xstrdup("\""); + t->quote_tokens_end = xstrdup("\""); + t->get_byte_func = 0; + t->get_byte_data = 0; + t->wr_string = wrbuf_alloc(); + t->look = YAZ_TOKENIZER_ERROR; + t->unget_byte = 0; + return t; +} + +void yaz_tokenizer_destroy(yaz_tokenizer_t t) +{ + xfree(t->white_space); + xfree(t->single_tokens); + xfree(t->quote_tokens_begin); + xfree(t->quote_tokens_end); + wrbuf_destroy(t->wr_string); + xfree(t); +} + +static int read_buf(const void **vp) +{ + const char *cp = *(const char **) vp; + int ch = *cp; + if (ch) + { + cp++; + *(const char **)vp = cp; + } + return ch; +} + +static int get_byte(yaz_tokenizer_t t) +{ + int ch = t->unget_byte; + assert(t->get_byte_func); + if (ch) + t->unget_byte = 0; + else + ch = t->get_byte_func(&t->get_byte_data); + return ch; +} + +static void unget_byte(yaz_tokenizer_t t, int ch) +{ + t->unget_byte = ch; +} + +void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf) +{ + assert(t); + t->get_byte_func = read_buf; + t->get_byte_data = buf; +} + +int yaz_tokenizer_move(yaz_tokenizer_t t) +{ + const char *cp; + int ch = get_byte(t); + + /* skip white space */ + while (ch && strchr(t->white_space, ch)) + ch = get_byte(t); + if (!ch) + { + ch = YAZ_TOKENIZER_EOF; + } + else if ((cp = strchr(t->single_tokens, ch))) + ch = *cp; /* single token match */ + else if ((cp = strchr(t->quote_tokens_begin, ch))) + { /* quoted string */ + int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin]; + ch = get_byte(t); + wrbuf_rewind(t->wr_string); + while (ch && ch != end_ch) + wrbuf_putc(t->wr_string, ch); + if (!ch) + ch = YAZ_TOKENIZER_ERROR; + else + ch = YAZ_TOKENIZER_QSTRING; + } + else + { /* unquoted string */ + wrbuf_rewind(t->wr_string); + while (ch && !strchr(t->white_space, ch) + && !strchr(t->single_tokens, ch)) + { + wrbuf_putc(t->wr_string, ch); + ch = get_byte(t); + } + unget_byte(t, ch); + ch = YAZ_TOKENIZER_STRING; + } + t->look = ch; + yaz_log(YLOG_LOG, "tokenizer returns %d (%s)", ch, + wrbuf_cstr(t->wr_string)); + + return ch; +} + +const char *yaz_tokenizer_string(yaz_tokenizer_t t) +{ + return wrbuf_cstr(t->wr_string); +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4