Added functions to create CCL RPN nodes. Added small tokenizer
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 26 Apr 2007 21:45:16 +0000 (21:45 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 26 Apr 2007 21:45:16 +0000 (21:45 +0000)
utility (tokenizer.[ch]). CCL qualifier spec parsed using tokenizer
which allows more flexible notation in terms of white-space etc.

include/yaz/Makefile.am
include/yaz/ccl.h
include/yaz/tokenizer.h [new file with mode: 0644]
src/Makefile.am
src/cclfind.c
src/cclqfile.c
src/tokenizer.c [new file with mode: 0644]

index 19f5c43..6aea0da 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.44 2007-04-18 07:34:35 adam Exp $
+## $Id: Makefile.am,v 1.45 2007-04-26 21:45:16 adam Exp $
 
 pkginclude_HEADERS= backend.h ccl.h ccl_xml.h cql.h comstack.h \
  diagbib1.h diagsrw.h diagsru_update.h sortspec.h log.h logrpn.h marcdisp.h \
@@ -8,7 +8,7 @@ pkginclude_HEADERS= backend.h ccl.h ccl_xml.h cql.h comstack.h \
  tcpip.h test.h timing.h unix.h tpath.h wrbuf.h xmalloc.h \
  yaz-ccl.h yaz-iconv.h yaz-util.h yaz-version.h yconfig.h proto.h \
  xmlquery.h libxml2_error.h xmltypes.h snprintf.h query-charset.h \
- mutex.h oid_db.h oid_util.h oid_std.h \
+ mutex.h oid_db.h oid_util.h oid_std.h tokenizer.h \
  \
  ill.h ill-core.h item-req.h oclc-ill-req-ext.h z-accdes1.h z-accform1.h \
  z-acckrb1.h z-core.h z-date.h z-diag1.h z-espec1.h z-estask.h z-exp.h \
index 78e90c3..7772cff 100644 (file)
@@ -49,7 +49,7 @@
 /*
  * CCL - header file
  *
- * $Id: ccl.h,v 1.25 2007-04-26 09:11:56 adam Exp $
+ * $Id: ccl.h,v 1.26 2007-04-26 21:45:16 adam Exp $
  *
  * Old Europagate Log:
  *
@@ -297,6 +297,18 @@ void ccl_pquery(WRBUF w, struct ccl_rpn_node *p);
 YAZ_EXPORT
 int ccl_parser_get_error(CCL_parser cclp, int *pos);
 
+YAZ_EXPORT
+struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind);
+
+YAZ_EXPORT
+void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
+                          int type, int value);
+
+YAZ_EXPORT
+void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
+                         int type, char *value);
+
+
 #ifndef ccl_assert
 #define ccl_assert(x) ;
 #endif
diff --git a/include/yaz/tokenizer.h b/include/yaz/tokenizer.h
new file mode 100644 (file)
index 0000000..02cd195
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1995-2007, Index Data
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Index Data nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* $Id: tokenizer.h,v 1.1 2007-04-26 21:45:16 adam Exp $ */
+
+/** \file tokenizer.h
+    \brief Header with public definitions about YAZ' tokenizer
+*/
+
+#ifndef YAZ_TOKENIZER
+#define YAZ_TOKENIZER
+#include <yaz/nmem.h>
+
+YAZ_BEGIN_CDECL
+
+#define YAZ_TOKENIZER_EOF 0
+#define YAZ_TOKENIZER_ERROR (-1)
+#define YAZ_TOKENIZER_STRING (-2)
+#define YAZ_TOKENIZER_QSTRING (-3)
+
+typedef struct yaz_tokenizer *yaz_tokenizer_t;
+
+YAZ_EXPORT
+yaz_tokenizer_t yaz_tokenizer_create(void);
+
+YAZ_EXPORT
+void yaz_tokenizer_destroy(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf);
+
+YAZ_EXPORT
+int yaz_tokenizer_move(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+const char *yaz_tokenizer_string(yaz_tokenizer_t t);
+
+YAZ_EXPORT
+void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple);
+
+YAZ_END_CDECL
+
+#endif
+/* CQL_H_INCLUDED */
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index 780d0b5..33112c2 100644 (file)
@@ -1,6 +1,6 @@
 ## This file is part of the YAZ toolkit.
 ## Copyright (C) 1995-2007, Index Data, All rights reserved.
-## $Id: Makefile.am,v 1.64 2007-04-25 20:52:19 adam Exp $
+## $Id: Makefile.am,v 1.65 2007-04-26 21:45:17 adam Exp $
 
 YAZ_VERSION_INFO=3:0:0
 
@@ -92,7 +92,7 @@ libyaz_la_SOURCES=version.c options.c log.c \
   tcpdchk.c \
   test.c timing.c \
   xmlquery.c http.c \
-  mime.c mime.h oid_util.c \
+  mime.c mime.h oid_util.c tokenizer.c \
   record_conv.c retrieval.c elementset.c snprintf.c query-charset.c
 
 libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
index 3d81f2d..fc67b80 100644 (file)
@@ -56,7 +56,7 @@
 /* CCL find (to rpn conversion)
  * Europagate, 1995
  *
- * $Id: cclfind.c,v 1.10 2007-04-26 09:11:56 adam Exp $
+ * $Id: cclfind.c,v 1.11 2007-04-26 21:45:17 adam Exp $
  *
  * Old Europagate log:
  *
@@ -185,12 +185,22 @@ static char *copy_token_name (struct ccl_token *tp)
  * kind:   Type of node.
  * return: pointer to allocated node.
  */
-static struct ccl_rpn_node *mk_node (int kind)
+struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind)
 {
     struct ccl_rpn_node *p;
     p = (struct ccl_rpn_node *)xmalloc (sizeof(*p));
     ccl_assert (p);
     p->kind = kind;
+
+    switch(kind)
+    {
+    case CCL_RPN_TERM:
+        p->u.t.attr_list = 0;
+        p->u.t.term = 0;
+        break;
+    default:
+        break;
+    }
     return p;
 }
 
@@ -264,8 +274,6 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p,
     n->next = p->u.t.attr_list;
     p->u.t.attr_list = n;
     
-    n->kind = CCL_RPN_ATTR_NUMERIC;
-    n->value.numeric = 0;
     return n;
 }
 
@@ -276,8 +284,8 @@ static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p,
  * value: Value of attribute
  * set: Attribute set name
  */
-static void add_attr_numeric (struct ccl_rpn_node *p, const char *set,
-                              int type, int value)
+void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
+                          int type, int value)
 {
     struct ccl_rpn_attr *n;
 
@@ -286,8 +294,8 @@ static void add_attr_numeric (struct ccl_rpn_node *p, const char *set,
     n->value.numeric = value;
 }
 
-static void add_attr_string (struct ccl_rpn_node *p, const char *set,
-                             int type, char *value)
+void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
+                         int type, char *value)
 {
     struct ccl_rpn_attr *n;
 
@@ -379,17 +387,17 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
         if (p_top)
         {
             if (or_list)
-                p = mk_node (CCL_RPN_OR);
+                p = ccl_rpn_node_create(CCL_RPN_OR);
             else if (and_list)
-                p = mk_node (CCL_RPN_AND);
+                p = ccl_rpn_node_create(CCL_RPN_AND);
             else
-                p = mk_node (CCL_RPN_AND);
+                p = ccl_rpn_node_create(CCL_RPN_AND);
             p->u.p[0] = p_top;
             p_top = p;
         }
                 
         /* create the term node, but wait a moment before adding the term */
-        p = mk_node (CCL_RPN_TERM);
+        p = ccl_rpn_node_create(CCL_RPN_TERM);
         p->u.t.attr_list = NULL;
         p->u.t.term = NULL;
 
@@ -409,8 +417,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 switch(attr->kind)
                 {
                 case CCL_RPN_ATTR_STRING:
-                    add_attr_string(p, attr->set, attr->type,
-                                    attr->value.str);
+                    ccl_add_attr_string(p, attr->set, attr->type,
+                                        attr->value.str);
                     break;
                 case CCL_RPN_ATTR_NUMERIC:
                     if (attr->value.numeric > 0)
@@ -444,8 +452,8 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                             completeness_value = attr->value.numeric;
                             break;
                         }
-                        add_attr_numeric(p, attr->set, attr->type,
-                                         attr->value.numeric);
+                        ccl_add_attr_numeric(p, attr->set, attr->type,
+                                             attr->value.numeric);
                     }
                 }
         }
@@ -457,9 +465,9 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
         {   /* no structure attribute met. Apply either structure attribute 
                WORD or PHRASE depending on number of CCL tokens */
             if (no == 1 && no_spaces == 0)
-                add_attr_numeric (p, attset, CCL_BIB1_STR, 2);
+                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
             else
-                add_attr_numeric (p, attset, CCL_BIB1_STR, 1);
+                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
         }
 
         /* make the RPN token */
@@ -497,7 +505,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr_numeric (p, attset, CCL_BIB1_TRU, 3);
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
         }
         else if (right_trunc)
         {
@@ -508,7 +516,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr_numeric (p, attset, CCL_BIB1_TRU, 1);
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
         }
         else if (left_trunc)
         {
@@ -519,13 +527,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr_numeric (p, attset, CCL_BIB1_TRU, 2);
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
         }
         else
         {
             if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
                                &attset))
-                add_attr_numeric (p, attset, CCL_BIB1_TRU, 100);
+                ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
         }
         if (!multi)
             break;
@@ -661,16 +669,16 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            p = mk_node (CCL_RPN_AND);
+            p = ccl_rpn_node_create(CCL_RPN_AND);
             p->u.p[0] = p1;
-            add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
+            ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
             p->u.p[1] = p2;
-            add_attr_numeric (p2, attset, CCL_BIB1_REL, 2);
+            ccl_add_attr_numeric(p2, attset, CCL_BIB1_REL, 2);
             return p;
         }
         else                       /* = term -    */
         {
-            add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
+            ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
             return p1;
         }
     }
@@ -681,7 +689,7 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp,
         ADVANCE;
         if (!(p = search_term (cclp, ap)))
             return NULL;
-        add_attr_numeric (p, attset, CCL_BIB1_REL, 2);
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2);
         return p;
     }
     else if (KIND == CCL_TOK_LP)
@@ -702,7 +710,7 @@ struct ccl_rpn_node *qualifiers_order (CCL_parser cclp,
     {
         if (!(p = search_terms (cclp, ap)))
             return NULL;
-        add_attr_numeric (p, attset, CCL_BIB1_REL, rel);
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
         return p;
     }
     cclp->error_code = CCL_ERR_TERM_EXPECTED;
@@ -812,7 +820,8 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,
                 }
                 if (node)
                 {
-                    struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+                    struct ccl_rpn_node *node_this = 
+                        ccl_rpn_node_create(CCL_RPN_OR);
                     node_this->u.p[0] = node;
                     node_this->u.p[1] = node_sub;
                     node = node_this;
@@ -883,7 +892,8 @@ static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,
             }
             if (node)
             {
-                struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+                struct ccl_rpn_node *node_this = 
+                    ccl_rpn_node_create(CCL_RPN_OR);
                 node_this->u.p[0] = node;
                 node_this->u.p[1] = node_sub;
                 node = node_this;
@@ -920,7 +930,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
             struct ccl_rpn_node *p_prox = 0;
             /* ! word order specified */
             /* % word order not specified */
-            p_prox = mk_node(CCL_RPN_TERM);
+            p_prox = ccl_rpn_node_create(CCL_RPN_TERM);
             p_prox->u.t.term = (char *) xmalloc(1 + cclp->look_token->len);
             memcpy(p_prox->u.t.term, cclp->look_token->name,
                    cclp->look_token->len);
@@ -934,7 +944,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            pn = mk_node (CCL_RPN_PROX);
+            pn = ccl_rpn_node_create(CCL_RPN_PROX);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
             pn->u.p[2] = p_prox;
@@ -948,7 +958,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            pn = mk_node (CCL_RPN_PROX);
+            pn = ccl_rpn_node_create(CCL_RPN_PROX);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
             pn->u.p[2] = 0;
@@ -996,7 +1006,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp,
             cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
             return NULL;
         }
-        p1 = mk_node (CCL_RPN_SET);
+        p1 = ccl_rpn_node_create(CCL_RPN_SET);
         p1->u.setname = copy_token_name (cclp->look_token);
         ADVANCE;
         return p1;
@@ -1039,7 +1049,8 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp,
             }
             if (node)
             {
-                struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+                struct ccl_rpn_node *node_this = 
+                    ccl_rpn_node_create(CCL_RPN_OR);
                 node_this->u.p[0] = node;
                 node_this->u.p[1] = node_sub;
                 node_this->u.p[2] = 0;
@@ -1078,7 +1089,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            pn = mk_node (CCL_RPN_AND);
+            pn = ccl_rpn_node_create(CCL_RPN_AND);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
             pn->u.p[2] = 0;
@@ -1092,7 +1103,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            pn = mk_node (CCL_RPN_OR);
+            pn = ccl_rpn_node_create(CCL_RPN_OR);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
             pn->u.p[2] = 0;
@@ -1106,7 +1117,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
                 ccl_rpn_delete (p1);
                 return NULL;
             }
-            pn = mk_node (CCL_RPN_NOT);
+            pn = ccl_rpn_node_create(CCL_RPN_NOT);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
             pn->u.p[2] = 0;
index 9d34229..b3abfd8 100644 (file)
@@ -48,7 +48,7 @@
 /* CCL qualifiers
  * Europagate, 1995
  *
- * $Id: cclqfile.c,v 1.8 2007-04-25 20:52:19 adam Exp $
+ * $Id: cclqfile.c,v 1.9 2007-04-26 21:45:17 adam Exp $
  *
  * Old Europagate Log:
  *
 #include <stdlib.h>
 #include <string.h>
 
+#include <yaz/tokenizer.h>
 #include <yaz/ccl.h>
+#include <yaz/log.h>
 
 #define MAX_QUAL 128
 
-void ccl_qual_field (CCL_bibset bibset, const char *cp, const char *qual_name)
+int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name,
+                    const char **addinfo)
 {
-    char qual_spec[128];
+    yaz_tokenizer_t yt = yaz_tokenizer_create();
+
     int type_ar[MAX_QUAL];
     int value_ar[MAX_QUAL];
     char *svalue_ar[MAX_QUAL];
     char *attsets[MAX_QUAL];
     int pair_no = 0;
+    char *type_str = 0;
+    int t;
 
-    while (pair_no < MAX_QUAL)
+    yaz_tokenizer_single_tokens(yt, ",=");
+    yaz_tokenizer_read_buf(yt, cp);
+    *addinfo = 0;
+    
+    t = yaz_tokenizer_move(yt);
+    while (t == YAZ_TOKENIZER_STRING)
     {
-        char *qual_value, *qual_type;
-        char *split, *setp;
-        int no_scan = 0;
-        
-        if (sscanf (cp, "%100s%n", qual_spec, &no_scan) < 1)
-            break;
+        /* we don't know what lead is yet */
+        char *lead_str = xstrdup(yaz_tokenizer_string(yt));
+        const char *value_str = 0;
+        int type = 0, value = 0; /* indicates attribute value UNSET  */
 
-        if (!(split = strchr (qual_spec, '=')))
+        t = yaz_tokenizer_move(yt);
+        if (t == ',')
         {
-            /* alias specification .. */
-            if (pair_no == 0)
+            /* full attribute spec: set, type = value */
+            /* lead is attribute set */
+            attsets[pair_no] = lead_str;
+            t = yaz_tokenizer_move(yt);
+            if (t != YAZ_TOKENIZER_STRING)
             {
-                ccl_qual_add_combi (bibset, qual_name, cp);
-                return;
+                *addinfo = "token expected";
+                goto out;
+            }
+            xfree(type_str);
+            type_str = xstrdup(yaz_tokenizer_string(yt));
+            if (yaz_tokenizer_move(yt) != '=')
+            {
+                *addinfo = "= expected";
+                goto out;
             }
-            break;
         }
-        /* [set,]type=value ... */
-        cp += no_scan;
-        
-        *split++ = '\0';
-
-        setp = strchr (qual_spec, ',');
-        if (setp)
+        else if (t == '=')
         {
-            /* set,type=value ... */
-            *setp++ = '\0';
-            qual_type = setp;
+            /* lead is attribute type */
+            /* attribute set omitted: type = value */
+            attsets[pair_no] = 0;
+            xfree(type_str);
+            type_str = lead_str;
         }
         else
         {
-            /* type=value ... */
-            qual_type = qual_spec;
+            /* lead is first of a list of qualifier aliaeses */
+            /* qualifier alias: q1 q2 ... */
+            xfree(lead_str);
+            yaz_tokenizer_destroy(yt);
+            ccl_qual_add_combi (bibset, qual_name, cp);
+            return 0;
         }
-        while (pair_no < MAX_QUAL)
+        while (1) /* comma separated attribute value list */
         {
-            int type, value;
-
-            qual_value = split;
-            if ((split = strchr (qual_value, ',')))
-                *split++ = '\0';
-
-            value = 0;
-            switch (qual_type[0])
+            t = yaz_tokenizer_move(yt);
+            /* must have a value now */
+            if (t != YAZ_TOKENIZER_STRING)
             {
-            case 'u':
-            case 'U':
-                type = CCL_BIB1_USE;
-                break;
-            case 'r':
-            case 'R':
-                type = CCL_BIB1_REL;
-                if (!ccl_stricmp (qual_value, "o"))
-                    value = CCL_BIB1_REL_ORDER;
-                else if (!ccl_stricmp (qual_value, "r"))
-                    value = CCL_BIB1_REL_PORDER;
-                break;                
-            case 'p':
-            case 'P':
-                type = CCL_BIB1_POS;
-                break;
-            case 's':
-            case 'S':
-                type = CCL_BIB1_STR;
-                if (!ccl_stricmp (qual_value, "pw"))
-                    value = CCL_BIB1_STR_WP;
-                if (!ccl_stricmp (qual_value, "al"))
-                    value = CCL_BIB1_STR_AND_LIST;
-                if (!ccl_stricmp (qual_value, "ol"))
-                    value = CCL_BIB1_STR_OR_LIST;
-                break;                
-            case 't':
-            case 'T':
-                type = CCL_BIB1_TRU;
-                if (!ccl_stricmp (qual_value, "l"))
-                    value = CCL_BIB1_TRU_CAN_LEFT;
-                else if (!ccl_stricmp (qual_value, "r"))
-                    value = CCL_BIB1_TRU_CAN_RIGHT;
-                else if (!ccl_stricmp (qual_value, "b"))
-                    value = CCL_BIB1_TRU_CAN_BOTH;
-                else if (!ccl_stricmp (qual_value, "n"))
-                    value = CCL_BIB1_TRU_CAN_NONE;
-                break;                
-            case 'c':
-            case 'C':
-                type = CCL_BIB1_COM;
-                break;
-            default:
-                type = atoi (qual_type);
+                *addinfo = "value token expected";
+                goto out;
+            }
+            value_str = yaz_tokenizer_string(yt);
+            
+            if (sscanf(type_str, "%d", &type) == 1)
+                ;
+            else if (strlen(type_str) != 1)
+            {
+                *addinfo = "bad attribute type";
+                goto out;
+            }
+            else
+            {
+                switch (*type_str)
+                {
+                case 'u':
+                case 'U':
+                    type = CCL_BIB1_USE;
+                    break;
+                case 'r':
+                case 'R':
+                    type = CCL_BIB1_REL;
+                    if (!ccl_stricmp (value_str, "o"))
+                        value = CCL_BIB1_REL_ORDER;
+                    else if (!ccl_stricmp (value_str, "r"))
+                        value = CCL_BIB1_REL_PORDER;
+                    break;                
+                case 'p':
+                case 'P':
+                    type = CCL_BIB1_POS;
+                    break;
+                case 's':
+                case 'S':
+                    type = CCL_BIB1_STR;
+                    if (!ccl_stricmp (value_str, "pw"))
+                        value = CCL_BIB1_STR_WP;
+                    if (!ccl_stricmp (value_str, "al"))
+                        value = CCL_BIB1_STR_AND_LIST;
+                    if (!ccl_stricmp (value_str, "ol"))
+                        value = CCL_BIB1_STR_OR_LIST;
+                    break;                
+                case 't':
+                case 'T':
+                    type = CCL_BIB1_TRU;
+                    if (!ccl_stricmp (value_str, "l"))
+                        value = CCL_BIB1_TRU_CAN_LEFT;
+                    else if (!ccl_stricmp (value_str, "r"))
+                        value = CCL_BIB1_TRU_CAN_RIGHT;
+                    else if (!ccl_stricmp (value_str, "b"))
+                        value = CCL_BIB1_TRU_CAN_BOTH;
+                    else if (!ccl_stricmp (value_str, "n"))
+                        value = CCL_BIB1_TRU_CAN_NONE;
+                    break;                
+                case 'c':
+                case 'C':
+                    type = CCL_BIB1_COM;
+                    break;
+                }
+            }
+            if (type == 0)
+            {
+                /* type was not set in switch above */
+                *addinfo = "bad attribute type";
+                goto out;
             }
-
             type_ar[pair_no] = type;
-
             if (value)
             {
                 value_ar[pair_no] = value;
                 svalue_ar[pair_no] = 0;
             }
-            else if (*qual_value >= '0' && *qual_value <= '9')
+            else if (*value_str >= '0' && *value_str <= '9')
             {
-                value_ar[pair_no] = atoi (qual_value);
+                value_ar[pair_no] = atoi (value_str);
                 svalue_ar[pair_no] = 0;
             }
             else
             {
-                size_t len;
-                if (split)
-                    len = split - qual_value;
-                else
-                    len = strlen(qual_value);
-                svalue_ar[pair_no] = (char *) xmalloc(len+1);
-                memcpy(svalue_ar[pair_no], qual_value, len);
-                svalue_ar[pair_no][len] = '\0';
+                value_ar[pair_no] = 0;
+                svalue_ar[pair_no] = xstrdup(value_str);
             }
-            if (setp)
+            pair_no++;
+            if (pair_no == MAX_QUAL)
             {
-                attsets[pair_no] = xstrdup (qual_spec);
+                *addinfo = "too many attribute values";
+                goto out;
             }
-            else
-                attsets[pair_no] = 0;
-            pair_no++;
-            if (!split)
+            t = yaz_tokenizer_move(yt);
+            if (t != ',')
                 break;
+            attsets[pair_no] = attsets[pair_no-1];
         }
     }
-    ccl_qual_add_set (bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar,
-                      attsets);
+ out:
+    xfree(type_str);
+    type_str = 0;
+
+    yaz_tokenizer_destroy(yt);
+
+    if (*addinfo)
+    {
+        int i;
+        for (i = 0; i<pair_no; i++)
+        {
+            xfree(attsets[i]);
+            xfree(svalue_ar[i]);
+        }
+        return -1;
+    }
+    ccl_qual_add_set(bibset, qual_name, pair_no, type_ar, value_ar, svalue_ar,
+                     attsets);
+    return 0;
+}
+
+void ccl_qual_field(CCL_bibset bibset, const char *cp, const char *qual_name)
+{
+    const char *addinfo;
+    ccl_qual_field2(bibset, cp, qual_name, &addinfo);
+    if (addinfo)
+        yaz_log(YLOG_WARN, "ccl_qual_field2 fail: %s", addinfo);
 }
 
 void ccl_qual_fitem (CCL_bibset bibset, const char *cp, const char *qual_name)
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644 (file)
index 0000000..622e6ce
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 1995-2007, Index Data ApS
+ * See the file LICENSE for details.
+ *
+ * $Id: tokenizer.c,v 1.1 2007-04-26 21:45:17 adam Exp $
+ */
+
+/**
+ * \file tokenizer.c
+ * \brief Implements attribute match of CCL RPN nodes
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <yaz/log.h>
+#include <yaz/wrbuf.h>
+#include <yaz/tokenizer.h>
+
+struct yaz_tokenizer {
+    int (*get_byte_func)(const void **vp);
+    const void *get_byte_data;
+
+    int unget_byte;
+    char *white_space;
+    char *single_tokens;
+    char *quote_tokens_begin;
+    char *quote_tokens_end;
+    WRBUF wr_string;
+    int look;
+};
+
+void yaz_tokenizer_single_tokens(yaz_tokenizer_t t, const char *simple)
+{
+    xfree(t->single_tokens);
+    t->single_tokens = xstrdup(simple);
+}
+
+yaz_tokenizer_t yaz_tokenizer_create(void)
+{
+    yaz_tokenizer_t t = xmalloc(sizeof(*t));
+    t->white_space = xstrdup(" \t\r\n");
+    t->single_tokens = xstrdup("");
+    t->quote_tokens_begin = xstrdup("\"");
+    t->quote_tokens_end = xstrdup("\"");
+    t->get_byte_func = 0;
+    t->get_byte_data = 0;
+    t->wr_string = wrbuf_alloc();
+    t->look = YAZ_TOKENIZER_ERROR;
+    t->unget_byte = 0;
+    return t;
+}
+
+void yaz_tokenizer_destroy(yaz_tokenizer_t t)
+{
+    xfree(t->white_space);
+    xfree(t->single_tokens);
+    xfree(t->quote_tokens_begin);
+    xfree(t->quote_tokens_end);
+    wrbuf_destroy(t->wr_string);
+    xfree(t);
+}
+
+static int read_buf(const void **vp)
+{
+    const char *cp = *(const char **) vp;
+    int ch = *cp;
+    if (ch)
+    {
+        cp++;
+        *(const char **)vp = cp;
+    }
+    return ch;
+}
+
+static int get_byte(yaz_tokenizer_t t)
+{
+    int ch = t->unget_byte;
+    assert(t->get_byte_func);
+    if (ch)
+        t->unget_byte = 0;
+    else
+        ch = t->get_byte_func(&t->get_byte_data);
+    return ch;
+}
+
+static void unget_byte(yaz_tokenizer_t t, int ch)
+{
+    t->unget_byte = ch;
+}
+
+void yaz_tokenizer_read_buf(yaz_tokenizer_t t, const char *buf)
+{
+    assert(t);
+    t->get_byte_func = read_buf;
+    t->get_byte_data = buf;
+}
+
+int yaz_tokenizer_move(yaz_tokenizer_t t)
+{
+    const char *cp;
+    int ch = get_byte(t);
+
+    /* skip white space */
+    while (ch && strchr(t->white_space, ch))
+        ch = get_byte(t);
+    if (!ch) 
+    {
+        ch = YAZ_TOKENIZER_EOF;
+    }
+    else if ((cp = strchr(t->single_tokens, ch)))
+        ch = *cp;  /* single token match */
+    else if ((cp = strchr(t->quote_tokens_begin, ch)))
+    {   /* quoted string */
+        int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
+        ch = get_byte(t);
+        wrbuf_rewind(t->wr_string);
+        while (ch && ch != end_ch)
+            wrbuf_putc(t->wr_string, ch);
+        if (!ch)
+            ch = YAZ_TOKENIZER_ERROR;
+        else
+            ch = YAZ_TOKENIZER_QSTRING;
+    }
+    else
+    {  /* unquoted string */
+        wrbuf_rewind(t->wr_string);
+        while (ch && !strchr(t->white_space, ch)
+               && !strchr(t->single_tokens, ch))
+        {
+            wrbuf_putc(t->wr_string, ch);
+            ch = get_byte(t);
+        }
+        unget_byte(t, ch);
+        ch = YAZ_TOKENIZER_STRING;
+    }
+    t->look = ch;
+    yaz_log(YLOG_LOG, "tokenizer returns %d (%s)", ch, 
+            wrbuf_cstr(t->wr_string));
+    
+    return ch;
+}
+
+const char *yaz_tokenizer_string(yaz_tokenizer_t t)
+{
+    return wrbuf_cstr(t->wr_string);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+