Update CQL parser to use CQL 1.1 modifiers for booleans and
[yaz-moved-to-github.git] / src / cqltransform.c
index 7008af4..52cb21f 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: cqltransform.c,v 1.2 2003-12-18 16:42:52 mike Exp $
+/* $Id: cqltransform.c,v 1.7 2004-03-10 16:34:29 adam Exp $
    Copyright (C) 2002-2003
    Index Data Aps
 
@@ -172,11 +172,35 @@ int cql_pr_attr(cql_transform_t ct, const char *category,
     if (errcode && !ct->error)
     {
         ct->error = errcode;
-        ct->addinfo = strdup(val);
+       if (val)
+           ct->addinfo = strdup(val);
+       else
+           ct->addinfo = 0;
     }
     return 0;
 }
 
+
+/* Returns location of first wildcard character in the `length'
+ * characters starting at `term', or a null pointer of there are
+ * none -- like memchr().
+ */
+static char *wcchar(const char *term, int length)
+{
+    char *best = 0;
+    char *current;
+    char *whichp;
+
+    for (whichp = "*?"; *whichp != '\0'; whichp++) {
+       current = memchr(term, *whichp, length);
+       if (current != 0 && (best == 0 || current < best))
+           best = current;
+    }
+
+    return best;
+}
+
+
 void emit_term(cql_transform_t ct,
                const char *term, int length,
                void (*pr)(const char *buf, void *client_data),
@@ -197,6 +221,7 @@ void emit_term(cql_transform_t ct,
             cql_pr_attr(ct, "position.", "first", 0,
                         pr, client_data, 32);
             term++;
+           length--;
         }
         else if (term[length-1] == '^')
         {
@@ -210,6 +235,73 @@ void emit_term(cql_transform_t ct,
                         pr, client_data, 32);
         }
     }
+
+    if (length > 0)
+    {
+       /* Check for well-known globbing patterns that represent
+        * simple truncation attributes as expected by, for example,
+        * Bath-compliant server.  If we find such a pattern but
+        * there's no mapping for it, that's fine: we just use a
+        * general pattern-matching attribute.
+        */
+        if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
+           wcchar(term+1, length-2) == 0 &&
+            cql_pr_attr(ct, "truncation.", "both", 0,
+                       pr, client_data, 0)) {
+           term++;
+           length -= 2;
+        }
+        else if (term[0] == '*' &&
+                wcchar(term+1, length-1) == 0 &&
+                cql_pr_attr(ct, "truncation.", "left", 0,
+                            pr, client_data, 0)) {
+           term++;
+           length--;
+        }
+        else if (term[length-1] == '*' &&
+                wcchar(term, length-1) == 0 &&
+                cql_pr_attr(ct, "truncation.", "right", 0,
+                            pr, client_data, 0)) {
+           length--;
+        }
+        else if (wcchar(term, length))
+        {
+           /* We have one or more wildcard characters, but not in a
+            * way that can be dealt with using only the standard
+            * left-, right- and both-truncation attributes.  We need
+            * to translate the pattern into a Z39.58-type pattern,
+            * which has been supported in BIB-1 since 1996.  If
+            * there's no configuration element for "truncation.z3958"
+            * we indicate this as error 28 "Masking character not
+            * supported".
+            */
+           int i;
+           char *mem;
+            cql_pr_attr(ct, "truncation.", "z3958", 0,
+                        pr, client_data, 28);
+           mem = malloc(length+1);
+            for (i = 0; i < length; i++) {
+               if (term[i] == '*')      mem[i] = '?';
+               else if (term[i] == '?') mem[i] = '#';
+               else                     mem[i] = term[i];
+           }
+           mem[length] = '\0';
+           term = mem;
+        }
+        else {
+           /* No masking characters.  If there's no "truncation.none"
+            * configuration element, that's an error which we
+            * indicate (rather tangentially) as 30 "Too many masking
+            * characters in term".  28 would be equally meaningful
+            * (or meaningless) but using a different value allows us
+            * to differentiate between this case and the previous
+            * one.
+            */
+            cql_pr_attr(ct, "truncation.", "none", 0,
+                        pr, client_data, 30);
+        }
+    }
+
     (*pr)("\"", client_data);
     for (i = 0; i<length; i++)
     {
@@ -280,17 +372,17 @@ static const char *cql_get_ns(cql_transform_t ct,
     for (i = prefix_level; !ns && --i >= 0; )
     {
         struct cql_node *cn_prefix = prefix_ar[i];
-        for (; cn_prefix; cn_prefix = cn_prefix->u.mod.next)
+        for (; cn_prefix; cn_prefix = cn_prefix->u.st.modifiers)
         {
-            if (*prefix && cn_prefix->u.mod.name &&
-                !strcmp(prefix, cn_prefix->u.mod.name))
+            if (*prefix && cn_prefix->u.st.index &&
+                !strcmp(prefix, cn_prefix->u.st.index))
             {
-                ns = cn_prefix->u.mod.value;
+                ns = cn_prefix->u.st.term;
                 break;
             }
-            else if (!*prefix && !cn_prefix->u.mod.name)
+            else if (!*prefix && !cn_prefix->u.st.index)
             {
-                ns = cn_prefix->u.mod.value;
+                ns = cn_prefix->u.st.term;
                 break;
             }
         }
@@ -315,7 +407,7 @@ static const char *cql_get_ns(cql_transform_t ct,
         }
         return 0;
     }
-    /* 4. lookup qualifier.prefix. */
+    /* 4. lookup index.prefix. */
     
     cp = cn->u.st.index;
     cp_dot = strchr(cp, '.');
@@ -345,7 +437,8 @@ void cql_transform_r(cql_transform_t ct,
             char n_full[64];
             sprintf (n_full, "%.20s.%.40s", n_prefix, n_suffix);
         
-            if (!strcmp(ns, "http://www.loc.gov/zing/cql/srw-indexes/v1.0/")
+            if ((!strcmp(ns, "http://www.loc.gov/zing/cql/context-sets/cql/v1.1/") ||
+                !strcmp(ns, "http://www.loc.gov/zing/cql/srw-indexes/v1.0/"))
                 && !strcmp(n_suffix, "resultSet"))
             {
                 (*pr)("@set \"", client_data);
@@ -353,11 +446,13 @@ void cql_transform_r(cql_transform_t ct,
                 (*pr)("\" ", client_data);
                 return ;
             }
-           if (!cql_pr_attr(ct, "index.", n_full, "srw.serverChoice",
+           /* ### It would be nice if this could fall back to whichever 
+              of cql.serverChoice and srw.serverChoice is defined */
+           if (!cql_pr_attr(ct, "index.", n_full, "cql.serverChoice",
                             pr, client_data, 16)) {
                /* No index.foo; reset error and fall back to qualifier.foo */
                if (ct->error == 16) ct->error = 0;
-               cql_pr_attr(ct, "qualifier.", n_full, "srw.serverChoice",
+               cql_pr_attr(ct, "qualifier.", n_full, "cql.serverChoice",
                            pr, client_data, 16);
            }
         }
@@ -377,9 +472,9 @@ void cql_transform_r(cql_transform_t ct,
         if (cn->u.st.modifiers)
         {
             struct cql_node *mod = cn->u.st.modifiers;
-            for (; mod; mod = mod->u.mod.next)
+            for (; mod; mod = mod->u.st.modifiers)
             {
-                cql_pr_attr(ct, "relationModifier.", mod->u.mod.value, 0,
+                cql_pr_attr(ct, "relationModifier.", mod->u.st.term, 0,
                             pr, client_data, 20);
             }
         }
@@ -432,13 +527,13 @@ int cql_transform(cql_transform_t ct,
     {
         if (!memcmp(e->pattern, "set.", 4))
         {
-            *pp = cql_node_mk_mod(e->pattern+4, e->value);
-            pp = &(*pp)->u.mod.next;
+            *pp = cql_node_mk_sc(e->pattern+4, "=", e->value);
+            pp = &(*pp)->u.st.modifiers;
         }
         else if (!strcmp(e->pattern, "set"))
         {
-            *pp = cql_node_mk_mod(0, e->value);
-            pp = &(*pp)->u.mod.next;
+            *pp = cql_node_mk_sc(e->value, 0, 0);
+            pp = &(*pp)->u.st.modifiers;
         }
     }
     cql_transform_r (ct, cn, pr, client_data, prefix_ar, 1);