Reinsert initialiser for __UNUSED_loglevel
[yaz-moved-to-github.git] / src / cqltransform.c
index 00305f7..74ad961 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: cqltransform.c,v 1.9 2004-05-25 14:06:15 adam Exp $
-   Copyright (C) 2002-2004
+/* $Id: cqltransform.c,v 1.21 2006-03-20 14:56:40 mike Exp $
+   Copyright (C) 1995-2005, Index Data ApS
    Index Data Aps
 
 This file is part of the YAZ toolkit.
@@ -7,6 +7,11 @@ This file is part of the YAZ toolkit.
 See the file LICENSE.
 */
 
+/**
+ * \file cqltransform.c
+ * \brief Implements CQL transform (CQL to RPN conversion).
+ */
+
 #include <stdlib.h>
 #include <string.h>
 #include <yaz/cql.h>
@@ -104,7 +109,7 @@ cql_transform_t cql_transform_open_fname(const char *fname)
 
 static const char *cql_lookup_property(cql_transform_t ct,
                                        const char *pat1, const char *pat2,
-                                      const char *pat3)
+                                       const char *pat3)
 {
     char pattern[120];
     struct cql_prop_entry *e;
@@ -118,21 +123,21 @@ static const char *cql_lookup_property(cql_transform_t ct,
     else if (pat1)
         sprintf (pattern, "%.39s", pat1);
     else
-       return 0;
+        return 0;
     
     for (e = ct->entry; e; e = e->next)
     {
-        if (!strcmp(e->pattern, pattern))
+        if (!cql_strcmp(e->pattern, pattern))
             return e->value;
     }
     return 0;
 }
 
 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
-                  const char *uri, const char *val, const char *default_val,
-                  void (*pr)(const char *buf, void *client_data),
-                  void *client_data,
-                  int errcode)
+                   const char *uri, const char *val, const char *default_val,
+                   void (*pr)(const char *buf, void *client_data),
+                   void *client_data,
+                   int errcode)
 {
     const char *res = 0;
     const char *eval = val ? val : default_val;
@@ -140,24 +145,24 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category,
     
     if (uri)
     {
-       struct cql_prop_entry *e;
-       
-       for (e = ct->entry; e; e = e->next)
-           if (!memcmp(e->pattern, "set.", 4) && e->value &&
-               !strcmp(e->value, uri))
-           {
-               prefix = e->pattern+4;
-               break;
-           }
-       /* must have a prefix now - if not it's an error */
+        struct cql_prop_entry *e;
+        
+        for (e = ct->entry; e; e = e->next)
+            if (!memcmp(e->pattern, "set.", 4) && e->value &&
+                !strcmp(e->value, uri))
+            {
+                prefix = e->pattern+4;
+                break;
+            }
+        /* must have a prefix now - if not it's an error */
     }
 
     if (!uri || prefix)
     {
-       if (!res)
-           res = cql_lookup_property(ct, category, prefix, eval);
-       if (!res)
-           res = cql_lookup_property(ct, category, prefix, "*");
+        if (!res)
+            res = cql_lookup_property(ct, category, prefix, eval);
+        if (!res)
+            res = cql_lookup_property(ct, category, prefix, "*");
     }
     if (res)
     {
@@ -185,39 +190,126 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category,
     if (errcode && !ct->error)
     {
         ct->error = errcode;
-       if (val)
-           ct->addinfo = xstrdup(val);
-       else
-           ct->addinfo = 0;
+        if (val)
+            ct->addinfo = xstrdup(val);
+        else
+            ct->addinfo = 0;
     }
     return 0;
 }
 
 int cql_pr_attr(cql_transform_t ct, const char *category,
-               const char *val, const char *default_val,
-               void (*pr)(const char *buf, void *client_data),
-               void *client_data,
-               int errcode)
+                const char *val, const char *default_val,
+                void (*pr)(const char *buf, void *client_data),
+                void *client_data,
+                int errcode)
 {
     return cql_pr_attr_uri(ct, category, 0 /* uri */,
-                          val, default_val, pr, client_data, errcode);
+                           val, default_val, pr, client_data, errcode);
 }
 
 
+static void cql_pr_int (int val,
+                        void (*pr)(const char *buf, void *client_data),
+                        void *client_data)
+{
+    char buf[21];              /* enough characters to 2^64 */
+    sprintf(buf, "%d", val);
+    (*pr)(buf, client_data);
+    (*pr)(" ", client_data);
+}
+
+
+static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
+                       void (*pr)(const char *buf, void *client_data),
+                       void *client_data)
+{
+    int exclusion = 0;
+    int distance;               /* to be filled in later depending on unit */
+    int distance_defined = 0;
+    int ordered = 0;
+    int proxrel = 2;            /* less than or equal */
+    int unit = 2;               /* word */
+
+    while (mods != 0) {
+        char *name = mods->u.st.index;
+        char *term = mods->u.st.term;
+        char *relation = mods->u.st.relation;
+
+        if (!strcmp(name, "distance")) {
+            distance = strtol(term, (char**) 0, 0);
+            distance_defined = 1;
+            if (!strcmp(relation, "=")) {
+                proxrel = 3;
+            } else if (!strcmp(relation, ">")) {
+                proxrel = 5;
+            } else if (!strcmp(relation, "<")) {
+                proxrel = 1;
+            } else if (!strcmp(relation, ">=")) {
+                proxrel = 4;
+            } else if (!strcmp(relation, "<=")) {
+                proxrel = 2;
+            } else if (!strcmp(relation, "<>")) {
+                proxrel = 6;
+            } else {
+                ct->error = 40; /* Unsupported proximity relation */
+                ct->addinfo = xstrdup(relation);
+                return 0;
+            }
+        } else if (!strcmp(name, "ordered")) {
+            ordered = 1;
+        } else if (!strcmp(name, "unordered")) {
+            ordered = 0;
+        } else if (!strcmp(name, "unit")) {
+            if (!strcmp(term, "word")) {
+                unit = 2;
+            } else if (!strcmp(term, "sentence")) {
+                unit = 3;
+            } else if (!strcmp(term, "paragraph")) {
+                unit = 4;
+            } else if (!strcmp(term, "element")) {
+                unit = 8;
+            } else {
+                ct->error = 42; /* Unsupported proximity unit */
+                ct->addinfo = xstrdup(term);
+                return 0;
+            }
+        } else {
+            ct->error = 46;     /* Unsupported boolean modifier */
+            ct->addinfo = xstrdup(name);
+            return 0;
+        }
+
+        mods = mods->u.st.modifiers;
+    }
+
+    if (!distance_defined)
+        distance = (unit == 2) ? 1 : 0;
+
+    cql_pr_int(exclusion, pr, client_data);
+    cql_pr_int(distance, pr, client_data);
+    cql_pr_int(ordered, pr, client_data);
+    cql_pr_int(proxrel, pr, client_data);
+    (*pr)("k ", client_data);
+    cql_pr_int(unit, pr, client_data);
+
+    return 1;
+}
+
 /* Returns location of first wildcard character in the `length'
  * characters starting at `term', or a null pointer of there are
  * none -- like memchr().
  */
-static char *wcchar(const char *term, int length)
+static const char *wcchar(const char *term, int length)
 {
-    char *best = 0;
-    char *current;
+    const char *best = 0;
+    const char *current;
     char *whichp;
 
     for (whichp = "*?"; *whichp != '\0'; whichp++) {
-       current = memchr(term, *whichp, length);
-       if (current != 0 && (best == 0 || current < best))
-           best = current;
+        current = (const char *) memchr(term, *whichp, length);
+        if (current != 0 && (best == 0 || current < best))
+            best = current;
     }
 
     return best;
@@ -244,7 +336,7 @@ void emit_term(cql_transform_t ct,
             cql_pr_attr(ct, "position", "first", 0,
                         pr, client_data, 32);
             term++;
-           length--;
+            length--;
         }
         else if (term[length-1] == '^')
         {
@@ -261,58 +353,58 @@ void emit_term(cql_transform_t ct,
 
     if (length > 0)
     {
-       /* Check for well-known globbing patterns that represent
-        * simple truncation attributes as expected by, for example,
-        * Bath-compliant server.  If we find such a pattern but
-        * there's no mapping for it, that's fine: we just use a
-        * general pattern-matching attribute.
-        */
+        /* Check for well-known globbing patterns that represent
+         * simple truncation attributes as expected by, for example,
+         * Bath-compliant server.  If we find such a pattern but
+         * there's no mapping for it, that's fine: we just use a
+         * general pattern-matching attribute.
+         */
         if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
-           wcchar(term+1, length-2) == 0 &&
+            wcchar(term+1, length-2) == 0 &&
             cql_pr_attr(ct, "truncation", "both", 0,
-                       pr, client_data, 0)) {
-           term++;
-           length -= 2;
+                        pr, client_data, 0)) {
+            term++;
+            length -= 2;
         }
         else if (term[0] == '*' &&
-                wcchar(term+1, length-1) == 0 &&
-                cql_pr_attr(ct, "truncation", "left", 0,
-                            pr, client_data, 0)) {
-           term++;
-           length--;
+                 wcchar(term+1, length-1) == 0 &&
+                 cql_pr_attr(ct, "truncation", "left", 0,
+                             pr, client_data, 0)) {
+            term++;
+            length--;
         }
         else if (term[length-1] == '*' &&
-                wcchar(term, length-1) == 0 &&
-                cql_pr_attr(ct, "truncation", "right", 0,
-                            pr, client_data, 0)) {
-           length--;
+                 wcchar(term, length-1) == 0 &&
+                 cql_pr_attr(ct, "truncation", "right", 0,
+                             pr, client_data, 0)) {
+            length--;
         }
         else if (wcchar(term, length))
         {
-           /* We have one or more wildcard characters, but not in a
-            * way that can be dealt with using only the standard
-            * left-, right- and both-truncation attributes.  We need
-            * to translate the pattern into a Z39.58-type pattern,
-            * which has been supported in BIB-1 since 1996.  If
-            * there's no configuration element for "truncation.z3958"
-            * we indicate this as error 28 "Masking character not
-            * supported".
-            */
-           int i;
-           char *mem;
+            /* We have one or more wildcard characters, but not in a
+             * way that can be dealt with using only the standard
+             * left-, right- and both-truncation attributes.  We need
+             * to translate the pattern into a Z39.58-type pattern,
+             * which has been supported in BIB-1 since 1996.  If
+             * there's no configuration element for "truncation.z3958"
+             * we indicate this as error 28 "Masking character not
+             * supported".
+             */
+            int i;
+            char *mem;
             cql_pr_attr(ct, "truncation", "z3958", 0,
                         pr, client_data, 28);
-           mem = xmalloc(length+1);
+            mem = (char *) xmalloc(length+1);
             for (i = 0; i < length; i++) {
-               if (term[i] == '*')      mem[i] = '?';
-               else if (term[i] == '?') mem[i] = '#';
-               else                     mem[i] = term[i];
-           }
-           mem[length] = '\0';
-           term = mem;
+                if (term[i] == '*')      mem[i] = '?';
+                else if (term[i] == '?') mem[i] = '#';
+                else                     mem[i] = term[i];
+            }
+            mem[length] = '\0';
+            term = mem;
         }
         else {
-           /* No masking characters.  Use "truncation.none" if given. */
+            /* No masking characters.  Use "truncation.none" if given. */
             cql_pr_attr(ct, "truncation", "none", 0,
                         pr, client_data, 0);
         }
@@ -368,42 +460,41 @@ void cql_transform_r(cql_transform_t ct,
                      void *client_data)
 {
     const char *ns;
+    struct cql_node *mods;
 
     if (!cn)
         return;
     switch (cn->which)
     {
     case CQL_NODE_ST:
-       ns = cn->u.st.index_uri;
+        ns = cn->u.st.index_uri;
         if (ns)
         {
             if (!strcmp(ns, cql_uri())
-               && cn->u.st.index && !strcmp(cn->u.st.index, "resultSet"))
+                && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
             {
                 (*pr)("@set \"", client_data);
                 (*pr)(cn->u.st.term, client_data);
                 (*pr)("\" ", client_data);
                 return ;
             }
-           cql_pr_attr_uri(ct, "index", ns,
-                           cn->u.st.index, "serverChoice",
-                           pr, client_data, 16);
         }
-       else
-       {
-           if (!ct->error)
-           {
-               ct->error = 15;
-               ct->addinfo = 0;
-           }
-       }
-        if (cn->u.st.relation && !strcmp(cn->u.st.relation, "="))
+        else
+        {
+            if (!ct->error)
+            {
+                ct->error = 15;
+                ct->addinfo = 0;
+            }
+        }
+        cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
+        if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
             cql_pr_attr(ct, "relation", "eq", "scr",
                         pr, client_data, 19);
-        else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "<="))
+        else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
             cql_pr_attr(ct, "relation", "le", "scr",
                         pr, client_data, 19);
-        else if (cn->u.st.relation && !strcmp(cn->u.st.relation, ">="))
+        else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
             cql_pr_attr(ct, "relation", "ge", "scr",
                         pr, client_data, 19);
         else
@@ -414,17 +505,22 @@ void cql_transform_r(cql_transform_t ct,
             struct cql_node *mod = cn->u.st.modifiers;
             for (; mod; mod = mod->u.st.modifiers)
             {
-                cql_pr_attr(ct, "relationModifier", mod->u.st.term, 0,
+                cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
                             pr, client_data, 20);
             }
         }
         cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
                     pr, client_data, 24);
-        if (cn->u.st.relation && !strcmp(cn->u.st.relation, "all"))
+        if (ns) {
+            cql_pr_attr_uri(ct, "index", ns,
+                            cn->u.st.index, "serverChoice",
+                            pr, client_data, 16);
+        }
+        if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
         {
             emit_wordlist(ct, cn, pr, client_data, "and");
         }
-        else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "any"))
+        else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
         {
             emit_wordlist(ct, cn, pr, client_data, "or");
         }
@@ -438,9 +534,24 @@ void cql_transform_r(cql_transform_t ct,
         (*pr)("@", client_data);
         (*pr)(cn->u.boolean.value, client_data);
         (*pr)(" ", client_data);
+        mods = cn->u.boolean.modifiers;
+        if (!strcmp(cn->u.boolean.value, "prox")) {
+            if (!cql_pr_prox(ct, mods, pr, client_data))
+                return;
+        } else if (mods) {
+            /* Boolean modifiers other than on proximity not supported */
+            ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
+            ct->addinfo = xstrdup(mods->u.st.index);
+            return;
+        }
 
         cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
         cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
+        break;
+
+    default:
+        fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
+        abort();
     }
 }
 
@@ -450,6 +561,7 @@ int cql_transform(cql_transform_t ct,
                   void *client_data)
 {
     struct cql_prop_entry *e;
+    NMEM nmem = nmem_create();
 
     ct->error = 0;
     if (ct->addinfo)
@@ -458,12 +570,13 @@ int cql_transform(cql_transform_t ct,
 
     for (e = ct->entry; e ; e = e->next)
     {
-        if (!memcmp(e->pattern, "set.", 4))
-           cql_apply_prefix(cn, e->pattern+4, e->value);
-        else if (!strcmp(e->pattern, "set"))
-           cql_apply_prefix(cn, 0, e->value);
+        if (!cql_strncmp(e->pattern, "set.", 4))
+            cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
+        else if (!cql_strcmp(e->pattern, "set"))
+            cql_apply_prefix(nmem, cn, 0, e->value);
     }
     cql_transform_r (ct, cn, pr, client_data);
+    nmem_destroy(nmem);
     return ct->error;
 }
 
@@ -493,3 +606,11 @@ int cql_transform_error(cql_transform_t ct, const char **addinfo)
     *addinfo = ct->addinfo;
     return ct->error;
 }
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+