Allow HTTP protocol on unix local socket
[yaz-moved-to-github.git] / src / cqltransform.c
index 3163775..ef93ca8 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2011 Index Data
+ * Copyright (C) 1995-2012 Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -71,7 +71,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
     int ret = 0; /* 0=OK, != 0 FAIL */
     int t;
     t = yaz_tok_move(tp);
-    
+
     while (t == YAZ_TOK_STRING && ae_num < 20)
     {
         WRBUF type_str = wrbuf_alloc();
@@ -79,7 +79,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
         Z_AttributeElement *elem = 0;
         const char *value_str = 0;
         /* attset type=value  OR  type=value */
-        
+
         elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
         elem->attributeSet = 0;
         ae[ae_num] = elem;
@@ -90,19 +90,19 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
         {
             wrbuf_destroy(type_str);
             if (set_str)
-                wrbuf_destroy(set_str);                
+                wrbuf_destroy(set_str);
             break;
         }
-        if (t == YAZ_TOK_STRING)  
-        {  
+        if (t == YAZ_TOK_STRING)
+        {
             wrbuf_puts(ct->w, " ");
             wrbuf_puts(ct->w, yaz_tok_parse_string(tp));
             set_str = type_str;
-            
+
             elem->attributeSet =
                 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
                                        wrbuf_cstr(set_str), ct->nmem);
-            
+
             type_str = wrbuf_alloc();
             wrbuf_puts(type_str, yaz_tok_parse_string(tp));
             t = yaz_tok_move(tp);
@@ -113,7 +113,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
         {
             wrbuf_destroy(type_str);
             if (set_str)
-                wrbuf_destroy(set_str);                
+                wrbuf_destroy(set_str);
             yaz_log(YLOG_WARN, "Expected numeric attribute type");
             ret = -1;
             break;
@@ -121,8 +121,8 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
 
         wrbuf_destroy(type_str);
         if (set_str)
-            wrbuf_destroy(set_str);                
-        
+            wrbuf_destroy(set_str);
+
         if (t != '=')
         {
             yaz_log(YLOG_WARN, "Expected = after after attribute type");
@@ -182,7 +182,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct,
             (*pp)->attr_list.attributes = (Z_AttributeElement **)
                 nmem_malloc(ct->nmem,
                             ae_num * sizeof(Z_AttributeElement *));
-            memcpy((*pp)->attr_list.attributes, ae, 
+            memcpy((*pp)->attr_list.attributes, ae,
                    ae_num * sizeof(Z_AttributeElement *));
         }
         (*pp)->next = 0;
@@ -210,7 +210,7 @@ int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
     yaz_tok_parse_destroy(tp);
     return r;
 }
-    
+
 cql_transform_t cql_transform_open_FILE(FILE *f)
 {
     cql_transform_t ct = cql_transform_create();
@@ -309,7 +309,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
 
     z_AttributeElement(odr_a, &a, 0, 0);
     z_AttributeElement(odr_b, &b, 0, 0);
-    
+
     buf_a = odr_getbuf(odr_a, &len_a, 0);
     buf_b = odr_getbuf(odr_b, &len_b, 0);
 
@@ -320,7 +320,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
     return ret;
 }
 
-const char *cql_lookup_reverse(cql_transform_t ct, 
+const char *cql_lookup_reverse(cql_transform_t ct,
                                const char *category,
                                Z_AttributeList *attributes)
 {
@@ -348,7 +348,7 @@ const char *cql_lookup_reverse(cql_transform_t ct,
                 }
                 if (j == attributes->num_attributes)
                     break; /* i was not found at all.. try next pattern */
-                    
+
             }
             if (i == e->attr_list.num_attributes)
                 return e->pattern + clen;
@@ -356,7 +356,7 @@ const char *cql_lookup_reverse(cql_transform_t ct,
     }
     return 0;
 }
-                                      
+
 static const char *cql_lookup_property(cql_transform_t ct,
                                        const char *pat1, const char *pat2,
                                        const char *pat3)
@@ -374,7 +374,7 @@ static const char *cql_lookup_property(cql_transform_t ct,
         sprintf(pattern, "%.39s", pat1);
     else
         return 0;
-    
+
     for (e = ct->entry; e; e = e->next)
     {
         if (!cql_strcmp(e->pattern, pattern))
@@ -392,11 +392,11 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category,
     const char *res = 0;
     const char *eval = val ? val : default_val;
     const char *prefix = 0;
-    
+
     if (uri)
     {
         struct cql_prop_entry *e;
-        
+
         for (e = ct->entry; e; e = e->next)
             if (!memcmp(e->pattern, "set.", 4) && e->value &&
                 !strcmp(e->value, uri))
@@ -500,8 +500,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
                        void *client_data)
 {
     int exclusion = 0;
-    int distance;               /* to be filled in later depending on unit */
-    int distance_defined = 0;
+    int distance = -1;
     int ordered = 0;
     int proxrel = 2;            /* less than or equal */
     int unit = 2;               /* word */
@@ -514,26 +513,25 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
 
         if (!strcmp(name, "distance")) {
             distance = strtol(term, (char**) 0, 0);
-            distance_defined = 1;
             if (!strcmp(relation, "="))
                 proxrel = 3;
             else if (!strcmp(relation, ">"))
                 proxrel = 5;
             else if (!strcmp(relation, "<"))
                 proxrel = 1;
-            else if (!strcmp(relation, ">=")) 
+            else if (!strcmp(relation, ">="))
                 proxrel = 4;
             else if (!strcmp(relation, "<="))
                 proxrel = 2;
             else if (!strcmp(relation, "<>"))
                 proxrel = 6;
-            else 
+            else
             {
                 ct->error = YAZ_SRW_UNSUPP_PROX_RELATION;
                 ct->addinfo = xstrdup(relation);
                 return 0;
             }
-        } 
+        }
         else if (!strcmp(name, "ordered"))
             ordered = 1;
         else if (!strcmp(name, "unordered"))
@@ -548,14 +546,14 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
                 unit = 4;
             else if (!strcmp(term, "element"))
                 unit = 8;
-            else 
+            else
             {
                 ct->error = YAZ_SRW_UNSUPP_PROX_UNIT;
                 ct->addinfo = xstrdup(term);
                 return 0;
             }
-        } 
-        else 
+        }
+        else
         {
             ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
             ct->addinfo = xstrdup(name);
@@ -564,7 +562,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
         mods = mods->u.st.modifiers;
     }
 
-    if (!distance_defined)
+    if (distance == -1)
         distance = (unit == 2) ? 1 : 0;
 
     cql_pr_int(exclusion, pr, client_data);
@@ -616,9 +614,17 @@ static void emit_term(cql_transform_t ct,
 {
     int i;
     const char *ns = cn->u.st.index_uri;
-    int process_term = !has_modifier(cn, "regexp");
-    char *z3958_mem = 0;
+    int z3958_mode = 0;
+    int process_term = 1;
 
+    if (has_modifier(cn, "regexp"))
+        process_term = 0;
+    else if (cql_lookup_property(ct, "truncation", 0, "cql"))
+    {
+        process_term = 0;
+        cql_pr_attr(ct, "truncation", "cql", 0,
+                    pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
+    }
     assert(cn->which == CQL_NODE_ST);
 
     if (process_term && length > 0)
@@ -662,9 +668,9 @@ static void emit_term(cql_transform_t ct,
          * there's no mapping for it, that's fine: we just use a
          * general pattern-matching attribute.
          */
-        if (first_wc == term && second_wc == term + length-1 
-            && *first_wc == '*' && *second_wc == '*' 
-            && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) 
+        if (first_wc == term && second_wc == term + length-1
+            && *first_wc == '*' && *second_wc == '*'
+            && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
         {
             term++;
             length -= 2;
@@ -678,41 +684,19 @@ static void emit_term(cql_transform_t ct,
         }
         else if (first_wc == term + length-1 && second_wc == 0
                  && *first_wc == '*'
-                 && cql_pr_attr(ct, "truncation", "right", 0, 
+                 && cql_pr_attr(ct, "truncation", "right", 0,
                                 pr, client_data, 0))
         {
             length--;
         }
         else if (first_wc)
         {
-            /* We have one or more wildcard characters, but not in a
-             * way that can be dealt with using only the standard
-             * left-, right- and both-truncation attributes.  We need
-             * to translate the pattern into a Z39.58-type pattern,
-             * which has been supported in BIB-1 since 1996.  If
-             * there's no configuration element for "truncation.z3958"
-             * we indicate this as error 28 "Masking character not
-             * supported".
-             */
-            int i;
+            z3958_mode = 1;
             cql_pr_attr(ct, "truncation", "z3958", 0,
                         pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
-            z3958_mem = (char *) xmalloc(length+1);
-            for (i = 0; i < length; i++)
-            {
-                if (i > 0 && term[i-1] == '\\')
-                    z3958_mem[i] = term[i];
-                else if (term[i] == '*')
-                    z3958_mem[i] = '?';
-                else if (term[i] == '?')
-                    z3958_mem[i] = '#';
-                else
-                    z3958_mem[i] = term[i];
-            }
-            z3958_mem[length] = '\0';
-            term = z3958_mem;
         }
-        else {
+        else
+        {
             /* No masking characters.  Use "truncation.none" if given. */
             cql_pr_attr(ct, "truncation", "none", 0,
                         pr, client_data, 0);
@@ -733,20 +717,47 @@ static void emit_term(cql_transform_t ct,
         }
     }
 
+    /* produce only \-sequences if:
+       1) the output is a Z39.58-trunc reserved character
+       2) the output is a PQF reserved character (\\, \")
+    */
     (*pr)("\"", client_data);
     for (i = 0; i < length; i++)
     {
-        /* pr(int) each character */
-        /* we do not need to deal with \-sequences because the
-           CQL and PQF terms have same \-format, bug #1988 */
-        char buf[2];
-
-        buf[0] = term[i];
-        buf[1] = '\0';
-        (*pr)(buf, client_data);
+        char x[3]; /* temp buffer */
+        if (i > 0 && term[i-1] == '\\')
+        {
+            if (term[i] == '\"' || term[i] == '\\')
+                pr("\\", client_data);
+            if (z3958_mode && strchr("#?", term[i]))
+                pr("\\\\", client_data); /* double \\ to survive PQF parse */
+            x[0] = term[i];
+            x[1] = '\0';
+            pr(x, client_data);
+        }
+        else if (z3958_mode && term[i] == '*')
+        {
+            pr("?", client_data);
+            /* avoid ?n sequences output (n=[0-9]) because that has
+               different semantics than just a single ? in Z39.58
+            */
+            if (i < length - 1 && yaz_isdigit(term[i+1]))
+                pr("\\\\", client_data); /* double \\ to survive PQF parse */
+        }
+        else if (z3958_mode && term[i] == '?')
+            pr("#", client_data);
+        else if (term[i] != '\\')
+        {
+            if (term[i] == '\"')
+                pr("\\", client_data);
+            if (z3958_mode && strchr("#?", term[i]))
+                pr("\\\\", client_data); /* double \\ to survive PQF parse */
+            x[0] = term[i];
+            x[1] = '\0';
+            pr(x, client_data);
+        }
     }
     (*pr)("\" ", client_data);
-    xfree(z3958_mem);
 }
 
 static void emit_terms(cql_transform_t ct,
@@ -771,7 +782,7 @@ static void emit_terms(cql_transform_t ct,
             (*pr)("@", client_data);
             (*pr)(op, client_data);
             (*pr)(" ", client_data);
-        }            
+        }
         emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term),
                   pr, client_data);
     }
@@ -860,11 +871,11 @@ void cql_transform_r(cql_transform_t ct,
         (*pr)(cn->u.boolean.value, client_data);
         (*pr)(" ", client_data);
         mods = cn->u.boolean.modifiers;
-        if (!strcmp(cn->u.boolean.value, "prox")) 
+        if (!strcmp(cn->u.boolean.value, "prox"))
         {
             if (!cql_pr_prox(ct, mods, pr, client_data))
                 return;
-        } 
+        }
         else if (mods)
         {
             /* Boolean modifiers other than on proximity not supported */