Avoid mixed stmt/var declare
[yaz-moved-to-github.git] / src / rpn2solr.c
index 11abe87..c72b11a 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2011 Index Data
+ * Copyright (C) 1995-2013 Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -48,7 +48,7 @@ static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
         }
     }
     if (server_choice)
-        return "solr.serverChoice";
+        return "cql.serverChoice";
     return 0;
 }
 
@@ -68,27 +68,27 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
                 switch (*relation)
                 {
                     /* Unsure on whether this is the relation attribute constants? */
-                case Z_ProximityOperator_Prox_lessThan: 
+                case Z_ProximityOperator_Prox_lessThan:
                     return 0;
-                case Z_ProximityOperator_Prox_lessThanOrEqual: 
+                case Z_ProximityOperator_Prox_lessThanOrEqual:
                     return 0;
-                case Z_ProximityOperator_Prox_equal: 
+                case Z_ProximityOperator_Prox_equal:
                     return ":";
-                case Z_ProximityOperator_Prox_greaterThanOrEqual: 
+                case Z_ProximityOperator_Prox_greaterThanOrEqual:
                     return 0;
-                case Z_ProximityOperator_Prox_greaterThan: 
+                case Z_ProximityOperator_Prox_greaterThan:
                     return 0;
-                case Z_ProximityOperator_Prox_notEqual: 
+                case Z_ProximityOperator_Prox_notEqual:
                     return 0;
-                case 100: 
-                    /* phonetic is not implemented*/
-                    return 0; 
-                case 101: 
+                case 100:
+                    /* phonetic is not implemented */
+                    return 0;
+                case 101:
                     /* stem is not not implemented */
-                    return 0; 
-                case 102: 
+                    return 0;
+                case 102:
                     /* relevance is supported in SOLR, but not implemented yet */
-                    return 0; 
+                    return 0;
                 default:
                     /* Invalid relation */
                     return 0;
@@ -104,74 +104,95 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
     return ":";
 }
 
+struct solr_attr {
+    const char *index; 
+    const char *relation;
+    const char *term;
+    int  is_range;
+    const char *begin;
+    const char *close;
+};
+
 static int rpn2solr_attr(solr_transform_t ct,
-                        Z_AttributeList *attributes, WRBUF w)
+                         Z_AttributeList *attributes, WRBUF w, struct solr_attr *solr_attr)
 {
-    const char *relation = solr_lookup_reverse(ct, "relation.", attributes);
-    const char *index = solr_lookup_reverse(ct, "index.", attributes);
+    const char *relation  = solr_lookup_reverse(ct, "relation.", attributes);
+    const char *index     = solr_lookup_reverse(ct, "index.", attributes);
     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
+    /* Assume this is not a range */
+    solr_attr->is_range = 0;
 
     /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */
     if (!index)
         index = lookup_index_from_string_attr(attributes);
 
     /* Attempt to fix bug #2978: Look for a relation attribute */
-    if (!relation) 
+    if (!relation)
         relation = lookup_relation_index_from_attr(attributes);
 
     if (!index)
     {
-        solr_transform_set_error(ct,
-                                YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
+        solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
         return -1;
     }
     /* for serverChoice we omit index+relation+structure */
-    if (strcmp(index, "solr.serverChoice"))
+    if (strcmp(index, "cql.serverChoice"))
     {
-        wrbuf_puts(w, index);
+        solr_attr->index = index;
         if (relation)
         {
-            if (!strcmp(relation, "exact"))
-                /* TODO Verify if a exact  SOLR exists */
+            if (!strcmp(relation, "exact")) {
+                /* TODO Exact match does not exists in SOLR. Need to use specific field type  */
                 relation = ":";
-            else if (!strcmp(relation, "eq"))
+            }
+            else if (!strcmp(relation, "eq")) {
                 relation = ":";
+            }
+            else if (!strcmp(relation, "<")) {
+                solr_attr->is_range = 1;
+                solr_attr->begin = "[* TO ";
+                solr_attr->close = "}";
+            }
             else if (!strcmp(relation, "le")) {
-                /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = ":[ * to ";
-                close_range = "]"
-                */
+                solr_attr->is_range = 2;
+                solr_attr->begin = "[* TO ";
+                solr_attr->close = "]";
             }
             else if (!strcmp(relation, "ge")) {
-                /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = "[";
-                relation = ":[ * to ";
-                close_range = "]"
-                */
+                solr_attr->is_range = 4;
+                solr_attr->begin = "[";
+                solr_attr->close = " TO *]";
             }
-            /* Missing mapping of not equal, phonetic, stem and relevance */
-            wrbuf_puts(w, relation);
+            else if (!strcmp(relation, ">")) {
+                solr_attr->is_range = 5;
+                solr_attr->begin = "{";
+                solr_attr->close = " TO *]";
+            }
+            solr_attr->relation = relation;
         }
-        else
-            wrbuf_puts(w, ":");
-
+        // TODO is this valid for Solr? 
+        solr_attr->term = 0;
         if (structure)
         {
             if (strcmp(structure, "*"))
             {
-                wrbuf_puts(w, "/");
-                wrbuf_puts(w, structure);
-                wrbuf_puts(w, " ");
+               wrbuf_puts(w, "/");
+               wrbuf_puts(w, structure);
+               wrbuf_puts(w, " ");
+               solr_attr->index = 0;  
             }
+
         }
     }
+    else 
+        solr_attr->index = 0;
     return 0;
 }
 
-/* Bug 2878: Currently only support left and right truncation. Specific check for this */
-static int checkForTruncation(int flag, Z_AttributeList *attributes)
+static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
 {
     int j;
+    Z_AttributeList *attributes = apt->attributes;
     for (j = 0; j < attributes->num_attributes; j++)
     {
         Z_AttributeElement *ae = attributes->attributes[j];
@@ -179,33 +200,23 @@ static int checkForTruncation(int flag, Z_AttributeList *attributes)
         {
             if (ae->which == Z_AttributeValue_numeric)
             {
-                Odr_int truncation = *(ae->value.numeric);
-                /* This logic only works for Left, right and both. eg. 1,2,3 */
-               if (truncation <= 3)
-                    return ((int) truncation & flag);
+                return *(ae->value.numeric);
             }
             else if (ae->which == Z_AttributeValue_complex) {
+                ;
                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
                 /* Complex: Shouldn't happen */
             }
         }
     }
-    /* No truncation or unsupported */
+    /* No truncation given */
     return 0;
-};
-
-static int checkForLeftTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(2, attributes);
 }
 
-static int checkForRightTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(1, attributes);
-};
+#define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
 
 static int rpn2solr_simple(solr_transform_t ct,
-                          void (*pr)(const char *buf, void *client_data),
-                          void *client_data,
-                          Z_Operand *q, WRBUF w)
+                           Z_Operand *q, WRBUF w, struct solr_attr *solr_attr)
 {
     int ret = 0;
     if (q->which != Z_Operand_APT)
@@ -219,11 +230,20 @@ static int rpn2solr_simple(solr_transform_t ct,
         Z_Term *term = apt->term;
         const char *sterm = 0;
         size_t lterm = 0;
+        Odr_int trunc = get_truncation(apt);
 
         wrbuf_rewind(w);
-        ret = rpn2solr_attr(ct, apt->attributes, w);
+        
+        ret = rpn2solr_attr(ct, apt->attributes, w, solr_attr);
 
-        switch(term->which)
+        if (trunc == 0 || trunc == 1 || trunc == 100 || trunc == 104)
+            ;
+        else
+        {
+            solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
+            return -1;
+        }
+        switch (term->which)
         {
         case Z_Term_general:
             lterm = term->u.general->len;
@@ -241,7 +261,7 @@ static int rpn2solr_simple(solr_transform_t ct,
             solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
         }
 
-        if (term)
+        if (sterm)
         {
             size_t i;
             int must_quote = 0;
@@ -251,87 +271,197 @@ static int rpn2solr_simple(solr_transform_t ct,
                     must_quote = 1;
             if (must_quote)
                 wrbuf_puts(w, "\"");
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForLeftTruncation(apt->attributes))
-                wrbuf_puts(w, "*");
-                       /* BUG 4415: Escape : (as \:) in string terms */
-                       for (i = 0 ; i < lterm; i++) {
-                           if (sterm[i] == ':') {
-                              wrbuf_putc(w, '\\');
-                           }
-                           wrbuf_putc(w, sterm[i]);
-                       }
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForRightTruncation(apt->attributes))
+            for (i = 0 ; i < lterm; i++)
+            {
+                if (sterm[i] == '\\' && i < lterm - 1)
+                {
+                    i++;
+                    if (strchr(SOLR_SPECIAL, sterm[i]))
+                        wrbuf_putc(w, '\\');
+                    wrbuf_putc(w, sterm[i]);
+                }
+                else if (sterm[i] == '?' && trunc == 104)
+                {
+                    wrbuf_putc(w, '*');
+                }
+                else if (sterm[i] == '#' && trunc == 104)
+                {
+                    wrbuf_putc(w, '?');
+                }
+                else if (strchr(SOLR_SPECIAL, sterm[i]))
+                {
+                    wrbuf_putc(w, '\\');
+                    wrbuf_putc(w, sterm[i]);
+                }
+                else
+                    wrbuf_putc(w, sterm[i]);
+            }
+            if (trunc == 1)
                 wrbuf_puts(w, "*");
             if (must_quote)
                 wrbuf_puts(w, "\"");
         }
-        if (ret == 0)
-            pr(wrbuf_cstr(w), client_data);
+        if (ret == 0) { 
+            solr_attr->term = wrbuf_cstr(w);
+        }
+        
     }
     return ret;
+};
+
+static int solr_write_range(void (*pr)(const char *buf, void *client_data),
+                            void *client_data,
+                            struct solr_attr *solr_attr_left, 
+                            struct solr_attr *solr_attr_right)
+{
+    pr(solr_attr_left->index, client_data);
+    pr(":", client_data);
+    pr(solr_attr_left->begin, client_data);
+    pr(solr_attr_left->term,  client_data);
+    pr(" TO ", client_data);
+    pr(solr_attr_right->term,  client_data);
+    pr(solr_attr_right->close, client_data);
+    return 0;
+}; 
+
+static int solr_write_structure(void (*pr)(const char *buf, void *client_data),
+                            void *client_data,
+                            struct solr_attr *solr_attr)
+{
+    if (solr_attr->index) {
+        pr(solr_attr->index, client_data);
+        pr(":", client_data);
+    }
+    if (solr_attr->is_range) {
+        pr(solr_attr->begin, client_data);
+        pr(solr_attr->term,  client_data);
+        pr(solr_attr->close, client_data);
+    }
+    else if (solr_attr->term) 
+        pr(solr_attr->term,  client_data);
+    return 0;
+}; 
+
+
+
+static int solr_write_and_or_range(void (*pr)(const char *buf, void *client_data),
+                             void *client_data,
+                             struct solr_attr *solr_attr_left, 
+                             struct solr_attr *solr_attr_right)
+{
+    if (solr_attr_left->is_range && 
+        solr_attr_right->is_range && 
+        !strcmp(solr_attr_left->index, solr_attr_right->index)) 
+    {
+        if (solr_attr_left->is_range > 3 && solr_attr_right->is_range < 3)
+            return solr_write_range(pr, client_data, solr_attr_left, solr_attr_right); 
+        else if (solr_attr_left->is_range < 3 && solr_attr_right->is_range > 3)
+            return solr_write_range(pr, client_data, solr_attr_right, solr_attr_left); 
+    }
+    solr_write_structure(pr, client_data, solr_attr_left);
+    pr(" AND ", client_data);
+    solr_write_structure(pr, client_data, solr_attr_right);
+    return 0;
+}
+
+static void solr_attr_init(struct solr_attr *solr_attr) {
+    solr_attr->index = 0; 
+    solr_attr->relation = 0;
+    solr_attr->is_range = 0; 
+    solr_attr->term = 0; 
 }
 
 
 static int rpn2solr_structure(solr_transform_t ct,
-                             void (*pr)(const char *buf, void *client_data),
-                             void *client_data,
-                             Z_RPNStructure *q, int nested,
-                             WRBUF w)
+                              void (*pr)(const char *buf, void *client_data),
+                              void *client_data,
+                              Z_RPNStructure *q, int nested,
+                              WRBUF wa, struct solr_attr *solr_attr)
 {
-    if (q->which == Z_RPNStructure_simple)
-        return rpn2solr_simple(ct, pr, client_data, q->u.simple, w);
+    if (q->which == Z_RPNStructure_simple) {
+        solr_attr_init(solr_attr);
+        return rpn2solr_simple(ct, q->u.simple, wa, solr_attr);
+    }
     else
     {
         Z_Operator *op = q->u.complex->roperator;
         int r;
+        struct solr_attr solr_attr_left, solr_attr_right;
+        WRBUF w_left = wrbuf_alloc();
+        WRBUF w_right = wrbuf_alloc();
 
         if (nested)
             pr("(", client_data);
 
-        r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
-        if (r)
+        solr_attr_init(&solr_attr_left);
+        r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w_left, &solr_attr_left);
+
+
+        if (r) {
+            wrbuf_destroy(w_left);
+            return r;
+        }
+        solr_attr_init(&solr_attr_right);
+
+        r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w_right, &solr_attr_right);
+        if (r) {
+            wrbuf_destroy(w_left);
+            wrbuf_destroy(w_right);
             return r;
+        }
+
         switch(op->which)
         {
         case  Z_Operator_and:
-            pr(" AND ", client_data);
+            solr_write_and_or_range(pr, client_data, &solr_attr_left, &solr_attr_right);
             break;
         case  Z_Operator_or:
+            solr_write_structure(pr, client_data, &solr_attr_left);
             pr(" OR ", client_data);
+            solr_write_structure(pr, client_data, &solr_attr_right);
             break;
         case  Z_Operator_and_not:
+            solr_write_structure(pr, client_data, &solr_attr_left);
             pr(" AND NOT ", client_data);
+            solr_write_structure(pr, client_data, &solr_attr_right);
             break;
         case  Z_Operator_prox:
             solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0);
+            wrbuf_destroy(w_left);
+            wrbuf_destroy(w_right);
             return -1;
         }
-        r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
+
         if (nested)
             pr(")", client_data);
+        
+        solr_attr_init(solr_attr);
+        wrbuf_destroy(w_left);
+        wrbuf_destroy(w_right);
         return r;
     }
 }
 
 int solr_transform_rpn2solr_stream(solr_transform_t ct,
-                                 void (*pr)(const char *buf, void *client_data),
-                                 void *client_data,
-                                 Z_RPNQuery *q)
+                                   void (*pr)(const char *buf, void *client_data),
+                                   void *client_data,
+                                   Z_RPNQuery *q)
 {
     int r;
     WRBUF w = wrbuf_alloc();
+    struct solr_attr solr_attr;
     solr_transform_set_error(ct, 0, 0);
-    r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w);
+    solr_attr_init(&solr_attr);
+    r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w, &solr_attr);
+    solr_write_structure(pr, client_data, &solr_attr);
     wrbuf_destroy(w);
     return r;
 }
 
 
 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
-                                WRBUF w,
-                                Z_RPNQuery *q)
+                                  WRBUF w,
+                                  Z_RPNQuery *q)
 {
     return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q);
 }