rpn2solr: support truncation left(2), left&right(3) YAZ-718
[yaz-moved-to-github.git] / src / rpn2solr.c
index 11abe87..c9143c7 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2011 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -48,7 +48,7 @@ static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
         }
     }
     if (server_choice)
-        return "solr.serverChoice";
+        return "cql.serverChoice";
     return 0;
 }
 
@@ -68,27 +68,27 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
                 switch (*relation)
                 {
                     /* Unsure on whether this is the relation attribute constants? */
-                case Z_ProximityOperator_Prox_lessThan: 
-                    return 0;
-                case Z_ProximityOperator_Prox_lessThanOrEqual: 
-                    return 0;
-                case Z_ProximityOperator_Prox_equal: 
+                case Z_ProximityOperator_Prox_lessThan:
+                    return "<";
+                case Z_ProximityOperator_Prox_lessThanOrEqual:
+                    return "le";
+                case Z_ProximityOperator_Prox_equal:
                     return ":";
-                case Z_ProximityOperator_Prox_greaterThanOrEqual: 
+                case Z_ProximityOperator_Prox_greaterThanOrEqual:
+                    return "ge";
+                case Z_ProximityOperator_Prox_greaterThan:
+                    return ">";
+                case Z_ProximityOperator_Prox_notEqual:
                     return 0;
-                case Z_ProximityOperator_Prox_greaterThan: 
+                case 100:
+                    /* phonetic is not implemented */
                     return 0;
-                case Z_ProximityOperator_Prox_notEqual: 
-                    return 0;
-                case 100: 
-                    /* phonetic is not implemented*/
-                    return 0; 
-                case 101: 
+                case 101:
                     /* stem is not not implemented */
-                    return 0; 
-                case 102: 
+                    return 0;
+                case 102:
                     /* relevance is supported in SOLR, but not implemented yet */
-                    return 0; 
+                    return 0;
                 default:
                     /* Invalid relation */
                     return 0;
@@ -104,57 +104,74 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
     return ":";
 }
 
+static int check_range(solr_transform_t ct, Z_Complex *q,
+                       Z_AttributesPlusTerm **p_apt1,
+                       Z_AttributesPlusTerm **p_apt2)
+{
+    Z_Operator *op = q->roperator;
+    if (op->which == Z_Operator_and &&
+        q->s1->which == Z_RPNStructure_simple &&
+        q->s2->which == Z_RPNStructure_simple &&
+        q->s1->u.simple->which == Z_Operand_APT &&
+        q->s2->u.simple->which == Z_Operand_APT)
+    {
+        Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm;
+        Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm;
+        const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes);
+        const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes);
+        const char *rel1 = solr_lookup_reverse(ct, "relation.",
+                                               apt1->attributes);
+        const char *rel2 = solr_lookup_reverse(ct, "relation.",
+                                               apt2->attributes);
+        if (!rel1)
+            rel1 = lookup_relation_index_from_attr(apt1->attributes);
+        if (!rel2)
+            rel2 = lookup_relation_index_from_attr(apt2->attributes);
+        if (!i1)
+            i1 = lookup_index_from_string_attr(apt1->attributes);
+        if (!i2)
+            i2 = lookup_index_from_string_attr(apt2->attributes);
+        if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2)
+        {
+            if ((rel1[0] == '>' || rel1[0] == 'g') &&
+                (rel2[0] == '<' || rel2[0] == 'l'))
+            {
+                *p_apt1 = apt1;
+                *p_apt2 = apt2;
+                return 1;
+            }
+            if ((rel2[0] == '>' || rel2[0] == 'g') &&
+                (rel1[0] == '<' || rel1[0] == 'l'))
+            {
+                *p_apt1 = apt2;
+                *p_apt2 = apt1;
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static int rpn2solr_attr(solr_transform_t ct,
-                        Z_AttributeList *attributes, WRBUF w)
+                         Z_AttributeList *attributes, WRBUF w)
 {
-    const char *relation = solr_lookup_reverse(ct, "relation.", attributes);
     const char *index = solr_lookup_reverse(ct, "index.", attributes);
     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
 
     /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */
     if (!index)
         index = lookup_index_from_string_attr(attributes);
-
-    /* Attempt to fix bug #2978: Look for a relation attribute */
-    if (!relation) 
-        relation = lookup_relation_index_from_attr(attributes);
-
     if (!index)
     {
         solr_transform_set_error(ct,
-                                YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
+                                 YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
         return -1;
     }
     /* for serverChoice we omit index+relation+structure */
-    if (strcmp(index, "solr.serverChoice"))
+    if (strcmp(index, "cql.serverChoice"))
     {
         wrbuf_puts(w, index);
-        if (relation)
-        {
-            if (!strcmp(relation, "exact"))
-                /* TODO Verify if a exact  SOLR exists */
-                relation = ":";
-            else if (!strcmp(relation, "eq"))
-                relation = ":";
-            else if (!strcmp(relation, "le")) {
-                /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = ":[ * to ";
-                close_range = "]"
-                */
-            }
-            else if (!strcmp(relation, "ge")) {
-                /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = "[";
-                relation = ":[ * to ";
-                close_range = "]"
-                */
-            }
-            /* Missing mapping of not equal, phonetic, stem and relevance */
-            wrbuf_puts(w, relation);
-        }
-        else
-            wrbuf_puts(w, ":");
-
+        wrbuf_puts(w, ":");
         if (structure)
         {
             if (strcmp(structure, "*"))
@@ -168,10 +185,10 @@ static int rpn2solr_attr(solr_transform_t ct,
     return 0;
 }
 
-/* Bug 2878: Currently only support left and right truncation. Specific check for this */
-static int checkForTruncation(int flag, Z_AttributeList *attributes)
+static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
 {
     int j;
+    Z_AttributeList *attributes = apt->attributes;
     for (j = 0; j < attributes->num_attributes; j++)
     {
         Z_AttributeElement *ae = attributes->attributes[j];
@@ -179,114 +196,192 @@ static int checkForTruncation(int flag, Z_AttributeList *attributes)
         {
             if (ae->which == Z_AttributeValue_numeric)
             {
-                Odr_int truncation = *(ae->value.numeric);
-                /* This logic only works for Left, right and both. eg. 1,2,3 */
-               if (truncation <= 3)
-                    return ((int) truncation & flag);
+                return *(ae->value.numeric);
             }
             else if (ae->which == Z_AttributeValue_complex) {
+                ;
                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
                 /* Complex: Shouldn't happen */
             }
         }
     }
-    /* No truncation or unsupported */
+    /* No truncation given */
     return 0;
-};
-
-static int checkForLeftTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(2, attributes);
 }
 
-static int checkForRightTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(1, attributes);
-};
+#define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
 
-static int rpn2solr_simple(solr_transform_t ct,
-                          void (*pr)(const char *buf, void *client_data),
-                          void *client_data,
-                          Z_Operand *q, WRBUF w)
+static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc)
 {
-    int ret = 0;
-    if (q->which != Z_Operand_APT)
+    size_t lterm = 0;
+    const char *sterm = 0;
+    switch (term->which)
     {
-        ret = -1;
-        solr_transform_set_error(ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0);
+    case Z_Term_general:
+        lterm = term->u.general->len;
+        sterm = (const char *) term->u.general->buf;
+        break;
+    case Z_Term_numeric:
+        wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
+        break;
+    case Z_Term_characterString:
+        sterm = term->u.characterString;
+        lterm = strlen(sterm);
+        break;
+    default:
+        solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
+        return -1;
     }
-    else
-    {
-        Z_AttributesPlusTerm *apt = q->u.attributesPlusTerm;
-        Z_Term *term = apt->term;
-        const char *sterm = 0;
-        size_t lterm = 0;
-
-        wrbuf_rewind(w);
-        ret = rpn2solr_attr(ct, apt->attributes, w);
 
-        switch(term->which)
-        {
-        case Z_Term_general:
-            lterm = term->u.general->len;
-            sterm = (const char *) term->u.general->buf;
-            break;
-        case Z_Term_numeric:
-            wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
-            break;
-        case Z_Term_characterString:
-            sterm = term->u.characterString;
-            lterm = strlen(sterm);
-            break;
-        default:
-            ret = -1;
-            solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
-        }
+    if (sterm)
+    {
+        size_t i;
+        int must_quote = 0;
 
-        if (term)
+        for (i = 0 ; i < lterm; i++)
+            if (sterm[i] == ' ')
+                must_quote = 1;
+        if (must_quote)
+            wrbuf_puts(w, "\"");
+        if (trunc == 2 || trunc == 3)
+            wrbuf_puts(w, "*");
+        for (i = 0 ; i < lterm; i++)
         {
-            size_t i;
-            int must_quote = 0;
-
-            for (i = 0 ; i < lterm; i++)
-                if (sterm[i] == ' ')
-                    must_quote = 1;
-            if (must_quote)
-                wrbuf_puts(w, "\"");
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForLeftTruncation(apt->attributes))
-                wrbuf_puts(w, "*");
-                       /* BUG 4415: Escape : (as \:) in string terms */
-                       for (i = 0 ; i < lterm; i++) {
-                           if (sterm[i] == ':') {
-                              wrbuf_putc(w, '\\');
-                           }
-                           wrbuf_putc(w, sterm[i]);
-                       }
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForRightTruncation(apt->attributes))
-                wrbuf_puts(w, "*");
-            if (must_quote)
-                wrbuf_puts(w, "\"");
+            if (sterm[i] == '\\' && i < lterm - 1)
+            {
+                i++;
+                if (strchr(SOLR_SPECIAL, sterm[i]))
+                    wrbuf_putc(w, '\\');
+                wrbuf_putc(w, sterm[i]);
+            }
+            else if (sterm[i] == '?' && trunc == 104)
+            {
+                wrbuf_putc(w, '*');
+            }
+            else if (sterm[i] == '#' && trunc == 104)
+            {
+                wrbuf_putc(w, '?');
+            }
+            else if (strchr(SOLR_SPECIAL, sterm[i]))
+            {
+                wrbuf_putc(w, '\\');
+                wrbuf_putc(w, sterm[i]);
+            }
+            else
+                wrbuf_putc(w, sterm[i]);
         }
-        if (ret == 0)
-            pr(wrbuf_cstr(w), client_data);
+        if (trunc == 1 || trunc == 3)
+            wrbuf_puts(w, "*");
+        if (must_quote)
+            wrbuf_puts(w, "\"");
     }
-    return ret;
+    return 0;
 }
 
+static int rpn2solr_simple(solr_transform_t ct,
+                           void (*pr)(const char *buf, void *client_data),
+                           void *client_data,
+                           Z_AttributesPlusTerm *apt, WRBUF w,
+                           Z_AttributesPlusTerm *apt2)
+ {
+     int ret = 0;
+     Z_Term *term = apt->term;
+     Odr_int trunc = get_truncation(apt);
+     const char *relation2 = 0;
+     const char *relation1 = solr_lookup_reverse(ct, "relation.",
+                                                 apt->attributes);
+     /* Attempt to fix bug #2978: Look for a relation attribute */
+     if (!relation1)
+         relation1 = lookup_relation_index_from_attr(apt->attributes);
+     if (!relation1)
+     {
+         solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, 0);
+         return -1;
+     }
+     if (apt2)
+     {
+         relation2 = solr_lookup_reverse(ct, "relation.",
+                                         apt2->attributes);
+         if (!relation2)
+             relation2 = lookup_relation_index_from_attr(apt2->attributes);
+     }
+     wrbuf_rewind(w);
+     ret = rpn2solr_attr(ct, apt->attributes, w);
+     if (ret)
+         return ret;
+     if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104)
+             ;
+     else
+     {
+         solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
+         return -1;
+     }
+
+     if (!relation1)
+         ret = emit_term(ct, w, term, trunc);
+     else if (relation1[0] == '<' || relation1[0] == 'l')
+     {
+         wrbuf_puts(w, "[* TO ");
+         ret = emit_term(ct, w, term, trunc);
+         if (!strcmp(relation1, "le") || !strcmp(relation1, "<="))
+             wrbuf_puts(w, "]");
+         else
+             wrbuf_puts(w, "}");
+     }
+     else if (relation1[0] == '>' || relation1[0] == 'g')
+     {
+         if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge"))
+             wrbuf_puts(w, "[");
+         else
+             wrbuf_puts(w, "{");
+         ret = emit_term(ct, w, term, trunc);
+         wrbuf_puts(w, " TO ");
+         if (apt2)
+         {
+             emit_term(ct, w, apt2->term, 0);
+             if (!relation2 || !strcmp(relation2, "<=") ||
+                 !strcmp(relation2, "le"))
+                 wrbuf_puts(w, "]");
+             else
+                 wrbuf_puts(w, "}");
+         }
+         else
+             wrbuf_puts(w, "*]");
+     }
+     else
+         ret = emit_term(ct, w, term, trunc);
+     if (ret == 0)
+         pr(wrbuf_cstr(w), client_data);
+     return ret;
+ }
+
 
 static int rpn2solr_structure(solr_transform_t ct,
-                             void (*pr)(const char *buf, void *client_data),
-                             void *client_data,
-                             Z_RPNStructure *q, int nested,
-                             WRBUF w)
+                              void (*pr)(const char *buf, void *client_data),
+                               void *client_data,
+                              Z_RPNStructure *q, int nested,
+                              WRBUF w)
 {
     if (q->which == Z_RPNStructure_simple)
-        return rpn2solr_simple(ct, pr, client_data, q->u.simple, w);
+    {
+        if (q->u.simple->which != Z_Operand_APT)
+        {
+            solr_transform_set_error(
+                ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0);
+            return -1;
+        }
+        else
+            return rpn2solr_simple(ct, pr, client_data,
+                                   q->u.simple->u.attributesPlusTerm, w, 0);
+    }
     else
     {
         Z_Operator *op = q->u.complex->roperator;
+        Z_AttributesPlusTerm *apt1, *apt2;
         int r;
 
+        if (check_range(ct, q->u.complex, &apt1, &apt2))
+            return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2);
         if (nested)
             pr("(", client_data);
 
@@ -316,9 +411,9 @@ static int rpn2solr_structure(solr_transform_t ct,
 }
 
 int solr_transform_rpn2solr_stream(solr_transform_t ct,
-                                 void (*pr)(const char *buf, void *client_data),
-                                 void *client_data,
-                                 Z_RPNQuery *q)
+                                   void (*pr)(const char *buf, void *client_data),
+                                   void *client_data,
+                                   Z_RPNQuery *q)
 {
     int r;
     WRBUF w = wrbuf_alloc();
@@ -330,8 +425,8 @@ int solr_transform_rpn2solr_stream(solr_transform_t ct,
 
 
 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
-                                WRBUF w,
-                                Z_RPNQuery *q)
+                                  WRBUF w,
+                                  Z_RPNQuery *q)
 {
     return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q);
 }