rpn2solr supports Z39.58 truncation (104)
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 5 Sep 2011 08:34:14 +0000 (10:34 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 5 Sep 2011 08:34:14 +0000 (10:34 +0200)
Thus, CCL truncation mode t=z will make it possible to use both
single-character mask (#) and wildcard (?) in CCL . Throw diagnostic
for left truncation because that is not supported by SOLR.

src/rpn2solr.c
test/test_rpn2solr.c

index 70b2f6c..664ce2d 100644 (file)
@@ -105,7 +105,7 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
 }
 
 static int rpn2solr_attr(solr_transform_t ct,
-                        Z_AttributeList *attributes, WRBUF w)
+                         Z_AttributeList *attributes, WRBUF w)
 {
     const char *relation = solr_lookup_reverse(ct, "relation.", attributes);
     const char *index = solr_lookup_reverse(ct, "index.", attributes);
@@ -122,7 +122,7 @@ static int rpn2solr_attr(solr_transform_t ct,
     if (!index)
     {
         solr_transform_set_error(ct,
-                                YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
+                                 YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
         return -1;
     }
     /* for serverChoice we omit index+relation+structure */
@@ -138,15 +138,15 @@ static int rpn2solr_attr(solr_transform_t ct,
                 relation = ":";
             else if (!strcmp(relation, "le")) {
                 /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = ":[ * to ";
-                close_range = "]"
+                   relation = ":[ * to ";
+                   close_range = "]"
                 */
             }
             else if (!strcmp(relation, "ge")) {
                 /* TODO Not support as such, but could perhaps be transformed into a range
-                relation = "[";
-                relation = ":[ * to ";
-                close_range = "]"
+                   relation = "[";
+                   relation = ":[ * to ";
+                   close_range = "]"
                 */
             }
             /* Missing mapping of not equal, phonetic, stem and relevance */
@@ -168,10 +168,10 @@ static int rpn2solr_attr(solr_transform_t ct,
     return 0;
 }
 
-/* Bug 2878: Currently only support left and right truncation. Specific check for this */
-static int checkForTruncation(int flag, Z_AttributeList *attributes)
+static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
 {
     int j;
+    Z_AttributeList *attributes = apt->attributes;
     for (j = 0; j < attributes->num_attributes; j++)
     {
         Z_AttributeElement *ae = attributes->attributes[j];
@@ -179,33 +179,25 @@ static int checkForTruncation(int flag, Z_AttributeList *attributes)
         {
             if (ae->which == Z_AttributeValue_numeric)
             {
-                Odr_int truncation = *(ae->value.numeric);
-                /* This logic only works for Left, right and both. eg. 1,2,3 */
-               if (truncation <= 3)
-                    return ((int) truncation & flag);
+                return *(ae->value.numeric);
             }
             else if (ae->which == Z_AttributeValue_complex) {
+                ;
                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
                 /* Complex: Shouldn't happen */
             }
         }
     }
-    /* No truncation or unsupported */
+    /* No truncation given */
     return 0;
-};
-
-static int checkForLeftTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(2, attributes);
 }
 
-static int checkForRightTruncation(Z_AttributeList *attributes) {
-       return checkForTruncation(1, attributes);
-};
+#define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
 
 static int rpn2solr_simple(solr_transform_t ct,
-                          void (*pr)(const char *buf, void *client_data),
-                          void *client_data,
-                          Z_Operand *q, WRBUF w)
+                           void (*pr)(const char *buf, void *client_data),
+                           void *client_data,
+                           Z_Operand *q, WRBUF w)
 {
     int ret = 0;
     if (q->which != Z_Operand_APT)
@@ -219,11 +211,19 @@ static int rpn2solr_simple(solr_transform_t ct,
         Z_Term *term = apt->term;
         const char *sterm = 0;
         size_t lterm = 0;
+        Odr_int trunc = get_truncation(apt);
 
         wrbuf_rewind(w);
         ret = rpn2solr_attr(ct, apt->attributes, w);
 
-        switch(term->which)
+        if (trunc == 0 || trunc == 1 || trunc == 100 || trunc == 104)
+            ;
+        else
+        {
+            solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
+            return -1;
+        }
+        switch (term->which)
         {
         case Z_Term_general:
             lterm = term->u.general->len;
@@ -241,7 +241,7 @@ static int rpn2solr_simple(solr_transform_t ct,
             solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
         }
 
-        if (term)
+        if (sterm)
         {
             size_t i;
             int must_quote = 0;
@@ -251,18 +251,32 @@ static int rpn2solr_simple(solr_transform_t ct,
                     must_quote = 1;
             if (must_quote)
                 wrbuf_puts(w, "\"");
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForLeftTruncation(apt->attributes))
-                wrbuf_puts(w, "*");
-                       for (i = 0 ; i < lterm; i++) {
-                /* BUG 4415: Escape special characters in string terms */
-                           if (strchr("+-&|!(){}[]^\"~*?:\\", sterm[i])) {
-                              wrbuf_putc(w, '\\');
-                           }
-                           wrbuf_putc(w, sterm[i]);
-                       }
-            /* Bug 2878: Check and add Truncation */
-                       if (checkForRightTruncation(apt->attributes))
+            for (i = 0 ; i < lterm; i++)
+            {
+                if (sterm[i] == '\\' && i < lterm - 1)
+                {
+                    i++;
+                    if (strchr(SOLR_SPECIAL, sterm[i]))
+                        wrbuf_putc(w, '\\');
+                    wrbuf_putc(w, sterm[i]);                    
+                }
+                else if (sterm[i] == '?' && trunc == 104)
+                {
+                    wrbuf_putc(w, '*');
+                }
+                else if (sterm[i] == '#' && trunc == 104)
+                {
+                    wrbuf_putc(w, '?');
+                }
+                else if (strchr(SOLR_SPECIAL, sterm[i]))
+                {
+                    wrbuf_putc(w, '\\');
+                    wrbuf_putc(w, sterm[i]);
+                }
+                else
+                    wrbuf_putc(w, sterm[i]);
+            }
+            if (trunc == 1)
                 wrbuf_puts(w, "*");
             if (must_quote)
                 wrbuf_puts(w, "\"");
@@ -275,10 +289,10 @@ static int rpn2solr_simple(solr_transform_t ct,
 
 
 static int rpn2solr_structure(solr_transform_t ct,
-                             void (*pr)(const char *buf, void *client_data),
-                             void *client_data,
-                             Z_RPNStructure *q, int nested,
-                             WRBUF w)
+                              void (*pr)(const char *buf, void *client_data),
+                              void *client_data,
+                              Z_RPNStructure *q, int nested,
+                              WRBUF w)
 {
     if (q->which == Z_RPNStructure_simple)
         return rpn2solr_simple(ct, pr, client_data, q->u.simple, w);
@@ -316,9 +330,9 @@ static int rpn2solr_structure(solr_transform_t ct,
 }
 
 int solr_transform_rpn2solr_stream(solr_transform_t ct,
-                                 void (*pr)(const char *buf, void *client_data),
-                                 void *client_data,
-                                 Z_RPNQuery *q)
+                                   void (*pr)(const char *buf, void *client_data),
+                                   void *client_data,
+                                   Z_RPNQuery *q)
 {
     int r;
     WRBUF w = wrbuf_alloc();
@@ -330,8 +344,8 @@ int solr_transform_rpn2solr_stream(solr_transform_t ct,
 
 
 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
-                                WRBUF w,
-                                Z_RPNQuery *q)
+                                  WRBUF w,
+                                  Z_RPNQuery *q)
 {
     return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q);
 }
index 1fc1695..ed2d291 100644 (file)
@@ -53,10 +53,6 @@ static void tst1(void)
 
     YAZ_CHECK(compare(ct, "abc", "abc"));
     YAZ_CHECK(compare(ct, "\"a b c\"", "\"a b c\""));
-#if 0
-/* Invalid PQF, so this will never work */
-    YAZ_CHECK(compare(ct, "a b", "a b"));
-#endif
     YAZ_CHECK(compare(ct, "@not a b", "a AND NOT b"));
     YAZ_CHECK(compare(ct, "@and @or a b c", "(a OR b) AND c"));
     YAZ_CHECK(compare(ct, "@and a b", "a AND b"));
@@ -73,12 +69,27 @@ static void tst1(void)
 
     /* Truncation */
     YAZ_CHECK(compare(ct, "@attr 5=1 water", "water*"));
-    YAZ_CHECK(compare(ct, "@attr 5=2 water", "*water"));
-    YAZ_CHECK(compare(ct, "@attr 5=3 water", "*water*"));
-
-    /*
-    YAZ_CHECK(compare(ct, "@or @attr 1=1016 water @attr 7=1 @attr 1=4 0", "any:water rank:??");
-     */
+    YAZ_CHECK(compare(ct, "@attr 5=2 water", 0));
+    YAZ_CHECK(compare(ct, "@attr 5=3 water", 0));
+    YAZ_CHECK(compare(ct, "@attr 5=100 water", "water"));
+    YAZ_CHECK(compare(ct, "@attr 5=101 water", 0));
+    YAZ_CHECK(compare(ct, "@attr 5=104 w#ter", "w?ter"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 w#t?r", "w?t*r"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 w#te?", "w?te*"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 w\\#te?", "w?te*")); /* PQF eats # */
+    YAZ_CHECK(compare(ct, "@attr 5=104 w\\\\#te?", "w#te*"));
+
+    /* reserved characters */
+    YAZ_CHECK(compare(ct, "@attr 5=104 \\\"\\\\\\\\",
+                      "\\\"" "\\\\"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 \\\"\\\\\\\\\\\"",
+                      "\\\"" "\\\\" "\\\""));
+    YAZ_CHECK(compare(ct, "@attr 5=104 \\\"\\\\", "\\\"\\\\"));
+
+    YAZ_CHECK(compare(ct, "@attr 5=104 \\{:\\}", "\\{\\:\\}"));
+
+    YAZ_CHECK(compare(ct, "@attr 5=104 \\\"\\\\\\\\\\\"",
+                      "\\\"" "\\\\" "\\\""));
 
     solr_transform_close(ct);
 }