CQL/CCL/PQF conversion fixes. CCL support for Z39.58 trunc
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 29 Aug 2011 13:57:05 +0000 (15:57 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 29 Aug 2011 13:57:05 +0000 (15:57 +0200)
Lots of fixes for the conversions from CQL to CCL; CCL to RPN;
RPN to CQL. yaz_encode_pqf_term's term parameter properly
escaped into PQF term. CCL truncation configuration t=z
enables Z39.58 truncation which is in fact CCL truncation, but there's
more to it than one might think anyway. The RPN to CCL conversion
handles Z39.58 truncation as well (5=104).

doc/tools.xml
include/yaz/ccl.h
src/cclfind.c
src/cclqfile.c
src/querytowrbuf.c
src/rpn2cql.c
test/test_ccl.c
test/test_rpn2cql.c

index cc3e3c5..80a9756 100644 (file)
           </entry>
          </row>
 
           </entry>
          </row>
 
+         <row><entry><literal>t=z</literal></entry><entry>
+           Allows masking anywhere in a term, thus fully supporting
+           # (mask one character) and ? (zero or more of any).
+           If masking is used, trunction is set to 104 (Z39.58 in term)
+           and the term is converted accordingly to Z39.58 masking term -
+           actually the same truncation as CCL itself.
+          </entry>
+         </row>
+
         </tbody>
        </tgroup>
        </table>
         </tbody>
        </tgroup>
        </table>
index d3e3032..f65674a 100644 (file)
@@ -353,7 +353,7 @@ int ccl_stop_words_info(ccl_stop_words_t csw, int idx,
 #define CCL_BIB1_TRU_CAN_BOTH  (-3)
 #define CCL_BIB1_TRU_CAN_NONE  (-4)
 #define CCL_BIB1_TRU_CAN_REGEX (-5)
 #define CCL_BIB1_TRU_CAN_BOTH  (-3)
 #define CCL_BIB1_TRU_CAN_NONE  (-4)
 #define CCL_BIB1_TRU_CAN_REGEX (-5)
-
+#define CCL_BIB1_TRU_CAN_Z3958 (-6)
 
 
 YAZ_END_CDECL
 
 
 YAZ_END_CDECL
index 6fd3144..7932ecb 100644 (file)
@@ -213,7 +213,8 @@ void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
 }
 
 
 }
 
 
-#define REGEX_CHARS "^[]{}()|.*+?!\"$"
+#define REGEX_CHARS "^[]{}()|.*+?!$"
+#define CCL_CHARS "#?\\"
 /**
  * search_term: Parse CCL search term. 
  * cclp:   CCL Parser
 /**
  * search_term: Parse CCL search term. 
  * cclp:   CCL Parser
@@ -261,6 +262,7 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         int left_trunc = 0;
         int right_trunc = 0;
         int regex_trunc = 0;
         int left_trunc = 0;
         int right_trunc = 0;
         int regex_trunc = 0;
+        int z3958_trunc = 0;
         size_t max = 200;
         if (and_list || or_list || !multi)
             max = 1;
         size_t max = 200;
         if (and_list || or_list || !multi)
             max = 1;
@@ -364,6 +366,11 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         {
             regex_trunc = 1; /* regex trunc (102) allowed */
         }
         {
             regex_trunc = 1; /* regex trunc (102) allowed */
         }
+        else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+                          &attset))
+        {
+            z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+        }
 
         /* make the RPN token */
         p->u.t.term = (char *)xmalloc(len * 2 + 2);
 
         /* make the RPN token */
         p->u.t.term = (char *)xmalloc(len * 2 + 2);
@@ -390,10 +397,13 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                     if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
                     {
                         regex_trunc = 2;
                     if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
                     {
                         regex_trunc = 2;
-                        strcat(p->u.t.term, "\\\\");
+                        strcat(p->u.t.term, "\\");
                     }
                     }
-                    if (src_str[j] == '\\')
+                    else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
+                    {
+                        z3958_trunc = 2;
                         strcat(p->u.t.term, "\\");
                         strcat(p->u.t.term, "\\");
+                    }
                     strxcat(p->u.t.term, src_str + j, 1);
                 }
                 else if (src_str[j] == '"')
                     strxcat(p->u.t.term, src_str + j, 1);
                 }
                 else if (src_str[j] == '"')
@@ -405,6 +415,11 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                         strcat(p->u.t.term, ".*");
                         regex_trunc = 2; /* regex trunc is really needed */
                     }
                         strcat(p->u.t.term, ".*");
                         regex_trunc = 2; /* regex trunc is really needed */
                     }
+                    else if (z3958_trunc)
+                    {
+                        strcat(p->u.t.term, "?");
+                        z3958_trunc = 2;
+                    }
                     else if (i == 0 && j == 0)
                         left_trunc = 1;
                     else if (i == no - 1 && j == src_len - 1)
                     else if (i == 0 && j == 0)
                         left_trunc = 1;
                     else if (i == no - 1 && j == src_len - 1)
@@ -423,6 +438,11 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                         strcat(p->u.t.term, ".");
                         regex_trunc = 2; /* regex trunc is really needed */
                     }
                         strcat(p->u.t.term, ".");
                         regex_trunc = 2; /* regex trunc is really needed */
                     }
+                    else if (z3958_trunc)
+                    {
+                        strcat(p->u.t.term, "#");
+                        z3958_trunc = 2;
+                    }
                     else
                     {
                         cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
                     else
                     {
                         cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
@@ -435,7 +455,12 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                     if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
                     {
                         regex_trunc = 2;
                     if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
                     {
                         regex_trunc = 2;
-                        strcat(p->u.t.term, "\\\\");
+                        strcat(p->u.t.term, "\\");
+                    }
+                    else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
+                    {
+                        z3958_trunc = 2;
+                        strcat(p->u.t.term, "\\");
                     }
                     strxcat(p->u.t.term, src_str + j, 1);                    
                 }
                     }
                     strxcat(p->u.t.term, src_str + j, 1);                    
                 }
@@ -500,6 +525,10 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         {
             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
         }
         {
             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
         }
+        else if (z3958_trunc == 2)
+        {
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+        }
         else
         {
             if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
         else
         {
             if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
index 16e658f..d2ca1c9 100644 (file)
@@ -160,6 +160,8 @@ int ccl_qual_field2(CCL_bibset bibset, const char *cp, const char *qual_name,
                         value = CCL_BIB1_TRU_CAN_NONE;
                     else if (!ccl_stricmp (value_str, "x"))
                         value = CCL_BIB1_TRU_CAN_REGEX;
                         value = CCL_BIB1_TRU_CAN_NONE;
                     else if (!ccl_stricmp (value_str, "x"))
                         value = CCL_BIB1_TRU_CAN_REGEX;
+                    else if (!ccl_stricmp (value_str, "z"))
+                        value = CCL_BIB1_TRU_CAN_Z3958;
                     break;                
                 case 'c':
                 case 'C':
                     break;                
                 case 'c':
                 case 'C':
index d52e37f..8ca78a2 100644 (file)
@@ -22,14 +22,21 @@ void yaz_encode_pqf_term(WRBUF b, const char *term, int len)
     for (i = 0; i < len; i++)
         if (strchr(" \"{", term[i]))
             break;
     for (i = 0; i < len; i++)
         if (strchr(" \"{", term[i]))
             break;
-    if (i == len && i)
-        wrbuf_write(b, term, len);
+    if (len > 0 && i == len)
+    {
+        for (i = 0; i<len; i++)
+        {
+            if (term[i] == '\\')
+                wrbuf_putc(b, '\\');
+            wrbuf_putc(b, term[i]);
+        }
+    }
     else
     {
         wrbuf_putc(b, '"');
         for (i = 0; i<len; i++)
         {
     else
     {
         wrbuf_putc(b, '"');
         for (i = 0; i<len; i++)
         {
-            if (term[i] == '"')
+            if (term[i] == '"' || term[i] == '\\')
                 wrbuf_putc(b, '\\');
             wrbuf_putc(b, term[i]);
         }
                 wrbuf_putc(b, '\\');
             wrbuf_putc(b, term[i]);
         }
index aef1cfd..9fedba6 100644 (file)
@@ -192,11 +192,14 @@ static int rpn2cql_simple(cql_transform_t ct,
         Z_Term *term = apt->term;
         const char *sterm = 0;
         size_t lterm = 0;
         Z_Term *term = apt->term;
         const char *sterm = 0;
         size_t lterm = 0;
+        Odr_int trunc = lookup_truncation(apt->attributes);
+        size_t i;
+        int must_quote = 0;
 
         wrbuf_rewind(w);
         ret = rpn2cql_attr(ct, apt->attributes, w);
 
 
         wrbuf_rewind(w);
         ret = rpn2cql_attr(ct, apt->attributes, w);
 
-        switch(term->which)
+        switch (term->which)
         {
         case Z_Term_general:
             lterm = term->u.general->len;
         {
         case Z_Term_general:
             lterm = term->u.general->len;
@@ -210,25 +213,17 @@ static int rpn2cql_simple(cql_transform_t ct,
             lterm = strlen(sterm);
             break;
         default:
             lterm = strlen(sterm);
             break;
         default:
-            ret = -1;
             cql_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
             cql_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
+            return -1;
         }
 
         }
 
-        if (term)
+        if (trunc <= 3 || trunc == 100 || trunc == 102 || trunc == 104)
         {
         {
-            size_t i;
-            int must_quote = 0;
-            Odr_int trunc = lookup_truncation(apt->attributes);
-
-            if (trunc > 3 && trunc != 100 && trunc != 102)
-            {
-                cql_transform_set_error(
-                    ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
-                ret = -1;
-            }
             for (i = 0 ; i < lterm; i++)
                 if (strchr(" ()=></", sterm[i]))
             for (i = 0 ; i < lterm; i++)
                 if (strchr(" ()=></", sterm[i]))
-                    must_quote = 1;
+                    break;
+            if (i < lterm || lterm == 0)
+                must_quote = 1;
             if (must_quote)
                 wrbuf_puts(w, "\"");
             if (trunc == 2 || trunc == 3)
             if (must_quote)
                 wrbuf_puts(w, "\"");
             if (trunc == 2 || trunc == 3)
@@ -249,6 +244,10 @@ static int rpn2cql_simple(cql_transform_t ct,
                 }
                 else if (trunc == 102 && sterm[i] == '.')
                     wrbuf_putc(w, '?');
                 }
                 else if (trunc == 102 && sterm[i] == '.')
                     wrbuf_putc(w, '?');
+                else if (trunc == 104 && sterm[i] == '?')
+                    wrbuf_putc(w, '*');
+                else if (trunc == 104 && sterm[i] == '#')
+                    wrbuf_putc(w, '?');
                 else if (strchr("*?\"", sterm[i]))
                 {
                     wrbuf_putc(w, '\\');
                 else if (strchr("*?\"", sterm[i]))
                 {
                     wrbuf_putc(w, '\\');
@@ -262,6 +261,12 @@ static int rpn2cql_simple(cql_transform_t ct,
             if (must_quote)
                 wrbuf_puts(w, "\"");
         }
             if (must_quote)
                 wrbuf_puts(w, "\"");
         }
+        else
+        {
+            cql_transform_set_error(
+                ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
+            ret = -1;
+        }
         if (ret == 0)
             pr(wrbuf_cstr(w), client_data);
     }
         if (ret == 0)
             pr(wrbuf_cstr(w), client_data);
     }
@@ -280,6 +285,7 @@ static int rpn2cql_structure(cql_transform_t ct,
     else
     {
         Z_Operator *op = q->u.complex->roperator;
     else
     {
         Z_Operator *op = q->u.complex->roperator;
+        Z_ProximityOperator *prox;
         int r;
 
         if (nested)
         int r;
 
         if (nested)
@@ -301,7 +307,7 @@ static int rpn2cql_structure(cql_transform_t ct,
             break;
         case  Z_Operator_prox: {
             pr(" prox", client_data);
             break;
         case  Z_Operator_prox: {
             pr(" prox", client_data);
-            Z_ProximityOperator *prox = op->u.prox;
+            prox = op->u.prox;
             /* No way to express Odr_bool *exclusion -- ignore it */
             if (prox->distance) {
                 char buf[21]; /* Enough for any 64-bit int */
             /* No way to express Odr_bool *exclusion -- ignore it */
             if (prox->distance) {
                 char buf[21]; /* Enough for any 64-bit int */
index 76d76ba..bb54243 100644 (file)
@@ -79,7 +79,9 @@ void tst1(int pass)
     case 0:
         ccl_qual_fitem(bibset, "u=4    s=pw t=l,r", "ti");
         ccl_qual_fitem(bibset, "1=1016 s=al,pw t=r",    "term");
     case 0:
         ccl_qual_fitem(bibset, "u=4    s=pw t=l,r", "ti");
         ccl_qual_fitem(bibset, "1=1016 s=al,pw t=r",    "term");
-        ccl_qual_fitem(bibset, "1=/my/title t=x",       "dc.title");
+        ccl_qual_fitem(bibset, "t=x", "reg");
+        ccl_qual_fitem(bibset, "t=z", "z");
+        ccl_qual_fitem(bibset, "1=/my/title",       "dc.title");
         ccl_qual_fitem(bibset, "r=r",         "date");
         ccl_qual_fitem(bibset, "r=o",         "x");
         ccl_qual_fitem(bibset, "dc.title", "title");
         ccl_qual_fitem(bibset, "r=r",         "date");
         ccl_qual_fitem(bibset, "r=o",         "x");
         ccl_qual_fitem(bibset, "dc.title", "title");
@@ -92,7 +94,13 @@ void tst1(int pass)
         strcpy(tstline, "term 1=1016 s=al,pw t=r  # default term");
         ccl_qual_line(bibset, tstline);
 
         strcpy(tstline, "term 1=1016 s=al,pw t=r  # default term");
         ccl_qual_line(bibset, tstline);
 
-        strcpy(tstline, "dc.title 1=/my/title t=x");
+        strcpy(tstline, "reg t=x");
+        ccl_qual_line(bibset, tstline);
+
+        strcpy(tstline, "z t=z");
+        ccl_qual_line(bibset, tstline);
+
+        strcpy(tstline, "dc.title 1=/my/title");
         ccl_qual_line(bibset, tstline);
 
         strcpy(tstline, "date r=r # ordered relation");
         ccl_qual_line(bibset, tstline);
 
         strcpy(tstline, "date r=r # ordered relation");
@@ -111,7 +119,9 @@ void tst1(int pass)
         ccl_qual_buf(bibset, "ti u=4    s=pw t=l,r\n"
                      "term 1=1016 s=al,pw t=r\r\n"
                      "\n"
         ccl_qual_buf(bibset, "ti u=4    s=pw t=l,r\n"
                      "term 1=1016 s=al,pw t=r\r\n"
                      "\n"
-                     "dc.title 1=/my/title t=x\n"
+                     "reg t=x\r\n"
+                     "z t=z\r\n"
+                     "dc.title 1=/my/title\n"
                      "date r=r\n" 
                      "x r=o\n"
                      "title dc.title\n"
                      "date r=r\n" 
                      "x r=o\n"
                      "title dc.title\n"
@@ -137,9 +147,14 @@ void tst1(int pass)
                 "   <attr type=\"s\" value=\"al,pw\"/>\n"
                 "   <attr type=\"t\" value=\"r\"/>\n"
                 " </qual>\n"
                 "   <attr type=\"s\" value=\"al,pw\"/>\n"
                 "   <attr type=\"t\" value=\"r\"/>\n"
                 " </qual>\n"
+                " <qual name=\"reg\">\n"
+                "   <attr type=\"t\" value=\"x\"/>\n"
+                " </qual>\n"
+                " <qual name=\"z\">\n"
+                "   <attr type=\"t\" value=\"z\"/>\n"
+                " </qual>\n"
                 " <qual name=\"dc.title\">\n"
                 "   <attr type=\"1\" value=\"/my/title\"/>\n"
                 " <qual name=\"dc.title\">\n"
                 "   <attr type=\"1\" value=\"/my/title\"/>\n"
-                "   <attr type=\"t\" value=\"x\"/>\n"
                 " </qual>\n"
                 " <qual name=\"date\">\n"
                 "   <attr type=\"r\" value=\"r\"/>\n"
                 " </qual>\n"
                 " <qual name=\"date\">\n"
                 "   <attr type=\"r\" value=\"r\"/>\n"
@@ -222,24 +237,41 @@ void tst1(int pass)
                   "@attr 4=2 @attr 1=1016 a "
                   "@attr 4=2 @attr 1=1016 b "));
 
                   "@attr 4=2 @attr 1=1016 a "
                   "@attr 4=2 @attr 1=1016 b "));
 
-    YAZ_CHECK(tst_ccl_query(bibset, "date=1980", "@attr 2=3 1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=234-1990", "@and @attr 2=4 234 @attr 2=2 1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=234- 1990", "@and @attr 2=4 234 @attr 2=2 1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=234 -1990", "@and @attr 2=4 234 @attr 2=2 1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=234 - 1990", "@and @attr 2=4 234 @attr 2=2 1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=-1980", "@attr 2=2 1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "date=- 1980", "@attr 2=2 1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "x=-1980", "@attr 2=3 -1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "x=- 1980", "@attr 2=2 1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "x= -1980", "@attr 2=3 -1980 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "x=234-1990", "@attr 2=3 234-1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "x=234 - 1990", "@and @attr 2=4 234 @attr 2=2 1990 "));
-    YAZ_CHECK(tst_ccl_query(bibset, "ti=a,b", "@attr 4=1 @attr 1=4 a,b "));
-    YAZ_CHECK(tst_ccl_query(bibset, "ti=a, b", "@attr 4=1 @attr 1=4 \"a, b\" "));
-    YAZ_CHECK(tst_ccl_query(bibset, "ti=a-b", "@attr 4=2 @attr 1=4 a-b "));
-    YAZ_CHECK(tst_ccl_query(bibset, "ti=a - b", "@attr 4=1 @attr 1=4 \"a - b\" "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "a?", "@attr 5=1 @attr 4=2 @attr 1=1016 a "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=1980",
+                            "@attr 2=3 1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=234-1990",
+                            "@and @attr 2=4 234 @attr 2=2 1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=234- 1990",
+                            "@and @attr 2=4 234 @attr 2=2 1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=234 -1990",
+                            "@and @attr 2=4 234 @attr 2=2 1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=234 - 1990",
+                            "@and @attr 2=4 234 @attr 2=2 1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=-1980",
+                            "@attr 2=2 1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "date=- 1980",
+                            "@attr 2=2 1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "x=-1980",
+                            "@attr 2=3 -1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "x=- 1980",
+                            "@attr 2=2 1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "x= -1980",
+                            "@attr 2=3 -1980 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "x=234-1990",
+                            "@attr 2=3 234-1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "x=234 - 1990",
+                            "@and @attr 2=4 234 @attr 2=2 1990 "));
+    YAZ_CHECK(tst_ccl_query(bibset, "ti=a,b",
+                            "@attr 4=1 @attr 1=4 a,b "));
+    YAZ_CHECK(tst_ccl_query(bibset, "ti=a, b",
+                            "@attr 4=1 @attr 1=4 \"a, b\" "));
+    YAZ_CHECK(tst_ccl_query(bibset, "ti=a-b",
+                            "@attr 4=2 @attr 1=4 a-b "));
+    YAZ_CHECK(tst_ccl_query(bibset, "ti=a - b",
+                            "@attr 4=1 @attr 1=4 \"a - b\" "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "a?",
+                            "@attr 5=1 @attr 4=2 @attr 1=1016 a "));
     YAZ_CHECK(tst_ccl_query(bibset, "a b", 
                             "@and @attr 4=2 @attr 1=1016 a "
                             "@attr 4=2 @attr 1=1016 b "));
     YAZ_CHECK(tst_ccl_query(bibset, "a b", 
                             "@and @attr 4=2 @attr 1=1016 a "
                             "@attr 4=2 @attr 1=1016 b "));
@@ -251,29 +283,45 @@ void tst1(int pass)
     YAZ_CHECK(tst_ccl_query(bibset, "title=a", 
                             "@attr 1=/my/title a "));
 
     YAZ_CHECK(tst_ccl_query(bibset, "title=a", 
                             "@attr 1=/my/title a "));
 
-    YAZ_CHECK(tst_ccl_query(bibset, "title=a?b#\"c?\"", 
-                            "@attr 5=102 @attr 1=/my/title a.*b.c\\\\? "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=\\(", 
-                            "@attr 5=102 @attr 1=/my/title \\\\( "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=.", 
-                            "@attr 5=102 @attr 1=/my/title \\\\. "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=.", 
-                            "@attr 5=102 @attr 1=/my/title \\\\. "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=\".\"", 
-                            "@attr 5=102 @attr 1=/my/title \\\\. "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=?\\?", 
-                            "@attr 5=102 @attr 1=/my/title .*\\\\? "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=\"?\\?\"", 
-                            "@attr 5=102 @attr 1=/my/title \\\\?\\\\? "));
-
-    YAZ_CHECK(tst_ccl_query(bibset, "title=\\\\", 
-                            "@attr 5=102 @attr 1=/my/title \\\\\\\\ "));
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=a?b#\"c?\"", 
+                            "@attr 5=102 a.*b.c\\\\? "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=a?b#\"c?\"", 
+                            "@attr 5=104 a?b#c\\\\? "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=\\(", 
+                            "@attr 5=102 \\\\( "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=\\(", 
+                            "( "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=\\\"", 
+                            "\"\\\"\" "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=\\\"", 
+                            "\"\\\"\" "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=.",
+                            "@attr 5=102 \\\\. "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=.",
+                            ". "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=\".\"", 
+                            "@attr 5=102 \\\\. "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=\".\"", 
+                            ". "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=?\\?",
+                            "@attr 5=102 .*\\\\? "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=?\\?",
+                            "@attr 5=104 ?\\\\? "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=\"?\\?\"",
+                            "@attr 5=102 \\\\?\\\\? "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=\"?\\?\"",
+                            "@attr 5=104 \\\\?\\\\? "));
+
+    YAZ_CHECK(tst_ccl_query(bibset, "reg=\\\\",
+                            "@attr 5=102 \\\\\\\\ "));
+    YAZ_CHECK(tst_ccl_query(bibset, "z=\\\\",
+                            "@attr 5=104 \\\\\\\\ "));
 
     YAZ_CHECK(tst_ccl_query(bibset, "\\\\", 
                             "@attr 4=2 @attr 1=1016 \\\\ "));
 
     YAZ_CHECK(tst_ccl_query(bibset, "\\\\", 
                             "@attr 4=2 @attr 1=1016 \\\\ "));
index bdb4972..fd0e5a6 100644 (file)
@@ -40,6 +40,11 @@ static int compare(cql_transform_t ct, const char *pqf, const char *cql)
             {
                 ret = 1;
             }
             {
                 ret = 1;
             }
+            else
+            {
+                yaz_log(YLOG_WARN, " expected: %s", cql ? cql : "null");
+                yaz_log(YLOG_WARN, " got:      %s", wrbuf_cstr(w));
+            }
         }
     }
     wrbuf_destroy(w);
         }
     }
     wrbuf_destroy(w);
@@ -90,10 +95,54 @@ static void tst2(void)
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=5 1980", "dc.date>1980"));
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=2 1980", "dc.date<=1980"));
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=4 1980", "dc.date>=1980"));
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=5 1980", "dc.date>1980"));
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=2 1980", "dc.date<=1980"));
     YAZ_CHECK(compare(ct, "@attr 1=30 @attr 2=4 1980", "dc.date>=1980"));
+
     /* Truncation */
     YAZ_CHECK(compare(ct, "@attr 5=1 water", "water*"));
     YAZ_CHECK(compare(ct, "@attr 5=2 water", "*water"));
     YAZ_CHECK(compare(ct, "@attr 5=3 water", "*water*"));
     /* Truncation */
     YAZ_CHECK(compare(ct, "@attr 5=1 water", "water*"));
     YAZ_CHECK(compare(ct, "@attr 5=2 water", "*water"));
     YAZ_CHECK(compare(ct, "@attr 5=3 water", "*water*"));
+    YAZ_CHECK(compare(ct, "@attr 5=100 water", "water"));
+    YAZ_CHECK(compare(ct, "@attr 5=102 water", "water"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 water", "water"));
+
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat.*er", "wat*er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat?er", "wat*er"));
+
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat.er", "wat?er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat#er", "wat?er"));
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat?er", "wat\\?er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat*er", "wat\\*er"));
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat#er", "wat#er"));
+
+    /* \. is 'eaten' by PQF parser */
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat\\.er", "wat?er"));
+
+    /* Escape sequences */
+    /* note: escape sequences that survive after PQF parse below */
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat\\\\?er", "wat\\?er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat\\\\?er", "wat\\?er"));
+
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat\\\\*er", "wat\\*er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat\\\\*er", "wat\\*er"));
+
+    YAZ_CHECK(compare(ct, "wat\\\\#er", "wat#er"));
+    YAZ_CHECK(compare(ct, "@attr 5=100 wat\\\\#er", "wat#er"));
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat\\\\#er", "wat#er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat\\\\#er", "wat#er"));
+    YAZ_CHECK(compare(ct, "@attr 5=102 wat\\\\.er", "wat.er"));
+    YAZ_CHECK(compare(ct, "@attr 5=104 wat\\\\.er", "wat.er"));
+
+    /* Quoting */
+    YAZ_CHECK(compare(ct, "@attr 5=100 \"\"", "\"\""));
+    YAZ_CHECK(compare(ct, "@attr 5=1 \"\"", "\"*\""));
+    YAZ_CHECK(compare(ct, "@attr 5=2 \"\"", "\"*\""));
+    YAZ_CHECK(compare(ct, "@attr 5=3 \"\"", "\"**\""));
+    YAZ_CHECK(compare(ct, "@attr 5=102 \"\"", "\"\""));
+    YAZ_CHECK(compare(ct, "@attr 5=104 \"\"", "\"\""));
+
+    YAZ_CHECK(compare(ct, "@attr 5=1 \"water basket\"", "\"water basket*\""));
+    YAZ_CHECK(compare(ct, "@attr 5=2 \"water basket\"", "\"*water basket\""));
+    YAZ_CHECK(compare(ct, "@attr 5=3 \"water basket\"", "\"*water basket*\""));
+
     /* Other */
     YAZ_CHECK(compare(ct, "@attr 2=103 @attr 1=_ALLRECORDS 1", "cql.allRecords=1"));
     YAZ_CHECK(compare(ct, "@attr 1=500 abc", 0));
     /* Other */
     YAZ_CHECK(compare(ct, "@attr 2=103 @attr 1=_ALLRECORDS 1", "cql.allRecords=1"));
     YAZ_CHECK(compare(ct, "@attr 1=500 abc", 0));