ccl2rpn: Conversion to regexp-1 terms (trunc=102).
[yaz-moved-to-github.git] / src / cclfind.c
index efc094c..f242169 100644 (file)
@@ -1,8 +1,6 @@
-/*
- * Copyright (C) 1995-2008, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data
  * See the file LICENSE for details.
- *
- * $Id: cclfind.c,v 1.15 2008-01-09 21:32:27 adam Exp $
  */
 /** 
  * \file cclfind.c
  * of lookahead in the handling of relational operations.. So
  * it's not really pure.
  */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <stdlib.h>
 #include <string.h>
+#include <assert.h>
 
 #include "cclp.h"
 
@@ -249,15 +251,15 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         struct ccl_rpn_node *p;
         size_t no, i;
         int no_spaces = 0;
-        int left_trunc = 0;
-        int right_trunc = 0;
-        int mid_trunc = 0;
         int relation_value = -1;
         int position_value = -1;
         int structure_value = -1;
         int truncation_value = -1;
         int completeness_value = -1;
         int len = 0;
+        int left_trunc = 0;
+        int right_trunc = 0;
+        int regex_trunc = 0;
         size_t max = 200;
         if (and_list || or_list || !multi)
             max = 1;
@@ -277,16 +279,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
             for (i = 0; i<lookahead->len; i++)
                 if (lookahead->name[i] == ' ')
                     no_spaces++;
-                else if (strchr(truncation_aliases[0], lookahead->name[i]))
-                {
-                    if (no == 0 && i == 0 && lookahead->len >= 1)
-                        left_trunc = 1;
-                    else if (!is_term_ok(lookahead->next->kind, term_list) &&
-                             i == lookahead->len-1 && i >= 1)
-                        right_trunc = 1;
-                    else
-                        mid_trunc = 1;
-                }
             len += 1+lookahead->len+lookahead->ws_prefix_len;
             lookahead = lookahead->next;
         }
@@ -341,7 +333,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                             if (truncation_value != -1)
                                 continue;
                             truncation_value = attr->value.numeric;
-                            left_trunc = right_trunc = mid_trunc = 0;
                             break;
                         case CCL_BIB1_COM:
                             if (completeness_value != -1)
@@ -367,22 +358,23 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                 ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
         }
 
+        if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+                          &attset))
+        {
+            regex_trunc = 1; /* regex trunc (102) allowed */
+        }
+
         /* make the RPN token */
-        p->u.t.term = (char *)xmalloc(len);
+        p->u.t.term = (char *)xmalloc(len * 2 + 2);
         ccl_assert(p->u.t.term);
         p->u.t.term[0] = '\0';
         for (i = 0; i<no; i++)
         {
             const char *src_str = cclp->look_token->name;
             size_t src_len = cclp->look_token->len;
-            
-            if (i == 0 && left_trunc)
-            {
-                src_len--;
-                src_str++;
-            }
-            if (i == no-1 && right_trunc)
-                src_len--;
+            int j;
+            int quote_mode = 0;
+
             if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
             {
                 size_t len = strlen(p->u.t.term);
@@ -390,7 +382,61 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                        cclp->look_token->ws_prefix_len);
                 p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
             }
-            strxcat(p->u.t.term, src_str, src_len);
+            for (j = 0; j < src_len; j++)
+            {
+                if (j > 0 && src_str[j-1] == '\\')
+                {
+                    if (regex_trunc && strchr("()[]?*.", src_str[j]))
+                    {
+                        regex_trunc = 2;
+                        strcat(p->u.t.term, "\\\\");
+                    }
+                    strxcat(p->u.t.term, src_str + j, 1);
+                }
+                else if (src_str[j] == '"')
+                    quote_mode = !quote_mode;
+                else if (!quote_mode && src_str[j] == '?')
+                {
+                    if (regex_trunc)
+                    {
+                        strcat(p->u.t.term, ".*");
+                        regex_trunc = 2; /* regex trunc is really needed */
+                    }
+                    else if (i == 0 && j == 0)
+                        left_trunc = 1;
+                    else if (i == no - 1 && j == src_len - 1)
+                        right_trunc = 1;
+                    else
+                    {
+                        cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+                        ccl_rpn_delete(p);
+                        return NULL;
+                    }
+                }
+                else if (!quote_mode && src_str[j] == '#')
+                {
+                    if (regex_trunc)
+                    {
+                        strcat(p->u.t.term, ".");
+                        regex_trunc = 2; /* regex trunc is really needed */
+                    }
+                    else
+                    {
+                        cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+                        ccl_rpn_delete(p);
+                        return NULL;
+                    }
+                }
+                else if (src_str[j] != '\\')
+                {
+                    if (regex_trunc && strchr("()[]?*.", src_str[j]))
+                    {
+                        regex_trunc = 2;
+                        strcat(p->u.t.term, "\\\\");
+                    }
+                    strxcat(p->u.t.term, src_str + j, 1);                    
+                }
+            }
             ADVANCE;
         }
 
@@ -447,6 +493,10 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
             }
             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
         }
+        else if (regex_trunc == 2)
+        {
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+        }
         else
         {
             if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
@@ -1107,6 +1157,7 @@ struct ccl_rpn_node *ccl_find_str(CCL_bibset bibset, const char *str,
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab