CCL: slightly different point of error return
[yaz-moved-to-github.git] / src / cclfind.c
index 93ea05d..f4dbaa4 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2013 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -229,9 +229,38 @@ static size_t cmp_operator(const char **aliases, const char *input)
 
 #define REGEX_CHARS "^[]{}()|.*+?!$"
 #define CCL_CHARS "#?\\"
+
+static int has_ccl_masking(const char *src_str,
+                           size_t src_len,
+                           const char **truncation_aliases,
+                           const char **mask_aliases)
+{
+    size_t j;
+    int quote_mode = 0;
+
+    for (j = 0; j < src_len; j++)
+    {
+        size_t op_size;
+        if (j > 0 && src_str[j-1] == '\\')
+            ;
+        else if (src_str[j] == '"')
+            quote_mode = !quote_mode;
+        else if (!quote_mode &&
+                 (op_size = cmp_operator(truncation_aliases,
+                                         src_str + j)))
+            return 1;
+        else if (!quote_mode &&
+                 (op_size = cmp_operator(mask_aliases,
+                                          src_str + j)))
+            return 1;
+    }
+    return 0;
+}
+
 static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
-                       char *dst_term, int *regex_trunc, int *z3958_trunc,
+                       char *dst_term, int regex_trunc, int z3958_trunc,
                        const char **truncation_aliases,
+                       const char **mask_aliases,
                        int is_first, int is_last,
                        int *left_trunc, int *right_trunc)
 {
@@ -243,16 +272,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
         size_t op_size;
         if (j > 0 && src_str[j-1] == '\\')
         {
-            if (*regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
-            {
-                *regex_trunc = 2;
+            if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
                 strcat(dst_term, "\\");
-            }
-            else if (*z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
-            {
-                *z3958_trunc = 2;
+            else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
                 strcat(dst_term, "\\");
-            }
             strxcat(dst_term, src_str + j, 1);
         }
         else if (src_str[j] == '"')
@@ -263,16 +286,10 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
             )
         {
             j += (op_size - 1);  /* j++ in for loop */
-            if (*regex_trunc)
-            {
+            if (regex_trunc)
                 strcat(dst_term, ".*");
-                *regex_trunc = 2; /* regex trunc is really needed */
-            }
-            else if (*z3958_trunc)
-            {
+            else if (z3958_trunc)
                 strcat(dst_term, "?");
-                *z3958_trunc = 2;
-            }
             else if (is_first && j == 0)
                 *left_trunc = 1;
             else if (is_last && j == src_len - 1)
@@ -283,18 +300,14 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
                 return -1;
             }
         }
-        else if (!quote_mode && src_str[j] == '#')
+        else if (!quote_mode &&
+                 (op_size = cmp_operator(mask_aliases, src_str + j)))
         {
-            if (*regex_trunc)
-            {
+            j += (op_size - 1);  /* j++ in for loop */
+            if (regex_trunc)
                 strcat(dst_term, ".");
-                *regex_trunc = 2; /* regex trunc is really needed */
-            }
-            else if (*z3958_trunc)
-            {
+            else if (z3958_trunc)
                 strcat(dst_term, "#");
-                *z3958_trunc = 2;
-            }
             else
             {
                 cclp->error_code = CCL_ERR_TRUNC_NOT_SINGLE;
@@ -303,42 +316,43 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
         }
         else if (src_str[j] != '\\')
         {
-            if (*regex_trunc && strchr(REGEX_CHARS, src_str[j]))
-            {
-                *regex_trunc = 2;
+            if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
                 strcat(dst_term, "\\");
-            }
-            else if (*z3958_trunc && strchr(CCL_CHARS, src_str[j]))
-            {
-                *z3958_trunc = 2;
+            else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
                 strcat(dst_term, "\\");
-            }
             strxcat(dst_term, src_str + j, 1);
         }
     }
     return 0;
 }
 
-/**
- * search_term: Parse CCL search term.
- * cclp:   CCL Parser
- * qa:     Qualifier attributes already applied.
- * term_list: tokens we accept as terms in context
- * multi:  whether we accept "multiple" tokens
- * return: pointer to node(s); NULL on error.
- */
-static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
-                                          ccl_qualifier_t *qa,
-                                          int *term_list, int multi)
+
+static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp,
+                                             struct ccl_rpn_attr *attr_use,
+                                             ccl_qualifier_t *qa,
+                                             size_t no, int term_len,
+                                             int is_phrase,
+                                             int auto_group)
 {
-    struct ccl_rpn_node *p_top = 0;
-    struct ccl_token *lookahead = cclp->look_token;
-    int and_list = 0;
-    int auto_group = 0;
-    int or_list = 0;
+    struct ccl_rpn_node *p;
+    size_t i;
+    int relation_value = -1;
+    int position_value = -1;
+    int structure_value = -1;
+    int truncation_value = -1;
+    int completeness_value = -1;
+
+    int left_trunc = 0;
+    int right_trunc = 0;
+    int regex_trunc = 0;
+    int z3958_trunc = 0;
+    int is_ccl_masked = 0;
     char *attset;
+    struct ccl_token *lookahead = cclp->look_token;
     const char **truncation_aliases;
     const char *t_default[2];
+    const char **mask_aliases;
+    const char *m_default[2];
 
     truncation_aliases =
         ccl_qual_search_special(cclp->bibset, "truncation");
@@ -349,78 +363,43 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         t_default[1] = 0;
     }
 
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
-        and_list = 1;
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AUTO_GROUP, 0))
-        auto_group = 1;
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
-        or_list = 1;
-    while (1)
+    mask_aliases =
+        ccl_qual_search_special(cclp->bibset, "mask");
+    if (!mask_aliases)
     {
-        struct ccl_rpn_node *p;
-        size_t no, i;
-        int is_phrase = 0;
-        int relation_value = -1;
-        int position_value = -1;
-        int structure_value = -1;
-        int truncation_value = -1;
-        int completeness_value = -1;
-        int len = 0;
-        int left_trunc = 0;
-        int right_trunc = 0;
-        int regex_trunc = 0;
-        int z3958_trunc = 0;
-        size_t max = 200;
-        if (and_list || or_list || !multi)
-            max = 1;
-
-        /* ignore commas when dealing with and-lists .. */
-        if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
-        {
-            lookahead = lookahead->next;
-            ADVANCE;
-            continue;
-        }
-        for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
-        {
-            int this_is_phrase = 0;
-            for (i = 0; i<lookahead->len; i++)
-                if (lookahead->name[i] == ' ')
-                    this_is_phrase = 1;
-
-            if (auto_group)
-            {
-                if (no > 0 && (is_phrase || is_phrase != this_is_phrase))
-                    break;
-                is_phrase = this_is_phrase;
-            }
-            else if (this_is_phrase || no > 0)
-                is_phrase = 1;
-            len += 1+lookahead->len+lookahead->ws_prefix_len;
-            lookahead = lookahead->next;
-        }
-
-        if (len == 0)
-            break;      /* no more terms . stop . */
+        mask_aliases = m_default;
+        m_default[0] = "#";
+        m_default[1] = 0;
+    }
 
-        /* create the term node, but wait a moment before adding the term */
-        p = ccl_rpn_node_create(CCL_RPN_TERM);
-        p->u.t.attr_list = NULL;
-        p->u.t.term = NULL;
-        if (qa && qa[0])
-        {
-            const char *n = ccl_qual_get_name(qa[0]);
-            if (n)
-                p->u.t.qual = xstrdup(n);
-        }
 
-        /* go through all attributes and add them to the attribute list */
-        for (i=0; qa && qa[i]; i++)
-        {
-            struct ccl_rpn_attr *attr;
+    for (i = 0; i < no; i++)
+    {
+        if (has_ccl_masking(lookahead->name, lookahead->len,
+                            truncation_aliases,
+                            mask_aliases))
+            is_ccl_masked = 1;
+        lookahead = lookahead->next;
+    }
+    lookahead = cclp->look_token;
 
-            for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
-                switch(attr->kind)
+    p = ccl_rpn_node_create(CCL_RPN_TERM);
+    p->u.t.attr_list = NULL;
+    p->u.t.term = NULL;
+    if (qa && qa[0])
+    {
+        const char *n = ccl_qual_get_name(qa[0]);
+        if (n)
+            p->u.t.qual = xstrdup(n);
+    }
+    /* go through all attributes and add them to the attribute list */
+    for (i = 0; qa && qa[i]; i++)
+    {
+        struct ccl_rpn_attr *attr;
+        for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
+            if (attr->type != 1 || attr == attr_use)
+            {
+                switch (attr->kind)
                 {
                 case CCL_RPN_ATTR_STRING:
                     ccl_add_attr_string(p, attr->set, attr->type,
@@ -461,53 +440,204 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                                              attr->value.numeric);
                     }
                 }
+            }
+    }
+    attset = 0;
+    if (structure_value == -1 && (
+            auto_group ||
+            qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
+        )
+    {
+        if (!is_phrase)
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
+        else
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
+    }
+    if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+                      &attset))
+    {
+        if (is_ccl_masked)
+            regex_trunc = 1; /* regex trunc (102) allowed */
+    }
+    else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+                           &attset))
+    {
+        if (is_ccl_masked)
+            z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+    }
+    /* make the RPN token */
+    p->u.t.term = (char *)xmalloc(term_len * 2 + 2);
+    ccl_assert(p->u.t.term);
+    p->u.t.term[0] = '\0';
+
+    for (i = 0; i < no; i++)
+    {
+        const char *src_str = lookahead->name;
+        size_t src_len = lookahead->len;
+
+        if (p->u.t.term[0] && lookahead->ws_prefix_len)
+        {
+            strxcat(p->u.t.term, lookahead->ws_prefix_buf,
+                    lookahead->ws_prefix_len);
         }
-        attset = 0;
-        if (structure_value == -1 && (
-                auto_group ||
-                qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
-            )
+        if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc,
+                        z3958_trunc, truncation_aliases, mask_aliases,
+                        i == 0, i == no - 1,
+                        &left_trunc, &right_trunc))
         {
-            if (!is_phrase)
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
-            else
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
+            ccl_rpn_delete(p);
+            return NULL;
         }
-
-        if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
-                          &attset))
+        lookahead = lookahead->next;
+    }
+    if (left_trunc && right_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
+                           &attset))
         {
-            regex_trunc = 1; /* regex trunc (102) allowed */
+            cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+            ccl_rpn_delete(p);
+            return NULL;
         }
-        else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
-                          &attset))
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
+    }
+    else if (right_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
+                           &attset))
         {
-            z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+            cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
+            ccl_rpn_delete(p);
+            return NULL;
         }
-
-        /* make the RPN token */
-        p->u.t.term = (char *)xmalloc(len * 2 + 2);
-        ccl_assert(p->u.t.term);
-        p->u.t.term[0] = '\0';
-        for (i = 0; i<no; i++)
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
+    }
+    else if (left_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
+                           &attset))
         {
-            const char *src_str = cclp->look_token->name;
-            size_t src_len = cclp->look_token->len;
+            cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
+            ccl_rpn_delete(p);
+            return NULL;
+        }
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
+    }
+    else if (regex_trunc)
+    {
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+    }
+    else if (z3958_trunc)
+    {
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+    }
+    else
+    {
+        if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
+                          &attset))
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
+    }
+    return p;
+}
 
-            if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
-            {
-                strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf,
-                        cclp->look_token->ws_prefix_len);
-            }
-            if (append_term(cclp, src_str, src_len, p->u.t.term, &regex_trunc,
-                            &z3958_trunc, truncation_aliases, i == 0, i == no - 1,
-                            &left_trunc, &right_trunc))
+/**
+ * search_term: Parse CCL search term.
+ * cclp:   CCL Parser
+ * qa:     Qualifier attributes already applied.
+ * term_list: tokens we accept as terms in context
+ * multi:  whether we accept "multiple" tokens
+ * return: pointer to node(s); NULL on error.
+ */
+static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
+                                          ccl_qualifier_t *qa,
+                                          int *term_list, int multi)
+{
+    struct ccl_rpn_node *p_top = 0;
+    struct ccl_token *lookahead = cclp->look_token;
+    int and_list = 0;
+    int auto_group = 0;
+    int or_list = 0;
+
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
+        and_list = 1;
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AUTO_GROUP, 0))
+        auto_group = 1;
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
+        or_list = 1;
+    while (1)
+    {
+        struct ccl_rpn_node *p = 0;
+        size_t no, i;
+        int len = 0;
+        int is_phrase = 0;
+        size_t max = 200;
+        if (and_list || or_list || !multi)
+            max = 1;
+
+        /* ignore commas when dealing with and-lists .. */
+        if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
+        {
+            lookahead = lookahead->next;
+            ADVANCE;
+            continue;
+        }
+        for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
+        {
+            int this_is_phrase = 0;
+            for (i = 0; i<lookahead->len; i++)
+                if (lookahead->name[i] == ' ')
+                    this_is_phrase = 1;
+            if (auto_group)
             {
-                ccl_rpn_delete(p);
-                return NULL;
+                if (no > 0 && (is_phrase || is_phrase != this_is_phrase))
+                    break;
+                is_phrase = this_is_phrase;
             }
-            ADVANCE;
+            else if (this_is_phrase || no > 0)
+                is_phrase = 1;
+            len += 1+lookahead->len+lookahead->ws_prefix_len;
+            lookahead = lookahead->next;
         }
+
+        if (len == 0)
+            break;      /* no more terms . stop . */
+
+        /* go through all attributes and add them to the attribute list */
+        for (i = 0; qa && qa[i]; i++)
+        {
+            struct ccl_rpn_attr *attr;
+
+            for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
+                if (attr->type == 1)
+                {
+                    struct ccl_rpn_node *tmp2;
+                    tmp2 = ccl_term_one_use(cclp, attr, qa, no, len,
+                                            is_phrase,
+                                            auto_group);
+                    if (!tmp2)
+                    {
+                        ccl_rpn_delete(p);
+                        return 0;
+                    }
+                    if (!p)
+                        p = tmp2;
+                    else
+                    {
+                        struct ccl_rpn_node *tmp1;
+                        tmp1 = ccl_rpn_node_create(CCL_RPN_OR);
+                        tmp1->u.p[0] = p;
+                        tmp1->u.p[1] = tmp2;
+                        p = tmp1;
+                    }
+                }
+        }
+        if (!p)
+            p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len,
+                                 is_phrase, auto_group);
+        for (i = 0; i < no; i++)
+            ADVANCE;
+        if (!p)
+            return 0;
         /* make the top node point to us.. */
         if (p_top)
         {
@@ -527,54 +657,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         else
             p_top = p;
 
-
-        if (left_trunc && right_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
-                                &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
-        }
-        else if (right_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
-                                 &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
-        }
-        else if (left_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
-                                &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
-        }
-        else if (regex_trunc == 2)
-        {
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
-        }
-        else if (z3958_trunc == 2)
-        {
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
-        }
-        else
-        {
-            if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
-                               &attset))
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
-        }
         if (!multi)
             break;
     }
@@ -659,9 +741,14 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
         if (KIND == CCL_TOK_TERM)
         {
             size_t i;
+            int quote_mode = 0;
             for (i = 0; i<cclp->look_token->len; i++)
             {
-                if (cclp->look_token->name[i] == '-')
+                if (i > 0 && cclp->look_token->name[i] == '\\')
+                    ;
+                else if (cclp->look_token->name[i] == '"')
+                    quote_mode = !quote_mode;
+                else if (cclp->look_token->name[i] == '-' && !quote_mode)
                     break;
             }
 
@@ -766,10 +853,11 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
     {
         if (!(p = search_terms(cclp, ap)))
             return NULL;
-        ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
+        if (rel != 3 ||
+            !qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_OMIT_EQUALS, 0))
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
         return p;
     }
-    cclp->error_code = CCL_ERR_TERM_EXPECTED;
     return NULL;
 }