CCL: slightly different point of error return
[yaz-moved-to-github.git] / src / cclfind.c
index 202e2a3..f4dbaa4 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2013 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
@@ -231,7 +231,7 @@ static size_t cmp_operator(const char **aliases, const char *input)
 #define CCL_CHARS "#?\\"
 
 static int has_ccl_masking(const char *src_str,
-                           int src_len,
+                           size_t src_len,
                            const char **truncation_aliases,
                            const char **mask_aliases)
 {
@@ -326,24 +326,29 @@ static int append_term(CCL_parser cclp, const char *src_str, size_t src_len,
     return 0;
 }
 
-/**
- * search_term: Parse CCL search term.
- * cclp:   CCL Parser
- * qa:     Qualifier attributes already applied.
- * term_list: tokens we accept as terms in context
- * multi:  whether we accept "multiple" tokens
- * return: pointer to node(s); NULL on error.
- */
-static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
-                                          ccl_qualifier_t *qa,
-                                          int *term_list, int multi)
+
+static struct ccl_rpn_node *ccl_term_one_use(CCL_parser cclp,
+                                             struct ccl_rpn_attr *attr_use,
+                                             ccl_qualifier_t *qa,
+                                             size_t no, int term_len,
+                                             int is_phrase,
+                                             int auto_group)
 {
-    struct ccl_rpn_node *p_top = 0;
-    struct ccl_token *lookahead = cclp->look_token;
-    int and_list = 0;
-    int auto_group = 0;
-    int or_list = 0;
+    struct ccl_rpn_node *p;
+    size_t i;
+    int relation_value = -1;
+    int position_value = -1;
+    int structure_value = -1;
+    int truncation_value = -1;
+    int completeness_value = -1;
+
+    int left_trunc = 0;
+    int right_trunc = 0;
+    int regex_trunc = 0;
+    int z3958_trunc = 0;
+    int is_ccl_masked = 0;
     char *attset;
+    struct ccl_token *lookahead = cclp->look_token;
     const char **truncation_aliases;
     const char *t_default[2];
     const char **mask_aliases;
@@ -368,84 +373,33 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
     }
 
 
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
-        and_list = 1;
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AUTO_GROUP, 0))
-        auto_group = 1;
-    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
-        or_list = 1;
-    while (1)
+    for (i = 0; i < no; i++)
     {
-        struct ccl_rpn_node *p;
-        size_t no, i;
-        int is_phrase = 0;
-        int is_ccl_masked = 0;
-        int relation_value = -1;
-        int position_value = -1;
-        int structure_value = -1;
-        int truncation_value = -1;
-        int completeness_value = -1;
-        int len = 0;
-        int left_trunc = 0;
-        int right_trunc = 0;
-        int regex_trunc = 0;
-        int z3958_trunc = 0;
-        size_t max = 200;
-        if (and_list || or_list || !multi)
-            max = 1;
-
-        /* ignore commas when dealing with and-lists .. */
-        if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
-        {
-            lookahead = lookahead->next;
-            ADVANCE;
-            continue;
-        }
-        for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
-        {
-            int this_is_phrase = 0;
-            for (i = 0; i<lookahead->len; i++)
-                if (lookahead->name[i] == ' ')
-                    this_is_phrase = 1;
-
-            if (has_ccl_masking(lookahead->name, lookahead->len,
-                                truncation_aliases,
-                                mask_aliases))
-                is_ccl_masked = 1;
+        if (has_ccl_masking(lookahead->name, lookahead->len,
+                            truncation_aliases,
+                            mask_aliases))
+            is_ccl_masked = 1;
+        lookahead = lookahead->next;
+    }
+    lookahead = cclp->look_token;
 
-            if (auto_group)
+    p = ccl_rpn_node_create(CCL_RPN_TERM);
+    p->u.t.attr_list = NULL;
+    p->u.t.term = NULL;
+    if (qa && qa[0])
+    {
+        const char *n = ccl_qual_get_name(qa[0]);
+        if (n)
+            p->u.t.qual = xstrdup(n);
+    }
+    /* go through all attributes and add them to the attribute list */
+    for (i = 0; qa && qa[i]; i++)
+    {
+        struct ccl_rpn_attr *attr;
+        for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
+            if (attr->type != 1 || attr == attr_use)
             {
-                if (no > 0 && (is_phrase || is_phrase != this_is_phrase))
-                    break;
-                is_phrase = this_is_phrase;
-            }
-            else if (this_is_phrase || no > 0)
-                is_phrase = 1;
-            len += 1+lookahead->len+lookahead->ws_prefix_len;
-            lookahead = lookahead->next;
-        }
-
-        if (len == 0)
-            break;      /* no more terms . stop . */
-
-        /* create the term node, but wait a moment before adding the term */
-        p = ccl_rpn_node_create(CCL_RPN_TERM);
-        p->u.t.attr_list = NULL;
-        p->u.t.term = NULL;
-        if (qa && qa[0])
-        {
-            const char *n = ccl_qual_get_name(qa[0]);
-            if (n)
-                p->u.t.qual = xstrdup(n);
-        }
-
-        /* go through all attributes and add them to the attribute list */
-        for (i=0; qa && qa[i]; i++)
-        {
-            struct ccl_rpn_attr *attr;
-
-            for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
-                switch(attr->kind)
+                switch (attr->kind)
                 {
                 case CCL_RPN_ATTR_STRING:
                     ccl_add_attr_string(p, attr->set, attr->type,
@@ -486,56 +440,204 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
                                              attr->value.numeric);
                     }
                 }
+            }
+    }
+    attset = 0;
+    if (structure_value == -1 && (
+            auto_group ||
+            qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
+        )
+    {
+        if (!is_phrase)
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
+        else
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
+    }
+    if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+                      &attset))
+    {
+        if (is_ccl_masked)
+            regex_trunc = 1; /* regex trunc (102) allowed */
+    }
+    else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+                           &attset))
+    {
+        if (is_ccl_masked)
+            z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+    }
+    /* make the RPN token */
+    p->u.t.term = (char *)xmalloc(term_len * 2 + 2);
+    ccl_assert(p->u.t.term);
+    p->u.t.term[0] = '\0';
+
+    for (i = 0; i < no; i++)
+    {
+        const char *src_str = lookahead->name;
+        size_t src_len = lookahead->len;
+
+        if (p->u.t.term[0] && lookahead->ws_prefix_len)
+        {
+            strxcat(p->u.t.term, lookahead->ws_prefix_buf,
+                    lookahead->ws_prefix_len);
         }
-        attset = 0;
-        if (structure_value == -1 && (
-                auto_group ||
-                qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
-            )
+        if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc,
+                        z3958_trunc, truncation_aliases, mask_aliases,
+                        i == 0, i == no - 1,
+                        &left_trunc, &right_trunc))
         {
-            if (!is_phrase)
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
-            else
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
+            ccl_rpn_delete(p);
+            return NULL;
         }
-
-        if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
-                          &attset))
+        lookahead = lookahead->next;
+    }
+    if (left_trunc && right_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
+                           &attset))
         {
-            if (is_ccl_masked)
-                regex_trunc = 1; /* regex trunc (102) allowed */
+            cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+            ccl_rpn_delete(p);
+            return NULL;
         }
-        else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
-                          &attset))
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
+    }
+    else if (right_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
+                           &attset))
         {
-            if (is_ccl_masked)
-                z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+            cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
+            ccl_rpn_delete(p);
+            return NULL;
         }
-
-        /* make the RPN token */
-        p->u.t.term = (char *)xmalloc(len * 2 + 2);
-        ccl_assert(p->u.t.term);
-        p->u.t.term[0] = '\0';
-        for (i = 0; i<no; i++)
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
+    }
+    else if (left_trunc)
+    {
+        if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
+                           &attset))
         {
-            const char *src_str = cclp->look_token->name;
-            size_t src_len = cclp->look_token->len;
+            cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
+            ccl_rpn_delete(p);
+            return NULL;
+        }
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
+    }
+    else if (regex_trunc)
+    {
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+    }
+    else if (z3958_trunc)
+    {
+        ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+    }
+    else
+    {
+        if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
+                          &attset))
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
+    }
+    return p;
+}
 
-            if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
-            {
-                strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf,
-                        cclp->look_token->ws_prefix_len);
-            }
-            if (append_term(cclp, src_str, src_len, p->u.t.term, regex_trunc,
-                            z3958_trunc, truncation_aliases, mask_aliases,
-                            i == 0, i == no - 1,
-                            &left_trunc, &right_trunc))
+/**
+ * search_term: Parse CCL search term.
+ * cclp:   CCL Parser
+ * qa:     Qualifier attributes already applied.
+ * term_list: tokens we accept as terms in context
+ * multi:  whether we accept "multiple" tokens
+ * return: pointer to node(s); NULL on error.
+ */
+static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
+                                          ccl_qualifier_t *qa,
+                                          int *term_list, int multi)
+{
+    struct ccl_rpn_node *p_top = 0;
+    struct ccl_token *lookahead = cclp->look_token;
+    int and_list = 0;
+    int auto_group = 0;
+    int or_list = 0;
+
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
+        and_list = 1;
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AUTO_GROUP, 0))
+        auto_group = 1;
+    if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
+        or_list = 1;
+    while (1)
+    {
+        struct ccl_rpn_node *p = 0;
+        size_t no, i;
+        int len = 0;
+        int is_phrase = 0;
+        size_t max = 200;
+        if (and_list || or_list || !multi)
+            max = 1;
+
+        /* ignore commas when dealing with and-lists .. */
+        if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
+        {
+            lookahead = lookahead->next;
+            ADVANCE;
+            continue;
+        }
+        for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
+        {
+            int this_is_phrase = 0;
+            for (i = 0; i<lookahead->len; i++)
+                if (lookahead->name[i] == ' ')
+                    this_is_phrase = 1;
+            if (auto_group)
             {
-                ccl_rpn_delete(p);
-                return NULL;
+                if (no > 0 && (is_phrase || is_phrase != this_is_phrase))
+                    break;
+                is_phrase = this_is_phrase;
             }
-            ADVANCE;
+            else if (this_is_phrase || no > 0)
+                is_phrase = 1;
+            len += 1+lookahead->len+lookahead->ws_prefix_len;
+            lookahead = lookahead->next;
+        }
+
+        if (len == 0)
+            break;      /* no more terms . stop . */
+
+        /* go through all attributes and add them to the attribute list */
+        for (i = 0; qa && qa[i]; i++)
+        {
+            struct ccl_rpn_attr *attr;
+
+            for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
+                if (attr->type == 1)
+                {
+                    struct ccl_rpn_node *tmp2;
+                    tmp2 = ccl_term_one_use(cclp, attr, qa, no, len,
+                                            is_phrase,
+                                            auto_group);
+                    if (!tmp2)
+                    {
+                        ccl_rpn_delete(p);
+                        return 0;
+                    }
+                    if (!p)
+                        p = tmp2;
+                    else
+                    {
+                        struct ccl_rpn_node *tmp1;
+                        tmp1 = ccl_rpn_node_create(CCL_RPN_OR);
+                        tmp1->u.p[0] = p;
+                        tmp1->u.p[1] = tmp2;
+                        p = tmp1;
+                    }
+                }
         }
+        if (!p)
+            p = ccl_term_one_use(cclp, 0 /* attr: no use */, qa, no, len,
+                                 is_phrase, auto_group);
+        for (i = 0; i < no; i++)
+            ADVANCE;
+        if (!p)
+            return 0;
         /* make the top node point to us.. */
         if (p_top)
         {
@@ -555,54 +657,6 @@ static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
         else
             p_top = p;
 
-
-        if (left_trunc && right_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
-                                &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
-        }
-        else if (right_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
-                                 &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
-        }
-        else if (left_trunc)
-        {
-            if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
-                                &attset))
-            {
-                cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
-                ccl_rpn_delete(p);
-                return NULL;
-            }
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
-        }
-        else if (regex_trunc)
-        {
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
-        }
-        else if (z3958_trunc)
-        {
-            ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
-        }
-        else
-        {
-            if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
-                               &attset))
-                ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
-        }
         if (!multi)
             break;
     }
@@ -687,9 +741,14 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
         if (KIND == CCL_TOK_TERM)
         {
             size_t i;
+            int quote_mode = 0;
             for (i = 0; i<cclp->look_token->len; i++)
             {
-                if (cclp->look_token->name[i] == '-')
+                if (i > 0 && cclp->look_token->name[i] == '\\')
+                    ;
+                else if (cclp->look_token->name[i] == '"')
+                    quote_mode = !quote_mode;
+                else if (cclp->look_token->name[i] == '-' && !quote_mode)
                     break;
             }
 
@@ -794,10 +853,11 @@ struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
     {
         if (!(p = search_terms(cclp, ap)))
             return NULL;
-        ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
+        if (rel != 3 ||
+            !qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_OMIT_EQUALS, 0))
+            ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
         return p;
     }
-    cclp->error_code = CCL_ERR_TERM_EXPECTED;
     return NULL;
 }