Fix sample PQF
[yaz-moved-to-github.git] / ccl / cclfind.c
index 579491e..9fdcf13 100644 (file)
@@ -44,7 +44,7 @@
 /* CCL find (to rpn conversion)
  * Europagate, 1995
  *
- * $Id: cclfind.c,v 1.29 2002-03-18 18:14:34 adam Exp $
+ * $Id: cclfind.c,v 1.34 2003-06-23 10:22:21 adam Exp $
  *
  * Old Europagate log:
  *
@@ -127,7 +127,8 @@ static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value,
     for (i = 0;  (q=qa[i]); i++)
         while (q)
         {
-            if (q->type == type && q->value == value)
+            if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC &&
+               q->value.numeric == value)
             {
                 if (attset)
                     *attset = q->set;
@@ -160,7 +161,7 @@ static void strxcat (char *n, const char *src, int len)
  */
 static char *copy_token_name (struct ccl_token *tp)
 {
-    char *str = (char *)malloc (tp->len + 1);
+    char *str = (char *)xmalloc (tp->len + 1);
     ccl_assert (str);
     memcpy (str, tp->name, tp->len);
     str[tp->len] = '\0';
@@ -175,7 +176,7 @@ static char *copy_token_name (struct ccl_token *tp)
 static struct ccl_rpn_node *mk_node (int kind)
 {
     struct ccl_rpn_node *p;
-    p = (struct ccl_rpn_node *)malloc (sizeof(*p));
+    p = (struct ccl_rpn_node *)xmalloc (sizeof(*p));
     ccl_assert (p);
     p->kind = kind;
     return p;
@@ -199,24 +200,26 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn)
         ccl_rpn_delete (rpn->u.p[1]);
         break;
     case CCL_RPN_TERM:
-        free (rpn->u.t.term);
+        xfree (rpn->u.t.term);
         for (attr = rpn->u.t.attr_list; attr; attr = attr1)
         {
             attr1 = attr->next;
+           if (attr->kind == CCL_RPN_ATTR_STRING)
+               xfree(attr->value.str);
             if (attr->set)
-                free (attr->set);
-            free (attr);
+                xfree (attr->set);
+            xfree (attr);
         }
         break;
     case CCL_RPN_SET:
-        free (rpn->u.setname);
+        xfree (rpn->u.setname);
         break;
     case CCL_RPN_PROX:
         ccl_rpn_delete (rpn->u.p[0]);
         ccl_rpn_delete (rpn->u.p[1]);
         break;
     }
-    free (rpn);
+    xfree (rpn);
 }
 
 static struct ccl_rpn_node *find_spec (CCL_parser cclp,
@@ -233,34 +236,58 @@ static int is_term_ok (int look, int *list)
 static struct ccl_rpn_node *search_terms (CCL_parser cclp,
                                           struct ccl_rpn_attr **qa);
 
-/*
- * add_attr: Add attribute (type/value) to RPN term node.
- * p:     RPN node of type term.
- * type:  Type of attribute
- * value: Value of attribute
- * set: Attribute set name
- */
-static void add_attr (struct ccl_rpn_node *p, const char *set,
-                      int type, int value)
+static struct ccl_rpn_attr *add_attr_node (struct ccl_rpn_node *p,
+                                          const char *set, int type)
 {
     struct ccl_rpn_attr *n;
-
-    n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
+    
+    n = (struct ccl_rpn_attr *)xmalloc (sizeof(*n));
     ccl_assert (n);
     if (set)
     {
-        n->set = (char*) malloc (strlen(set)+1);
+        n->set = (char*) xmalloc (strlen(set)+1);
         strcpy (n->set, set);
     }
     else
         n->set = 0;
     n->type = type;
-    n->value = value;
     n->next = p->u.t.attr_list;
     p->u.t.attr_list = n;
+    
+    n->kind = CCL_RPN_ATTR_NUMERIC;
+    n->value.numeric = 0;
+    return n;
 }
 
 /*
+ * add_attr_numeric: Add attribute (type/value) to RPN term node.
+ * p:     RPN node of type term.
+ * type:  Type of attribute
+ * value: Value of attribute
+ * set: Attribute set name
+ */
+static void add_attr_numeric (struct ccl_rpn_node *p, const char *set,
+                             int type, int value)
+{
+    struct ccl_rpn_attr *n;
+
+    n = add_attr_node(p, set, type);
+    n->kind = CCL_RPN_ATTR_NUMERIC;
+    n->value.numeric = value;
+}
+
+static void add_attr_string (struct ccl_rpn_node *p, const char *set,
+                            int type, char *value)
+{
+    struct ccl_rpn_attr *n;
+
+    n = add_attr_node(p, set, type);
+    n->kind = CCL_RPN_ATTR_STRING;
+    n->value.str = xstrdup(value);
+}
+
+
+/*
  * search_term: Parse CCL search term. 
  * cclp:   CCL Parser
  * qa:     Qualifier attributes already applied.
@@ -369,39 +396,48 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
             struct ccl_rpn_attr *attr;
             
             for (attr = qa[i]; attr; attr = attr->next)
-                if (attr->value > 0)
-                {   /* deal only with REAL attributes (positive) */
-                    switch (attr->type)
-                    {
-                    case CCL_BIB1_REL:
-                        if (relation_value != -1)
-                            continue;
-                        relation_value = attr->value;
-                        break;
-                    case CCL_BIB1_POS:
-                        if (position_value != -1)
-                            continue;
-                        position_value = attr->value;
-                        break;
-                    case CCL_BIB1_STR:
-                        if (structure_value != -1)
-                            continue;
-                        structure_value = attr->value;
-                        break;
-                    case CCL_BIB1_TRU:
-                        if (truncation_value != -1)
-                            continue;
-                        truncation_value = attr->value;
-                        left_trunc = right_trunc = mid_trunc = 0;
-                        break;
-                    case CCL_BIB1_COM:
-                        if (completeness_value != -1)
-                            continue;
-                        completeness_value = attr->value;
-                        break;
-                    }
-                    add_attr (p, attr->set, attr->type, attr->value);
-            }
+               switch(attr->kind)
+               {
+               case CCL_RPN_ATTR_STRING:
+                   add_attr_string(p, attr->set, attr->type,
+                                   attr->value.str);
+                   break;
+               case CCL_RPN_ATTR_NUMERIC:
+                   if (attr->value.numeric > 0)
+                   {   /* deal only with REAL attributes (positive) */
+                       switch (attr->type)
+                       {
+                       case CCL_BIB1_REL:
+                           if (relation_value != -1)
+                               continue;
+                           relation_value = attr->value.numeric;
+                           break;
+                       case CCL_BIB1_POS:
+                           if (position_value != -1)
+                               continue;
+                           position_value = attr->value.numeric;
+                           break;
+                       case CCL_BIB1_STR:
+                           if (structure_value != -1)
+                               continue;
+                           structure_value = attr->value.numeric;
+                           break;
+                       case CCL_BIB1_TRU:
+                           if (truncation_value != -1)
+                               continue;
+                           truncation_value = attr->value.numeric;
+                           left_trunc = right_trunc = mid_trunc = 0;
+                           break;
+                       case CCL_BIB1_COM:
+                           if (completeness_value != -1)
+                               continue;
+                           completeness_value = attr->value.numeric;
+                           break;
+                       }
+                       add_attr_numeric(p, attr->set, attr->type,
+                                        attr->value.numeric);
+                   }
+               }
         }
         /* len now holds the number of characters in the RPN term */
         /* no holds the number of CCL tokens (1 or more) */
@@ -411,13 +447,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
         {   /* no structure attribute met. Apply either structure attribute 
                WORD or PHRASE depending on number of CCL tokens */
             if (no == 1 && no_spaces == 0)
-                add_attr (p, attset, CCL_BIB1_STR, 2);
+                add_attr_numeric (p, attset, CCL_BIB1_STR, 2);
             else
-                add_attr (p, attset, CCL_BIB1_STR, 1);
+                add_attr_numeric (p, attset, CCL_BIB1_STR, 1);
         }
         
         /* make the RPN token */
-        p->u.t.term = (char *)malloc (len);
+        p->u.t.term = (char *)xmalloc (len);
         ccl_assert (p->u.t.term);
         p->u.t.term[0] = '\0';
         for (i = 0; i<no; i++)
@@ -454,7 +490,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr (p, attset, CCL_BIB1_TRU, 3);
+            add_attr_numeric (p, attset, CCL_BIB1_TRU, 3);
         }
         else if (right_trunc)
         {
@@ -465,7 +501,7 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr (p, attset, CCL_BIB1_TRU, 1);
+            add_attr_numeric (p, attset, CCL_BIB1_TRU, 1);
         }
         else if (left_trunc)
         {
@@ -476,13 +512,13 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
                 ccl_rpn_delete (p);
                 return NULL;
             }
-            add_attr (p, attset, CCL_BIB1_TRU, 2);
+            add_attr_numeric (p, attset, CCL_BIB1_TRU, 2);
         }
         else
         {
             if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
                                &attset))
-                add_attr (p, attset, CCL_BIB1_TRU, 100);
+                add_attr_numeric (p, attset, CCL_BIB1_TRU, 100);
         }
         if (!multi)
             break;
@@ -580,14 +616,14 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
                 }
                 p = mk_node (CCL_RPN_AND);
                 p->u.p[0] = p1;
-                add_attr (p1, attset, CCL_BIB1_REL, 4);
+                add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
                 p->u.p[1] = p2;
-                add_attr (p2, attset, CCL_BIB1_REL, 2);
+                add_attr_numeric (p2, attset, CCL_BIB1_REL, 2);
                 return p;
             }
             else                       /* = term -    */
             {
-                add_attr (p1, attset, CCL_BIB1_REL, 4);
+                add_attr_numeric (p1, attset, CCL_BIB1_REL, 4);
                 return p1;
             }
         }
@@ -597,7 +633,7 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
             ADVANCE;
             if (!(p = search_term (cclp, ap)))
                 return NULL;
-            add_attr (p, attset, CCL_BIB1_REL, 2);
+            add_attr_numeric (p, attset, CCL_BIB1_REL, 2);
             return p;
         }
         else if (KIND == CCL_TOK_LP)
@@ -618,7 +654,7 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
         {
             if (!(p = search_terms (cclp, ap)))
                 return NULL;
-            add_attr (p, attset, CCL_BIB1_REL, rel);
+            add_attr_numeric (p, attset, CCL_BIB1_REL, rel);
             return p;
         }
         cclp->error_code = CCL_ERR_TERM_EXPECTED;
@@ -627,22 +663,24 @@ static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
 }
 
 /*
- * qualifiers: Parse CCL qualifiers and search terms. 
+ * qualifiers1: Parse CCL qualifiers and search terms. 
  * cclp:   CCL Parser
  * la:     Token pointer to RELATION token.
  * qa:     Qualifier attributes already applied.
  * return: pointer to node(s); NULL on error.
  */
-static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la,
-                                        struct ccl_rpn_attr **qa)
+static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,
+                                         struct ccl_rpn_attr **qa)
 {
     struct ccl_token *lookahead = cclp->look_token;
     struct ccl_token *look_start = cclp->look_token;
     struct ccl_rpn_attr **ap;
     struct ccl_rpn_node *node = 0;
+    const char *field_str;
     int no = 0;
     int seq = 0;
     int i;
+    int mode_merge = 1;
 #if 0
     if (qa)
     {
@@ -656,62 +694,122 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la,
     if (qa)
         for (i=0; qa[i]; i++)
             no++;
-    ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap));
+    ap = (struct ccl_rpn_attr **)xmalloc ((no ? (no+1) : 2) * sizeof(*ap));
     ccl_assert (ap);
 
-    while (1)
+    field_str = ccl_qual_search_special(cclp->bibset, "field");
+    if (field_str)
+    {
+        if (!strcmp (field_str, "or"))
+            mode_merge = 0;
+        else if (!strcmp (field_str, "merge"))
+            mode_merge = 1;
+    }
+    if (!mode_merge)
     {
-        struct ccl_rpn_node *node_sub;
-        int found = 0;
+        /* consider each field separately and OR */
         lookahead = look_start;
-        for (i = 0; lookahead != la; i++)
+        while (lookahead != la)
         {
-            ap[i] = ccl_qual_search (cclp, lookahead->name,
-                                     lookahead->len, seq);
-            if (ap[i])
-                found++;
-            if (!ap[i] && seq > 0)
-                ap[i] = ccl_qual_search (cclp, lookahead->name,
-                                         lookahead->len, 0);
-            if (!ap[i])
+            ap[1] = 0;
+            seq = 0;
+            while ((ap[0] = ccl_qual_search (cclp, lookahead->name,
+                                             lookahead->len, seq)) != 0)
+            {
+                struct ccl_rpn_node *node_sub;
+                cclp->look_token = la;
+                
+                node_sub = qualifiers2(cclp, ap);
+                if (!node_sub)
+                {
+                    ccl_rpn_delete (node);
+                    xfree (ap);
+                    return 0;
+                }
+                if (node)
+                {
+                    struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+                    node_this->u.p[0] = node;
+                    node_this->u.p[1] = node_sub;
+                    node = node_this;
+                }
+                else
+                    node = node_sub;
+                seq++;
+            }
+            if (seq == 0)
             {
                 cclp->look_token = lookahead;
                 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
-                free (ap);
+                xfree (ap);
                 return NULL;
             }
             lookahead = lookahead->next;
             if (lookahead->kind == CCL_TOK_COMMA)
                 lookahead = lookahead->next;
         }
-        if (qa)
-            while (*qa)
-                ap[i++] = *qa++;
-        ap[i] = NULL;
-
-        if (!found)
-            break;
-
-        cclp->look_token = lookahead;
-
-        node_sub = qualifiers2(cclp, ap);
-        if (!node_sub)
-        {
-            ccl_rpn_delete (node);
-            break;
-        }
-        if (node)
+    }
+    else
+    {
+        /* merge attributes from ALL fields - including inherited ones */
+        while (1)
         {
-            struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
-            node_this->u.p[0] = node;
-            node_this->u.p[1] = node_sub;
-            node = node_this;
+            struct ccl_rpn_node *node_sub;
+            int found = 0;
+            lookahead = look_start;
+            for (i = 0; lookahead != la; i++)
+            {
+                ap[i] = ccl_qual_search (cclp, lookahead->name,
+                                         lookahead->len, seq);
+                if (ap[i])
+                    found++;
+                if (!ap[i] && seq > 0)
+                    ap[i] = ccl_qual_search (cclp, lookahead->name,
+                                             lookahead->len, 0);
+                if (!ap[i])
+                {
+                    cclp->look_token = lookahead;
+                    cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
+                    xfree (ap);
+                    return NULL;
+                }
+                lookahead = lookahead->next;
+                if (lookahead->kind == CCL_TOK_COMMA)
+                    lookahead = lookahead->next;
+            }
+            if (qa)
+            {
+                struct ccl_rpn_attr **qa0 = qa;
+                
+                while (*qa0)
+                    ap[i++] = *qa0++;
+            }
+            ap[i] = NULL;
+            
+            if (!found)
+                break;
+            
+            cclp->look_token = lookahead;
+            
+            node_sub = qualifiers2(cclp, ap);
+            if (!node_sub)
+            {
+                ccl_rpn_delete (node);
+                break;
+            }
+            if (node)
+            {
+                struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+                node_this->u.p[0] = node;
+                node_this->u.p[1] = node_sub;
+                node = node_this;
+            }
+            else
+                node = node_sub;
+            seq++;
         }
-        else
-            node = node_sub;
-        seq++;
     }
-    free (ap);
+    xfree (ap);
     return node;
 }
 
@@ -735,6 +833,16 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
     {
         if (KIND == CCL_TOK_PROX)
         {
+            struct ccl_rpn_node *p_prox = 0;
+            /* ! word order specified */
+            /* % word order not specified */
+            p_prox = mk_node(CCL_RPN_TERM);
+            p_prox->u.t.term = (char *) xmalloc(cclp->look_token->len);
+            memcpy(p_prox->u.t.term, cclp->look_token->name,
+                   cclp->look_token->len);
+            p_prox->u.t.term[cclp->look_token->len] = 0;
+            p_prox->u.t.attr_list = 0;
+
             ADVANCE;
             p2 = search_term_x (cclp, qa, list, 1);
             if (!p2)
@@ -745,6 +853,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
             pn = mk_node (CCL_RPN_PROX);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
+            pn->u.p[2] = p_prox;
             p1 = pn;
         }
         else if (is_term_ok(KIND, list))
@@ -758,6 +867,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp,
             pn = mk_node (CCL_RPN_PROX);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
+            pn->u.p[2] = 0;
             p1 = pn;
         }
         else
@@ -813,7 +923,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp,
     {
         lookahead = lookahead->next;
         if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
-            return qualifiers (cclp, lookahead, qa);
+            return qualifiers1 (cclp, lookahead, qa);
         if (lookahead->kind != CCL_TOK_COMMA)
             break;
         lookahead = lookahead->next;
@@ -848,6 +958,7 @@ static struct ccl_rpn_node *search_elements (CCL_parser cclp,
                 struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
                 node_this->u.p[0] = node;
                 node_this->u.p[1] = node_sub;
+                node_this->u.p[2] = 0;
                 node = node_this;
             }
             else
@@ -886,6 +997,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
             pn = mk_node (CCL_RPN_AND);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
+            pn->u.p[2] = 0;
             p1 = pn;
             continue;
         case CCL_TOK_OR:
@@ -899,6 +1011,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
             pn = mk_node (CCL_RPN_OR);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
+            pn->u.p[2] = 0;
             p1 = pn;
             continue;
         case CCL_TOK_NOT:
@@ -912,6 +1025,7 @@ static struct ccl_rpn_node *find_spec (CCL_parser cclp,
             pn = mk_node (CCL_RPN_NOT);
             pn->u.p[0] = p1;
             pn->u.p[1] = p2;
+            pn->u.p[2] = 0;
             p1 = pn;
             continue;
         }