7932ecbd099a257d1c02cd69d9dc9b0f74550b7d
[yaz-moved-to-github.git] / src / cclfind.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2011 Index Data
3  * See the file LICENSE for details.
4  */
5 /** 
6  * \file cclfind.c
7  * \brief Implements parsing of a CCL FIND query.
8  *
9  * This source file implements parsing of a CCL Query (ISO8777).
10  * The parser uses predictive parsing, but it does several tokens
11  * of lookahead in the handling of relational operations.. So
12  * it's not really pure.
13  */
14 #if HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <assert.h>
21
22 #include "cclp.h"
23
24 /* returns type of current lookahead */
25 #define KIND (cclp->look_token->kind)
26
27 /* move one token forward */
28 #define ADVANCE cclp->look_token = cclp->look_token->next
29
30 /**
31  * qual_val_type: test for existance of attribute type/value pair.
32  * qa:     Attribute array
33  * type:   Type of attribute to search for
34  * value:  Value of attribute to seach for
35  * return: 1 if found; 0 otherwise.
36  */
37 static int qual_val_type(ccl_qualifier_t *qa, int type, int value,
38                          char **attset)
39 {
40     int i;
41
42     if (!qa)
43         return 0;
44     for (i = 0; qa[i]; i++)
45     {
46         struct ccl_rpn_attr *q = ccl_qual_get_attr(qa[i]);
47         while (q)
48         {
49             if (q->type == type && q->kind == CCL_RPN_ATTR_NUMERIC &&
50                 q->value.numeric == value)
51             {
52                 if (attset)
53                     *attset = q->set;
54                 return 1;
55             }
56             q = q->next;
57         }
58     }
59     return 0;
60 }
61
62 /**
63  * strxcat: concatenate strings.
64  * n:      Null-terminated Destination string 
65  * src:    Source string to be appended (not null-terminated)
66  * len:    Length of source string.
67  */
68 static void strxcat(char *n, const char *src, int len)
69 {
70     while (*n)
71         n++;
72     while (--len >= 0)
73         *n++ = *src++;
74     *n = '\0';
75 }
76
77 /**
78  * copy_token_name: Return copy of CCL token name
79  * tp:      Pointer to token info.
80  * return:  malloc(3) allocated copy of token name.
81  */
82 static char *copy_token_name(struct ccl_token *tp)
83 {
84     char *str = (char *)xmalloc(tp->len + 1);
85     ccl_assert(str);
86     memcpy(str, tp->name, tp->len);
87     str[tp->len] = '\0';
88     return str;
89 }
90
91 /**
92  * mk_node: Create RPN node.
93  * kind:   Type of node.
94  * return: pointer to allocated node.
95  */
96 struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind)
97 {
98     struct ccl_rpn_node *p;
99     p = (struct ccl_rpn_node *)xmalloc(sizeof(*p));
100     ccl_assert(p);
101     p->kind = kind;
102
103     switch(kind)
104     {
105     case CCL_RPN_TERM:
106         p->u.t.attr_list = 0;
107         p->u.t.term = 0;
108         p->u.t.qual = 0;
109         break;
110     default:
111         break;
112     }
113     return p;
114 }
115
116 /**
117  * ccl_rpn_delete: Delete RPN tree.
118  * rpn:   Pointer to tree.
119  */
120 void ccl_rpn_delete(struct ccl_rpn_node *rpn)
121 {
122     struct ccl_rpn_attr *attr, *attr1;
123     if (!rpn)
124         return;
125     switch (rpn->kind)
126     {
127     case CCL_RPN_AND:
128     case CCL_RPN_OR:
129     case CCL_RPN_NOT:
130         ccl_rpn_delete(rpn->u.p[0]);
131         ccl_rpn_delete(rpn->u.p[1]);
132         break;
133     case CCL_RPN_TERM:
134         xfree(rpn->u.t.term);
135         xfree(rpn->u.t.qual);
136         for (attr = rpn->u.t.attr_list; attr; attr = attr1)
137         {
138             attr1 = attr->next;
139             if (attr->kind == CCL_RPN_ATTR_STRING)
140                 xfree(attr->value.str);
141             if (attr->set)
142                 xfree(attr->set);
143             xfree(attr);
144         }
145         break;
146     case CCL_RPN_SET:
147         xfree(rpn->u.setname);
148         break;
149     case CCL_RPN_PROX:
150         ccl_rpn_delete(rpn->u.p[0]);
151         ccl_rpn_delete(rpn->u.p[1]);
152         ccl_rpn_delete(rpn->u.p[2]);
153         break;
154     }
155     xfree(rpn);
156 }
157
158 static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa);
159
160 static int is_term_ok(int look, int *list)
161 {
162     for (;*list >= 0; list++)
163         if (look == *list)
164             return 1;
165     return 0;
166 }
167
168 static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa);
169
170 static struct ccl_rpn_attr *add_attr_node(struct ccl_rpn_node *p,
171                                            const char *set, int type)
172 {
173     struct ccl_rpn_attr *n;
174     
175     n = (struct ccl_rpn_attr *)xmalloc(sizeof(*n));
176     ccl_assert(n);
177     if (set)
178         n->set = xstrdup(set);
179     else
180         n->set = 0;
181     n->type = type;
182     n->next = p->u.t.attr_list;
183     p->u.t.attr_list = n;
184     
185     return n;
186 }
187
188 /**
189  * add_attr_numeric: Add attribute (type/value) to RPN term node.
190  * p:     RPN node of type term.
191  * type:  Type of attribute
192  * value: Value of attribute
193  * set: Attribute set name
194  */
195 void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set,
196                           int type, int value)
197 {
198     struct ccl_rpn_attr *n;
199
200     n = add_attr_node(p, set, type);
201     n->kind = CCL_RPN_ATTR_NUMERIC;
202     n->value.numeric = value;
203 }
204
205 void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set,
206                          int type, char *value)
207 {
208     struct ccl_rpn_attr *n;
209
210     n = add_attr_node(p, set, type);
211     n->kind = CCL_RPN_ATTR_STRING;
212     n->value.str = xstrdup(value);
213 }
214
215
216 #define REGEX_CHARS "^[]{}()|.*+?!$"
217 #define CCL_CHARS "#?\\"
218 /**
219  * search_term: Parse CCL search term. 
220  * cclp:   CCL Parser
221  * qa:     Qualifier attributes already applied.
222  * term_list: tokens we accept as terms in context
223  * multi:  whether we accept "multiple" tokens
224  * return: pointer to node(s); NULL on error.
225  */
226 static struct ccl_rpn_node *search_term_x(CCL_parser cclp,
227                                           ccl_qualifier_t *qa,
228                                           int *term_list, int multi)
229 {
230     struct ccl_rpn_node *p_top = 0;
231     struct ccl_token *lookahead = cclp->look_token;
232     int and_list = 0;
233     int or_list = 0;
234     char *attset;
235     const char **truncation_aliases;
236     const char *t_default[2];
237
238     truncation_aliases =
239         ccl_qual_search_special(cclp->bibset, "truncation");
240     if (!truncation_aliases)
241     {
242         truncation_aliases = t_default;
243         t_default[0] = "?";
244         t_default[1] = 0;
245     }
246
247     if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
248         and_list = 1;
249     if (qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
250         or_list = 1;
251     while (1)
252     {
253         struct ccl_rpn_node *p;
254         size_t no, i;
255         int no_spaces = 0;
256         int relation_value = -1;
257         int position_value = -1;
258         int structure_value = -1;
259         int truncation_value = -1;
260         int completeness_value = -1;
261         int len = 0;
262         int left_trunc = 0;
263         int right_trunc = 0;
264         int regex_trunc = 0;
265         int z3958_trunc = 0;
266         size_t max = 200;
267         if (and_list || or_list || !multi)
268             max = 1;
269         
270         /* ignore commas when dealing with and-lists .. */
271         if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
272         {
273             lookahead = lookahead->next;
274             ADVANCE;
275             continue;
276         }
277         /* go through each TERM token. If no truncation attribute is yet
278            met, then look for left/right truncation markers (?) and
279            set left_trunc/right_trunc/mid_trunc accordingly */
280         for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
281         {
282             for (i = 0; i<lookahead->len; i++)
283                 if (lookahead->name[i] == ' ')
284                     no_spaces++;
285             len += 1+lookahead->len+lookahead->ws_prefix_len;
286             lookahead = lookahead->next;
287         }
288
289         if (len == 0)
290             break;      /* no more terms . stop . */
291                 
292         /* create the term node, but wait a moment before adding the term */
293         p = ccl_rpn_node_create(CCL_RPN_TERM);
294         p->u.t.attr_list = NULL;
295         p->u.t.term = NULL;
296         if (qa && qa[0])
297         {
298             const char *n = ccl_qual_get_name(qa[0]);
299             if (n)
300                 p->u.t.qual = xstrdup(n);
301         }
302
303         /* go through all attributes and add them to the attribute list */
304         for (i=0; qa && qa[i]; i++)
305         {
306             struct ccl_rpn_attr *attr;
307             
308             for (attr = ccl_qual_get_attr(qa[i]); attr; attr = attr->next)
309                 switch(attr->kind)
310                 {
311                 case CCL_RPN_ATTR_STRING:
312                     ccl_add_attr_string(p, attr->set, attr->type,
313                                         attr->value.str);
314                     break;
315                 case CCL_RPN_ATTR_NUMERIC:
316                     if (attr->value.numeric > 0)
317                     {   /* deal only with REAL attributes (positive) */
318                         switch (attr->type)
319                         {
320                         case CCL_BIB1_REL:
321                             if (relation_value != -1)
322                                 continue;
323                             relation_value = attr->value.numeric;
324                             break;
325                         case CCL_BIB1_POS:
326                             if (position_value != -1)
327                                 continue;
328                             position_value = attr->value.numeric;
329                             break;
330                         case CCL_BIB1_STR:
331                             if (structure_value != -1)
332                                 continue;
333                             structure_value = attr->value.numeric;
334                             break;
335                         case CCL_BIB1_TRU:
336                             if (truncation_value != -1)
337                                 continue;
338                             truncation_value = attr->value.numeric;
339                             break;
340                         case CCL_BIB1_COM:
341                             if (completeness_value != -1)
342                                 continue;
343                             completeness_value = attr->value.numeric;
344                             break;
345                         }
346                         ccl_add_attr_numeric(p, attr->set, attr->type,
347                                              attr->value.numeric);
348                     }
349                 }
350         }
351         /* len now holds the number of characters in the RPN term */
352         /* no holds the number of CCL tokens (1 or more) */
353         
354         if (structure_value == -1 && 
355             qual_val_type(qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
356         {   /* no structure attribute met. Apply either structure attribute 
357                WORD or PHRASE depending on number of CCL tokens */
358             if (no == 1 && no_spaces == 0)
359                 ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 2);
360             else
361                 ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
362         }
363
364         if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
365                           &attset))
366         {
367             regex_trunc = 1; /* regex trunc (102) allowed */
368         }
369         else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
370                           &attset))
371         {
372             z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
373         }
374
375         /* make the RPN token */
376         p->u.t.term = (char *)xmalloc(len * 2 + 2);
377         ccl_assert(p->u.t.term);
378         p->u.t.term[0] = '\0';
379         for (i = 0; i<no; i++)
380         {
381             const char *src_str = cclp->look_token->name;
382             size_t src_len = cclp->look_token->len;
383             int j;
384             int quote_mode = 0;
385
386             if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
387             {
388                 size_t len = strlen(p->u.t.term);
389                 memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
390                        cclp->look_token->ws_prefix_len);
391                 p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
392             }
393             for (j = 0; j < src_len; j++)
394             {
395                 if (j > 0 && src_str[j-1] == '\\')
396                 {
397                     if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
398                     {
399                         regex_trunc = 2;
400                         strcat(p->u.t.term, "\\");
401                     }
402                     else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
403                     {
404                         z3958_trunc = 2;
405                         strcat(p->u.t.term, "\\");
406                     }
407                     strxcat(p->u.t.term, src_str + j, 1);
408                 }
409                 else if (src_str[j] == '"')
410                     quote_mode = !quote_mode;
411                 else if (!quote_mode && src_str[j] == '?')
412                 {
413                     if (regex_trunc)
414                     {
415                         strcat(p->u.t.term, ".*");
416                         regex_trunc = 2; /* regex trunc is really needed */
417                     }
418                     else if (z3958_trunc)
419                     {
420                         strcat(p->u.t.term, "?");
421                         z3958_trunc = 2;
422                     }
423                     else if (i == 0 && j == 0)
424                         left_trunc = 1;
425                     else if (i == no - 1 && j == src_len - 1)
426                         right_trunc = 1;
427                     else
428                     {
429                         cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
430                         ccl_rpn_delete(p);
431                         return NULL;
432                     }
433                 }
434                 else if (!quote_mode && src_str[j] == '#')
435                 {
436                     if (regex_trunc)
437                     {
438                         strcat(p->u.t.term, ".");
439                         regex_trunc = 2; /* regex trunc is really needed */
440                     }
441                     else if (z3958_trunc)
442                     {
443                         strcat(p->u.t.term, "#");
444                         z3958_trunc = 2;
445                     }
446                     else
447                     {
448                         cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
449                         ccl_rpn_delete(p);
450                         return NULL;
451                     }
452                 }
453                 else if (src_str[j] != '\\')
454                 {
455                     if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
456                     {
457                         regex_trunc = 2;
458                         strcat(p->u.t.term, "\\");
459                     }
460                     else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
461                     {
462                         z3958_trunc = 2;
463                         strcat(p->u.t.term, "\\");
464                     }
465                     strxcat(p->u.t.term, src_str + j, 1);                    
466                 }
467             }
468             ADVANCE;
469         }
470
471         /* make the top node point to us.. */
472         if (p_top)
473         {
474             struct ccl_rpn_node *tmp;
475
476             if (or_list)
477                 tmp = ccl_rpn_node_create(CCL_RPN_OR);
478             else if (and_list)
479                 tmp = ccl_rpn_node_create(CCL_RPN_AND);
480             else
481                 tmp = ccl_rpn_node_create(CCL_RPN_AND);
482             tmp->u.p[0] = p_top;
483             tmp->u.p[1] = p;
484
485             p_top = tmp;
486         }
487         else
488             p_top = p;
489
490
491         if (left_trunc && right_trunc)
492         {
493             if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH,
494                                 &attset))
495             {
496                 cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
497                 ccl_rpn_delete(p);
498                 return NULL;
499             }
500             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 3);
501         }
502         else if (right_trunc)
503         {
504             if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT,
505                                  &attset))
506             {
507                 cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
508                 ccl_rpn_delete(p);
509                 return NULL;
510             }
511             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 1);
512         }
513         else if (left_trunc)
514         {
515             if (!qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT,
516                                 &attset))
517             {
518                 cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
519                 ccl_rpn_delete(p);
520                 return NULL;
521             }
522             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
523         }
524         else if (regex_trunc == 2)
525         {
526             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
527         }
528         else if (z3958_trunc == 2)
529         {
530             ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
531         }
532         else
533         {
534             if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
535                                &attset))
536                 ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 100);
537         }
538         if (!multi)
539             break;
540     }
541     if (!p_top)
542         cclp->error_code = CCL_ERR_TERM_EXPECTED;
543     return p_top;
544 }
545
546 static struct ccl_rpn_node *search_term(CCL_parser cclp, ccl_qualifier_t *qa)
547 {
548     static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
549     return search_term_x(cclp, qa, list, 0);
550 }
551
552 static
553 struct ccl_rpn_node *qualifiers_order(CCL_parser cclp,
554                                       ccl_qualifier_t *ap, char *attset)
555 {
556     int rel = 0;
557     struct ccl_rpn_node *p;
558
559     if (cclp->look_token->len == 1)
560     {
561         if (cclp->look_token->name[0] == '<')
562             rel = 1;
563         else if (cclp->look_token->name[0] == '=')
564             rel = 3;
565         else if (cclp->look_token->name[0] == '>')
566             rel = 5;
567     }
568     else if (cclp->look_token->len == 2)
569     {
570         if (!memcmp(cclp->look_token->name, "<=", 2))
571             rel = 2;
572         else if (!memcmp(cclp->look_token->name, ">=", 2))
573             rel = 4;
574         else if (!memcmp(cclp->look_token->name, "<>", 2))
575             rel = 6;
576     }
577     if (!rel)
578     {
579         cclp->error_code = CCL_ERR_BAD_RELATION;
580         return NULL;
581     }
582     ADVANCE;  /* skip relation */
583     if (rel == 3 &&
584         qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, 0))
585     {
586         /* allow - inside term and treat it as range _always_ */
587         /* relation is =. Extract "embedded" - to separate terms */
588         if (KIND == CCL_TOK_TERM)
589         {
590             size_t i;
591             for (i = 0; i<cclp->look_token->len; i++)
592             {
593                 if (cclp->look_token->name[i] == '-')
594                     break;
595             }
596             
597             if (cclp->look_token->len > 1 && i == 0)
598             {   /*  -xx*/
599                 struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
600
601                 ntoken->kind = CCL_TOK_TERM;
602                 ntoken->name = cclp->look_token->name + 1;
603                 ntoken->len = cclp->look_token->len - 1;
604
605                 cclp->look_token->len = 1;
606                 cclp->look_token->name = "-";
607             }
608             else if (cclp->look_token->len > 1 && i == cclp->look_token->len-1)
609             {   /* xx- */
610                 struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
611
612                 ntoken->kind = CCL_TOK_TERM;
613                 ntoken->name = "-";
614                 ntoken->len = 1;
615
616                 (cclp->look_token->len)--;
617             }
618             else if (cclp->look_token->len > 2 && i < cclp->look_token->len)
619             {   /* xx-yy */
620                 struct ccl_token *ntoken1 = ccl_token_add(cclp->look_token);
621                 struct ccl_token *ntoken2 = ccl_token_add(ntoken1);
622
623                 ntoken1->kind = CCL_TOK_TERM;  /* generate - */
624                 ntoken1->name = "-";
625                 ntoken1->len = 1;
626
627                 ntoken2->kind = CCL_TOK_TERM;  /* generate yy */
628                 ntoken2->name = cclp->look_token->name + (i+1);
629                 ntoken2->len = cclp->look_token->len - (i+1);
630
631                 cclp->look_token->len = i;     /* adjust xx */
632             }
633             else if (i == cclp->look_token->len &&
634                      cclp->look_token->next &&
635                      cclp->look_token->next->kind == CCL_TOK_TERM &&
636                      cclp->look_token->next->len > 1 &&
637                      cclp->look_token->next->name[0] == '-')
638                      
639             {   /* xx -yy */
640                 /* we _know_ that xx does not have - in it */
641                 struct ccl_token *ntoken = ccl_token_add(cclp->look_token);
642
643                 ntoken->kind = CCL_TOK_TERM;    /* generate - */
644                 ntoken->name = "-";
645                 ntoken->len = 1;
646
647                 (ntoken->next->name)++;        /* adjust yy */
648                 (ntoken->next->len)--; 
649             }
650         }
651     }
652         
653     if (rel == 3 &&
654         KIND == CCL_TOK_TERM &&
655         cclp->look_token->next && cclp->look_token->next->len == 1 &&
656         cclp->look_token->next->name[0] == '-')
657     {
658         struct ccl_rpn_node *p1;
659         if (!(p1 = search_term(cclp, ap)))
660             return NULL;
661         ADVANCE;                   /* skip '-' */
662         if (KIND == CCL_TOK_TERM)  /* = term - term  ? */
663         {
664             struct ccl_rpn_node *p2;
665             
666             if (!(p2 = search_term(cclp, ap)))
667             {
668                 ccl_rpn_delete(p1);
669                 return NULL;
670             }
671             p = ccl_rpn_node_create(CCL_RPN_AND);
672             p->u.p[0] = p1;
673             ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
674             p->u.p[1] = p2;
675             ccl_add_attr_numeric(p2, attset, CCL_BIB1_REL, 2);
676             return p;
677         }
678         else                       /* = term -    */
679         {
680             ccl_add_attr_numeric(p1, attset, CCL_BIB1_REL, 4);
681             return p1;
682         }
683     }
684     else if (rel == 3 &&
685              cclp->look_token->len == 1 &&
686              cclp->look_token->name[0] == '-')   /* = - term  ? */
687     {
688         ADVANCE;
689         if (!(p = search_term(cclp, ap)))
690             return NULL;
691         ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, 2);
692         return p;
693     }
694     else if (KIND == CCL_TOK_LP)
695     {
696         ADVANCE;
697         if (!(p = find_spec(cclp, ap)))
698             return NULL;
699         if (KIND != CCL_TOK_RP)
700         {
701             cclp->error_code = CCL_ERR_RP_EXPECTED;
702             ccl_rpn_delete(p);
703             return NULL;
704         }
705         ADVANCE;
706         return p;
707     }
708     else
709     {
710         if (!(p = search_terms(cclp, ap)))
711             return NULL;
712         ccl_add_attr_numeric(p, attset, CCL_BIB1_REL, rel);
713         return p;
714     }
715     cclp->error_code = CCL_ERR_TERM_EXPECTED;
716     return NULL;
717 }
718
719 static
720 struct ccl_rpn_node *qualifier_relation(CCL_parser cclp, ccl_qualifier_t *ap)
721 {
722     char *attset;
723     struct ccl_rpn_node *p;
724     
725     if (qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset)
726         || qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_PORDER, &attset))
727         return qualifiers_order(cclp, ap, attset);
728
729     /* unordered relation */
730     if (KIND != CCL_TOK_EQ)
731     {
732         cclp->error_code = CCL_ERR_EQ_EXPECTED;
733         return NULL;
734     }
735     ADVANCE;
736     if (KIND == CCL_TOK_LP)
737     {
738         ADVANCE;
739         if (!(p = find_spec(cclp, ap)))
740         {
741             return NULL;
742         }
743         if (KIND != CCL_TOK_RP)
744         {
745             cclp->error_code = CCL_ERR_RP_EXPECTED;
746             ccl_rpn_delete(p);
747             return NULL;
748         }
749         ADVANCE;
750     }
751     else
752         p = search_terms(cclp, ap);
753     return p;
754 }
755
756 /**
757  * qualifier_list: Parse CCL qualifiers and search terms. 
758  * cclp:   CCL Parser
759  * la:     Token pointer to RELATION token.
760  * qa:     Qualifier attributes already applied.
761  * return: pointer to node(s); NULL on error.
762  */
763 static struct ccl_rpn_node *qualifier_list(CCL_parser cclp, 
764                                            struct ccl_token *la,
765                                            ccl_qualifier_t *qa)
766 {
767     struct ccl_token *lookahead = cclp->look_token;
768     struct ccl_token *look_start = cclp->look_token;
769     ccl_qualifier_t *ap;
770     struct ccl_rpn_node *node = 0;
771     const char **field_str;
772     int no = 0;
773     int seq = 0;
774     int i;
775     int mode_merge = 1;
776 #if 0
777     if (qa)
778     {
779         cclp->error_code = CCL_ERR_DOUBLE_QUAL;
780         return NULL;
781     }
782 #endif
783     for (lookahead = cclp->look_token; lookahead != la;
784          lookahead=lookahead->next)
785         no++;
786     if (qa)
787         for (i=0; qa[i]; i++)
788             no++;
789     ap = (ccl_qualifier_t *)xmalloc((no ? (no+1) : 2) * sizeof(*ap));
790     ccl_assert(ap);
791
792     field_str = ccl_qual_search_special(cclp->bibset, "field");
793     if (field_str)
794     {
795         if (!strcmp(field_str[0], "or"))
796             mode_merge = 0;
797         else if (!strcmp(field_str[0], "merge"))
798             mode_merge = 1;
799     }
800     if (!mode_merge)
801     {
802         /* consider each field separately and OR */
803         lookahead = look_start;
804         while (lookahead != la)
805         {
806             ap[1] = 0;
807             seq = 0;
808             while ((ap[0] = ccl_qual_search(cclp, lookahead->name,
809                                             lookahead->len, seq)) != 0)
810             {
811                 struct ccl_rpn_node *node_sub;
812                 cclp->look_token = la;
813                 
814                 node_sub = qualifier_relation(cclp, ap);
815                 if (!node_sub)
816                 {
817                     ccl_rpn_delete(node);
818                     xfree(ap);
819                     return 0;
820                 }
821                 if (node)
822                 {
823                     struct ccl_rpn_node *node_this = 
824                         ccl_rpn_node_create(CCL_RPN_OR);
825                     node_this->u.p[0] = node;
826                     node_this->u.p[1] = node_sub;
827                     node = node_this;
828                 }
829                 else
830                     node = node_sub;
831                 seq++;
832             }
833             if (seq == 0)
834             {
835                 cclp->look_token = lookahead;
836                 cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
837                 xfree(ap);
838                 return NULL;
839             }
840             lookahead = lookahead->next;
841             if (lookahead->kind == CCL_TOK_COMMA)
842                 lookahead = lookahead->next;
843         }
844     }
845     else
846     {
847         /* merge attributes from ALL fields - including inherited ones */
848         while (1)
849         {
850             struct ccl_rpn_node *node_sub;
851             int found = 0;
852             lookahead = look_start;
853             for (i = 0; lookahead != la; i++)
854             {
855                 ap[i] = ccl_qual_search(cclp, lookahead->name,
856                                          lookahead->len, seq);
857                 if (ap[i])
858                     found++;
859                 if (!ap[i] && seq > 0)
860                     ap[i] = ccl_qual_search(cclp, lookahead->name,
861                                              lookahead->len, 0);
862                 if (!ap[i])
863                 {
864                     cclp->look_token = lookahead;
865                     cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
866                     xfree(ap);
867                     return NULL;
868                 }
869                 lookahead = lookahead->next;
870                 if (lookahead->kind == CCL_TOK_COMMA)
871                     lookahead = lookahead->next;
872             }
873             if (qa)
874             {
875                 ccl_qualifier_t *qa0 = qa;
876                 
877                 while (*qa0)
878                     ap[i++] = *qa0++;
879             }
880             ap[i] = NULL;
881             
882             if (!found)
883                 break;
884             
885             cclp->look_token = lookahead;
886             
887             node_sub = qualifier_relation(cclp, ap);
888             if (!node_sub)
889             {
890                 ccl_rpn_delete(node);
891                 break;
892             }
893             if (node)
894             {
895                 struct ccl_rpn_node *node_this = 
896                     ccl_rpn_node_create(CCL_RPN_OR);
897                 node_this->u.p[0] = node;
898                 node_this->u.p[1] = node_sub;
899                 node = node_this;
900             }
901             else
902                 node = node_sub;
903             seq++;
904         }
905     }
906     xfree(ap);
907     return node;
908 }
909
910
911 /**
912  * search_terms: Parse CCL search terms - including proximity.
913  * cclp:   CCL Parser
914  * qa:     Qualifier attributes already applied.
915  * return: pointer to node(s); NULL on error.
916  */
917 static struct ccl_rpn_node *search_terms(CCL_parser cclp, ccl_qualifier_t *qa)
918 {
919     static int list[] = {
920         CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1};
921     struct ccl_rpn_node *p1, *p2, *pn;
922     p1 = search_term_x(cclp, qa, list, 1);
923     if (!p1)
924         return NULL;
925     while (1)
926     {
927         if (KIND == CCL_TOK_PROX)
928         {
929             struct ccl_rpn_node *p_prox = 0;
930             /* ! word order specified */
931             /* % word order not specified */
932             p_prox = ccl_rpn_node_create(CCL_RPN_TERM);
933             p_prox->u.t.term = (char *) xmalloc(1 + cclp->look_token->len);
934             memcpy(p_prox->u.t.term, cclp->look_token->name,
935                    cclp->look_token->len);
936             p_prox->u.t.term[cclp->look_token->len] = 0;
937             p_prox->u.t.attr_list = 0;
938
939             ADVANCE;
940             p2 = search_term_x(cclp, qa, list, 1);
941             if (!p2)
942             {
943                 ccl_rpn_delete(p1);
944                 return NULL;
945             }
946             pn = ccl_rpn_node_create(CCL_RPN_PROX);
947             pn->u.p[0] = p1;
948             pn->u.p[1] = p2;
949             pn->u.p[2] = p_prox;
950             p1 = pn;
951         }
952         else if (is_term_ok(KIND, list))
953         {
954             p2 = search_term_x(cclp, qa, list, 1);
955             if (!p2)
956             {
957                 ccl_rpn_delete(p1);
958                 return NULL;
959             }
960             pn = ccl_rpn_node_create(CCL_RPN_PROX);
961             pn->u.p[0] = p1;
962             pn->u.p[1] = p2;
963             pn->u.p[2] = 0;
964             p1 = pn;
965         }
966         else
967             break;
968     }
969     return p1;
970 }
971
972 /**
973  * search_elements: Parse CCL search elements
974  * cclp:   CCL Parser
975  * qa:     Qualifier attributes already applied.
976  * return: pointer to node(s); NULL on error.
977  */
978 static struct ccl_rpn_node *search_elements(CCL_parser cclp,
979                                             ccl_qualifier_t *qa)
980 {
981     struct ccl_rpn_node *p1;
982     struct ccl_token *lookahead;
983     if (KIND == CCL_TOK_LP)
984     {
985         ADVANCE;
986         p1 = find_spec(cclp, qa);
987         if (!p1)
988             return NULL;
989         if (KIND != CCL_TOK_RP)
990         {
991             cclp->error_code = CCL_ERR_RP_EXPECTED;
992             ccl_rpn_delete(p1);
993             return NULL;
994         }
995         ADVANCE;
996         return p1;
997     }
998     else if (KIND == CCL_TOK_SET)
999     {
1000         ADVANCE;
1001         if (KIND == CCL_TOK_EQ)
1002             ADVANCE;
1003         if (KIND != CCL_TOK_TERM)
1004         {
1005             cclp->error_code = CCL_ERR_SETNAME_EXPECTED;
1006             return NULL;
1007         }
1008         p1 = ccl_rpn_node_create(CCL_RPN_SET);
1009         p1->u.setname = copy_token_name(cclp->look_token);
1010         ADVANCE;
1011         return p1;
1012     }
1013     lookahead = cclp->look_token;
1014
1015     while (lookahead->kind==CCL_TOK_TERM)
1016     {
1017         lookahead = lookahead->next;
1018         if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
1019             return qualifier_list(cclp, lookahead, qa);
1020         if (lookahead->kind != CCL_TOK_COMMA)
1021             break;
1022         lookahead = lookahead->next;
1023     }
1024     if (qa)
1025         return search_terms(cclp, qa);
1026     else
1027     {
1028         ccl_qualifier_t qa[2];
1029         struct ccl_rpn_node *node = 0;
1030         int seq;
1031         lookahead = cclp->look_token;
1032
1033         qa[1] = 0;
1034         for(seq = 0; ;seq++)
1035         {
1036             struct ccl_rpn_node *node_sub;
1037             qa[0] = ccl_qual_search(cclp, "term", 4, seq);
1038             if (!qa[0])
1039                 break;
1040
1041             cclp->look_token = lookahead;
1042
1043             node_sub = search_terms(cclp, qa);
1044             if (!node_sub)
1045             {
1046                 ccl_rpn_delete(node);
1047                 return 0;
1048             }
1049             if (node)
1050             {
1051                 struct ccl_rpn_node *node_this = 
1052                     ccl_rpn_node_create(CCL_RPN_OR);
1053                 node_this->u.p[0] = node;
1054                 node_this->u.p[1] = node_sub;
1055                 node_this->u.p[2] = 0;
1056                 node = node_this;
1057             }
1058             else
1059                 node = node_sub;
1060         }
1061         if (!node)
1062             node = search_terms(cclp, 0);
1063         return node;
1064     }
1065 }
1066
1067 /**
1068  * find_spec: Parse CCL find specification
1069  * cclp:   CCL Parser
1070  * qa:     Qualifier attributes already applied.
1071  * return: pointer to node(s); NULL on error.
1072  */
1073 static struct ccl_rpn_node *find_spec(CCL_parser cclp, ccl_qualifier_t *qa)
1074 {
1075     struct ccl_rpn_node *p1, *p2, *pn;
1076     if (!(p1 = search_elements(cclp, qa)))
1077         return NULL;
1078     while (1)
1079     {
1080         switch (KIND)
1081         {
1082         case CCL_TOK_AND:
1083             ADVANCE;
1084             p2 = search_elements(cclp, qa);
1085             if (!p2)
1086             {
1087                 ccl_rpn_delete(p1);
1088                 return NULL;
1089             }
1090             pn = ccl_rpn_node_create(CCL_RPN_AND);
1091             pn->u.p[0] = p1;
1092             pn->u.p[1] = p2;
1093             pn->u.p[2] = 0;
1094             p1 = pn;
1095             continue;
1096         case CCL_TOK_OR:
1097             ADVANCE;
1098             p2 = search_elements(cclp, qa);
1099             if (!p2)
1100             {
1101                 ccl_rpn_delete(p1);
1102                 return NULL;
1103             }
1104             pn = ccl_rpn_node_create(CCL_RPN_OR);
1105             pn->u.p[0] = p1;
1106             pn->u.p[1] = p2;
1107             pn->u.p[2] = 0;
1108             p1 = pn;
1109             continue;
1110         case CCL_TOK_NOT:
1111             ADVANCE;
1112             p2 = search_elements(cclp, qa);
1113             if (!p2)
1114             {
1115                 ccl_rpn_delete(p1);
1116                 return NULL;
1117             }
1118             pn = ccl_rpn_node_create(CCL_RPN_NOT);
1119             pn->u.p[0] = p1;
1120             pn->u.p[1] = p2;
1121             pn->u.p[2] = 0;
1122             p1 = pn;
1123             continue;
1124         }
1125         break;
1126     }
1127     return p1;
1128 }
1129
1130 struct ccl_rpn_node *ccl_parser_find_str(CCL_parser cclp, const char *str)
1131 {
1132     struct ccl_rpn_node *p;
1133     struct ccl_token *list = ccl_parser_tokenize(cclp, str);
1134     p = ccl_parser_find_token(cclp, list);
1135     ccl_token_del(list);
1136     return p;
1137 }
1138
1139 struct ccl_rpn_node *ccl_parser_find_token(CCL_parser cclp, 
1140                                            struct ccl_token *list)
1141 {
1142     struct ccl_rpn_node *p;
1143
1144     cclp->look_token = list;
1145     p = find_spec(cclp, NULL);
1146     if (p && KIND != CCL_TOK_EOL)
1147     {
1148         if (KIND == CCL_TOK_RP)
1149             cclp->error_code = CCL_ERR_BAD_RP;
1150         else
1151             cclp->error_code = CCL_ERR_OP_EXPECTED;
1152         ccl_rpn_delete(p);
1153         p = NULL;
1154     }
1155     cclp->error_pos = cclp->look_token->name;
1156     if (p)
1157         cclp->error_code = CCL_ERR_OK;
1158     else
1159         cclp->error_code = cclp->error_code;
1160     return p;
1161 }
1162
1163 /**
1164  * ccl_find_str: Parse CCL find - string representation
1165  * bibset:  Bibset to be used for the parsing
1166  * str:     String to be parsed
1167  * error:   Pointer to integer. Holds error no. on completion.
1168  * pos:     Pointer to char position. Holds approximate error position.
1169  * return:  RPN tree on successful completion; NULL otherwise.
1170  */
1171 struct ccl_rpn_node *ccl_find_str(CCL_bibset bibset, const char *str,
1172                                   int *error, int *pos)
1173 {
1174     CCL_parser cclp = ccl_parser_create(bibset);
1175     struct ccl_token *list;
1176     struct ccl_rpn_node *p;
1177
1178     list = ccl_parser_tokenize(cclp, str);
1179     p = ccl_parser_find_token(cclp, list);
1180
1181     *error = cclp->error_code;
1182     if (*error)
1183         *pos = cclp->error_pos - str;
1184     ccl_parser_destroy(cclp);
1185     ccl_token_del(list);
1186     return p;
1187 }
1188
1189 /*
1190  * Local variables:
1191  * c-basic-offset: 4
1192  * c-file-style: "Stroustrup"
1193  * indent-tabs-mode: nil
1194  * End:
1195  * vim: shiftwidth=4 tabstop=8 expandtab
1196  */
1197