1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief Implements CQL transform (CQL to RPN conversion).
9 * Evaluation order of rules:
26 #include <yaz/rpn2cql.h>
27 #include <yaz/xmalloc.h>
28 #include <yaz/diagsrw.h>
29 #include <yaz/tokenizer.h>
30 #include <yaz/wrbuf.h>
31 #include <yaz/z-core.h>
32 #include <yaz/matchstr.h>
33 #include <yaz/oid_db.h>
36 struct cql_prop_entry {
39 Z_AttributeList attr_list;
40 struct cql_prop_entry *next;
43 struct cql_transform_t_ {
44 struct cql_prop_entry *entry;
45 yaz_tok_cfg_t tok_cfg;
52 cql_transform_t cql_transform_create(void)
54 cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct));
55 ct->tok_cfg = yaz_tok_cfg_create();
57 ct->addinfo = wrbuf_alloc();
59 ct->nmem = nmem_create();
63 static int cql_transform_parse_tok_line(cql_transform_t ct,
68 Z_AttributeElement *ae[20];
69 int ret = 0; /* 0=OK, != 0 FAIL */
71 WRBUF w = wrbuf_alloc();
75 while (t == YAZ_TOK_STRING && ae_num < 20)
77 WRBUF type_str = wrbuf_alloc();
79 Z_AttributeElement *elem = 0;
80 const char *value_str = 0;
81 /* attset type=value OR type=value */
83 elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem));
84 elem->attributeSet = 0;
86 wrbuf_puts(w, yaz_tok_parse_string(tp));
87 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
91 wrbuf_destroy(type_str);
93 wrbuf_destroy(set_str);
96 if (t == YAZ_TOK_STRING)
99 wrbuf_puts(w, yaz_tok_parse_string(tp));
103 yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET,
104 wrbuf_cstr(set_str), ct->nmem);
106 type_str = wrbuf_alloc();
107 wrbuf_puts(type_str, yaz_tok_parse_string(tp));
108 t = yaz_tok_move(tp);
110 elem->attributeType = nmem_intdup(ct->nmem, 0);
111 if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType)
114 wrbuf_destroy(type_str);
116 wrbuf_destroy(set_str);
117 yaz_log(YLOG_WARN, "Expected numeric attribute type");
122 wrbuf_destroy(type_str);
124 wrbuf_destroy(set_str);
128 yaz_log(YLOG_WARN, "Expected = after after attribute type");
132 t = yaz_tok_move(tp);
133 if (t != YAZ_TOK_STRING) /* value */
135 yaz_log(YLOG_WARN, "Missing attribute value");
139 value_str = yaz_tok_parse_string(tp);
140 if (yaz_isdigit(*value_str))
142 elem->which = Z_AttributeValue_numeric;
143 elem->value.numeric =
144 nmem_intdup(ct->nmem, atoi(value_str));
148 Z_ComplexAttribute *ca = (Z_ComplexAttribute *)
149 nmem_malloc(ct->nmem, sizeof(*ca));
150 elem->which = Z_AttributeValue_complex;
151 elem->value.complex = ca;
153 ca->list = (Z_StringOrNumeric **)
154 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *));
155 ca->list[0] = (Z_StringOrNumeric *)
156 nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric));
157 ca->list[0]->which = Z_StringOrNumeric_string;
158 ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str);
159 ca->num_semanticAction = 0;
160 ca->semanticAction = 0;
163 wrbuf_puts(w, yaz_tok_parse_string(tp));
164 t = yaz_tok_move(tp);
168 if (ret == 0) /* OK? */
170 struct cql_prop_entry **pp = &ct->entry;
173 *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp));
174 (*pp)->pattern = xstrdup(pattern);
175 (*pp)->value = xstrdup(wrbuf_cstr(w));
177 (*pp)->attr_list.num_attributes = ae_num;
179 (*pp)->attr_list.attributes = 0;
182 (*pp)->attr_list.attributes = (Z_AttributeElement **)
183 nmem_malloc(ct->nmem,
184 ae_num * sizeof(Z_AttributeElement *));
185 memcpy((*pp)->attr_list.attributes, ae,
186 ae_num * sizeof(Z_AttributeElement *));
192 ODR pr = odr_createmem(ODR_PRINT);
193 Z_AttributeList *alp = &(*pp)->attr_list;
194 odr_setprint_noclose(pr, yaz_log_file());
195 z_AttributeList(pr, &alp, 0, 0);
203 int cql_transform_define_pattern(cql_transform_t ct, const char *pattern,
207 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value);
208 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
209 r = cql_transform_parse_tok_line(ct, pattern, tp);
210 yaz_tok_parse_destroy(tp);
214 cql_transform_t cql_transform_open_FILE(FILE *f)
216 cql_transform_t ct = cql_transform_create();
219 yaz_tok_cfg_single_tokens(ct->tok_cfg, "=");
221 while (fgets(line, sizeof(line)-1, f))
223 yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line);
225 t = yaz_tok_move(tp);
226 if (t == YAZ_TOK_STRING)
228 char * pattern = xstrdup(yaz_tok_parse_string(tp));
229 t = yaz_tok_move(tp);
232 yaz_tok_parse_destroy(tp);
233 cql_transform_close(ct);
236 if (cql_transform_parse_tok_line(ct, pattern, tp))
238 yaz_tok_parse_destroy(tp);
239 cql_transform_close(ct);
244 else if (t != YAZ_TOK_EOF)
246 yaz_tok_parse_destroy(tp);
247 cql_transform_close(ct);
250 yaz_tok_parse_destroy(tp);
255 void cql_transform_close(cql_transform_t ct)
257 struct cql_prop_entry *pe;
263 struct cql_prop_entry *pe_next = pe->next;
269 wrbuf_destroy(ct->addinfo);
270 yaz_tok_cfg_destroy(ct->tok_cfg);
271 nmem_destroy(ct->nmem);
275 cql_transform_t cql_transform_open_fname(const char *fname)
278 FILE *f = fopen(fname, "r");
281 ct = cql_transform_open_FILE(f);
287 struct Z_AttributeElement {
288 Z_AttributeSetId *attributeSet; /* OPT */
293 Z_ComplexAttribute *complex;
294 #define Z_AttributeValue_numeric 1
295 #define Z_AttributeValue_complex 2
300 static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b)
302 ODR odr_a = odr_createmem(ODR_ENCODE);
303 ODR odr_b = odr_createmem(ODR_ENCODE);
308 z_AttributeElement(odr_a, &a, 0, 0);
309 z_AttributeElement(odr_b, &b, 0, 0);
311 buf_a = odr_getbuf(odr_a, &len_a, 0);
312 buf_b = odr_getbuf(odr_b, &len_b, 0);
314 ret = yaz_memcmp(buf_a, buf_b, len_a, len_b);
321 const char *cql_lookup_reverse(cql_transform_t ct,
322 const char *category,
323 Z_AttributeList *attributes)
325 struct cql_prop_entry *e;
326 size_t clen = strlen(category);
327 for (e = ct->entry; e; e = e->next)
329 if (!strncmp(e->pattern, category, clen))
331 /* category matches.. See if attributes in pattern value
332 are all listed in actual attributes */
334 for (i = 0; i < e->attr_list.num_attributes; i++)
336 /* entry attribute */
337 Z_AttributeElement *e_ae = e->attr_list.attributes[i];
339 for (j = 0; j < attributes->num_attributes; j++)
341 /* actual attribute */
342 Z_AttributeElement a_ae = *attributes->attributes[j];
343 if (a_ae.attributeSet && &e_ae->attributeSet &&
344 !oid_oidcmp(a_ae.attributeSet, yaz_oid_attset_bib_1))
345 a_ae.attributeSet = 0;
346 if (!compare_attr(e_ae, &a_ae))
349 if (j == attributes->num_attributes)
350 break; /* i was not found at all.. try next pattern */
353 if (i == e->attr_list.num_attributes)
354 return e->pattern + clen;
360 static const char *cql_lookup_property(cql_transform_t ct,
361 const char *pat1, const char *pat2,
365 struct cql_prop_entry *e;
367 if (pat1 && pat2 && pat3)
368 sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
369 else if (pat1 && pat2)
370 sprintf(pattern, "%.39s.%.39s", pat1, pat2);
371 else if (pat1 && pat3)
372 sprintf(pattern, "%.39s.%.39s", pat1, pat3);
374 sprintf(pattern, "%.39s", pat1);
378 for (e = ct->entry; e; e = e->next)
380 if (!cql_strcmp(e->pattern, pattern))
386 int cql_pr_attr_uri(cql_transform_t ct, WRBUF addinfo, const char *category,
387 const char *uri, const char *val, const char *default_val,
388 void (*pr)(const char *buf, void *client_data),
393 const char *eval = val ? val : default_val;
394 const char *prefix = 0;
398 struct cql_prop_entry *e;
400 for (e = ct->entry; e; e = e->next)
401 if (!memcmp(e->pattern, "set.", 4) && e->value &&
402 !strcmp(e->value, uri))
404 prefix = e->pattern+4;
407 /* must have a prefix now - if not it's an error */
413 res = cql_lookup_property(ct, category, prefix, eval);
414 /* we have some aliases for some relations unfortunately.. */
415 if (!res && !prefix && !strcmp(category, "relation"))
417 if (!strcmp(val, "=="))
418 res = cql_lookup_property(ct, category, prefix, "exact");
419 if (!strcmp(val, "="))
420 res = cql_lookup_property(ct, category, prefix, "eq");
421 if (!strcmp(val, "<="))
422 res = cql_lookup_property(ct, category, prefix, "le");
423 if (!strcmp(val, ">="))
424 res = cql_lookup_property(ct, category, prefix, "ge");
427 res = cql_lookup_property(ct, category, prefix, "*");
433 const char *cp0 = res, *cp1;
434 while ((cp1 = strchr(cp0, '=')))
437 while (*cp1 && *cp1 != ' ')
439 if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf))
441 memcpy(buf, cp0, cp1 - cp0);
443 (*pr)("@attr ", client_data);
445 for (i = 0; buf[i]; i++)
448 (*pr)(eval, client_data);
454 (*pr)(tmp, client_data);
457 (*pr)(" ", client_data);
466 return 1; /* signal error, but do not set addinfo */
468 wrbuf_puts(addinfo, val);
472 int cql_pr_attr(cql_transform_t ct, WRBUF addinfo, const char *category,
473 const char *val, const char *default_val,
474 void (*pr)(const char *buf, void *client_data),
478 return cql_pr_attr_uri(ct, addinfo, category, 0 /* uri */,
479 val, default_val, pr, client_data, errcode);
483 static void cql_pr_int(int val,
484 void (*pr)(const char *buf, void *client_data),
487 char buf[21]; /* enough characters to 2^64 */
488 sprintf(buf, "%d", val);
489 (*pr)(buf, client_data);
490 (*pr)(" ", client_data);
494 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
496 void (*pr)(const char *buf, void *client_data),
502 int proxrel = 2; /* less than or equal */
503 int unit = 2; /* word */
507 const char *name = mods->u.st.index;
508 const char *term = mods->u.st.term;
509 const char *relation = mods->u.st.relation;
511 if (!strcmp(name, "distance")) {
512 distance = strtol(term, (char**) 0, 0);
513 if (!strcmp(relation, "="))
515 else if (!strcmp(relation, ">"))
517 else if (!strcmp(relation, "<"))
519 else if (!strcmp(relation, ">="))
521 else if (!strcmp(relation, "<="))
523 else if (!strcmp(relation, "<>"))
527 wrbuf_puts(addinfo, relation);
528 return YAZ_SRW_UNSUPP_PROX_RELATION;
531 else if (!strcmp(name, "ordered"))
533 else if (!strcmp(name, "unordered"))
535 else if (!strcmp(name, "unit"))
537 if (!strcmp(term, "word"))
539 else if (!strcmp(term, "sentence"))
541 else if (!strcmp(term, "paragraph"))
543 else if (!strcmp(term, "element"))
547 wrbuf_puts(addinfo, term);
548 return YAZ_SRW_UNSUPP_PROX_UNIT;
553 wrbuf_puts(addinfo, name);
554 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
556 mods = mods->u.st.modifiers;
560 distance = (unit == 2) ? 1 : 0;
562 cql_pr_int(exclusion, pr, client_data);
563 cql_pr_int(distance, pr, client_data);
564 cql_pr_int(ordered, pr, client_data);
565 cql_pr_int(proxrel, pr, client_data);
566 (*pr)("k ", client_data);
567 cql_pr_int(unit, pr, client_data);
572 /* ### checks for CQL relation-name rather than Type-1 attribute */
573 static int has_modifier(struct cql_node *cn, const char *name) {
574 struct cql_node *mod;
575 for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) {
576 if (!strcmp(mod->u.st.index, name))
583 static int emit_term(cql_transform_t ct,
584 struct cql_node *cn, WRBUF addinfo,
585 const char *term, int length,
586 void (*pr)(const char *buf, void *client_data),
590 const char *ns = cn->u.st.index_uri;
592 int process_term = 1;
594 if (has_modifier(cn, "regexp"))
596 else if (has_modifier(cn, "unmasked"))
598 else if (cql_lookup_property(ct, "truncation", 0, "cql"))
601 r = cql_pr_attr(ct, addinfo, "truncation", "cql", 0,
602 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
606 assert(cn->which == CQL_NODE_ST);
609 { /* convert term via truncation.things */
612 for (i = 0; i < length; i++)
614 if (term[i] == '\\' && i < length - 1)
623 else if (i == length - 1)
629 else if (i == length - 1)
642 r = cql_pr_attr(ct, addinfo, "position", "firstAndLast", 0,
644 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
650 else if (anchor == 1)
652 r = cql_pr_attr(ct, addinfo, "position", "first", 0,
654 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
660 else if (anchor == 2)
662 r = cql_pr_attr(ct, addinfo, "position", "last", 0,
664 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
671 r = cql_pr_attr(ct, addinfo, "position", "any", 0,
673 YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION);
679 if (trunc == 3 && !cql_pr_attr(ct, addinfo, "truncation",
680 "both", 0, pr, client_data, 0))
685 else if (trunc == 1 && !cql_pr_attr(ct, addinfo, "truncation",
686 "left", 0, pr, client_data, 0))
691 else if (trunc == 2 && !cql_pr_attr(ct, addinfo, "truncation",
692 "right", 0, pr, client_data, 0))
699 cql_pr_attr(ct, addinfo, "truncation", "none", 0,
704 r = cql_pr_attr(ct, addinfo, "truncation", "z3958", 0,
705 pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP);
712 r = cql_pr_attr_uri(ct, addinfo, "index", ns,
713 cn->u.st.index, "serverChoice",
714 pr, client_data, YAZ_SRW_UNSUPP_INDEX);
718 if (cn->u.st.modifiers)
720 struct cql_node *mod = cn->u.st.modifiers;
721 for (; mod; mod = mod->u.st.modifiers)
723 r = cql_pr_attr(ct, addinfo,
724 "relationModifier", mod->u.st.index, 0,
725 pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER);
730 (*pr)("\"", client_data);
732 for (i = 0; i < length; i++)
734 char x[2]; /* temp buffer */
735 if (term[i] == '\\' && i < length - 1)
738 if (strchr("\"\\", term[i]))
739 pr("\\", client_data);
740 if (z3958_mode && strchr("#?", term[i]))
741 pr("\\\\", client_data); /* double \\ to survive PQF parse */
746 else if (z3958_mode && term[i] == '*')
748 pr("?", client_data);
749 if (i < length - 1 && yaz_isdigit(term[i+1]))
750 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
752 else if (z3958_mode && term[i] == '?')
754 pr("#", client_data);
759 pr("\\", client_data);
760 if (z3958_mode && strchr("#?", term[i]))
761 pr("\\\\", client_data); /* dbl \\ to survive PQF parse */
769 for (i = 0; i < length; i++)
777 (*pr)("\" ", client_data);
781 static int emit_terms(cql_transform_t ct, struct cql_node *cn,
783 void (*pr)(const char *buf, void *client_data),
787 struct cql_node *ne = cn->u.st.extra_terms;
791 (*pr)("@", client_data);
792 (*pr)(op, client_data);
793 (*pr)(" ", client_data);
795 r = emit_term(ct, cn, addinfo, cn->u.st.term, strlen(cn->u.st.term),
797 for (; !r && ne; ne = ne->u.st.extra_terms)
799 if (ne->u.st.extra_terms)
801 (*pr)("@", client_data);
802 (*pr)(op, client_data);
803 (*pr)(" ", client_data);
805 r = emit_term(ct, cn, addinfo, ne->u.st.term, strlen(ne->u.st.term),
811 static int emit_wordlist(cql_transform_t ct, struct cql_node *cn,
813 void (*pr)(const char *buf, void *client_data),
818 const char *cp0 = cn->u.st.term;
820 const char *last_term = 0;
826 cp1 = strchr(cp0, ' ');
829 (*pr)("@", client_data);
830 (*pr)(op, client_data);
831 (*pr)(" ", client_data);
832 r = emit_term(ct, cn, addinfo, last_term, last_length,
837 last_length = cp1 - cp0;
839 last_length = strlen(cp0);
843 r = emit_term(ct, cn, addinfo, last_term, last_length, pr, client_data);
847 static int emit_node(cql_transform_t ct, struct cql_node *cn,
849 void (*pr)(const char *buf, void *client_data),
854 struct cql_node *mods;
861 ns = cn->u.st.index_uri;
864 if (!strcmp(ns, cql_uri())
865 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
867 (*pr)("@set \"", client_data);
868 (*pr)(cn->u.st.term, client_data);
869 (*pr)("\" ", client_data);
875 return YAZ_SRW_UNSUPP_CONTEXT_SET;
877 cql_pr_attr(ct, addinfo, "always", 0, 0, pr, client_data, 0);
878 r = cql_pr_attr(ct, addinfo, "relation", cn->u.st.relation, 0,
879 pr, client_data, YAZ_SRW_UNSUPP_RELATION);
882 r = cql_pr_attr(ct, addinfo, "structure", cn->u.st.relation, 0,
884 YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM);
887 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
888 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "and");
889 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
890 r = emit_wordlist(ct, cn, addinfo, pr, client_data, "or");
892 r = emit_terms(ct, cn, addinfo, pr, client_data, "and");
895 (*pr)("@", client_data);
896 (*pr)(cn->u.boolean.value, client_data);
897 (*pr)(" ", client_data);
898 mods = cn->u.boolean.modifiers;
899 if (!strcmp(cn->u.boolean.value, "prox"))
901 r = cql_pr_prox(ct, mods, addinfo, pr, client_data);
907 /* Boolean modifiers other than on proximity not supported */
908 wrbuf_puts(addinfo, mods->u.st.index);
909 return YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER;
912 r = emit_node(ct, cn->u.boolean.left, addinfo, pr, client_data);
915 r = emit_node(ct, cn->u.boolean.right, addinfo, pr, client_data);
920 r = emit_node(ct, cn->u.sort.search, addinfo, pr, client_data);
923 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
929 int cql_transform_r(cql_transform_t ct, struct cql_node *cn,
931 void (*pr)(const char *buf, void *client_data),
934 struct cql_prop_entry *e;
935 NMEM nmem = nmem_create();
938 for (e = ct->entry; e ; e = e->next)
940 if (!cql_strncmp(e->pattern, "set.", 4))
941 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
942 else if (!cql_strcmp(e->pattern, "set"))
943 cql_apply_prefix(nmem, cn, 0, e->value);
945 r = emit_node(ct, cn, addinfo, pr, client_data);
950 int cql_transform(cql_transform_t ct, struct cql_node *cn,
951 void (*pr)(const char *buf, void *client_data),
954 WRBUF addinfo = wrbuf_alloc();
955 int r = cql_transform_r(ct, cn, addinfo, pr, client_data);
956 cql_transform_set_error(ct, r, wrbuf_cstr(addinfo));
957 wrbuf_destroy(addinfo);
961 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
963 return cql_transform(ct, cn, cql_fputs, f);
966 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
969 struct cql_buf_write_info info;
975 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
977 /* Attempt to write past end of buffer. For some reason, this
978 SRW diagnostic is deprecated, but it's so perfect for our
979 purposes that it would be stupid not to use it. */
981 sprintf(numbuf, "%ld", (long) info.max);
982 cql_transform_set_error(ct, YAZ_SRW_TOO_MANY_CHARS_IN_QUERY, numbuf);
986 info.buf[info.off] = '\0';
990 int cql_transform_error(cql_transform_t ct, const char **addinfo)
992 *addinfo = wrbuf_len(ct->addinfo) ? wrbuf_cstr(ct->addinfo) : 0;
996 void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo)
998 wrbuf_rewind(ct->addinfo);
1000 wrbuf_puts(ct->addinfo, addinfo);
1007 * c-file-style: "Stroustrup"
1008 * indent-tabs-mode: nil
1010 * vim: shiftwidth=4 tabstop=8 expandtab