1 /* $Id: cqltransform.c,v 1.21 2006-03-20 14:56:40 mike Exp $
2 Copyright (C) 1995-2005, Index Data ApS
5 This file is part of the YAZ toolkit.
11 * \file cqltransform.c
12 * \brief Implements CQL transform (CQL to RPN conversion).
18 #include <yaz/xmalloc.h>
20 struct cql_prop_entry {
23 struct cql_prop_entry *next;
26 struct cql_transform_t_ {
27 struct cql_prop_entry *entry;
32 cql_transform_t cql_transform_open_FILE(FILE *f)
35 cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct));
36 struct cql_prop_entry **pp = &ct->entry;
40 while (fgets(line, sizeof(line)-1, f))
42 const char *cp_value_start;
43 const char *cp_value_end;
44 const char *cp_pattern_end;
45 const char *cp = line;
46 while (*cp && !strchr(" \t=\r\n#", *cp))
51 while (*cp && strchr(" \t\r\n", *cp))
56 while (*cp && strchr(" \t\r\n", *cp))
59 if (!(cp_value_end = strchr(cp, '#')))
60 cp_value_end = strlen(line) + line;
62 if (cp_value_end != cp_value_start &&
63 strchr(" \t\r\n", cp_value_end[-1]))
65 *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp));
66 (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1);
67 memcpy ((*pp)->pattern, line, cp_pattern_end - line);
68 (*pp)->pattern[cp_pattern_end-line] = 0;
70 (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1);
71 if (cp_value_start != cp_value_end)
72 memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start);
73 (*pp)->value[cp_value_end - cp_value_start] = 0;
80 void cql_transform_close(cql_transform_t ct)
82 struct cql_prop_entry *pe;
88 struct cql_prop_entry *pe_next = pe->next;
99 cql_transform_t cql_transform_open_fname(const char *fname)
102 FILE *f = fopen(fname, "r");
105 ct = cql_transform_open_FILE(f);
110 static const char *cql_lookup_property(cql_transform_t ct,
111 const char *pat1, const char *pat2,
115 struct cql_prop_entry *e;
117 if (pat1 && pat2 && pat3)
118 sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3);
119 else if (pat1 && pat2)
120 sprintf (pattern, "%.39s.%.39s", pat1, pat2);
121 else if (pat1 && pat3)
122 sprintf (pattern, "%.39s.%.39s", pat1, pat3);
124 sprintf (pattern, "%.39s", pat1);
128 for (e = ct->entry; e; e = e->next)
130 if (!cql_strcmp(e->pattern, pattern))
136 int cql_pr_attr_uri(cql_transform_t ct, const char *category,
137 const char *uri, const char *val, const char *default_val,
138 void (*pr)(const char *buf, void *client_data),
143 const char *eval = val ? val : default_val;
144 const char *prefix = 0;
148 struct cql_prop_entry *e;
150 for (e = ct->entry; e; e = e->next)
151 if (!memcmp(e->pattern, "set.", 4) && e->value &&
152 !strcmp(e->value, uri))
154 prefix = e->pattern+4;
157 /* must have a prefix now - if not it's an error */
163 res = cql_lookup_property(ct, category, prefix, eval);
165 res = cql_lookup_property(ct, category, prefix, "*");
171 const char *cp0 = res, *cp1;
172 while ((cp1 = strchr(cp0, '=')))
174 while (*cp1 && *cp1 != ' ')
176 if (cp1 - cp0 >= sizeof(buf))
178 memcpy (buf, cp0, cp1 - cp0);
180 (*pr)("@attr ", client_data);
181 (*pr)(buf, client_data);
182 (*pr)(" ", client_data);
190 if (errcode && !ct->error)
194 ct->addinfo = xstrdup(val);
201 int cql_pr_attr(cql_transform_t ct, const char *category,
202 const char *val, const char *default_val,
203 void (*pr)(const char *buf, void *client_data),
207 return cql_pr_attr_uri(ct, category, 0 /* uri */,
208 val, default_val, pr, client_data, errcode);
212 static void cql_pr_int (int val,
213 void (*pr)(const char *buf, void *client_data),
216 char buf[21]; /* enough characters to 2^64 */
217 sprintf(buf, "%d", val);
218 (*pr)(buf, client_data);
219 (*pr)(" ", client_data);
223 static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods,
224 void (*pr)(const char *buf, void *client_data),
228 int distance; /* to be filled in later depending on unit */
229 int distance_defined = 0;
231 int proxrel = 2; /* less than or equal */
232 int unit = 2; /* word */
235 char *name = mods->u.st.index;
236 char *term = mods->u.st.term;
237 char *relation = mods->u.st.relation;
239 if (!strcmp(name, "distance")) {
240 distance = strtol(term, (char**) 0, 0);
241 distance_defined = 1;
242 if (!strcmp(relation, "=")) {
244 } else if (!strcmp(relation, ">")) {
246 } else if (!strcmp(relation, "<")) {
248 } else if (!strcmp(relation, ">=")) {
250 } else if (!strcmp(relation, "<=")) {
252 } else if (!strcmp(relation, "<>")) {
255 ct->error = 40; /* Unsupported proximity relation */
256 ct->addinfo = xstrdup(relation);
259 } else if (!strcmp(name, "ordered")) {
261 } else if (!strcmp(name, "unordered")) {
263 } else if (!strcmp(name, "unit")) {
264 if (!strcmp(term, "word")) {
266 } else if (!strcmp(term, "sentence")) {
268 } else if (!strcmp(term, "paragraph")) {
270 } else if (!strcmp(term, "element")) {
273 ct->error = 42; /* Unsupported proximity unit */
274 ct->addinfo = xstrdup(term);
278 ct->error = 46; /* Unsupported boolean modifier */
279 ct->addinfo = xstrdup(name);
283 mods = mods->u.st.modifiers;
286 if (!distance_defined)
287 distance = (unit == 2) ? 1 : 0;
289 cql_pr_int(exclusion, pr, client_data);
290 cql_pr_int(distance, pr, client_data);
291 cql_pr_int(ordered, pr, client_data);
292 cql_pr_int(proxrel, pr, client_data);
293 (*pr)("k ", client_data);
294 cql_pr_int(unit, pr, client_data);
299 /* Returns location of first wildcard character in the `length'
300 * characters starting at `term', or a null pointer of there are
301 * none -- like memchr().
303 static const char *wcchar(const char *term, int length)
305 const char *best = 0;
309 for (whichp = "*?"; *whichp != '\0'; whichp++) {
310 current = (const char *) memchr(term, *whichp, length);
311 if (current != 0 && (best == 0 || current < best))
319 void emit_term(cql_transform_t ct,
320 const char *term, int length,
321 void (*pr)(const char *buf, void *client_data),
327 if (length > 1 && term[0] == '^' && term[length-1] == '^')
329 cql_pr_attr(ct, "position", "firstAndLast", 0,
330 pr, client_data, 32);
334 else if (term[0] == '^')
336 cql_pr_attr(ct, "position", "first", 0,
337 pr, client_data, 32);
341 else if (term[length-1] == '^')
343 cql_pr_attr(ct, "position", "last", 0,
344 pr, client_data, 32);
349 cql_pr_attr(ct, "position", "any", 0,
350 pr, client_data, 32);
356 /* Check for well-known globbing patterns that represent
357 * simple truncation attributes as expected by, for example,
358 * Bath-compliant server. If we find such a pattern but
359 * there's no mapping for it, that's fine: we just use a
360 * general pattern-matching attribute.
362 if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
363 wcchar(term+1, length-2) == 0 &&
364 cql_pr_attr(ct, "truncation", "both", 0,
365 pr, client_data, 0)) {
369 else if (term[0] == '*' &&
370 wcchar(term+1, length-1) == 0 &&
371 cql_pr_attr(ct, "truncation", "left", 0,
372 pr, client_data, 0)) {
376 else if (term[length-1] == '*' &&
377 wcchar(term, length-1) == 0 &&
378 cql_pr_attr(ct, "truncation", "right", 0,
379 pr, client_data, 0)) {
382 else if (wcchar(term, length))
384 /* We have one or more wildcard characters, but not in a
385 * way that can be dealt with using only the standard
386 * left-, right- and both-truncation attributes. We need
387 * to translate the pattern into a Z39.58-type pattern,
388 * which has been supported in BIB-1 since 1996. If
389 * there's no configuration element for "truncation.z3958"
390 * we indicate this as error 28 "Masking character not
395 cql_pr_attr(ct, "truncation", "z3958", 0,
396 pr, client_data, 28);
397 mem = (char *) xmalloc(length+1);
398 for (i = 0; i < length; i++) {
399 if (term[i] == '*') mem[i] = '?';
400 else if (term[i] == '?') mem[i] = '#';
401 else mem[i] = term[i];
407 /* No masking characters. Use "truncation.none" if given. */
408 cql_pr_attr(ct, "truncation", "none", 0,
413 (*pr)("\"", client_data);
414 for (i = 0; i<length; i++)
419 (*pr)(buf, client_data);
421 (*pr)("\" ", client_data);
424 void emit_wordlist(cql_transform_t ct,
426 void (*pr)(const char *buf, void *client_data),
430 const char *cp0 = cn->u.st.term;
432 const char *last_term = 0;
438 cp1 = strchr(cp0, ' ');
441 (*pr)("@", client_data);
442 (*pr)(op, client_data);
443 (*pr)(" ", client_data);
444 emit_term(ct, last_term, last_length, pr, client_data);
448 last_length = cp1 - cp0;
450 last_length = strlen(cp0);
454 emit_term(ct, last_term, last_length, pr, client_data);
457 void cql_transform_r(cql_transform_t ct,
459 void (*pr)(const char *buf, void *client_data),
463 struct cql_node *mods;
470 ns = cn->u.st.index_uri;
473 if (!strcmp(ns, cql_uri())
474 && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet"))
476 (*pr)("@set \"", client_data);
477 (*pr)(cn->u.st.term, client_data);
478 (*pr)("\" ", client_data);
490 cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0);
491 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "="))
492 cql_pr_attr(ct, "relation", "eq", "scr",
493 pr, client_data, 19);
494 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<="))
495 cql_pr_attr(ct, "relation", "le", "scr",
496 pr, client_data, 19);
497 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">="))
498 cql_pr_attr(ct, "relation", "ge", "scr",
499 pr, client_data, 19);
501 cql_pr_attr(ct, "relation", cn->u.st.relation, "eq",
502 pr, client_data, 19);
503 if (cn->u.st.modifiers)
505 struct cql_node *mod = cn->u.st.modifiers;
506 for (; mod; mod = mod->u.st.modifiers)
508 cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0,
509 pr, client_data, 20);
512 cql_pr_attr(ct, "structure", cn->u.st.relation, 0,
513 pr, client_data, 24);
515 cql_pr_attr_uri(ct, "index", ns,
516 cn->u.st.index, "serverChoice",
517 pr, client_data, 16);
519 if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all"))
521 emit_wordlist(ct, cn, pr, client_data, "and");
523 else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any"))
525 emit_wordlist(ct, cn, pr, client_data, "or");
529 emit_term(ct, cn->u.st.term, strlen(cn->u.st.term),
534 (*pr)("@", client_data);
535 (*pr)(cn->u.boolean.value, client_data);
536 (*pr)(" ", client_data);
537 mods = cn->u.boolean.modifiers;
538 if (!strcmp(cn->u.boolean.value, "prox")) {
539 if (!cql_pr_prox(ct, mods, pr, client_data))
542 /* Boolean modifiers other than on proximity not supported */
543 ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */
544 ct->addinfo = xstrdup(mods->u.st.index);
548 cql_transform_r(ct, cn->u.boolean.left, pr, client_data);
549 cql_transform_r(ct, cn->u.boolean.right, pr, client_data);
553 fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which);
558 int cql_transform(cql_transform_t ct,
560 void (*pr)(const char *buf, void *client_data),
563 struct cql_prop_entry *e;
564 NMEM nmem = nmem_create();
571 for (e = ct->entry; e ; e = e->next)
573 if (!cql_strncmp(e->pattern, "set.", 4))
574 cql_apply_prefix(nmem, cn, e->pattern+4, e->value);
575 else if (!cql_strcmp(e->pattern, "set"))
576 cql_apply_prefix(nmem, cn, 0, e->value);
578 cql_transform_r (ct, cn, pr, client_data);
584 int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f)
586 return cql_transform(ct, cn, cql_fputs, f);
589 int cql_transform_buf(cql_transform_t ct, struct cql_node *cn,
592 struct cql_buf_write_info info;
598 r = cql_transform(ct, cn, cql_buf_write_handler, &info);
600 info.buf[info.off] = '\0';
604 int cql_transform_error(cql_transform_t ct, const char **addinfo)
606 *addinfo = ct->addinfo;
612 * indent-tabs-mode: nil
614 * vim: shiftwidth=4 tabstop=8 expandtab