X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcqltransform.c;h=133db946baeb2d465f676e2d461dc71a8c3d188c;hp=7008af4e7613c04f002707ee2e5a15ce66e595bc;hb=e4368af2ede9079d33e7a8fd280d29b2bde1d1ad;hpb=0e2fe400d9cb82150d83c48404bead5b5d447a16 diff --git a/src/cqltransform.c b/src/cqltransform.c index 7008af4..133db94 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -1,5 +1,5 @@ -/* $Id: cqltransform.c,v 1.2 2003-12-18 16:42:52 mike Exp $ - Copyright (C) 2002-2003 +/* $Id: cqltransform.c,v 1.25 2006-10-25 09:58:19 adam Exp $ + Copyright (C) 1995-2005, Index Data ApS Index Data Aps This file is part of the YAZ toolkit. @@ -7,9 +7,27 @@ This file is part of the YAZ toolkit. See the file LICENSE. */ +/** + * \file cqltransform.c + * \brief Implements CQL transform (CQL to RPN conversion). + * + * Evaluation order of rules: + * + * always + * relation + * structure + * position + * truncation + * index + * relationModifier + */ + +#include #include #include #include +#include +#include struct cql_prop_entry { char *pattern; @@ -26,7 +44,7 @@ struct cql_transform_t_ { cql_transform_t cql_transform_open_FILE(FILE *f) { char line[1024]; - cql_transform_t ct = (cql_transform_t) malloc (sizeof(*ct)); + cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct)); struct cql_prop_entry **pp = &ct->entry; ct->error = 0; @@ -35,36 +53,48 @@ cql_transform_t cql_transform_open_FILE(FILE *f) { const char *cp_value_start; const char *cp_value_end; + const char *cp_pattern_start; const char *cp_pattern_end; const char *cp = line; - while (*cp && !strchr(" \t=\r\n#", *cp)) + + while (*cp && strchr(" \t", *cp)) + cp++; + cp_pattern_start = cp; + + while (*cp && !strchr(" \t\r\n=#", *cp)) cp++; cp_pattern_end = cp; - if (cp == line) + if (cp == cp_pattern_start) continue; - while (*cp && strchr(" \t\r\n", *cp)) + while (*cp && strchr(" \t", *cp)) cp++; if (*cp != '=') - continue; + { + *pp = 0; + cql_transform_close(ct); + return 0; + } cp++; while (*cp && strchr(" \t\r\n", *cp)) cp++; cp_value_start = cp; - if (!(cp_value_end = strchr(cp, '#'))) + cp_value_end = strchr(cp, '#'); + if (!cp_value_end) cp_value_end = strlen(line) + line; if (cp_value_end != cp_value_start && strchr(" \t\r\n", cp_value_end[-1])) cp_value_end--; - *pp = (struct cql_prop_entry *) malloc (sizeof(**pp)); - (*pp)->pattern = (char *) malloc (cp_pattern_end - line + 1); - memcpy ((*pp)->pattern, line, cp_pattern_end - line); - (*pp)->pattern[cp_pattern_end-line] = 0; + *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp)); + (*pp)->pattern = (char *) xmalloc(cp_pattern_end-cp_pattern_start + 1); + memcpy ((*pp)->pattern, cp_pattern_start, + cp_pattern_end-cp_pattern_start); + (*pp)->pattern[cp_pattern_end-cp_pattern_start] = '\0'; - (*pp)->value = (char *) malloc (cp_value_end - cp_value_start + 1); + (*pp)->value = (char *) xmalloc (cp_value_end-cp_value_start + 1); if (cp_value_start != cp_value_end) memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start); - (*pp)->value[cp_value_end - cp_value_start] = 0; + (*pp)->value[cp_value_end - cp_value_start] = '\0'; pp = &(*pp)->next; } *pp = 0; @@ -80,14 +110,14 @@ void cql_transform_close(cql_transform_t ct) while (pe) { struct cql_prop_entry *pe_next = pe->next; - free (pe->pattern); - free (pe->value); - free (pe); + xfree (pe->pattern); + xfree (pe->value); + xfree (pe); pe = pe_next; } if (ct->addinfo) - free (ct->addinfo); - free (ct); + xfree (ct->addinfo); + xfree (ct); } cql_transform_t cql_transform_open_fname(const char *fname) @@ -102,50 +132,62 @@ cql_transform_t cql_transform_open_fname(const char *fname) } static const char *cql_lookup_property(cql_transform_t ct, - const char *pat1, const char *pat2) + const char *pat1, const char *pat2, + const char *pat3) { - char pattern[80]; + char pattern[120]; struct cql_prop_entry *e; - if (pat2) - sprintf (pattern, "%.39s%.39s", pat1, pat2); - else + if (pat1 && pat2 && pat3) + sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3); + else if (pat1 && pat2) + sprintf (pattern, "%.39s.%.39s", pat1, pat2); + else if (pat1 && pat3) + sprintf (pattern, "%.39s.%.39s", pat1, pat3); + else if (pat1) sprintf (pattern, "%.39s", pat1); + else + return 0; + for (e = ct->entry; e; e = e->next) { - if (!strcmp(e->pattern, pattern)) + if (!cql_strcmp(e->pattern, pattern)) return e->value; } return 0; } -static const char *cql_lookup_value(cql_transform_t ct, - const char *prefix, - const char *value) +int cql_pr_attr_uri(cql_transform_t ct, const char *category, + const char *uri, const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) { - struct cql_prop_entry *e; - int len = strlen(prefix); - - for (e = ct->entry; e; e = e->next) + const char *res = 0; + const char *eval = val ? val : default_val; + const char *prefix = 0; + + if (uri) { - if (!memcmp(e->pattern, prefix, len) && !strcmp(e->value, value)) - return e->pattern + len; + struct cql_prop_entry *e; + + for (e = ct->entry; e; e = e->next) + if (!memcmp(e->pattern, "set.", 4) && e->value && + !strcmp(e->value, uri)) + { + prefix = e->pattern+4; + break; + } + /* must have a prefix now - if not it's an error */ } - return 0; -} - -int cql_pr_attr(cql_transform_t ct, const char *category, - const char *val, - const char *default_val, - void (*pr)(const char *buf, void *client_data), - void *client_data, - int errcode) -{ - const char *res; - res = cql_lookup_property(ct, category, val ? val : default_val); - if (!res) - res = cql_lookup_property(ct, category, "*"); + if (!uri || prefix) + { + if (!res) + res = cql_lookup_property(ct, category, prefix, eval); + if (!res) + res = cql_lookup_property(ct, category, prefix, "*"); + } if (res) { char buf[64]; @@ -172,51 +214,263 @@ int cql_pr_attr(cql_transform_t ct, const char *category, if (errcode && !ct->error) { ct->error = errcode; - ct->addinfo = strdup(val); + if (val) + ct->addinfo = xstrdup(val); + else + ct->addinfo = 0; } return 0; } +int cql_pr_attr(cql_transform_t ct, const char *category, + const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) +{ + return cql_pr_attr_uri(ct, category, 0 /* uri */, + val, default_val, pr, client_data, errcode); +} + + +static void cql_pr_int (int val, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + char buf[21]; /* enough characters to 2^64 */ + sprintf(buf, "%d", val); + (*pr)(buf, client_data); + (*pr)(" ", client_data); +} + + +static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + int exclusion = 0; + int distance; /* to be filled in later depending on unit */ + int distance_defined = 0; + int ordered = 0; + int proxrel = 2; /* less than or equal */ + int unit = 2; /* word */ + + while (mods != 0) { + char *name = mods->u.st.index; + char *term = mods->u.st.term; + char *relation = mods->u.st.relation; + + if (!strcmp(name, "distance")) { + distance = strtol(term, (char**) 0, 0); + distance_defined = 1; + if (!strcmp(relation, "=")) { + proxrel = 3; + } else if (!strcmp(relation, ">")) { + proxrel = 5; + } else if (!strcmp(relation, "<")) { + proxrel = 1; + } else if (!strcmp(relation, ">=")) { + proxrel = 4; + } else if (!strcmp(relation, "<=")) { + proxrel = 2; + } else if (!strcmp(relation, "<>")) { + proxrel = 6; + } else { + ct->error = 40; /* Unsupported proximity relation */ + ct->addinfo = xstrdup(relation); + return 0; + } + } else if (!strcmp(name, "ordered")) { + ordered = 1; + } else if (!strcmp(name, "unordered")) { + ordered = 0; + } else if (!strcmp(name, "unit")) { + if (!strcmp(term, "word")) { + unit = 2; + } else if (!strcmp(term, "sentence")) { + unit = 3; + } else if (!strcmp(term, "paragraph")) { + unit = 4; + } else if (!strcmp(term, "element")) { + unit = 8; + } else { + ct->error = 42; /* Unsupported proximity unit */ + ct->addinfo = xstrdup(term); + return 0; + } + } else { + ct->error = 46; /* Unsupported boolean modifier */ + ct->addinfo = xstrdup(name); + return 0; + } + + mods = mods->u.st.modifiers; + } + + if (!distance_defined) + distance = (unit == 2) ? 1 : 0; + + cql_pr_int(exclusion, pr, client_data); + cql_pr_int(distance, pr, client_data); + cql_pr_int(ordered, pr, client_data); + cql_pr_int(proxrel, pr, client_data); + (*pr)("k ", client_data); + cql_pr_int(unit, pr, client_data); + + return 1; +} + +/* Returns location of first wildcard character in the `length' + * characters starting at `term', or a null pointer of there are + * none -- like memchr(). + */ +static const char *wcchar(const char *term, int length) +{ + const char *best = 0; + const char *current; + char *whichp; + + for (whichp = "*?"; *whichp != '\0'; whichp++) { + current = (const char *) memchr(term, *whichp, length); + if (current != 0 && (best == 0 || current < best)) + best = current; + } + + return best; +} + + void emit_term(cql_transform_t ct, + struct cql_node *cn, const char *term, int length, void (*pr)(const char *buf, void *client_data), void *client_data) { int i; + const char *ns = cn->u.st.index_uri; + + assert(cn->which == CQL_NODE_ST); + if (length > 0) { if (length > 1 && term[0] == '^' && term[length-1] == '^') { - cql_pr_attr(ct, "position.", "firstAndLast", 0, + cql_pr_attr(ct, "position", "firstAndLast", 0, pr, client_data, 32); term++; length -= 2; } else if (term[0] == '^') { - cql_pr_attr(ct, "position.", "first", 0, + cql_pr_attr(ct, "position", "first", 0, pr, client_data, 32); term++; + length--; } else if (term[length-1] == '^') { - cql_pr_attr(ct, "position.", "last", 0, + cql_pr_attr(ct, "position", "last", 0, pr, client_data, 32); length--; } else { - cql_pr_attr(ct, "position.", "any", 0, + cql_pr_attr(ct, "position", "any", 0, pr, client_data, 32); } } + + if (length > 0) + { + /* Check for well-known globbing patterns that represent + * simple truncation attributes as expected by, for example, + * Bath-compliant server. If we find such a pattern but + * there's no mapping for it, that's fine: we just use a + * general pattern-matching attribute. + */ + if (length > 1 && term[0] == '*' && term[length-1] == '*' && + wcchar(term+1, length-2) == 0 && + cql_pr_attr(ct, "truncation", "both", 0, + pr, client_data, 0)) { + term++; + length -= 2; + } + else if (term[0] == '*' && + wcchar(term+1, length-1) == 0 && + cql_pr_attr(ct, "truncation", "left", 0, + pr, client_data, 0)) { + term++; + length--; + } + else if (term[length-1] == '*' && + wcchar(term, length-1) == 0 && + cql_pr_attr(ct, "truncation", "right", 0, + pr, client_data, 0)) { + length--; + } + else if (wcchar(term, length)) + { + /* We have one or more wildcard characters, but not in a + * way that can be dealt with using only the standard + * left-, right- and both-truncation attributes. We need + * to translate the pattern into a Z39.58-type pattern, + * which has been supported in BIB-1 since 1996. If + * there's no configuration element for "truncation.z3958" + * we indicate this as error 28 "Masking character not + * supported". + */ + int i; + char *mem; + cql_pr_attr(ct, "truncation", "z3958", 0, + pr, client_data, 28); + mem = (char *) xmalloc(length+1); + for (i = 0; i < length; i++) { + if (term[i] == '*') mem[i] = '?'; + else if (term[i] == '?') mem[i] = '#'; + else mem[i] = term[i]; + } + mem[length] = '\0'; + term = mem; + } + else { + /* No masking characters. Use "truncation.none" if given. */ + cql_pr_attr(ct, "truncation", "none", 0, + pr, client_data, 0); + } + } + if (ns) { + cql_pr_attr_uri(ct, "index", ns, + cn->u.st.index, "serverChoice", + pr, client_data, 16); + } + if (cn->u.st.modifiers) + { + struct cql_node *mod = cn->u.st.modifiers; + for (; mod; mod = mod->u.st.modifiers) + { + cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0, + pr, client_data, 20); + } + } + (*pr)("\"", client_data); for (i = 0; iu.st.index; - const char *cp_dot = strchr(cp, '.'); - - /* strz current prefix (empty if not given) */ - if (cp_dot && cp_dot-cp < sizeof(prefix)) - { - memcpy (prefix, cp, cp_dot - cp); - prefix[cp_dot - cp] = 0; - } - else - *prefix = 0; - - /* 2. lookup in prefix_ar. and return NS */ - for (i = prefix_level; !ns && --i >= 0; ) - { - struct cql_node *cn_prefix = prefix_ar[i]; - for (; cn_prefix; cn_prefix = cn_prefix->u.mod.next) - { - if (*prefix && cn_prefix->u.mod.name && - !strcmp(prefix, cn_prefix->u.mod.name)) - { - ns = cn_prefix->u.mod.value; - break; - } - else if (!*prefix && !cn_prefix->u.mod.name) - { - ns = cn_prefix->u.mod.value; - break; - } - } - } - if (!ns) - { - if (!ct->error) - { - ct->error = 15; - ct->addinfo = strdup(prefix); - } - return 0; - } - /* 3. lookup in set.NS for new prefix */ - *n_prefix = cql_lookup_value(ct, "set.", ns); - if (!*n_prefix) - { - if (!ct->error) - { - ct->error = 15; - ct->addinfo = strdup(ns); - } - return 0; - } - /* 4. lookup qualifier.prefix. */ - - cp = cn->u.st.index; - cp_dot = strchr(cp, '.'); - - *n_suffix = cp_dot ? cp_dot+1 : cp; - return ns; + emit_term(ct, cn, last_term, last_length, pr, client_data); } void cql_transform_r(cql_transform_t ct, struct cql_node *cn, void (*pr)(const char *buf, void *client_data), - void *client_data, - struct cql_node **prefix_ar, int prefix_level) + void *client_data) { - const char *ns, *n_prefix, *n_suffix; + const char *ns; + struct cql_node *mods; if (!cn) return; switch (cn->which) { case CQL_NODE_ST: - if (cn->u.st.prefixes && prefix_level < 20) - prefix_ar[prefix_level++] = cn->u.st.prefixes; - ns = cql_get_ns(ct, cn, prefix_ar, prefix_level, &n_prefix, &n_suffix); + ns = cn->u.st.index_uri; if (ns) { - char n_full[64]; - sprintf (n_full, "%.20s.%.40s", n_prefix, n_suffix); - - if (!strcmp(ns, "http://www.loc.gov/zing/cql/srw-indexes/v1.0/") - && !strcmp(n_suffix, "resultSet")) + if (!strcmp(ns, cql_uri()) + && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet")) { (*pr)("@set \"", client_data); (*pr)(cn->u.st.term, client_data); (*pr)("\" ", client_data); return ; } - if (!cql_pr_attr(ct, "index.", n_full, "srw.serverChoice", - pr, client_data, 16)) { - /* No index.foo; reset error and fall back to qualifier.foo */ - if (ct->error == 16) ct->error = 0; - cql_pr_attr(ct, "qualifier.", n_full, "srw.serverChoice", - pr, client_data, 16); - } } - - if (cn->u.st.relation && !strcmp(cn->u.st.relation, "=")) - cql_pr_attr(ct, "relation.", "eq", "scr", - pr, client_data, 19); - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "<=")) - cql_pr_attr(ct, "relation.", "le", "scr", - pr, client_data, 19); - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, ">=")) - cql_pr_attr(ct, "relation.", "ge", "scr", - pr, client_data, 19); else - cql_pr_attr(ct, "relation.", cn->u.st.relation, "eq", - pr, client_data, 19); - if (cn->u.st.modifiers) { - struct cql_node *mod = cn->u.st.modifiers; - for (; mod; mod = mod->u.mod.next) + if (!ct->error) { - cql_pr_attr(ct, "relationModifier.", mod->u.mod.value, 0, - pr, client_data, 20); + ct->error = 15; + ct->addinfo = 0; } } - cql_pr_attr(ct, "structure.", cn->u.st.relation, 0, + cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0); + if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "=")) + cql_pr_attr(ct, "relation", "eq", "scr", + pr, client_data, 19); + else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "<=")) + cql_pr_attr(ct, "relation", "le", "scr", + pr, client_data, 19); + else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, ">=")) + cql_pr_attr(ct, "relation", "ge", "scr", + pr, client_data, 19); + else + cql_pr_attr(ct, "relation", cn->u.st.relation, "eq", + pr, client_data, 19); + cql_pr_attr(ct, "structure", cn->u.st.relation, 0, pr, client_data, 24); - if (cn->u.st.relation && !strcmp(cn->u.st.relation, "all")) + if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all")) { emit_wordlist(ct, cn, pr, client_data, "and"); } - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "any")) + else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any")) { emit_wordlist(ct, cn, pr, client_data, "or"); } else { - emit_term(ct, cn->u.st.term, strlen(cn->u.st.term), + emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term), pr, client_data); } break; case CQL_NODE_BOOL: - if (cn->u.boolean.prefixes && prefix_level < 20) - prefix_ar[prefix_level++] = cn->u.boolean.prefixes; (*pr)("@", client_data); (*pr)(cn->u.boolean.value, client_data); (*pr)(" ", client_data); + mods = cn->u.boolean.modifiers; + if (!strcmp(cn->u.boolean.value, "prox")) { + if (!cql_pr_prox(ct, mods, pr, client_data)) + return; + } else if (mods) { + /* Boolean modifiers other than on proximity not supported */ + ct->error = 46; /* SRW diag: "Unsupported boolean modifier" */ + ct->addinfo = xstrdup(mods->u.st.index); + return; + } - cql_transform_r(ct, cn->u.boolean.left, pr, client_data, - prefix_ar, prefix_level); - cql_transform_r(ct, cn->u.boolean.right, pr, client_data, - prefix_ar, prefix_level); + cql_transform_r(ct, cn->u.boolean.left, pr, client_data); + cql_transform_r(ct, cn->u.boolean.right, pr, client_data); + break; + + default: + fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which); + abort(); } } @@ -418,31 +600,23 @@ int cql_transform(cql_transform_t ct, void (*pr)(const char *buf, void *client_data), void *client_data) { - struct cql_node *prefix_ar[20], **pp; struct cql_prop_entry *e; + NMEM nmem = nmem_create(); ct->error = 0; if (ct->addinfo) - free (ct->addinfo); + xfree (ct->addinfo); ct->addinfo = 0; - prefix_ar[0] = 0; - pp = &prefix_ar[0]; for (e = ct->entry; e ; e = e->next) { - if (!memcmp(e->pattern, "set.", 4)) - { - *pp = cql_node_mk_mod(e->pattern+4, e->value); - pp = &(*pp)->u.mod.next; - } - else if (!strcmp(e->pattern, "set")) - { - *pp = cql_node_mk_mod(0, e->value); - pp = &(*pp)->u.mod.next; - } + if (!cql_strncmp(e->pattern, "set.", 4)) + cql_apply_prefix(nmem, cn, e->pattern+4, e->value); + else if (!cql_strcmp(e->pattern, "set")) + cql_apply_prefix(nmem, cn, 0, e->value); } - cql_transform_r (ct, cn, pr, client_data, prefix_ar, 1); - cql_node_destroy(prefix_ar[0]); + cql_transform_r (ct, cn, pr, client_data); + nmem_destroy(nmem); return ct->error; } @@ -462,6 +636,16 @@ int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, info.max = max; info.buf = out; r = cql_transform(ct, cn, cql_buf_write_handler, &info); + if (info.off < 0) { + /* Attempt to write past end of buffer. For some reason, this + SRW diagnostic is deprecated, but it's so perfect for our + purposes that it would be stupid not to use it. */ + char numbuf[30]; + ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY; + sprintf(numbuf, "%ld", (long) info.max); + ct->addinfo = xstrdup(numbuf); + return -1; + } if (info.off >= 0) info.buf[info.off] = '\0'; return r; @@ -472,3 +656,11 @@ int cql_transform_error(cql_transform_t ct, const char **addinfo) *addinfo = ct->addinfo; return ct->error; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +