X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fcqltransform.c;h=d3502424ebf5c09e420895a336f68741d9fec9aa;hb=138fbf3c4582f2738e5b30e440994c9b6d76e194;hp=559b2d4ef5b79eea81e2c913942d86f99c18d939;hpb=fb6d99a0c7e07d9cc4a315c447deaf6564a85505;p=yaz-moved-to-github.git diff --git a/src/cqltransform.c b/src/cqltransform.c index 559b2d4..d350242 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -1,21 +1,31 @@ -/* $Id: cqltransform.c,v 1.15 2005-06-25 15:46:03 adam Exp $ - Copyright (C) 1995-2005, Index Data ApS - Index Data Aps - -This file is part of the YAZ toolkit. - -See the file LICENSE. -*/ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2008 Index Data + * See the file LICENSE for details. + */ /** * \file cqltransform.c * \brief Implements CQL transform (CQL to RPN conversion). + * + * Evaluation order of rules: + * + * always + * relation + * structure + * position + * truncation + * index + * relationModifier */ +#include #include #include #include #include +#include +#include +#include struct cql_prop_entry { char *pattern; @@ -25,53 +35,93 @@ struct cql_prop_entry { struct cql_transform_t_ { struct cql_prop_entry *entry; + yaz_tok_cfg_t tok_cfg; int error; char *addinfo; + WRBUF w; }; + +cql_transform_t cql_transform_create(void) +{ + cql_transform_t ct = (cql_transform_t) xmalloc(sizeof(*ct)); + ct->tok_cfg = yaz_tok_cfg_create(); + ct->w = wrbuf_alloc(); + ct->error = 0; + ct->addinfo = 0; + ct->entry = 0; + return ct; +} + cql_transform_t cql_transform_open_FILE(FILE *f) { + cql_transform_t ct = cql_transform_create(); char line[1024]; - cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct)); struct cql_prop_entry **pp = &ct->entry; - ct->error = 0; - ct->addinfo = 0; + yaz_tok_cfg_single_tokens(ct->tok_cfg, "="); + while (fgets(line, sizeof(line)-1, f)) { - const char *cp_value_start; - const char *cp_value_end; - const char *cp_pattern_end; - const char *cp = line; - while (*cp && !strchr(" \t=\r\n#", *cp)) - cp++; - cp_pattern_end = cp; - if (cp == line) - continue; - while (*cp && strchr(" \t\r\n", *cp)) - cp++; - if (*cp != '=') - continue; - cp++; - while (*cp && strchr(" \t\r\n", *cp)) - cp++; - cp_value_start = cp; - if (!(cp_value_end = strchr(cp, '#'))) - cp_value_end = strlen(line) + line; - - if (cp_value_end != cp_value_start && - strchr(" \t\r\n", cp_value_end[-1])) - cp_value_end--; - *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp)); - (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1); - memcpy ((*pp)->pattern, line, cp_pattern_end - line); - (*pp)->pattern[cp_pattern_end-line] = 0; - - (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1); - if (cp_value_start != cp_value_end) - memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start); - (*pp)->value[cp_value_end - cp_value_start] = 0; - pp = &(*pp)->next; + yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line); + int t; + wrbuf_rewind(ct->w); + t = yaz_tok_move(tp); + if (t == YAZ_TOK_STRING) + { + char * pattern = xstrdup(yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + if (t != '=') + { + yaz_tok_parse_destroy(tp); + cql_transform_close(ct); + return 0; + } + t = yaz_tok_move(tp); + + while (t == YAZ_TOK_STRING) + { + /* attset type=value OR type=value */ + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + if (t == YAZ_TOK_EOF) + break; + if (t == YAZ_TOK_STRING) + { + wrbuf_puts(ct->w, " "); + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + } + if (t != '=') + { + yaz_tok_parse_destroy(tp); + cql_transform_close(ct); + return 0; + } + t = yaz_tok_move(tp); + if (t != YAZ_TOK_STRING) /* value */ + { + yaz_tok_parse_destroy(tp); + cql_transform_close(ct); + return 0; + } + wrbuf_puts(ct->w, "="); + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + wrbuf_puts(ct->w, " "); + } + *pp = (struct cql_prop_entry *) xmalloc(sizeof(**pp)); + (*pp)->pattern = pattern; + (*pp)->value = xstrdup(wrbuf_cstr(ct->w)); + pp = &(*pp)->next; + } + else if (t != YAZ_TOK_EOF) + { + yaz_tok_parse_destroy(tp); + cql_transform_close(ct); + return 0; + } + yaz_tok_parse_destroy(tp); } *pp = 0; return ct; @@ -86,14 +136,15 @@ void cql_transform_close(cql_transform_t ct) while (pe) { struct cql_prop_entry *pe_next = pe->next; - xfree (pe->pattern); - xfree (pe->value); - xfree (pe); + xfree(pe->pattern); + xfree(pe->value); + xfree(pe); pe = pe_next; } - if (ct->addinfo) - xfree (ct->addinfo); - xfree (ct); + xfree(ct->addinfo); + yaz_tok_cfg_destroy(ct->tok_cfg); + wrbuf_destroy(ct->w); + xfree(ct); } cql_transform_t cql_transform_open_fname(const char *fname) @@ -115,19 +166,19 @@ static const char *cql_lookup_property(cql_transform_t ct, struct cql_prop_entry *e; if (pat1 && pat2 && pat3) - sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3); + sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3); else if (pat1 && pat2) - sprintf (pattern, "%.39s.%.39s", pat1, pat2); + sprintf(pattern, "%.39s.%.39s", pat1, pat2); else if (pat1 && pat3) - sprintf (pattern, "%.39s.%.39s", pat1, pat3); + sprintf(pattern, "%.39s.%.39s", pat1, pat3); else if (pat1) - sprintf (pattern, "%.39s", pat1); + sprintf(pattern, "%.39s", pat1); else return 0; for (e = ct->entry; e; e = e->next) { - if (!strcmp(e->pattern, pattern)) + if (!cql_strcmp(e->pattern, pattern)) return e->value; } return 0; @@ -161,6 +212,18 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category, { if (!res) res = cql_lookup_property(ct, category, prefix, eval); + /* we have some aliases for some relations unfortunately.. */ + if (!res && !prefix && !strcmp(category, "relation")) + { + if (!strcmp(val, "==")) + res = cql_lookup_property(ct, category, prefix, "exact"); + if (!strcmp(val, "=")) + res = cql_lookup_property(ct, category, prefix, "eq"); + if (!strcmp(val, "<=")) + res = cql_lookup_property(ct, category, prefix, "le"); + if (!strcmp(val, ">=")) + res = cql_lookup_property(ct, category, prefix, "ge"); + } if (!res) res = cql_lookup_property(ct, category, prefix, "*"); } @@ -171,14 +234,27 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category, const char *cp0 = res, *cp1; while ((cp1 = strchr(cp0, '='))) { + int i; while (*cp1 && *cp1 != ' ') cp1++; if (cp1 - cp0 >= sizeof(buf)) break; - memcpy (buf, cp0, cp1 - cp0); + memcpy(buf, cp0, cp1 - cp0); buf[cp1-cp0] = 0; (*pr)("@attr ", client_data); - (*pr)(buf, client_data); + + for (i = 0; buf[i]; i++) + { + if (buf[i] == '*') + (*pr)(eval, client_data); + else + { + char tmp[2]; + tmp[0] = buf[i]; + tmp[1] = '\0'; + (*pr)(tmp, client_data); + } + } (*pr)(" ", client_data); cp0 = cp1; while (*cp0 == ' ') @@ -209,90 +285,206 @@ int cql_pr_attr(cql_transform_t ct, const char *category, } +static void cql_pr_int(int val, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + char buf[21]; /* enough characters to 2^64 */ + sprintf(buf, "%d", val); + (*pr)(buf, client_data); + (*pr)(" ", client_data); +} + + +static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + int exclusion = 0; + int distance; /* to be filled in later depending on unit */ + int distance_defined = 0; + int ordered = 0; + int proxrel = 2; /* less than or equal */ + int unit = 2; /* word */ + + while (mods) + { + const char *name = mods->u.st.index; + const char *term = mods->u.st.term; + const char *relation = mods->u.st.relation; + + if (!strcmp(name, "distance")) { + distance = strtol(term, (char**) 0, 0); + distance_defined = 1; + if (!strcmp(relation, "=")) + proxrel = 3; + else if (!strcmp(relation, ">")) + proxrel = 5; + else if (!strcmp(relation, "<")) + proxrel = 1; + else if (!strcmp(relation, ">=")) + proxrel = 4; + else if (!strcmp(relation, "<=")) + proxrel = 2; + else if (!strcmp(relation, "<>")) + proxrel = 6; + else + { + ct->error = YAZ_SRW_UNSUPP_PROX_RELATION; + ct->addinfo = xstrdup(relation); + return 0; + } + } + else if (!strcmp(name, "ordered")) + ordered = 1; + else if (!strcmp(name, "unordered")) + ordered = 0; + else if (!strcmp(name, "unit")) + { + if (!strcmp(term, "word")) + unit = 2; + else if (!strcmp(term, "sentence")) + unit = 3; + else if (!strcmp(term, "paragraph")) + unit = 4; + else if (!strcmp(term, "element")) + unit = 8; + else + { + ct->error = YAZ_SRW_UNSUPP_PROX_UNIT; + ct->addinfo = xstrdup(term); + return 0; + } + } + else + { + ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; + ct->addinfo = xstrdup(name); + return 0; + } + mods = mods->u.st.modifiers; + } + + if (!distance_defined) + distance = (unit == 2) ? 1 : 0; + + cql_pr_int(exclusion, pr, client_data); + cql_pr_int(distance, pr, client_data); + cql_pr_int(ordered, pr, client_data); + cql_pr_int(proxrel, pr, client_data); + (*pr)("k ", client_data); + cql_pr_int(unit, pr, client_data); + + return 1; +} + /* Returns location of first wildcard character in the `length' * characters starting at `term', or a null pointer of there are * none -- like memchr(). */ -static const char *wcchar(const char *term, int length) +static const char *wcchar(int start, const char *term, int length) { - const char *best = 0; - const char *current; - char *whichp; - - for (whichp = "*?"; *whichp != '\0'; whichp++) { - current = (const char *) memchr(term, *whichp, length); - if (current != 0 && (best == 0 || current < best)) - best = current; + while (length > 0) + { + if (start || term[-1] != '\\') + if (strchr("*?", *term)) + return term; + term++; + length--; + start = 0; } + return 0; +} + - return best; +/* ### checks for CQL relation-name rather than Type-1 attribute */ +static int has_modifier(struct cql_node *cn, const char *name) { + struct cql_node *mod; + for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) { + if (!strcmp(mod->u.st.index, name)) + return 1; + } + + return 0; } void emit_term(cql_transform_t ct, + struct cql_node *cn, const char *term, int length, void (*pr)(const char *buf, void *client_data), void *client_data) { int i; - if (length > 0) + const char *ns = cn->u.st.index_uri; + int process_term = !has_modifier(cn, "regexp"); + char *z3958_mem = 0; + + assert(cn->which == CQL_NODE_ST); + + if (process_term && length > 0) { if (length > 1 && term[0] == '^' && term[length-1] == '^') { cql_pr_attr(ct, "position", "firstAndLast", 0, - pr, client_data, 32); + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length -= 2; } else if (term[0] == '^') { cql_pr_attr(ct, "position", "first", 0, - pr, client_data, 32); + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length--; } else if (term[length-1] == '^') { cql_pr_attr(ct, "position", "last", 0, - pr, client_data, 32); + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); length--; } else { cql_pr_attr(ct, "position", "any", 0, - pr, client_data, 32); + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); } } - if (length > 0) + if (process_term && length > 0) { + const char *first_wc = wcchar(1, term, length); + const char *second_wc = first_wc ? + wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0; + /* Check for well-known globbing patterns that represent * simple truncation attributes as expected by, for example, * Bath-compliant server. If we find such a pattern but * there's no mapping for it, that's fine: we just use a * general pattern-matching attribute. */ - if (length > 1 && term[0] == '*' && term[length-1] == '*' && - wcchar(term+1, length-2) == 0 && - cql_pr_attr(ct, "truncation", "both", 0, - pr, client_data, 0)) { + if (first_wc == term && second_wc == term + length-1 + && *first_wc == '*' && *second_wc == '*' + && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) + { term++; length -= 2; } - else if (term[0] == '*' && - wcchar(term+1, length-1) == 0 && - cql_pr_attr(ct, "truncation", "left", 0, - pr, client_data, 0)) { + else if (first_wc == term && second_wc == 0 && *first_wc == '*' + && cql_pr_attr(ct, "truncation", "left", 0, + pr, client_data, 0)) + { term++; length--; } - else if (term[length-1] == '*' && - wcchar(term, length-1) == 0 && - cql_pr_attr(ct, "truncation", "right", 0, - pr, client_data, 0)) { + else if (first_wc == term + length-1 && second_wc == 0 + && *first_wc == '*' + && cql_pr_attr(ct, "truncation", "right", 0, + pr, client_data, 0)) + { length--; } - else if (wcchar(term, length)) + else if (first_wc) { /* We have one or more wildcard characters, but not in a * way that can be dealt with using only the standard @@ -304,17 +496,22 @@ void emit_term(cql_transform_t ct, * supported". */ int i; - char *mem; cql_pr_attr(ct, "truncation", "z3958", 0, - pr, client_data, 28); - mem = (char *) xmalloc(length+1); - for (i = 0; i < length; i++) { - if (term[i] == '*') mem[i] = '?'; - else if (term[i] == '?') mem[i] = '#'; - else mem[i] = term[i]; + pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); + z3958_mem = (char *) xmalloc(length+1); + for (i = 0; i < length; i++) + { + if (i > 0 && term[i-1] == '\\') + z3958_mem[i] = term[i]; + else if (term[i] == '*') + z3958_mem[i] = '?'; + else if (term[i] == '?') + z3958_mem[i] = '#'; + else + z3958_mem[i] = term[i]; } - mem[length] = '\0'; - term = mem; + z3958_mem[length] = '\0'; + term = z3958_mem; } else { /* No masking characters. Use "truncation.none" if given. */ @@ -322,16 +519,63 @@ void emit_term(cql_transform_t ct, pr, client_data, 0); } } + if (ns) { + cql_pr_attr_uri(ct, "index", ns, + cn->u.st.index, "serverChoice", + pr, client_data, YAZ_SRW_UNSUPP_INDEX); + } + if (cn->u.st.modifiers) + { + struct cql_node *mod = cn->u.st.modifiers; + for (; mod; mod = mod->u.st.modifiers) + { + cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0, + pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER); + } + } (*pr)("\"", client_data); for (i = 0; iu.st.extra_terms; + if (ne) + { + (*pr)("@", client_data); + (*pr)(op, client_data); + (*pr)(" ", client_data); + } + emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term), + pr, client_data); + for (; ne; ne = ne->u.st.extra_terms) + { + if (ne->u.st.extra_terms) + { + (*pr)("@", client_data); + (*pr)(op, client_data); + (*pr)(" ", client_data); + } + emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term), + pr, client_data); + } } void emit_wordlist(cql_transform_t ct, @@ -354,7 +598,7 @@ void emit_wordlist(cql_transform_t ct, (*pr)("@", client_data); (*pr)(op, client_data); (*pr)(" ", client_data); - emit_term(ct, last_term, last_length, pr, client_data); + emit_term(ct, cn, last_term, last_length, pr, client_data); } last_term = cp0; if (cp1) @@ -364,7 +608,7 @@ void emit_wordlist(cql_transform_t ct, cp0 = cp1; } if (last_term) - emit_term(ct, last_term, last_length, pr, client_data); + emit_term(ct, cn, last_term, last_length, pr, client_data); } void cql_transform_r(cql_transform_t ct, @@ -373,6 +617,7 @@ void cql_transform_r(cql_transform_t ct, void *client_data) { const char *ns; + struct cql_node *mods; if (!cn) return; @@ -383,74 +628,63 @@ void cql_transform_r(cql_transform_t ct, if (ns) { if (!strcmp(ns, cql_uri()) - && cn->u.st.index && !strcmp(cn->u.st.index, "resultSet")) + && cn->u.st.index && !cql_strcmp(cn->u.st.index, "resultSet")) { (*pr)("@set \"", client_data); (*pr)(cn->u.st.term, client_data); (*pr)("\" ", client_data); return ; } - cql_pr_attr_uri(ct, "index", ns, - cn->u.st.index, "serverChoice", - pr, client_data, 16); } else { if (!ct->error) { - ct->error = 15; + ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET; ct->addinfo = 0; } } - if (cn->u.st.relation && !strcmp(cn->u.st.relation, "=")) - cql_pr_attr(ct, "relation", "eq", "scr", - pr, client_data, 19); - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "<=")) - cql_pr_attr(ct, "relation", "le", "scr", - pr, client_data, 19); - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, ">=")) - cql_pr_attr(ct, "relation", "ge", "scr", - pr, client_data, 19); - else - cql_pr_attr(ct, "relation", cn->u.st.relation, "eq", - pr, client_data, 19); - if (cn->u.st.modifiers) - { - struct cql_node *mod = cn->u.st.modifiers; - for (; mod; mod = mod->u.st.modifiers) - { - cql_pr_attr(ct, "relationModifier", mod->u.st.index, 0, - pr, client_data, 20); - } - } + cql_pr_attr(ct, "always", 0, 0, pr, client_data, 0); + cql_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data, + YAZ_SRW_UNSUPP_RELATION); cql_pr_attr(ct, "structure", cn->u.st.relation, 0, - pr, client_data, 24); - if (cn->u.st.relation && !strcmp(cn->u.st.relation, "all")) - { + pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM); + if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "all")) emit_wordlist(ct, cn, pr, client_data, "and"); - } - else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "any")) - { + else if (cn->u.st.relation && !cql_strcmp(cn->u.st.relation, "any")) emit_wordlist(ct, cn, pr, client_data, "or"); - } else - { - emit_term(ct, cn->u.st.term, strlen(cn->u.st.term), - pr, client_data); - } + emit_terms(ct, cn, pr, client_data, "and"); break; case CQL_NODE_BOOL: (*pr)("@", client_data); (*pr)(cn->u.boolean.value, client_data); (*pr)(" ", client_data); + mods = cn->u.boolean.modifiers; + if (!strcmp(cn->u.boolean.value, "prox")) + { + if (!cql_pr_prox(ct, mods, pr, client_data)) + return; + } + else if (mods) + { + /* Boolean modifiers other than on proximity not supported */ + ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; + ct->addinfo = xstrdup(mods->u.st.index); + return; + } cql_transform_r(ct, cn->u.boolean.left, pr, client_data); cql_transform_r(ct, cn->u.boolean.right, pr, client_data); + break; + + default: + fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which); + abort(); } } -int cql_transform(cql_transform_t ct, - struct cql_node *cn, +int cql_transform(cql_transform_t ct, struct cql_node *cn, void (*pr)(const char *buf, void *client_data), void *client_data) { @@ -458,18 +692,17 @@ int cql_transform(cql_transform_t ct, NMEM nmem = nmem_create(); ct->error = 0; - if (ct->addinfo) - xfree (ct->addinfo); + xfree(ct->addinfo); ct->addinfo = 0; for (e = ct->entry; e ; e = e->next) { - if (!memcmp(e->pattern, "set.", 4)) + if (!cql_strncmp(e->pattern, "set.", 4)) cql_apply_prefix(nmem, cn, e->pattern+4, e->value); - else if (!strcmp(e->pattern, "set")) + else if (!cql_strcmp(e->pattern, "set")) cql_apply_prefix(nmem, cn, 0, e->value); } - cql_transform_r (ct, cn, pr, client_data); + cql_transform_r(ct, cn, pr, client_data); nmem_destroy(nmem); return ct->error; } @@ -480,8 +713,7 @@ int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f) return cql_transform(ct, cn, cql_fputs, f); } -int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, - char *out, int max) +int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max) { struct cql_buf_write_info info; int r; @@ -490,6 +722,16 @@ int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, info.max = max; info.buf = out; r = cql_transform(ct, cn, cql_buf_write_handler, &info); + if (info.off < 0) { + /* Attempt to write past end of buffer. For some reason, this + SRW diagnostic is deprecated, but it's so perfect for our + purposes that it would be stupid not to use it. */ + char numbuf[30]; + ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY; + sprintf(numbuf, "%ld", (long) info.max); + ct->addinfo = xstrdup(numbuf); + return -1; + } if (info.off >= 0) info.buf[info.off] = '\0'; return r; @@ -500,6 +742,14 @@ int cql_transform_error(cql_transform_t ct, const char **addinfo) *addinfo = ct->addinfo; return ct->error; } + +void cql_transform_set_error(cql_transform_t ct, int error, const char *addinfo) +{ + xfree(ct->addinfo); + ct->addinfo = addinfo ? xstrdup(addinfo) : 0; + ct->error = error; +} + /* * Local variables: * c-basic-offset: 4