X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcqltransform.c;h=f17af20bf8777e80452242656f322d5442b57ae0;hp=de777d025221ce04459dcbfa0b92f6c7f7451284;hb=b70c72a5a3914502cbdd17588e74e6c74bd282b6;hpb=ffe862e36b8b60b1197b223cec0b78482cbd7763 diff --git a/src/cqltransform.c b/src/cqltransform.c index de777d0..f17af20 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -1,8 +1,7 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2010 Index Data + * Copyright (C) 1995-2012 Index Data * See the file LICENSE for details. */ - /** * \file cqltransform.c * \brief Implements CQL transform (CQL to RPN conversion). @@ -17,11 +16,13 @@ * index * relationModifier */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include -#include #include #include #include @@ -70,7 +71,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, int ret = 0; /* 0=OK, != 0 FAIL */ int t; t = yaz_tok_move(tp); - + while (t == YAZ_TOK_STRING && ae_num < 20) { WRBUF type_str = wrbuf_alloc(); @@ -78,7 +79,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, Z_AttributeElement *elem = 0; const char *value_str = 0; /* attset type=value OR type=value */ - + elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem)); elem->attributeSet = 0; ae[ae_num] = elem; @@ -89,19 +90,19 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, { wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); + wrbuf_destroy(set_str); break; } - if (t == YAZ_TOK_STRING) - { + if (t == YAZ_TOK_STRING) + { wrbuf_puts(ct->w, " "); wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); set_str = type_str; - + elem->attributeSet = yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET, wrbuf_cstr(set_str), ct->nmem); - + type_str = wrbuf_alloc(); wrbuf_puts(type_str, yaz_tok_parse_string(tp)); t = yaz_tok_move(tp); @@ -112,7 +113,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, { wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); + wrbuf_destroy(set_str); yaz_log(YLOG_WARN, "Expected numeric attribute type"); ret = -1; break; @@ -120,8 +121,8 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); - + wrbuf_destroy(set_str); + if (t != '=') { yaz_log(YLOG_WARN, "Expected = after after attribute type"); @@ -136,7 +137,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, break; } value_str = yaz_tok_parse_string(tp); - if (isdigit(*value_str)) + if (yaz_isdigit(*value_str)) { elem->which = Z_AttributeValue_numeric; elem->value.numeric = @@ -181,7 +182,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, (*pp)->attr_list.attributes = (Z_AttributeElement **) nmem_malloc(ct->nmem, ae_num * sizeof(Z_AttributeElement *)); - memcpy((*pp)->attr_list.attributes, ae, + memcpy((*pp)->attr_list.attributes, ae, ae_num * sizeof(Z_AttributeElement *)); } (*pp)->next = 0; @@ -209,7 +210,7 @@ int cql_transform_define_pattern(cql_transform_t ct, const char *pattern, yaz_tok_parse_destroy(tp); return r; } - + cql_transform_t cql_transform_open_FILE(FILE *f) { cql_transform_t ct = cql_transform_create(); @@ -308,7 +309,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) z_AttributeElement(odr_a, &a, 0, 0); z_AttributeElement(odr_b, &b, 0, 0); - + buf_a = odr_getbuf(odr_a, &len_a, 0); buf_b = odr_getbuf(odr_b, &len_b, 0); @@ -319,7 +320,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) return ret; } -const char *cql_lookup_reverse(cql_transform_t ct, +const char *cql_lookup_reverse(cql_transform_t ct, const char *category, Z_AttributeList *attributes) { @@ -347,7 +348,7 @@ const char *cql_lookup_reverse(cql_transform_t ct, } if (j == attributes->num_attributes) break; /* i was not found at all.. try next pattern */ - + } if (i == e->attr_list.num_attributes) return e->pattern + clen; @@ -355,7 +356,7 @@ const char *cql_lookup_reverse(cql_transform_t ct, } return 0; } - + static const char *cql_lookup_property(cql_transform_t ct, const char *pat1, const char *pat2, const char *pat3) @@ -373,7 +374,7 @@ static const char *cql_lookup_property(cql_transform_t ct, sprintf(pattern, "%.39s", pat1); else return 0; - + for (e = ct->entry; e; e = e->next) { if (!cql_strcmp(e->pattern, pattern)) @@ -391,11 +392,11 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category, const char *res = 0; const char *eval = val ? val : default_val; const char *prefix = 0; - + if (uri) { struct cql_prop_entry *e; - + for (e = ct->entry; e; e = e->next) if (!memcmp(e->pattern, "set.", 4) && e->value && !strcmp(e->value, uri)) @@ -499,8 +500,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, void *client_data) { int exclusion = 0; - int distance; /* to be filled in later depending on unit */ - int distance_defined = 0; + int distance = -1; int ordered = 0; int proxrel = 2; /* less than or equal */ int unit = 2; /* word */ @@ -513,26 +513,25 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, if (!strcmp(name, "distance")) { distance = strtol(term, (char**) 0, 0); - distance_defined = 1; if (!strcmp(relation, "=")) proxrel = 3; else if (!strcmp(relation, ">")) proxrel = 5; else if (!strcmp(relation, "<")) proxrel = 1; - else if (!strcmp(relation, ">=")) + else if (!strcmp(relation, ">=")) proxrel = 4; else if (!strcmp(relation, "<=")) proxrel = 2; else if (!strcmp(relation, "<>")) proxrel = 6; - else + else { ct->error = YAZ_SRW_UNSUPP_PROX_RELATION; ct->addinfo = xstrdup(relation); return 0; } - } + } else if (!strcmp(name, "ordered")) ordered = 1; else if (!strcmp(name, "unordered")) @@ -547,14 +546,14 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, unit = 4; else if (!strcmp(term, "element")) unit = 8; - else + else { ct->error = YAZ_SRW_UNSUPP_PROX_UNIT; ct->addinfo = xstrdup(term); return 0; } - } - else + } + else { ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; ct->addinfo = xstrdup(name); @@ -563,7 +562,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, mods = mods->u.st.modifiers; } - if (!distance_defined) + if (distance == -1) distance = (unit == 2) ? 1 : 0; cql_pr_int(exclusion, pr, client_data); @@ -576,25 +575,6 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, return 1; } -/* Returns location of first wildcard character in the `length' - * characters starting at `term', or a null pointer of there are - * none -- like memchr(). - */ -static const char *wcchar(int start, const char *term, int length) -{ - while (length > 0) - { - if (start || term[-1] != '\\') - if (strchr("*?", *term)) - return term; - term++; - length--; - start = 0; - } - return 0; -} - - /* ### checks for CQL relation-name rather than Type-1 attribute */ static int has_modifier(struct cql_node *cn, const char *name) { struct cql_node *mod; @@ -607,36 +587,74 @@ static int has_modifier(struct cql_node *cn, const char *name) { } -void emit_term(cql_transform_t ct, - struct cql_node *cn, - const char *term, int length, - void (*pr)(const char *buf, void *client_data), - void *client_data) +static void emit_term(cql_transform_t ct, + struct cql_node *cn, + const char *term, int length, + void (*pr)(const char *buf, void *client_data), + void *client_data) { int i; const char *ns = cn->u.st.index_uri; - int process_term = !has_modifier(cn, "regexp"); - char *z3958_mem = 0; + int z3958_mode = 0; + int process_term = 1; + if (has_modifier(cn, "regexp")) + process_term = 0; + else if (cql_lookup_property(ct, "truncation", 0, "cql")) + { + process_term = 0; + cql_pr_attr(ct, "truncation", "cql", 0, + pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); + } assert(cn->which == CQL_NODE_ST); - if (process_term && length > 0) + if (process_term) { - if (length > 1 && term[0] == '^' && term[length-1] == '^') + unsigned anchor = 0; + unsigned trunc = 0; + for (i = 0; i < length; i++) + { + if (term[i] == '\\' && i < length - 1) + i++; + else + { + switch (term[i]) + { + case '^': + if (i == 0) + anchor |= 1; + else if (i == length - 1) + anchor |= 2; + break; + case '*': + if (i == 0) + trunc |= 1; + else if (i == length - 1) + trunc |= 2; + else + z3958_mode = 1; + break; + case '?': + z3958_mode = 1; + break; + } + } + } + if (anchor == 3) { cql_pr_attr(ct, "position", "firstAndLast", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length -= 2; } - else if (term[0] == '^') + else if (anchor == 1) { cql_pr_attr(ct, "position", "first", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length--; } - else if (term[length-1] == '^') + else if (anchor == 2) { cql_pr_attr(ct, "position", "last", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); @@ -647,75 +665,34 @@ void emit_term(cql_transform_t ct, cql_pr_attr(ct, "position", "any", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); } - } - - if (process_term && length > 0) - { - const char *first_wc = wcchar(1, term, length); - const char *second_wc = first_wc ? - wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0; - - /* Check for well-known globbing patterns that represent - * simple truncation attributes as expected by, for example, - * Bath-compliant server. If we find such a pattern but - * there's no mapping for it, that's fine: we just use a - * general pattern-matching attribute. - */ - if (first_wc == term && second_wc == term + length-1 - && *first_wc == '*' && *second_wc == '*' - && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) - { - term++; - length -= 2; - } - else if (first_wc == term && second_wc == 0 && *first_wc == '*' - && cql_pr_attr(ct, "truncation", "left", 0, - pr, client_data, 0)) + if (z3958_mode == 0) { - term++; - length--; - } - else if (first_wc == term + length-1 && second_wc == 0 - && *first_wc == '*' - && cql_pr_attr(ct, "truncation", "right", 0, - pr, client_data, 0)) - { - length--; - } - else if (first_wc) - { - /* We have one or more wildcard characters, but not in a - * way that can be dealt with using only the standard - * left-, right- and both-truncation attributes. We need - * to translate the pattern into a Z39.58-type pattern, - * which has been supported in BIB-1 since 1996. If - * there's no configuration element for "truncation.z3958" - * we indicate this as error 28 "Masking character not - * supported". - */ - int i; - cql_pr_attr(ct, "truncation", "z3958", 0, - pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); - z3958_mem = (char *) xmalloc(length+1); - for (i = 0; i < length; i++) + if (trunc == 3 && cql_pr_attr(ct, "truncation", + "both", 0, pr, client_data, 0)) { - if (i > 0 && term[i-1] == '\\') - z3958_mem[i] = term[i]; - else if (term[i] == '*') - z3958_mem[i] = '?'; - else if (term[i] == '?') - z3958_mem[i] = '#'; - else - z3958_mem[i] = term[i]; + term++; + length -= 2; } - z3958_mem[length] = '\0'; - term = z3958_mem; - } - else { - /* No masking characters. Use "truncation.none" if given. */ - cql_pr_attr(ct, "truncation", "none", 0, - pr, client_data, 0); + else if (trunc == 1 && cql_pr_attr(ct, "truncation", + "left", 0, pr, client_data, 0)) + { + term++; + length--; + } + else if (trunc == 2 && cql_pr_attr(ct, "truncation", "right", 0, + pr, client_data, 0)) + { + length--; + } + else if (trunc) + z3958_mode = 1; + else + cql_pr_attr(ct, "truncation", "none", 0, + pr, client_data, 0); } + if (z3958_mode) + cql_pr_attr(ct, "truncation", "z3958", 0, + pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); } if (ns) { cql_pr_attr_uri(ct, "index", ns, @@ -731,28 +708,61 @@ void emit_term(cql_transform_t ct, pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER); } } - (*pr)("\"", client_data); - for (i = 0; iu.st.extra_terms; if (ne) @@ -770,17 +780,17 @@ void emit_terms(cql_transform_t ct, (*pr)("@", client_data); (*pr)(op, client_data); (*pr)(" ", client_data); - } + } emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term), pr, client_data); } } -void emit_wordlist(cql_transform_t ct, - struct cql_node *cn, - void (*pr)(const char *buf, void *client_data), - void *client_data, - const char *op) +static void emit_wordlist(cql_transform_t ct, + struct cql_node *cn, + void (*pr)(const char *buf, void *client_data), + void *client_data, + const char *op) { const char *cp0 = cn->u.st.term; const char *cp1; @@ -859,11 +869,11 @@ void cql_transform_r(cql_transform_t ct, (*pr)(cn->u.boolean.value, client_data); (*pr)(" ", client_data); mods = cn->u.boolean.modifiers; - if (!strcmp(cn->u.boolean.value, "prox")) + if (!strcmp(cn->u.boolean.value, "prox")) { if (!cql_pr_prox(ct, mods, pr, client_data)) return; - } + } else if (mods) { /* Boolean modifiers other than on proximity not supported */ @@ -875,7 +885,9 @@ void cql_transform_r(cql_transform_t ct, cql_transform_r(ct, cn->u.boolean.left, pr, client_data); cql_transform_r(ct, cn->u.boolean.right, pr, client_data); break; - + case CQL_NODE_SORT: + cql_transform_r(ct, cn->u.sort.search, pr, client_data); + break; default: fprintf(stderr, "Fatal: impossible CQL node-type %d\n", cn->which); abort(); @@ -911,7 +923,8 @@ int cql_transform_FILE(cql_transform_t ct, struct cql_node *cn, FILE *f) return cql_transform(ct, cn, cql_fputs, f); } -int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, char *out, int max) +int cql_transform_buf(cql_transform_t ct, struct cql_node *cn, + char *out, int max) { struct cql_buf_write_info info; int r;