X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fcqltransform.c;h=f17af20bf8777e80452242656f322d5442b57ae0;hp=117ca9791ad1d638e5a5254f351bda357a81d7b0;hb=b70c72a5a3914502cbdd17588e74e6c74bd282b6;hpb=135a70856fc99547451ea919da242e7e7382acea diff --git a/src/cqltransform.c b/src/cqltransform.c index 117ca97..f17af20 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2011 Index Data + * Copyright (C) 1995-2012 Index Data * See the file LICENSE for details. */ /** @@ -71,7 +71,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, int ret = 0; /* 0=OK, != 0 FAIL */ int t; t = yaz_tok_move(tp); - + while (t == YAZ_TOK_STRING && ae_num < 20) { WRBUF type_str = wrbuf_alloc(); @@ -79,7 +79,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, Z_AttributeElement *elem = 0; const char *value_str = 0; /* attset type=value OR type=value */ - + elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem)); elem->attributeSet = 0; ae[ae_num] = elem; @@ -90,19 +90,19 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, { wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); + wrbuf_destroy(set_str); break; } - if (t == YAZ_TOK_STRING) - { + if (t == YAZ_TOK_STRING) + { wrbuf_puts(ct->w, " "); wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); set_str = type_str; - + elem->attributeSet = yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET, wrbuf_cstr(set_str), ct->nmem); - + type_str = wrbuf_alloc(); wrbuf_puts(type_str, yaz_tok_parse_string(tp)); t = yaz_tok_move(tp); @@ -113,7 +113,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, { wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); + wrbuf_destroy(set_str); yaz_log(YLOG_WARN, "Expected numeric attribute type"); ret = -1; break; @@ -121,8 +121,8 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, wrbuf_destroy(type_str); if (set_str) - wrbuf_destroy(set_str); - + wrbuf_destroy(set_str); + if (t != '=') { yaz_log(YLOG_WARN, "Expected = after after attribute type"); @@ -182,7 +182,7 @@ static int cql_transform_parse_tok_line(cql_transform_t ct, (*pp)->attr_list.attributes = (Z_AttributeElement **) nmem_malloc(ct->nmem, ae_num * sizeof(Z_AttributeElement *)); - memcpy((*pp)->attr_list.attributes, ae, + memcpy((*pp)->attr_list.attributes, ae, ae_num * sizeof(Z_AttributeElement *)); } (*pp)->next = 0; @@ -210,7 +210,7 @@ int cql_transform_define_pattern(cql_transform_t ct, const char *pattern, yaz_tok_parse_destroy(tp); return r; } - + cql_transform_t cql_transform_open_FILE(FILE *f) { cql_transform_t ct = cql_transform_create(); @@ -309,7 +309,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) z_AttributeElement(odr_a, &a, 0, 0); z_AttributeElement(odr_b, &b, 0, 0); - + buf_a = odr_getbuf(odr_a, &len_a, 0); buf_b = odr_getbuf(odr_b, &len_b, 0); @@ -320,7 +320,7 @@ static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) return ret; } -const char *cql_lookup_reverse(cql_transform_t ct, +const char *cql_lookup_reverse(cql_transform_t ct, const char *category, Z_AttributeList *attributes) { @@ -348,7 +348,7 @@ const char *cql_lookup_reverse(cql_transform_t ct, } if (j == attributes->num_attributes) break; /* i was not found at all.. try next pattern */ - + } if (i == e->attr_list.num_attributes) return e->pattern + clen; @@ -356,7 +356,7 @@ const char *cql_lookup_reverse(cql_transform_t ct, } return 0; } - + static const char *cql_lookup_property(cql_transform_t ct, const char *pat1, const char *pat2, const char *pat3) @@ -374,7 +374,7 @@ static const char *cql_lookup_property(cql_transform_t ct, sprintf(pattern, "%.39s", pat1); else return 0; - + for (e = ct->entry; e; e = e->next) { if (!cql_strcmp(e->pattern, pattern)) @@ -392,11 +392,11 @@ int cql_pr_attr_uri(cql_transform_t ct, const char *category, const char *res = 0; const char *eval = val ? val : default_val; const char *prefix = 0; - + if (uri) { struct cql_prop_entry *e; - + for (e = ct->entry; e; e = e->next) if (!memcmp(e->pattern, "set.", 4) && e->value && !strcmp(e->value, uri)) @@ -500,8 +500,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, void *client_data) { int exclusion = 0; - int distance; /* to be filled in later depending on unit */ - int distance_defined = 0; + int distance = -1; int ordered = 0; int proxrel = 2; /* less than or equal */ int unit = 2; /* word */ @@ -514,26 +513,25 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, if (!strcmp(name, "distance")) { distance = strtol(term, (char**) 0, 0); - distance_defined = 1; if (!strcmp(relation, "=")) proxrel = 3; else if (!strcmp(relation, ">")) proxrel = 5; else if (!strcmp(relation, "<")) proxrel = 1; - else if (!strcmp(relation, ">=")) + else if (!strcmp(relation, ">=")) proxrel = 4; else if (!strcmp(relation, "<=")) proxrel = 2; else if (!strcmp(relation, "<>")) proxrel = 6; - else + else { ct->error = YAZ_SRW_UNSUPP_PROX_RELATION; ct->addinfo = xstrdup(relation); return 0; } - } + } else if (!strcmp(name, "ordered")) ordered = 1; else if (!strcmp(name, "unordered")) @@ -548,14 +546,14 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, unit = 4; else if (!strcmp(term, "element")) unit = 8; - else + else { ct->error = YAZ_SRW_UNSUPP_PROX_UNIT; ct->addinfo = xstrdup(term); return 0; } - } - else + } + else { ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; ct->addinfo = xstrdup(name); @@ -564,7 +562,7 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, mods = mods->u.st.modifiers; } - if (!distance_defined) + if (distance == -1) distance = (unit == 2) ? 1 : 0; cql_pr_int(exclusion, pr, client_data); @@ -577,25 +575,6 @@ static int cql_pr_prox(cql_transform_t ct, struct cql_node *mods, return 1; } -/* Returns location of first wildcard character in the `length' - * characters starting at `term', or a null pointer of there are - * none -- like memchr(). - */ -static const char *wcchar(int start, const char *term, int length) -{ - while (length > 0) - { - if (start || term[-1] != '\\') - if (strchr("*?", *term)) - return term; - term++; - length--; - start = 0; - } - return 0; -} - - /* ### checks for CQL relation-name rather than Type-1 attribute */ static int has_modifier(struct cql_node *cn, const char *name) { struct cql_node *mod; @@ -616,28 +595,66 @@ static void emit_term(cql_transform_t ct, { int i; const char *ns = cn->u.st.index_uri; - int process_term = !has_modifier(cn, "regexp"); int z3958_mode = 0; + int process_term = 1; + if (has_modifier(cn, "regexp")) + process_term = 0; + else if (cql_lookup_property(ct, "truncation", 0, "cql")) + { + process_term = 0; + cql_pr_attr(ct, "truncation", "cql", 0, + pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); + } assert(cn->which == CQL_NODE_ST); - if (process_term && length > 0) + if (process_term) { - if (length > 1 && term[0] == '^' && term[length-1] == '^') + unsigned anchor = 0; + unsigned trunc = 0; + for (i = 0; i < length; i++) + { + if (term[i] == '\\' && i < length - 1) + i++; + else + { + switch (term[i]) + { + case '^': + if (i == 0) + anchor |= 1; + else if (i == length - 1) + anchor |= 2; + break; + case '*': + if (i == 0) + trunc |= 1; + else if (i == length - 1) + trunc |= 2; + else + z3958_mode = 1; + break; + case '?': + z3958_mode = 1; + break; + } + } + } + if (anchor == 3) { cql_pr_attr(ct, "position", "firstAndLast", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length -= 2; } - else if (term[0] == '^') + else if (anchor == 1) { cql_pr_attr(ct, "position", "first", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); term++; length--; } - else if (term[length-1] == '^') + else if (anchor == 2) { cql_pr_attr(ct, "position", "last", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); @@ -648,53 +665,34 @@ static void emit_term(cql_transform_t ct, cql_pr_attr(ct, "position", "any", 0, pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); } - } - - if (process_term && length > 0) - { - const char *first_wc = wcchar(1, term, length); - const char *second_wc = first_wc ? - wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0; - - /* Check for well-known globbing patterns that represent - * simple truncation attributes as expected by, for example, - * Bath-compliant server. If we find such a pattern but - * there's no mapping for it, that's fine: we just use a - * general pattern-matching attribute. - */ - if (first_wc == term && second_wc == term + length-1 - && *first_wc == '*' && *second_wc == '*' - && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) - { - term++; - length -= 2; - } - else if (first_wc == term && second_wc == 0 && *first_wc == '*' - && cql_pr_attr(ct, "truncation", "left", 0, - pr, client_data, 0)) - { - term++; - length--; - } - else if (first_wc == term + length-1 && second_wc == 0 - && *first_wc == '*' - && cql_pr_attr(ct, "truncation", "right", 0, - pr, client_data, 0)) + if (z3958_mode == 0) { - length--; + if (trunc == 3 && cql_pr_attr(ct, "truncation", + "both", 0, pr, client_data, 0)) + { + term++; + length -= 2; + } + else if (trunc == 1 && cql_pr_attr(ct, "truncation", + "left", 0, pr, client_data, 0)) + { + term++; + length--; + } + else if (trunc == 2 && cql_pr_attr(ct, "truncation", "right", 0, + pr, client_data, 0)) + { + length--; + } + else if (trunc) + z3958_mode = 1; + else + cql_pr_attr(ct, "truncation", "none", 0, + pr, client_data, 0); } - else if (first_wc) - { - z3958_mode = 1; + if (z3958_mode) cql_pr_attr(ct, "truncation", "z3958", 0, pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); - } - else - { - /* No masking characters. Use "truncation.none" if given. */ - cql_pr_attr(ct, "truncation", "none", 0, - pr, client_data, 0); - } } if (ns) { cql_pr_attr_uri(ct, "index", ns, @@ -710,42 +708,48 @@ static void emit_term(cql_transform_t ct, pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER); } } - - /* produce only \-sequences if: - 1) the output is a Z39.58-trunc reserved character - 2) the output is a PQF reserved character (\\, \") - */ (*pr)("\"", client_data); - for (i = 0; i < length; i++) - { - char x[3]; /* temp buffer */ - if (i > 0 && term[i-1] == '\\') + if (process_term) + for (i = 0; i < length; i++) { - if (term[i] == '\"' || term[i] == '\\') - pr("\\", client_data); - if (z3958_mode && strchr("#?", term[i])) - pr("\\\\", client_data); /* double \\ to survive PQF parse */ - x[0] = term[i]; - x[1] = '\0'; - pr(x, client_data); + char x[2]; /* temp buffer */ + if (term[i] == '\\' && i < length - 1) + { + i++; + if (strchr("\"\\", term[i])) + pr("\\", client_data); + if (z3958_mode && strchr("#?", term[i])) + pr("\\\\", client_data); /* double \\ to survive PQF parse */ + x[0] = term[i]; + x[1] = '\0'; + pr(x, client_data); + } + else if (z3958_mode && term[i] == '*') + { + pr("?", client_data); + if (i < length - 1 && yaz_isdigit(term[i+1])) + pr("\\\\", client_data); /* dbl \\ to survive PQF parse */ + } + else if (z3958_mode && term[i] == '?') + { + pr("#", client_data); + } + else + { + if (term[i] == '\"') + pr("\\", client_data); + if (z3958_mode && strchr("#?", term[i])) + pr("\\\\", client_data); /* dbl \\ to survive PQF parse */ + x[0] = term[i]; + x[1] = '\0'; + pr(x, client_data); + } } - else if (z3958_mode && term[i] == '*') - { - pr("?", client_data); - /* avoid ?n sequences output (n=[0-9]) because that has - different semantics than just a single ? in Z39.58 - */ - if (i < length - 1 && yaz_isdigit(term[i+1])) - pr("\\\\", client_data); /* double \\ to survive PQF parse */ - } - else if (z3958_mode && term[i] == '?') - pr("#", client_data); - else if (term[i] != '\\') + else + { + for (i = 0; i < length; i++) { - if (term[i] == '\"') - pr("\\", client_data); - if (z3958_mode && strchr("#?", term[i])) - pr("\\\\", client_data); /* double \\ to survive PQF parse */ + char x[2]; x[0] = term[i]; x[1] = '\0'; pr(x, client_data); @@ -776,7 +780,7 @@ static void emit_terms(cql_transform_t ct, (*pr)("@", client_data); (*pr)(op, client_data); (*pr)(" ", client_data); - } + } emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term), pr, client_data); } @@ -865,11 +869,11 @@ void cql_transform_r(cql_transform_t ct, (*pr)(cn->u.boolean.value, client_data); (*pr)(" ", client_data); mods = cn->u.boolean.modifiers; - if (!strcmp(cn->u.boolean.value, "prox")) + if (!strcmp(cn->u.boolean.value, "prox")) { if (!cql_pr_prox(ct, mods, pr, client_data)) return; - } + } else if (mods) { /* Boolean modifiers other than on proximity not supported */