The CQL parser preserves terms completely and \\s are no longer
removed. This changes CQL output for some test cases and so these
are updated accordingly. The CQL to PQF transform only treats
* and ? operators if they aren't escaped. A memory leak was also
fixed for the ANSIZ39.58 pattern case.
-/* $Id: cql.y,v 1.14 2007-06-28 07:58:07 adam Exp $
+/* $Id: cql.y,v 1.15 2007-10-31 21:58:07 adam Exp $
Copyright (C) 2002-2006
Index Data ApS
Copyright (C) 2002-2006
Index Data ApS
while ((c = cp->getbyte(cp->client_data)) != 0 && c != '"')
{
if (c == '\\')
while ((c = cp->getbyte(cp->client_data)) != 0 && c != '"')
{
if (c == '\\')
c = cp->getbyte(cp->client_data);
c = cp->getbyte(cp->client_data);
+ if (!c)
+ break;
+ }
+ putb(lval, cp, c);
while (c != 0 && !strchr(" \n()=<>/", c))
{
if (c == '\\')
while (c != 0 && !strchr(" \n()=<>/", c))
{
if (c == '\\')
c = cp->getbyte(cp->client_data);
c = cp->getbyte(cp->client_data);
putb(lval, cp, c);
c = cp->getbyte(cp->client_data);
}
putb(lval, cp, c);
c = cp->getbyte(cp->client_data);
}
#if YYDEBUG
printf ("got %s\n", lval->buf);
#endif
#if YYDEBUG
printf ("got %s\n", lval->buf);
#endif
-/* $Id: cqltransform.c,v 1.28 2007-03-29 11:14:11 mike Exp $
+/* $Id: cqltransform.c,v 1.29 2007-10-31 21:58:07 adam Exp $
Copyright (C) 1995-2007, Index Data ApS
Index Data Aps
Copyright (C) 1995-2007, Index Data ApS
Index Data Aps
* characters starting at `term', or a null pointer of there are
* none -- like memchr().
*/
* characters starting at `term', or a null pointer of there are
* none -- like memchr().
*/
-static const char *wcchar(const char *term, int length)
+static const char *wcchar(int start, const char *term, int length)
- const char *best = 0;
- const char *current;
- char *whichp;
-
- for (whichp = "*?"; *whichp != '\0'; whichp++) {
- current = (const char *) memchr(term, *whichp, length);
- if (current != 0 && (best == 0 || current < best))
- best = current;
+ while (length > 0)
+ {
+ if (start || term[-1] != '\\')
+ if (strchr("*?", *term))
+ return term;
+ term++;
+ length--;
+ start = 0;
int i;
const char *ns = cn->u.st.index_uri;
int process_term = !has_modifier(cn, "regexp");
int i;
const char *ns = cn->u.st.index_uri;
int process_term = !has_modifier(cn, "regexp");
assert(cn->which == CQL_NODE_ST);
assert(cn->which == CQL_NODE_ST);
if (process_term && length > 0)
{
if (process_term && length > 0)
{
+ const char *first_wc = wcchar(1, term, length);
+ const char *second_wc = first_wc ?
+ wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0;
+
/* Check for well-known globbing patterns that represent
* simple truncation attributes as expected by, for example,
* Bath-compliant server. If we find such a pattern but
* there's no mapping for it, that's fine: we just use a
* general pattern-matching attribute.
*/
/* Check for well-known globbing patterns that represent
* simple truncation attributes as expected by, for example,
* Bath-compliant server. If we find such a pattern but
* there's no mapping for it, that's fine: we just use a
* general pattern-matching attribute.
*/
- if (length > 1 && term[0] == '*' && term[length-1] == '*' &&
- wcchar(term+1, length-2) == 0 &&
- cql_pr_attr(ct, "truncation", "both", 0,
- pr, client_data, 0)) {
+ if (first_wc == term && second_wc == term + length-1
+ && *first_wc == '*' && *second_wc == '*'
+ && cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0))
+ {
- else if (term[0] == '*' &&
- wcchar(term+1, length-1) == 0 &&
- cql_pr_attr(ct, "truncation", "left", 0,
- pr, client_data, 0)) {
+ else if (first_wc == term && second_wc == 0 && *first_wc == '*'
+ && cql_pr_attr(ct, "truncation", "left", 0,
+ pr, client_data, 0))
+ {
- else if (term[length-1] == '*' &&
- wcchar(term, length-1) == 0 &&
- cql_pr_attr(ct, "truncation", "right", 0,
- pr, client_data, 0)) {
+ else if (first_wc == term + length-1 && second_wc == 0
+ && *first_wc == '*'
+ && cql_pr_attr(ct, "truncation", "right", 0,
+ pr, client_data, 0))
+ {
- else if (wcchar(term, length))
{
/* We have one or more wildcard characters, but not in a
* way that can be dealt with using only the standard
{
/* We have one or more wildcard characters, but not in a
* way that can be dealt with using only the standard
cql_pr_attr(ct, "truncation", "z3958", 0,
pr, client_data, 28);
cql_pr_attr(ct, "truncation", "z3958", 0,
pr, client_data, 28);
- mem = (char *) xmalloc(length+1);
- for (i = 0; i < length; i++) {
- if (term[i] == '*') mem[i] = '?';
- else if (term[i] == '?') mem[i] = '#';
- else mem[i] = term[i];
+ z3958_mem = (char *) xmalloc(length+1);
+ for (i = 0; i < length; i++)
+ {
+ if (i > 0 && term[i-1] == '\\')
+ z3958_mem[i] = term[i];
+ else if (term[i] == '*')
+ z3958_mem[i] = '?';
+ else if (term[i] == '?')
+ z3958_mem[i] = '#';
+ else
+ z3958_mem[i] = term[i];
- mem[length] = '\0';
- term = mem;
+ z3958_mem[length] = '\0';
+ term = z3958_mem;
}
else {
/* No masking characters. Use "truncation.none" if given. */
}
else {
/* No masking characters. Use "truncation.none" if given. */
(*pr)(cp, client_data);
}
(*pr)("\" ", client_data);
(*pr)(cp, client_data);
}
(*pr)("\" ", client_data);
}
void emit_wordlist(cql_transform_t ct,
}
void emit_wordlist(cql_transform_t ct,
<relation>
<value>scr</value>
</relation>
<relation>
<value>scr</value>
</relation>
- <term>^cat says "fish"</term>
+ <term>^cat says \"fish\"</term>
<relation>
<value>scr</value>
</relation>
<relation>
<value>scr</value>
</relation>
- <term>^cat*fishdog"horse?</term>
+ <term>^cat*fishdog\"horse?</term>