-/*
- * Copyright (c) 1995, the EUROPAGATE consortium (see below).
- *
- * The EUROPAGATE consortium members are:
- *
- * University College Dublin
- * Danmarks Teknologiske Videnscenter
- * An Chomhairle Leabharlanna
- * Consejo Superior de Investigaciones Cientificas
- *
- * Permission to use, copy, modify, distribute, and sell this software and
- * its documentation, in whole or in part, for any purpose, is hereby granted,
- * provided that:
- *
- * 1. This copyright and permission notice appear in all copies of the
- * software and its documentation. Notices of copyright or attribution
- * which appear at the beginning of any file must remain unchanged.
- *
- * 2. The names of EUROPAGATE or the project partners may not be used to
- * endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * 3. Users of this software (implementors and gateway operators) agree to
- * inform the EUROPAGATE consortium of their use of the software. This
- * information will be used to evaluate the EUROPAGATE project and the
- * software, and to plan further developments. The consortium may use
- * the information in later publications.
- *
- * 4. Users of this software agree to make their best efforts, when
- * documenting their use of the software, to acknowledge the EUROPAGATE
- * consortium, and the role played by the software in their work.
- *
- * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
- * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
- * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
- * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
- * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
- * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
- * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * USE OR PERFORMANCE OF THIS SOFTWARE.
- *
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data
+ * See the file LICENSE for details.
*/
-
/**
* \file cclfind.c
* \brief Implements parsing of a CCL FIND query.
* of lookahead in the handling of relational operations.. So
* it's not really pure.
*/
-
-
-/* CCL find (to rpn conversion)
- * Europagate, 1995
- *
- * $Id: cclfind.c,v 1.14 2007-05-01 12:22:11 adam Exp $
- *
- * Old Europagate log:
- *
- * Revision 1.16 1996/01/08 08:41:13 adam
- * Removed unused function.
- *
- * Revision 1.15 1995/07/20 08:14:34 adam
- * Qualifiers were observed too often. Instead tokens are treated as
- * qualifiers only when separated by comma.
- *
- * Revision 1.14 1995/05/16 09:39:26 adam
- * LICENSE.
- *
- * Revision 1.13 1995/04/17 09:31:42 adam
- * Improved handling of qualifiers. Aliases or reserved words.
- *
- * Revision 1.12 1995/03/20 15:27:43 adam
- * Minor changes.
- *
- * Revision 1.11 1995/02/23 08:31:59 adam
- * Changed header.
- *
- * Revision 1.9 1995/02/16 13:20:06 adam
- * Spell fix.
- *
- * Revision 1.8 1995/02/14 19:59:42 adam
- * Removed a syntax error.
- *
- * Revision 1.7 1995/02/14 19:55:10 adam
- * Header files ccl.h/cclp.h are gone! They have been merged an
- * moved to ../include/ccl.h.
- * Node kind(s) in ccl_rpn_node have changed names.
- *
- * Revision 1.6 1995/02/14 16:20:55 adam
- * Qualifiers are read from a file now.
- *
- * Revision 1.5 1995/02/14 14:12:41 adam
- * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990).
- *
- * Revision 1.4 1995/02/14 13:16:29 adam
- * Left and/or right truncation implemented.
- *
- * Revision 1.3 1995/02/14 10:25:56 adam
- * The constructions 'qualifier rel term ...' implemented.
- *
- * Revision 1.2 1995/02/13 15:15:07 adam
- * Added handling of qualifiers. Not finished yet.
- *
- * Revision 1.1 1995/02/13 12:35:20 adam
- * First version of CCL. Qualifiers aren't handled yet.
- *
- */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "cclp.h"
}
+#define REGEX_CHARS "^[]{}()|.*+?!$"
+#define CCL_CHARS "#?\\"
/**
* search_term: Parse CCL search term.
* cclp: CCL Parser
struct ccl_rpn_node *p;
size_t no, i;
int no_spaces = 0;
- int left_trunc = 0;
- int right_trunc = 0;
- int mid_trunc = 0;
int relation_value = -1;
int position_value = -1;
int structure_value = -1;
int truncation_value = -1;
int completeness_value = -1;
int len = 0;
+ int left_trunc = 0;
+ int right_trunc = 0;
+ int regex_trunc = 0;
+ int z3958_trunc = 0;
size_t max = 200;
if (and_list || or_list || !multi)
max = 1;
for (i = 0; i<lookahead->len; i++)
if (lookahead->name[i] == ' ')
no_spaces++;
- else if (strchr(truncation_aliases[0], lookahead->name[i]))
- {
- if (no == 0 && i == 0 && lookahead->len >= 1)
- left_trunc = 1;
- else if (!is_term_ok(lookahead->next->kind, term_list) &&
- i == lookahead->len-1 && i >= 1)
- right_trunc = 1;
- else
- mid_trunc = 1;
- }
len += 1+lookahead->len+lookahead->ws_prefix_len;
lookahead = lookahead->next;
}
if (truncation_value != -1)
continue;
truncation_value = attr->value.numeric;
- left_trunc = right_trunc = mid_trunc = 0;
break;
case CCL_BIB1_COM:
if (completeness_value != -1)
ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
}
+ if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+ &attset))
+ {
+ regex_trunc = 1; /* regex trunc (102) allowed */
+ }
+ else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+ &attset))
+ {
+ z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+ }
+
/* make the RPN token */
- p->u.t.term = (char *)xmalloc(len);
+ p->u.t.term = (char *)xmalloc(len * 2 + 2);
ccl_assert(p->u.t.term);
p->u.t.term[0] = '\0';
for (i = 0; i<no; i++)
{
const char *src_str = cclp->look_token->name;
size_t src_len = cclp->look_token->len;
-
- if (i == 0 && left_trunc)
- {
- src_len--;
- src_str++;
- }
- if (i == no-1 && right_trunc)
- src_len--;
+ int j;
+ int quote_mode = 0;
+
if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
{
size_t len = strlen(p->u.t.term);
cclp->look_token->ws_prefix_len);
p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
}
- strxcat(p->u.t.term, src_str, src_len);
+ for (j = 0; j < src_len; j++)
+ {
+ if (j > 0 && src_str[j-1] == '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
+ else if (src_str[j] == '"')
+ quote_mode = !quote_mode;
+ else if (!quote_mode && src_str[j] == '?')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".*");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "?");
+ z3958_trunc = 2;
+ }
+ else if (i == 0 && j == 0)
+ left_trunc = 1;
+ else if (i == no - 1 && j == src_len - 1)
+ right_trunc = 1;
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (!quote_mode && src_str[j] == '#')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "#");
+ z3958_trunc = 2;
+ }
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (src_str[j] != '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
+ }
ADVANCE;
}
}
ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
}
+ else if (regex_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+ }
+ else if (z3958_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+ }
else
{
if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab