/* CCL find (to rpn conversion)
* Europagate, 1995
*
- * $Log: cclfind.c,v $
- * Revision 1.21 2001-02-21 13:46:53 adam
- * C++ fixes.
+ * $Id: cclfind.c,v 1.31 2002-06-06 12:54:24 adam Exp $
*
- * Revision 1.20 2000/11/16 13:03:12 adam
- * Function ccl_rpn_query sets attributeSet to Bib-1.
- *
- * Revision 1.19 2000/11/16 09:58:02 adam
- * Implemented local AttributeSet setting for CCL field maps.
- *
- * Revision 1.18 2000/10/17 19:50:28 adam
- * Implemented and-list and or-list for CCL module.
- *
- * Revision 1.17 2000/05/01 09:36:50 adam
- * Range operator only treated in ordered ranges so that minus (-) can be
- * used for, say, the and-not operator.
- *
- * Revision 1.16 2000/03/14 09:06:11 adam
- * Added POSIX threads support for frontend server.
- *
- * Revision 1.15 2000/02/24 23:49:13 adam
- * Fixed memory allocation problem.
- *
- * Revision 1.14 2000/01/31 13:15:21 adam
- * Removed uses of assert(3). Cleanup of ODR. CCL parser update so
- * that some characters are not surrounded by spaces in resulting term.
- * ILL-code updates.
- *
- * Revision 1.13 1999/12/22 13:13:32 adam
- * Search terms may include "operators" without causing error.
- *
- * Revision 1.12 1999/11/30 13:47:11 adam
- * Improved installation. Moved header files to include/yaz.
- *
- * Revision 1.11 1999/03/31 11:15:37 adam
- * Fixed memory leaks in ccl_find_str and ccl_qual_rm.
- *
- * Revision 1.10 1998/02/11 11:53:33 adam
- * Changed code so that it compiles as C++.
- *
- * Revision 1.9 1997/09/29 08:56:37 adam
- * Changed CCL parser to be thread safe. New type, CCL_parser, declared
- * and a create/destructers ccl_parser_create/ccl_parser/destory has
- * been added.
- *
- * Revision 1.8 1997/09/01 08:48:11 adam
- * New windows NT/95 port using MSV5.0. Only a few changes made
- * to avoid warnings.
- *
- * Revision 1.7 1997/05/14 06:53:26 adam
- * C++ support.
- *
- * Revision 1.6 1997/04/30 08:52:06 quinn
- * Null
- *
- * Revision 1.5 1996/10/11 15:00:24 adam
- * CCL parser from Europagate Email gateway 1.0.
+ * Old Europagate log:
*
* Revision 1.16 1996/01/08 08:41:13 adam
* Removed unused function.
*/
static char *copy_token_name (struct ccl_token *tp)
{
- char *str = (char *)malloc (tp->len + 1);
+ char *str = (char *)xmalloc (tp->len + 1);
ccl_assert (str);
memcpy (str, tp->name, tp->len);
str[tp->len] = '\0';
static struct ccl_rpn_node *mk_node (int kind)
{
struct ccl_rpn_node *p;
- p = (struct ccl_rpn_node *)malloc (sizeof(*p));
+ p = (struct ccl_rpn_node *)xmalloc (sizeof(*p));
ccl_assert (p);
p->kind = kind;
return p;
ccl_rpn_delete (rpn->u.p[1]);
break;
case CCL_RPN_TERM:
- free (rpn->u.t.term);
+ xfree (rpn->u.t.term);
for (attr = rpn->u.t.attr_list; attr; attr = attr1)
{
attr1 = attr->next;
if (attr->set)
- free (attr->set);
- free (attr);
+ xfree (attr->set);
+ xfree (attr);
}
break;
case CCL_RPN_SET:
- free (rpn->u.setname);
+ xfree (rpn->u.setname);
break;
case CCL_RPN_PROX:
ccl_rpn_delete (rpn->u.p[0]);
ccl_rpn_delete (rpn->u.p[1]);
break;
}
- free (rpn);
+ xfree (rpn);
}
static struct ccl_rpn_node *find_spec (CCL_parser cclp,
{
struct ccl_rpn_attr *n;
- n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
+ n = (struct ccl_rpn_attr *)xmalloc (sizeof(*n));
ccl_assert (n);
if (set)
{
- n->set = (char*) malloc (strlen(set)+1);
+ n->set = (char*) xmalloc (strlen(set)+1);
strcpy (n->set, set);
}
else
* search_term: Parse CCL search term.
* cclp: CCL Parser
* qa: Qualifier attributes already applied.
+ * term_list: tokens we accept as terms in context
+ * multi: whether we accept "multiple" tokens
* return: pointer to node(s); NULL on error.
*/
static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
struct ccl_rpn_attr **qa,
- int *term_list)
+ int *term_list, int multi)
{
- struct ccl_rpn_attr *qa_tmp[2];
struct ccl_rpn_node *p_top = 0;
struct ccl_token *lookahead = cclp->look_token;
int and_list = 0;
int or_list = 0;
char *attset;
+ const char *truncation_aliases;
+
+ truncation_aliases =
+ ccl_qual_search_special(cclp->bibset, "truncation");
+ if (!truncation_aliases)
+ truncation_aliases = "?";
- if (!qa)
- {
- /* no qualifier(s) applied. Use 'term' if it is defined */
-
- qa = qa_tmp;
- ccl_assert (qa);
- qa[0] = ccl_qual_search (cclp, "term", 4);
- qa[1] = NULL;
- }
if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST, 0))
and_list = 1;
if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST, 0))
{
struct ccl_rpn_node *p;
size_t no, i;
+ int no_spaces = 0;
int left_trunc = 0;
int right_trunc = 0;
int mid_trunc = 0;
int completeness_value = -1;
int len = 0;
size_t max = 200;
- if (and_list || or_list)
+ if (and_list || or_list || !multi)
max = 1;
-
+
+ /* ignore commas when dealing with and-lists .. */
+ if (and_list && lookahead && lookahead->kind == CCL_TOK_COMMA)
+ {
+ lookahead = lookahead->next;
+ ADVANCE;
+ continue;
+ }
/* go through each TERM token. If no truncation attribute is yet
met, then look for left/right truncation markers (?) and
set left_trunc/right_trunc/mid_trunc accordingly */
for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
{
for (i = 0; i<lookahead->len; i++)
- if (truncation_value == -1 && lookahead->name[i] == '?')
+ if (lookahead->name[i] == ' ')
+ no_spaces++;
+ else if (strchr(truncation_aliases, lookahead->name[i]))
{
if (no == 0 && i == 0 && lookahead->len >= 1)
left_trunc = 1;
if (len == 0)
break; /* no more terms . stop . */
+
if (p_top)
{
if (or_list)
qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP, &attset))
{ /* no structure attribute met. Apply either structure attribute
WORD or PHRASE depending on number of CCL tokens */
- if (no == 1)
+ if (no == 1 && no_spaces == 0)
add_attr (p, attset, CCL_BIB1_STR, 2);
else
add_attr (p, attset, CCL_BIB1_STR, 1);
}
/* make the RPN token */
- p->u.t.term = (char *)malloc (len);
+ p->u.t.term = (char *)xmalloc (len);
ccl_assert (p->u.t.term);
p->u.t.term[0] = '\0';
for (i = 0; i<no; i++)
&attset))
add_attr (p, attset, CCL_BIB1_TRU, 100);
}
+ if (!multi)
+ break;
}
if (!p_top)
cclp->error_code = CCL_ERR_TERM_EXPECTED;
struct ccl_rpn_attr **qa)
{
static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
- return search_term_x(cclp, qa, list);
+ return search_term_x(cclp, qa, list, 0);
}
-/*
- * qualifiers: Parse CCL qualifiers and search terms.
- * cclp: CCL Parser
- * la: Token pointer to RELATION token.
- * qa: Qualifier attributes already applied.
- * return: pointer to node(s); NULL on error.
- */
-static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la,
- struct ccl_rpn_attr **qa)
+static struct ccl_rpn_node *qualifiers2 (CCL_parser cclp,
+ struct ccl_rpn_attr **ap)
{
- struct ccl_token *lookahead = cclp->look_token;
- struct ccl_rpn_attr **ap;
- int no = 0;
- int i, rel;
char *attset;
-#if 0
- if (qa)
- {
- cclp->error_code = CCL_ERR_DOUBLE_QUAL;
- return NULL;
- }
-#endif
- for (lookahead = cclp->look_token; lookahead != la;
- lookahead=lookahead->next)
- no++;
- if (qa)
- for (i=0; qa[i]; i++)
- no++;
- ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap));
- ccl_assert (ap);
- for (i = 0; cclp->look_token != la; i++)
- {
- ap[i] = ccl_qual_search (cclp, cclp->look_token->name,
- cclp->look_token->len);
- if (!ap[i])
- {
- cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
- free (ap);
- return NULL;
- }
- ADVANCE;
- if (KIND == CCL_TOK_COMMA)
- ADVANCE;
- }
- if (qa)
- while (*qa)
- ap[i++] = *qa++;
- ap[i] = NULL;
+ int rel;
+
if (!qual_val_type(ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER, &attset))
{
/* unordered relation */
if (KIND != CCL_TOK_EQ)
{
cclp->error_code = CCL_ERR_EQ_EXPECTED;
- free (ap);
return NULL;
}
ADVANCE;
ADVANCE;
if (!(p = find_spec (cclp, ap)))
{
- free (ap);
return NULL;
}
if (KIND != CCL_TOK_RP)
{
cclp->error_code = CCL_ERR_RP_EXPECTED;
ccl_rpn_delete (p);
- free (ap);
return NULL;
}
ADVANCE;
}
else
p = search_terms (cclp, ap);
- free (ap);
return p;
}
/* ordered relation ... */
else
{
struct ccl_rpn_node *p;
-
+
ADVANCE; /* skip relation */
if (KIND == CCL_TOK_TERM &&
- cclp->look_token->next->len == 1 &&
+ cclp->look_token->next && cclp->look_token->next->len == 1 &&
cclp->look_token->next->name[0] == '-')
{
struct ccl_rpn_node *p1;
if (!(p1 = search_term (cclp, ap)))
- {
- free (ap);
return NULL;
- }
ADVANCE; /* skip '-' */
if (KIND == CCL_TOK_TERM) /* = term - term ? */
{
if (!(p2 = search_term (cclp, ap)))
{
ccl_rpn_delete (p1);
- free (ap);
return NULL;
}
p = mk_node (CCL_RPN_AND);
add_attr (p1, attset, CCL_BIB1_REL, 4);
p->u.p[1] = p2;
add_attr (p2, attset, CCL_BIB1_REL, 2);
- free (ap);
return p;
}
else /* = term - */
{
add_attr (p1, attset, CCL_BIB1_REL, 4);
- free (ap);
return p1;
}
}
else if (cclp->look_token->len == 1 &&
- cclp->look_token->name[0] == '"') /* = - term ? */
+ cclp->look_token->name[0] == '-') /* = - term ? */
{
ADVANCE;
if (!(p = search_term (cclp, ap)))
- {
- free (ap);
return NULL;
- }
add_attr (p, attset, CCL_BIB1_REL, 2);
- free (ap);
return p;
}
else if (KIND == CCL_TOK_LP)
{
ADVANCE;
if (!(p = find_spec (cclp, ap)))
- {
- free (ap);
return NULL;
- }
if (KIND != CCL_TOK_RP)
{
cclp->error_code = CCL_ERR_RP_EXPECTED;
ccl_rpn_delete (p);
- free (ap);
return NULL;
}
ADVANCE;
- free (ap);
return p;
}
else
{
if (!(p = search_terms (cclp, ap)))
- {
- free (ap);
return NULL;
- }
add_attr (p, attset, CCL_BIB1_REL, rel);
- free (ap);
return p;
}
cclp->error_code = CCL_ERR_TERM_EXPECTED;
}
- free (ap);
return NULL;
}
/*
+ * qualifiers1: Parse CCL qualifiers and search terms.
+ * cclp: CCL Parser
+ * la: Token pointer to RELATION token.
+ * qa: Qualifier attributes already applied.
+ * return: pointer to node(s); NULL on error.
+ */
+static struct ccl_rpn_node *qualifiers1 (CCL_parser cclp, struct ccl_token *la,
+ struct ccl_rpn_attr **qa)
+{
+ struct ccl_token *lookahead = cclp->look_token;
+ struct ccl_token *look_start = cclp->look_token;
+ struct ccl_rpn_attr **ap;
+ struct ccl_rpn_node *node = 0;
+ const char *field_str;
+ int no = 0;
+ int seq = 0;
+ int i;
+ int mode_merge = 1;
+#if 0
+ if (qa)
+ {
+ cclp->error_code = CCL_ERR_DOUBLE_QUAL;
+ return NULL;
+ }
+#endif
+ for (lookahead = cclp->look_token; lookahead != la;
+ lookahead=lookahead->next)
+ no++;
+ if (qa)
+ for (i=0; qa[i]; i++)
+ no++;
+ ap = (struct ccl_rpn_attr **)xmalloc ((no ? (no+1) : 2) * sizeof(*ap));
+ ccl_assert (ap);
+
+ field_str = ccl_qual_search_special(cclp->bibset, "field");
+ if (field_str)
+ {
+ if (!strcmp (field_str, "or"))
+ mode_merge = 0;
+ else if (!strcmp (field_str, "merge"))
+ mode_merge = 1;
+ }
+ if (!mode_merge)
+ {
+ /* consider each field separately and OR */
+ lookahead = look_start;
+ while (lookahead != la)
+ {
+ ap[1] = 0;
+ seq = 0;
+ while ((ap[0] = ccl_qual_search (cclp, lookahead->name,
+ lookahead->len, seq)) != 0)
+ {
+ struct ccl_rpn_node *node_sub;
+ cclp->look_token = la;
+
+ node_sub = qualifiers2(cclp, ap);
+ if (!node_sub)
+ {
+ ccl_rpn_delete (node);
+ xfree (ap);
+ return 0;
+ }
+ if (node)
+ {
+ struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ node_this->u.p[0] = node;
+ node_this->u.p[1] = node_sub;
+ node = node_this;
+ }
+ else
+ node = node_sub;
+ seq++;
+ }
+ if (seq == 0)
+ {
+ cclp->look_token = lookahead;
+ cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
+ xfree (ap);
+ return NULL;
+ }
+ lookahead = lookahead->next;
+ if (lookahead->kind == CCL_TOK_COMMA)
+ lookahead = lookahead->next;
+ }
+ }
+ else
+ {
+ /* merge attributes from ALL fields - including inherited ones */
+ while (1)
+ {
+ struct ccl_rpn_node *node_sub;
+ int found = 0;
+ lookahead = look_start;
+ for (i = 0; lookahead != la; i++)
+ {
+ ap[i] = ccl_qual_search (cclp, lookahead->name,
+ lookahead->len, seq);
+ if (ap[i])
+ found++;
+ if (!ap[i] && seq > 0)
+ ap[i] = ccl_qual_search (cclp, lookahead->name,
+ lookahead->len, 0);
+ if (!ap[i])
+ {
+ cclp->look_token = lookahead;
+ cclp->error_code = CCL_ERR_UNKNOWN_QUAL;
+ xfree (ap);
+ return NULL;
+ }
+ lookahead = lookahead->next;
+ if (lookahead->kind == CCL_TOK_COMMA)
+ lookahead = lookahead->next;
+ }
+ if (qa)
+ {
+ struct ccl_rpn_attr **qa0 = qa;
+
+ while (*qa0)
+ ap[i++] = *qa0++;
+ }
+ ap[i] = NULL;
+
+ if (!found)
+ break;
+
+ cclp->look_token = lookahead;
+
+ node_sub = qualifiers2(cclp, ap);
+ if (!node_sub)
+ {
+ ccl_rpn_delete (node);
+ break;
+ }
+ if (node)
+ {
+ struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ node_this->u.p[0] = node;
+ node_this->u.p[1] = node_sub;
+ node = node_this;
+ }
+ else
+ node = node_sub;
+ seq++;
+ }
+ }
+ xfree (ap);
+ return node;
+}
+
+
+/*
* search_terms: Parse CCL search terms - including proximity.
* cclp: CCL Parser
* qa: Qualifier attributes already applied.
struct ccl_rpn_attr **qa)
{
static int list[] = {
- CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, -1};
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, CCL_TOK_SET, -1};
struct ccl_rpn_node *p1, *p2, *pn;
- p1 = search_term_x (cclp, qa, list);
+ p1 = search_term_x (cclp, qa, list, 1);
if (!p1)
return NULL;
while (1)
if (KIND == CCL_TOK_PROX)
{
ADVANCE;
- p2 = search_term_x (cclp, qa, list);
+ p2 = search_term_x (cclp, qa, list, 1);
if (!p2)
{
ccl_rpn_delete (p1);
}
else if (is_term_ok(KIND, list))
{
- p2 = search_term_x (cclp, qa, list);
+ p2 = search_term_x (cclp, qa, list, 1);
if (!p2)
{
ccl_rpn_delete (p1);
{
lookahead = lookahead->next;
if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ)
- return qualifiers (cclp, lookahead, qa);
+ return qualifiers1 (cclp, lookahead, qa);
if (lookahead->kind != CCL_TOK_COMMA)
break;
lookahead = lookahead->next;
}
- return search_terms (cclp, qa);
+ if (qa)
+ return search_terms (cclp, qa);
+ else
+ {
+ struct ccl_rpn_attr *qa[2];
+ struct ccl_rpn_node *node = 0;
+ int seq;
+ lookahead = cclp->look_token;
+
+ qa[1] = 0;
+ for(seq = 0; ;seq++)
+ {
+ struct ccl_rpn_node *node_sub;
+ qa[0] = ccl_qual_search(cclp, "term", 4, seq);
+ if (!qa[0])
+ break;
+
+ cclp->look_token = lookahead;
+
+ node_sub = search_terms (cclp, qa);
+ if (!node_sub)
+ {
+ ccl_rpn_delete (node);
+ return 0;
+ }
+ if (node)
+ {
+ struct ccl_rpn_node *node_this = mk_node(CCL_RPN_OR);
+ node_this->u.p[0] = node;
+ node_this->u.p[1] = node_sub;
+ node = node_this;
+ }
+ else
+ node = node_sub;
+ }
+ if (!node)
+ node = search_terms (cclp, 0);
+ return node;
+ }
}
/*
{
struct ccl_rpn_node *p;
+
+
cclp->look_token = list;
p = find_spec (cclp, NULL);
if (p && KIND != CCL_TOK_EOL)