* Europagate, 1995
*
* $Log: cclfind.c,v $
- * Revision 1.11 1999-03-31 11:15:37 adam
+ * Revision 1.18 2000-10-17 19:50:28 adam
+ * Implemented and-list and or-list for CCL module.
+ *
+ * Revision 1.17 2000/05/01 09:36:50 adam
+ * Range operator only treated in ordered ranges so that minus (-) can be
+ * used for, say, the and-not operator.
+ *
+ * Revision 1.16 2000/03/14 09:06:11 adam
+ * Added POSIX threads support for frontend server.
+ *
+ * Revision 1.15 2000/02/24 23:49:13 adam
+ * Fixed memory allocation problem.
+ *
+ * Revision 1.14 2000/01/31 13:15:21 adam
+ * Removed uses of assert(3). Cleanup of ODR. CCL parser update so
+ * that some characters are not surrounded by spaces in resulting term.
+ * ILL-code updates.
+ *
+ * Revision 1.13 1999/12/22 13:13:32 adam
+ * Search terms may include "operators" without causing error.
+ *
+ * Revision 1.12 1999/11/30 13:47:11 adam
+ * Improved installation. Moved header files to include/yaz.
+ *
+ * Revision 1.11 1999/03/31 11:15:37 adam
* Fixed memory leaks in ccl_find_str and ccl_qual_rm.
*
* Revision 1.10 1998/02/11 11:53:33 adam
*
*/
-#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
#include <string.h>
-#include <ccl.h>
+#include <yaz/ccl.h>
/* returns type of current lookahead */
#define KIND (cclp->look_token->kind)
static char *copy_token_name (struct ccl_token *tp)
{
char *str = (char *)malloc (tp->len + 1);
- assert (str);
+ ccl_assert (str);
memcpy (str, tp->name, tp->len);
str[tp->len] = '\0';
return str;
{
struct ccl_rpn_node *p;
p = (struct ccl_rpn_node *)malloc (sizeof(*p));
- assert (p);
+ ccl_assert (p);
p->kind = kind;
return p;
}
static struct ccl_rpn_node *find_spec (CCL_parser cclp,
struct ccl_rpn_attr **qa);
+
+static int is_term_ok (int look, int *list)
+{
+ for (;*list >= 0; list++)
+ if (look == *list)
+ return 1;
+ return 0;
+}
+
static struct ccl_rpn_node *search_terms (CCL_parser cclp,
struct ccl_rpn_attr **qa);
struct ccl_rpn_attr *n;
n = (struct ccl_rpn_attr *)malloc (sizeof(*n));
- assert (n);
+ ccl_assert (n);
n->type = type;
n->value = value;
n->next = p->u.t.attr_list;
* qa: Qualifier attributes already applied.
* return: pointer to node(s); NULL on error.
*/
-static struct ccl_rpn_node *search_term (CCL_parser cclp,
- struct ccl_rpn_attr **qa)
+static struct ccl_rpn_node *search_term_x (CCL_parser cclp,
+ struct ccl_rpn_attr **qa,
+ int *term_list)
{
- struct ccl_rpn_node *p;
+ struct ccl_rpn_attr *qa_tmp[2];
+ struct ccl_rpn_node *p_top = 0;
struct ccl_token *lookahead = cclp->look_token;
- int len = 0;
- size_t no, i;
- int left_trunc = 0;
- int right_trunc = 0;
- int mid_trunc = 0;
- int relation_value = -1;
- int position_value = -1;
- int structure_value = -1;
- int truncation_value = -1;
- int completeness_value = -1;
-
- if (KIND != CCL_TOK_TERM)
- {
- cclp->error_code = CCL_ERR_TERM_EXPECTED;
- return NULL;
- }
- /* create the term node, but wait a moment before adding the term */
- p = mk_node (CCL_RPN_TERM);
- p->u.t.attr_list = NULL;
- p->u.t.term = NULL;
+ int and_list = 0;
+ int or_list = 0;
if (!qa)
{
- /* no qualifier(s) applied. Use 'term' if it is defined */
-
- qa = (struct ccl_rpn_attr **)malloc (2*sizeof(*qa));
- assert (qa);
+ /* no qualifier(s) applied. Use 'term' if it is defined */
+
+ qa = qa_tmp;
+ ccl_assert (qa);
qa[0] = ccl_qual_search (cclp, "term", 4);
qa[1] = NULL;
}
-
- /* go through all attributes and add them to the attribute list */
- for (i=0; qa && qa[i]; i++)
+ if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST))
+ and_list = 1;
+ if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST))
+ or_list = 1;
+ while (1)
{
- struct ccl_rpn_attr *attr;
+ struct ccl_rpn_node *p;
+ size_t no, i;
+ int left_trunc = 0;
+ int right_trunc = 0;
+ int mid_trunc = 0;
+ int relation_value = -1;
+ int position_value = -1;
+ int structure_value = -1;
+ int truncation_value = -1;
+ int completeness_value = -1;
+ int len = 0;
+ int max = 200;
+ if (and_list || or_list)
+ max = 1;
+
+ /* go through each TERM token. If no truncation attribute is yet
+ met, then look for left/right truncation markers (?) and
+ set left_trunc/right_trunc/mid_trunc accordingly */
+ for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++)
+ {
+ for (i = 0; i<lookahead->len; i++)
+ if (truncation_value == -1 && lookahead->name[i] == '?')
+ {
+ if (no == 0 && i == 0 && lookahead->len >= 1)
+ left_trunc = 1;
+ else if (!is_term_ok(lookahead->next->kind, term_list) &&
+ i == lookahead->len-1 && i >= 1)
+ right_trunc = 1;
+ else
+ mid_trunc = 1;
+ }
+ len += 1+lookahead->len;
+ lookahead = lookahead->next;
+ }
+
+ if (len == 0)
+ break; /* no more terms . stop . */
+
+ if (p_top)
+ {
+ if (or_list)
+ p = mk_node (CCL_RPN_OR);
+ else if (and_list)
+ p = mk_node (CCL_RPN_AND);
+ else
+ p = mk_node (CCL_RPN_AND);
+ p->u.p[0] = p_top;
+ p_top = p;
+ }
+
+ /* create the term node, but wait a moment before adding the term */
+ p = mk_node (CCL_RPN_TERM);
+ p->u.t.attr_list = NULL;
+ p->u.t.term = NULL;
+
+ /* make the top node point to us.. */
+ if (p_top)
+ p_top->u.p[1] = p;
+ else
+ p_top = p;
- for (attr = qa[i]; attr; attr = attr->next)
- if (attr->value > 0)
- { /* deal only with REAL attributes (positive) */
- switch (attr->type)
+
+ /* go through all attributes and add them to the attribute list */
+ for (i=0; qa && qa[i]; i++)
+ {
+ struct ccl_rpn_attr *attr;
+
+ for (attr = qa[i]; attr; attr = attr->next)
+ if (attr->value > 0)
+ { /* deal only with REAL attributes (positive) */
+ switch (attr->type)
+ {
+ case CCL_BIB1_REL:
+ if (relation_value != -1)
+ continue;
+ relation_value = attr->value;
+ break;
+ case CCL_BIB1_POS:
+ if (position_value != -1)
+ continue;
+ position_value = attr->value;
+ break;
+ case CCL_BIB1_STR:
+ if (structure_value != -1)
+ continue;
+ structure_value = attr->value;
+ break;
+ case CCL_BIB1_TRU:
+ if (truncation_value != -1)
+ continue;
+ truncation_value = attr->value;
+ left_trunc = right_trunc = mid_trunc = 0;
+ break;
+ case CCL_BIB1_COM:
+ if (completeness_value != -1)
+ continue;
+ completeness_value = attr->value;
+ break;
+ }
+ add_attr (p, attr->type, attr->value);
+ }
+ }
+ /* len now holds the number of characters in the RPN term */
+ /* no holds the number of CCL tokens (1 or more) */
+
+ if (structure_value == -1 &&
+ qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP))
+ { /* no structure attribute met. Apply either structure attribute
+ WORD or PHRASE depending on number of CCL tokens */
+ if (no == 1)
+ add_attr (p, CCL_BIB1_STR, 2);
+ else
+ add_attr (p, CCL_BIB1_STR, 1);
+ }
+
+ /* make the RPN token */
+ p->u.t.term = (char *)malloc (len);
+ ccl_assert (p->u.t.term);
+ p->u.t.term[0] = '\0';
+ for (i = 0; i<no; i++)
+ {
+ const char *src_str = cclp->look_token->name;
+ int src_len = cclp->look_token->len;
+
+ if (i == 0 && left_trunc)
+ {
+ src_len--;
+ src_str++;
+ }
+ else if (i == no-1 && right_trunc)
+ src_len--;
+ if (src_len)
+ {
+ int len = strlen(p->u.t.term);
+ if (len &&
+ !strchr("-+", *src_str) &&
+ !strchr("-+", p->u.t.term[len-1]))
{
- case CCL_BIB1_REL:
- if (relation_value != -1)
- continue;
- relation_value = attr->value;
- break;
- case CCL_BIB1_POS:
- if (position_value != -1)
- continue;
- position_value = attr->value;
- break;
- case CCL_BIB1_STR:
- if (structure_value != -1)
- continue;
- structure_value = attr->value;
- break;
- case CCL_BIB1_TRU:
- if (truncation_value != -1)
- continue;
- truncation_value = attr->value;
- break;
- case CCL_BIB1_COM:
- if (completeness_value != -1)
- continue;
- completeness_value = attr->value;
- break;
+ strcat (p->u.t.term, " ");
}
- add_attr (p, attr->type, attr->value);
}
+ strxcat (p->u.t.term, src_str, src_len);
+ ADVANCE;
+ }
+ if (left_trunc && right_trunc)
+ {
+ if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH))
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+ ccl_rpn_delete (p);
+ return NULL;
+ }
+ add_attr (p, CCL_BIB1_TRU, 3);
+ }
+ else if (right_trunc)
+ {
+ if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT))
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
+ ccl_rpn_delete (p);
+ return NULL;
+ }
+ add_attr (p, CCL_BIB1_TRU, 1);
+ }
+ else if (left_trunc)
+ {
+ if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT))
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
+ ccl_rpn_delete (p);
+ return NULL;
+ }
+ add_attr (p, CCL_BIB1_TRU, 2);
+ }
+ else
+ {
+ if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE))
+ add_attr (p, CCL_BIB1_TRU, 100);
+ }
}
- /* go through each TERM token. If no truncation attribute is yet
- met, then look for left/right truncation markers (?) and
- set left_trunc/right_trunc/mid_trunc accordingly */
- for (no = 0; lookahead->kind == CCL_TOK_TERM; no++)
- {
- for (i = 0; i<lookahead->len; i++)
- if (truncation_value == -1 && lookahead->name[i] == '?')
- {
- if (no == 0 && i == 0 && lookahead->len >= 1)
- left_trunc = 1;
- else if (lookahead->next->kind != CCL_TOK_TERM &&
- i == lookahead->len-1 && i >= 1)
- right_trunc = 1;
- else
- mid_trunc = 1;
- }
- len += 1+lookahead->len;
- lookahead = lookahead->next;
- }
- /* len now holds the number of characters in the RPN term */
- /* no holds the number of CCL tokens (1 or more) */
-
- if (structure_value == -1 &&
- qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP))
- { /* no structure attribute met. Apply either structure attribute
- WORD or PHRASE depending on number of CCL tokens */
- if (no == 1)
- add_attr (p, CCL_BIB1_STR, 2);
- else
- add_attr (p, CCL_BIB1_STR, 1);
- }
+ if (!p_top)
+ cclp->error_code = CCL_ERR_TERM_EXPECTED;
+ return p_top;
+}
- /* make the RPN token */
- p->u.t.term = (char *)malloc (len);
- assert (p->u.t.term);
- p->u.t.term[0] = '\0';
- for (i = 0; i<no; i++)
- {
- const char *src_str = cclp->look_token->name;
- int src_len = cclp->look_token->len;
-
- if (i == 0 && left_trunc)
- {
- src_len--;
- src_str++;
- }
- else if (i == no-1 && right_trunc)
- src_len--;
- if (i)
- strcat (p->u.t.term, " ");
- strxcat (p->u.t.term, src_str, src_len);
- ADVANCE;
- }
- if (left_trunc && right_trunc)
- {
- if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH))
- {
- cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
- free (qa);
- ccl_rpn_delete (p);
- return NULL;
- }
- add_attr (p, CCL_BIB1_TRU, 3);
- }
- else if (right_trunc)
- {
- if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT))
- {
- cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT;
- free (qa);
- ccl_rpn_delete (p);
- return NULL;
- }
- add_attr (p, CCL_BIB1_TRU, 1);
- }
- else if (left_trunc)
- {
- if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT))
- {
- cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT;
- free (qa);
- ccl_rpn_delete (p);
- return NULL;
- }
- add_attr (p, CCL_BIB1_TRU, 2);
- }
- else
- {
- if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE))
- add_attr (p, CCL_BIB1_TRU, 100);
- }
- return p;
+static struct ccl_rpn_node *search_term (CCL_parser cclp,
+ struct ccl_rpn_attr **qa)
+{
+ static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1};
+ return search_term_x(cclp, qa, list);
}
/*
for (i=0; qa[i]; i++)
no++;
ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap));
- assert (ap);
+ ccl_assert (ap);
for (i = 0; cclp->look_token != la; i++)
{
ap[i] = ccl_qual_search (cclp, cclp->look_token->name,
free (ap);
return p;
}
+ /* ordered relation ... */
rel = 0;
if (cclp->look_token->len == 1)
{
ADVANCE; /* skip relation */
if (KIND == CCL_TOK_TERM &&
- cclp->look_token->next->kind == CCL_TOK_MINUS)
+ cclp->look_token->next->len == 1 &&
+ cclp->look_token->next->name[0] == '-')
{
struct ccl_rpn_node *p1;
if (!(p1 = search_term (cclp, ap)))
return p1;
}
}
- else if (KIND == CCL_TOK_MINUS) /* = - term ? */
+ else if (cclp->look_token->len == 1 &&
+ cclp->look_token->name[0] == '"') /* = - term ? */
{
ADVANCE;
if (!(p = search_term (cclp, ap)))
static struct ccl_rpn_node *search_terms (CCL_parser cclp,
struct ccl_rpn_attr **qa)
{
+ static int list[] = {
+ CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, -1};
struct ccl_rpn_node *p1, *p2, *pn;
- p1 = search_term (cclp, qa);
+ p1 = search_term_x (cclp, qa, list);
if (!p1)
return NULL;
while (1)
if (KIND == CCL_TOK_PROX)
{
ADVANCE;
- p2 = search_term (cclp, qa);
+ p2 = search_term_x (cclp, qa, list);
if (!p2)
{
ccl_rpn_delete (p1);
pn->u.p[1] = p2;
p1 = pn;
}
- else if (KIND == CCL_TOK_TERM)
+ else if (is_term_ok(KIND, list))
{
- p2 = search_term (cclp, qa);
+ p2 = search_term_x (cclp, qa, list);
if (!p2)
{
ccl_rpn_delete (p1);