X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=ccl%2Fcclfind.c;h=853bc70610aa185ec3df5bf86b1a049daab3115c;hp=4ac75a157052da75f4adc86733d9081deb2adb99;hb=33a6eab614ca84eef817f675843e6de820d13708;hpb=d9ee01635f03f9095a66f71b73580560d48798e8 diff --git a/ccl/cclfind.c b/ccl/cclfind.c index 4ac75a1..853bc70 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -45,7 +45,28 @@ * Europagate, 1995 * * $Log: cclfind.c,v $ - * Revision 1.12 1999-11-30 13:47:11 adam + * Revision 1.18 2000-10-17 19:50:28 adam + * Implemented and-list and or-list for CCL module. + * + * Revision 1.17 2000/05/01 09:36:50 adam + * Range operator only treated in ordered ranges so that minus (-) can be + * used for, say, the and-not operator. + * + * Revision 1.16 2000/03/14 09:06:11 adam + * Added POSIX threads support for frontend server. + * + * Revision 1.15 2000/02/24 23:49:13 adam + * Fixed memory allocation problem. + * + * Revision 1.14 2000/01/31 13:15:21 adam + * Removed uses of assert(3). Cleanup of ODR. CCL parser update so + * that some characters are not surrounded by spaces in resulting term. + * ILL-code updates. + * + * Revision 1.13 1999/12/22 13:13:32 adam + * Search terms may include "operators" without causing error. + * + * Revision 1.12 1999/11/30 13:47:11 adam * Improved installation. Moved header files to include/yaz. * * Revision 1.11 1999/03/31 11:15:37 adam @@ -122,9 +143,7 @@ * */ -#include #include -#include #include #include @@ -182,7 +201,7 @@ static void strxcat (char *n, const char *src, int len) static char *copy_token_name (struct ccl_token *tp) { char *str = (char *)malloc (tp->len + 1); - assert (str); + ccl_assert (str); memcpy (str, tp->name, tp->len); str[tp->len] = '\0'; return str; @@ -197,7 +216,7 @@ static struct ccl_rpn_node *mk_node (int kind) { struct ccl_rpn_node *p; p = (struct ccl_rpn_node *)malloc (sizeof(*p)); - assert (p); + ccl_assert (p); p->kind = kind; return p; } @@ -240,6 +259,15 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) static struct ccl_rpn_node *find_spec (CCL_parser cclp, struct ccl_rpn_attr **qa); + +static int is_term_ok (int look, int *list) +{ + for (;*list >= 0; list++) + if (look == *list) + return 1; + return 0; +} + static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa); @@ -254,7 +282,7 @@ static void add_attr (struct ccl_rpn_node *p, int type, int value) struct ccl_rpn_attr *n; n = (struct ccl_rpn_attr *)malloc (sizeof(*n)); - assert (n); + ccl_assert (n); n->type = type; n->value = value; n->next = p->u.t.attr_list; @@ -267,173 +295,221 @@ static void add_attr (struct ccl_rpn_node *p, int type, int value) * qa: Qualifier attributes already applied. * return: pointer to node(s); NULL on error. */ -static struct ccl_rpn_node *search_term (CCL_parser cclp, - struct ccl_rpn_attr **qa) +static struct ccl_rpn_node *search_term_x (CCL_parser cclp, + struct ccl_rpn_attr **qa, + int *term_list) { - struct ccl_rpn_node *p; + struct ccl_rpn_attr *qa_tmp[2]; + struct ccl_rpn_node *p_top = 0; struct ccl_token *lookahead = cclp->look_token; - int len = 0; - size_t no, i; - int left_trunc = 0; - int right_trunc = 0; - int mid_trunc = 0; - int relation_value = -1; - int position_value = -1; - int structure_value = -1; - int truncation_value = -1; - int completeness_value = -1; - - if (KIND != CCL_TOK_TERM) - { - cclp->error_code = CCL_ERR_TERM_EXPECTED; - return NULL; - } - /* create the term node, but wait a moment before adding the term */ - p = mk_node (CCL_RPN_TERM); - p->u.t.attr_list = NULL; - p->u.t.term = NULL; + int and_list = 0; + int or_list = 0; if (!qa) { - /* no qualifier(s) applied. Use 'term' if it is defined */ - - qa = (struct ccl_rpn_attr **)malloc (2*sizeof(*qa)); - assert (qa); + /* no qualifier(s) applied. Use 'term' if it is defined */ + + qa = qa_tmp; + ccl_assert (qa); qa[0] = ccl_qual_search (cclp, "term", 4); qa[1] = NULL; } - - /* go through all attributes and add them to the attribute list */ - for (i=0; qa && qa[i]; i++) + if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST)) + and_list = 1; + if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST)) + or_list = 1; + while (1) { - struct ccl_rpn_attr *attr; + struct ccl_rpn_node *p; + size_t no, i; + int left_trunc = 0; + int right_trunc = 0; + int mid_trunc = 0; + int relation_value = -1; + int position_value = -1; + int structure_value = -1; + int truncation_value = -1; + int completeness_value = -1; + int len = 0; + int max = 200; + if (and_list || or_list) + max = 1; + + /* go through each TERM token. If no truncation attribute is yet + met, then look for left/right truncation markers (?) and + set left_trunc/right_trunc/mid_trunc accordingly */ + for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++) + { + for (i = 0; ilen; i++) + if (truncation_value == -1 && lookahead->name[i] == '?') + { + if (no == 0 && i == 0 && lookahead->len >= 1) + left_trunc = 1; + else if (!is_term_ok(lookahead->next->kind, term_list) && + i == lookahead->len-1 && i >= 1) + right_trunc = 1; + else + mid_trunc = 1; + } + len += 1+lookahead->len; + lookahead = lookahead->next; + } + + if (len == 0) + break; /* no more terms . stop . */ + + if (p_top) + { + if (or_list) + p = mk_node (CCL_RPN_OR); + else if (and_list) + p = mk_node (CCL_RPN_AND); + else + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p_top; + p_top = p; + } + + /* create the term node, but wait a moment before adding the term */ + p = mk_node (CCL_RPN_TERM); + p->u.t.attr_list = NULL; + p->u.t.term = NULL; + + /* make the top node point to us.. */ + if (p_top) + p_top->u.p[1] = p; + else + p_top = p; - for (attr = qa[i]; attr; attr = attr->next) - if (attr->value > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) + + /* go through all attributes and add them to the attribute list */ + for (i=0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = qa[i]; attr; attr = attr->next) + if (attr->value > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value; + left_trunc = right_trunc = mid_trunc = 0; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value; + break; + } + add_attr (p, attr->type, attr->value); + } + } + /* len now holds the number of characters in the RPN term */ + /* no holds the number of CCL tokens (1 or more) */ + + if (structure_value == -1 && + qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) + { /* no structure attribute met. Apply either structure attribute + WORD or PHRASE depending on number of CCL tokens */ + if (no == 1) + add_attr (p, CCL_BIB1_STR, 2); + else + add_attr (p, CCL_BIB1_STR, 1); + } + + /* make the RPN token */ + p->u.t.term = (char *)malloc (len); + ccl_assert (p->u.t.term); + p->u.t.term[0] = '\0'; + for (i = 0; ilook_token->name; + int src_len = cclp->look_token->len; + + if (i == 0 && left_trunc) + { + src_len--; + src_str++; + } + else if (i == no-1 && right_trunc) + src_len--; + if (src_len) + { + int len = strlen(p->u.t.term); + if (len && + !strchr("-+", *src_str) && + !strchr("-+", p->u.t.term[len-1])) { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value; - break; + strcat (p->u.t.term, " "); } - add_attr (p, attr->type, attr->value); } + strxcat (p->u.t.term, src_str, src_len); + ADVANCE; + } + if (left_trunc && right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 3); + } + else if (right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 1); + } + else if (left_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 2); + } + else + { + if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) + add_attr (p, CCL_BIB1_TRU, 100); + } } - /* go through each TERM token. If no truncation attribute is yet - met, then look for left/right truncation markers (?) and - set left_trunc/right_trunc/mid_trunc accordingly */ - for (no = 0; lookahead->kind == CCL_TOK_TERM; no++) - { - for (i = 0; ilen; i++) - if (truncation_value == -1 && lookahead->name[i] == '?') - { - if (no == 0 && i == 0 && lookahead->len >= 1) - left_trunc = 1; - else if (lookahead->next->kind != CCL_TOK_TERM && - i == lookahead->len-1 && i >= 1) - right_trunc = 1; - else - mid_trunc = 1; - } - len += 1+lookahead->len; - lookahead = lookahead->next; - } - /* len now holds the number of characters in the RPN term */ - /* no holds the number of CCL tokens (1 or more) */ - - if (structure_value == -1 && - qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) - { /* no structure attribute met. Apply either structure attribute - WORD or PHRASE depending on number of CCL tokens */ - if (no == 1) - add_attr (p, CCL_BIB1_STR, 2); - else - add_attr (p, CCL_BIB1_STR, 1); - } + if (!p_top) + cclp->error_code = CCL_ERR_TERM_EXPECTED; + return p_top; +} - /* make the RPN token */ - p->u.t.term = (char *)malloc (len); - assert (p->u.t.term); - p->u.t.term[0] = '\0'; - for (i = 0; ilook_token->name; - int src_len = cclp->look_token->len; - - if (i == 0 && left_trunc) - { - src_len--; - src_str++; - } - else if (i == no-1 && right_trunc) - src_len--; - if (i) - strcat (p->u.t.term, " "); - strxcat (p->u.t.term, src_str, src_len); - ADVANCE; - } - if (left_trunc && right_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; - free (qa); - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 3); - } - else if (right_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; - free (qa); - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 1); - } - else if (left_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; - free (qa); - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 2); - } - else - { - if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) - add_attr (p, CCL_BIB1_TRU, 100); - } - return p; +static struct ccl_rpn_node *search_term (CCL_parser cclp, + struct ccl_rpn_attr **qa) +{ + static int list[] = {CCL_TOK_TERM, CCL_TOK_COMMA, -1}; + return search_term_x(cclp, qa, list); } /* @@ -464,7 +540,7 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, for (i=0; qa[i]; i++) no++; ap = (struct ccl_rpn_attr **)malloc ((no+1) * sizeof(*ap)); - assert (ap); + ccl_assert (ap); for (i = 0; cclp->look_token != la; i++) { ap[i] = ccl_qual_search (cclp, cclp->look_token->name, @@ -516,6 +592,7 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, free (ap); return p; } + /* ordered relation ... */ rel = 0; if (cclp->look_token->len == 1) { @@ -543,7 +620,8 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, ADVANCE; /* skip relation */ if (KIND == CCL_TOK_TERM && - cclp->look_token->next->kind == CCL_TOK_MINUS) + cclp->look_token->next->len == 1 && + cclp->look_token->next->name[0] == '-') { struct ccl_rpn_node *p1; if (!(p1 = search_term (cclp, ap))) @@ -577,7 +655,8 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, return p1; } } - else if (KIND == CCL_TOK_MINUS) /* = - term ? */ + else if (cclp->look_token->len == 1 && + cclp->look_token->name[0] == '"') /* = - term ? */ { ADVANCE; if (!(p = search_term (cclp, ap))) @@ -634,8 +713,10 @@ static struct ccl_rpn_node *qualifiers (CCL_parser cclp, struct ccl_token *la, static struct ccl_rpn_node *search_terms (CCL_parser cclp, struct ccl_rpn_attr **qa) { + static int list[] = { + CCL_TOK_TERM, CCL_TOK_COMMA,CCL_TOK_EQ, CCL_TOK_REL, -1}; struct ccl_rpn_node *p1, *p2, *pn; - p1 = search_term (cclp, qa); + p1 = search_term_x (cclp, qa, list); if (!p1) return NULL; while (1) @@ -643,7 +724,7 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, if (KIND == CCL_TOK_PROX) { ADVANCE; - p2 = search_term (cclp, qa); + p2 = search_term_x (cclp, qa, list); if (!p2) { ccl_rpn_delete (p1); @@ -654,9 +735,9 @@ static struct ccl_rpn_node *search_terms (CCL_parser cclp, pn->u.p[1] = p2; p1 = pn; } - else if (KIND == CCL_TOK_TERM) + else if (is_term_ok(KIND, list)) { - p2 = search_term (cclp, qa); + p2 = search_term_x (cclp, qa, list); if (!p2) { ccl_rpn_delete (p1);