From: Adam Dickmeiss Date: Tue, 17 Oct 2000 19:50:28 +0000 (+0000) Subject: Implemented and-list and or-list for CCL module. X-Git-Tag: YAZ.1.8~170 X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=commitdiff_plain;h=33a6eab614ca84eef817f675843e6de820d13708 Implemented and-list and or-list for CCL module. --- diff --git a/CHANGELOG b/CHANGELOG index 35382e3..03724e9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ Possible compatibility problems with earlier versions marked with '*'. +Added feature for CCL module. Virtual structure s=al or s=ol generates +and-list and or-list respectively instead of phrase search. + Added some OID's. * Added prefix "yaz_" for the functions log_init.., log_file.. and diff --git a/ccl/README b/ccl/README index 6b600b4..431ded0 100644 --- a/ccl/README +++ b/ccl/README @@ -1,6 +1,6 @@ CCL test module -This module is required to parse the queries given to the 'cli' +This module is required to parse the queries given to the yaz-client test program. It is *not* a part of the YAZ module, and it is distributed under a different license. @@ -11,5 +11,5 @@ of this software to organisations that are not partners in EUROPAGATE. If you need to distribute copies of YAZ to organisations which are not partners in EUROPAGATE, you should remove this module, and, if -necessary, modify yazlib/tst.c to work wihout it. The test program is -not essential to the general working of YAZ. +necessary, modify client/yaz-client.c to work wihout it. The test program +is not essential to the general working of YAZ. diff --git a/ccl/bib1 b/ccl/bib1 index 864931a..2a1aa28 100644 --- a/ccl/bib1 +++ b/ccl/bib1 @@ -1,4 +1,4 @@ -# $Id: bib1,v 1.2 1996-10-11 15:00:24 adam Exp $ +# $Id: bib1,v 1.3 2000-10-17 19:50:28 adam Exp $ # CCL qualifiers and their mapping to a bib-1 subset # # Each line takes the form: @@ -23,9 +23,9 @@ # r Allow right truncation if ? is at left side of term. # b Allow left&right truncation if ? is at left&right side of term. # n Set truncation explicitly to "none" if no ? is given. -term s=pw t=l,r -au u=1 s=pw -ti u=4 s=pw +term s=pw t=l,r s=al +au u=1 s=pw t=l,r +ti u=4 s=pw t=l,r isbn u=7 issn u=8 cc u=20 diff --git a/ccl/cclfind.c b/ccl/cclfind.c index ba12079..853bc70 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -45,7 +45,10 @@ * Europagate, 1995 * * $Log: cclfind.c,v $ - * Revision 1.17 2000-05-01 09:36:50 adam + * Revision 1.18 2000-10-17 19:50:28 adam + * Implemented and-list and or-list for CCL module. + * + * Revision 1.17 2000/05/01 09:36:50 adam * Range operator only treated in ordered ranges so that minus (-) can be * used for, say, the and-not operator. * @@ -297,175 +300,209 @@ static struct ccl_rpn_node *search_term_x (CCL_parser cclp, int *term_list) { struct ccl_rpn_attr *qa_tmp[2]; - struct ccl_rpn_node *p; + struct ccl_rpn_node *p_top = 0; struct ccl_token *lookahead = cclp->look_token; - int len = 0; - size_t no, i; - int left_trunc = 0; - int right_trunc = 0; - int mid_trunc = 0; - int relation_value = -1; - int position_value = -1; - int structure_value = -1; - int truncation_value = -1; - int completeness_value = -1; - - if (!is_term_ok(KIND, term_list)) - { - cclp->error_code = CCL_ERR_TERM_EXPECTED; - return NULL; - } - /* create the term node, but wait a moment before adding the term */ - p = mk_node (CCL_RPN_TERM); - p->u.t.attr_list = NULL; - p->u.t.term = NULL; + int and_list = 0; + int or_list = 0; if (!qa) { - /* no qualifier(s) applied. Use 'term' if it is defined */ - - qa = qa_tmp; + /* no qualifier(s) applied. Use 'term' if it is defined */ + + qa = qa_tmp; ccl_assert (qa); qa[0] = ccl_qual_search (cclp, "term", 4); qa[1] = NULL; } - - /* go through all attributes and add them to the attribute list */ - for (i=0; qa && qa[i]; i++) + if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_AND_LIST)) + and_list = 1; + if (qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_OR_LIST)) + or_list = 1; + while (1) { - struct ccl_rpn_attr *attr; + struct ccl_rpn_node *p; + size_t no, i; + int left_trunc = 0; + int right_trunc = 0; + int mid_trunc = 0; + int relation_value = -1; + int position_value = -1; + int structure_value = -1; + int truncation_value = -1; + int completeness_value = -1; + int len = 0; + int max = 200; + if (and_list || or_list) + max = 1; + + /* go through each TERM token. If no truncation attribute is yet + met, then look for left/right truncation markers (?) and + set left_trunc/right_trunc/mid_trunc accordingly */ + for (no = 0; no < max && is_term_ok(lookahead->kind, term_list); no++) + { + for (i = 0; ilen; i++) + if (truncation_value == -1 && lookahead->name[i] == '?') + { + if (no == 0 && i == 0 && lookahead->len >= 1) + left_trunc = 1; + else if (!is_term_ok(lookahead->next->kind, term_list) && + i == lookahead->len-1 && i >= 1) + right_trunc = 1; + else + mid_trunc = 1; + } + len += 1+lookahead->len; + lookahead = lookahead->next; + } + + if (len == 0) + break; /* no more terms . stop . */ + + if (p_top) + { + if (or_list) + p = mk_node (CCL_RPN_OR); + else if (and_list) + p = mk_node (CCL_RPN_AND); + else + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p_top; + p_top = p; + } + + /* create the term node, but wait a moment before adding the term */ + p = mk_node (CCL_RPN_TERM); + p->u.t.attr_list = NULL; + p->u.t.term = NULL; + + /* make the top node point to us.. */ + if (p_top) + p_top->u.p[1] = p; + else + p_top = p; - for (attr = qa[i]; attr; attr = attr->next) - if (attr->value > 0) - { /* deal only with REAL attributes (positive) */ - switch (attr->type) + + /* go through all attributes and add them to the attribute list */ + for (i=0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = qa[i]; attr; attr = attr->next) + if (attr->value > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value; + left_trunc = right_trunc = mid_trunc = 0; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value; + break; + } + add_attr (p, attr->type, attr->value); + } + } + /* len now holds the number of characters in the RPN term */ + /* no holds the number of CCL tokens (1 or more) */ + + if (structure_value == -1 && + qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) + { /* no structure attribute met. Apply either structure attribute + WORD or PHRASE depending on number of CCL tokens */ + if (no == 1) + add_attr (p, CCL_BIB1_STR, 2); + else + add_attr (p, CCL_BIB1_STR, 1); + } + + /* make the RPN token */ + p->u.t.term = (char *)malloc (len); + ccl_assert (p->u.t.term); + p->u.t.term[0] = '\0'; + for (i = 0; ilook_token->name; + int src_len = cclp->look_token->len; + + if (i == 0 && left_trunc) + { + src_len--; + src_str++; + } + else if (i == no-1 && right_trunc) + src_len--; + if (src_len) + { + int len = strlen(p->u.t.term); + if (len && + !strchr("-+", *src_str) && + !strchr("-+", p->u.t.term[len-1])) { - case CCL_BIB1_REL: - if (relation_value != -1) - continue; - relation_value = attr->value; - break; - case CCL_BIB1_POS: - if (position_value != -1) - continue; - position_value = attr->value; - break; - case CCL_BIB1_STR: - if (structure_value != -1) - continue; - structure_value = attr->value; - break; - case CCL_BIB1_TRU: - if (truncation_value != -1) - continue; - truncation_value = attr->value; - break; - case CCL_BIB1_COM: - if (completeness_value != -1) - continue; - completeness_value = attr->value; - break; + strcat (p->u.t.term, " "); } - add_attr (p, attr->type, attr->value); } + strxcat (p->u.t.term, src_str, src_len); + ADVANCE; + } + if (left_trunc && right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 3); + } + else if (right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 1); + } + else if (left_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) + { + cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 2); + } + else + { + if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) + add_attr (p, CCL_BIB1_TRU, 100); + } } - /* go through each TERM token. If no truncation attribute is yet - met, then look for left/right truncation markers (?) and - set left_trunc/right_trunc/mid_trunc accordingly */ - for (no = 0; is_term_ok(lookahead->kind, term_list); no++) - { - for (i = 0; ilen; i++) - if (truncation_value == -1 && lookahead->name[i] == '?') - { - if (no == 0 && i == 0 && lookahead->len >= 1) - left_trunc = 1; - else if (!is_term_ok(lookahead->next->kind, term_list) && - i == lookahead->len-1 && i >= 1) - right_trunc = 1; - else - mid_trunc = 1; - } - len += 1+lookahead->len; - lookahead = lookahead->next; - } - /* len now holds the number of characters in the RPN term */ - /* no holds the number of CCL tokens (1 or more) */ - - if (structure_value == -1 && - qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) - { /* no structure attribute met. Apply either structure attribute - WORD or PHRASE depending on number of CCL tokens */ - if (no == 1) - add_attr (p, CCL_BIB1_STR, 2); - else - add_attr (p, CCL_BIB1_STR, 1); - } - - /* make the RPN token */ - p->u.t.term = (char *)malloc (len); - ccl_assert (p->u.t.term); - p->u.t.term[0] = '\0'; - for (i = 0; ilook_token->name; - int src_len = cclp->look_token->len; - - if (i == 0 && left_trunc) - { - src_len--; - src_str++; - } - else if (i == no-1 && right_trunc) - src_len--; - if (src_len) - { - int len = strlen(p->u.t.term); - if (len && - !strchr("-+", *src_str) && - !strchr("-+", p->u.t.term[len-1])) - { - strcat (p->u.t.term, " "); - } - } - strxcat (p->u.t.term, src_str, src_len); - ADVANCE; - } - if (left_trunc && right_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH; - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 3); - } - else if (right_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_RIGHT; - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 1); - } - else if (left_trunc) - { - if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) - { - cclp->error_code = CCL_ERR_TRUNC_NOT_LEFT; - ccl_rpn_delete (p); - return NULL; - } - add_attr (p, CCL_BIB1_TRU, 2); - } - else - { - if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) - add_attr (p, CCL_BIB1_TRU, 100); - } - return p; + if (!p_top) + cclp->error_code = CCL_ERR_TERM_EXPECTED; + return p_top; } static struct ccl_rpn_node *search_term (CCL_parser cclp, diff --git a/ccl/cclqfile.c b/ccl/cclqfile.c index 9e0957a..f8b2438 100644 --- a/ccl/cclqfile.c +++ b/ccl/cclqfile.c @@ -45,7 +45,10 @@ * Europagate, 1995 * * $Log: cclqfile.c,v $ - * Revision 1.4 2000-01-31 13:15:21 adam + * Revision 1.5 2000-10-17 19:50:28 adam + * Implemented and-list and or-list for CCL module. + * + * Revision 1.4 2000/01/31 13:15:21 adam * Removed uses of assert(3). Cleanup of ODR. CCL parser update so * that some characters are not surrounded by spaces in resulting term. * ILL-code updates. @@ -127,6 +130,10 @@ void ccl_qual_fitem (CCL_bibset bibset, const char *cp, const char *qual_name) type = CCL_BIB1_STR; if (!ccl_stricmp (qual_value, "pw")) value = CCL_BIB1_STR_WP; + if (!ccl_stricmp (qual_value, "al")) + value = CCL_BIB1_STR_AND_LIST; + if (!ccl_stricmp (qual_value, "ol")) + value = CCL_BIB1_STR_OR_LIST; break; case 't': case 'T': @@ -143,7 +150,7 @@ void ccl_qual_fitem (CCL_bibset bibset, const char *cp, const char *qual_name) case 'c': case 'C': type = CCL_BIB1_COM; - break; + break; default: type = atoi (qual_type); } diff --git a/ccl/cclsh.c b/ccl/cclsh.c index d2aaf46..1b01bb8 100644 --- a/ccl/cclsh.c +++ b/ccl/cclsh.c @@ -45,7 +45,10 @@ * Europagate 1995 * * $Log: cclsh.c,v $ - * Revision 1.6 2000-01-31 13:15:21 adam + * Revision 1.7 2000-10-17 19:50:28 adam + * Implemented and-list and or-list for CCL module. + * + * Revision 1.6 2000/01/31 13:15:21 adam * Removed uses of assert(3). Cleanup of ODR. CCL parser update so * that some characters are not surrounded by spaces in resulting term. * ILL-code updates. @@ -107,6 +110,12 @@ static int debug = 0; static char *prog; +void usage(const char *prog) +{ + fprintf (stderr, "%s: [-d] [-b configfile]\n", prog); + exit (1); +} + int main (int argc, char **argv) { CCL_bibset bibset; @@ -148,9 +157,7 @@ int main (int argc, char **argv) fclose (bib_inf); break; default: - fprintf (stderr, "%s: unknown option '%s'\n", - prog, *argv); - exit (1); + usage(prog); } } else diff --git a/client/default.bib b/client/default.bib index 2316a29..3bae1ca 100644 --- a/client/default.bib +++ b/client/default.bib @@ -1,7 +1,7 @@ # Subset of bib-1 attributes map to CCL qualifiers -# $Id: default.bib,v 1.4 1996-07-26 13:37:06 quinn Exp $ +# $Id: default.bib,v 1.5 2000-10-17 19:50:28 adam Exp $ # -term t=l,r s=pw +term t=l,r s=pw s=al clean t=l au u=1 s=pw ti u=4 s=pw diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index d842be6..08d6542 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -46,7 +46,10 @@ * CCL - header file * * $Log: ccl.h,v $ - * Revision 1.5 2000-05-02 17:19:58 adam + * Revision 1.6 2000-10-17 19:50:28 adam + * Implemented and-list and or-list for CCL module. + * + * Revision 1.5 2000/05/02 17:19:58 adam * Removed MINUS token. * * Revision 1.4 2000/03/14 09:06:11 adam @@ -206,6 +209,8 @@ typedef struct ccl_qualifiers *CCL_bibset; #define CCL_BIB1_COM 6 #define CCL_BIB1_STR_WP (-1) +#define CCL_BIB1_STR_AND_LIST (-2) +#define CCL_BIB1_STR_OR_LIST (-3) #define CCL_BIB1_REL_ORDER (-1) #define CCL_BIB1_TRU_CAN_LEFT (-1)