X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=ccl%2Fcclfind.c;h=cbbe75d2badc048a87b17ef8c786979bc4f92abd;hb=177cd18ca296384f19f0f9104973446ed3e841af;hp=c10064063fe67311acc6618a7f5c1f3d986995c4;hpb=98a49f6636e01e87e867688f4fb8a3b696a847ff;p=egate.git diff --git a/ccl/cclfind.c b/ccl/cclfind.c index c100640..cbbe75d 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -2,7 +2,39 @@ * Europagate, 1995 * * $Log: cclfind.c,v $ - * Revision 1.2 1995/02/13 15:15:07 adam + * Revision 1.13 1995/04/17 09:31:42 adam + * Improved handling of qualifiers. Aliases or reserved words. + * + * Revision 1.12 1995/03/20 15:27:43 adam + * Minor changes. + * + * Revision 1.11 1995/02/23 08:31:59 adam + * Changed header. + * + * Revision 1.9 1995/02/16 13:20:06 adam + * Spell fix. + * + * Revision 1.8 1995/02/14 19:59:42 adam + * Removed a syntax error. + * + * Revision 1.7 1995/02/14 19:55:10 adam + * Header files ccl.h/cclp.h are gone! They have been merged an + * moved to ../include/ccl.h. + * Node kind(s) in ccl_rpn_node have changed names. + * + * Revision 1.6 1995/02/14 16:20:55 adam + * Qualifiers are read from a file now. + * + * Revision 1.5 1995/02/14 14:12:41 adam + * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990). + * + * Revision 1.4 1995/02/14 13:16:29 adam + * Left and/or right truncation implemented. + * + * Revision 1.3 1995/02/14 10:25:56 adam + * The constructions 'qualifier rel term ...' implemented. + * + * Revision 1.2 1995/02/13 15:15:07 adam * Added handling of qualifiers. Not finished yet. * * Revision 1.1 1995/02/13 12:35:20 adam @@ -15,16 +47,80 @@ #include #include -#include "cclp.h" +#include +/* current lookahead token */ static struct ccl_token *look_token; + +/* holds error no if error occur */ static int ccl_error; + +/* current bibset */ static CCL_bibset bibset; +/* returns type of current lookahead */ #define KIND (look_token->kind) + +/* move one token forward */ #define ADVANCE look_token = look_token->next -#define ADVX(x) x=(x)->next +/* + * qual_val_range: search for attribute of type with range + * qa: Attribute array + * type: Type of attribute to search for + * vmin: Lower bound of value to search for + * vmax: Upper bound of value to search for + * return: Return pointer to integer of attribute value found; NULL + * otherwise. + */ +static int *qual_val_range (struct ccl_rpn_attr **qa, int type, + int vmin, int vmax) +{ + int i; + struct ccl_rpn_attr *q; + + if (!qa) + return NULL; + for (i = 0; (q=qa[i]); i++) + while (q) + { + if (q->type == type && q->value >= vmin && q->value <= vmax) + return &q->value; + q = q->next; + } + return NULL; +} + +/* + * qual_val_type: test for existance of attribute type/value pair. + * qa: Attribute array + * type: Type of attribute to search for + * value: Value of attribute to seach for + * return: 1 if found; 0 otherwise. + */ +static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value) +{ + int i; + struct ccl_rpn_attr *q; + + if (!qa) + return 0; + for (i = 0; (q=qa[i]); i++) + while (q) + { + if (q->type == type && q->value == value) + return 1; + q = q->next; + } + return 0; +} + +/* + * strxcat: concatenate strings. + * n: Null-terminated Destination string + * src: Source string to be appended (not null-terminated) + * len: Length of source string. + */ static void strxcat (char *n, const char *src, int len) { while (*n) @@ -34,6 +130,11 @@ static void strxcat (char *n, const char *src, int len) *n = '\0'; } +/* + * copy_token_name: Return copy of CCL token name + * tp: Pointer to token info. + * return: malloc(3) allocated copy of token name. + */ static char *copy_token_name (struct ccl_token *tp) { char *str = malloc (tp->len + 1); @@ -43,6 +144,11 @@ static char *copy_token_name (struct ccl_token *tp) return str; } +/* + * mk_node: Create RPN node. + * kind: Type of node. + * return: pointer to allocated node. + */ static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind) { struct ccl_rpn_node *p; @@ -52,6 +158,10 @@ static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind) return p; } +/* + * ccl_rpn_delete: Delete RPN tree. + * rpn: Pointer to tree. + */ void ccl_rpn_delete (struct ccl_rpn_node *rpn) { struct ccl_rpn_attr *attr, *attr1; @@ -59,13 +169,13 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) return; switch (rpn->kind) { - case AND: - case OR: - case NOT: + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; - case TERM: + case CCL_RPN_TERM: free (rpn->u.t.term); for (attr = rpn->u.t.attr_list; attr; attr = attr1) { @@ -73,10 +183,10 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) free (attr); } break; - case SET: + case CCL_RPN_SET: free (rpn->u.setname); break; - case PROX: + case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; @@ -84,9 +194,15 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) free (rpn); } -static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa); -static struct ccl_rpn_node *search_terms (struct ccl_qualifier **qa); +static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa); +static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa); +/* + * add_attr: Add attribute (type/value) to RPN term node. + * p: RPN node of type term. + * type: Type of attribute + * value: Value of attribute + */ static void add_attr (struct ccl_rpn_node *p, int type, int value) { struct ccl_rpn_attr *n; @@ -99,81 +215,209 @@ static void add_attr (struct ccl_rpn_node *p, int type, int value) p->u.t.attr_list = n; } -static struct ccl_rpn_node *search_term (struct ccl_qualifier **qa) +/* + * search_term: Parse CCL search term. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ +static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p; struct ccl_token *lookahead = look_token; int len = 0; - int no = 0; + int no, i; + int left_trunc = 0; + int right_trunc = 0; + int mid_trunc = 0; + int relation_value = -1; + int position_value = -1; + int structure_value = -1; + int truncation_value = -1; + int completeness_value = -1; if (KIND != CCL_TOK_TERM) { ccl_error = CCL_ERR_TERM_EXPECTED; return NULL; } - while (lookahead->kind == CCL_TOK_TERM) + /* create the term node, but wait a moment before adding the term */ + p = mk_node (CCL_RPN_TERM); + p->u.t.attr_list = NULL; + p->u.t.term = NULL; + + if (!qa) { - no++; - len += 1+lookahead->len; + /* no qualifier(s) applied. Use 'term' if it is defined */ + + qa = malloc (2*sizeof(*qa)); + assert (qa); + qa[0] = ccl_qual_search (bibset, "term", 4); + qa[1] = NULL; + } + + /* go through all attributes and add them to the attribute list */ + for (i=0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = qa[i]; attr; attr = attr->next) + if (attr->value > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value; + break; + } + add_attr (p, attr->type, attr->value); + } + } + /* go through each TERM token. If no truncation attribute is yet + met, then look for left/right truncation markers (?) and + set left_trunc/right_trunc/mid_trunc accordingly */ + for (no = 0; lookahead->kind == CCL_TOK_TERM; no++) + { + for (i = 0; ilen; i++) + if (truncation_value == -1 && lookahead->name[i] == '?') + { + if (no == 0 && i == 0 && lookahead->len >= 1) + left_trunc = 1; + else if (lookahead->next->kind != CCL_TOK_TERM && + i == lookahead->len-1 && i >= 1) + right_trunc = 1; + else + mid_trunc = 1; + } + len += 1+lookahead->len; lookahead = lookahead->next; } - p = mk_node (TERM); + /* len now holds the number of characters in the RPN term */ + /* no holds the number of CCL tokens (1 or more) */ + + if (structure_value == -1 && + qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) + { /* no structure attribute met. Apply either structure attribute + WORD or PHRASE depending on number of CCL tokens */ + if (no == 1) + add_attr (p, CCL_BIB1_STR, 2); + else + add_attr (p, CCL_BIB1_STR, 1); + } + + /* make the RPN token */ p->u.t.term = malloc (len); - p->u.t.attr_list = NULL; - p->u.t.term[0] = '\0'; assert (p->u.t.term); - strxcat (p->u.t.term, look_token->name, look_token->len); - ADVANCE; - while (KIND == CCL_TOK_TERM) + p->u.t.term[0] = '\0'; + for (i = 0; iu.t.term, " "); - strxcat (p->u.t.term, look_token->name, look_token->len); - ADVANCE; + const char *src_str = look_token->name; + int src_len = look_token->len; + + if (i == 0 && left_trunc) + { + src_len--; + src_str++; + } + else if (i == no-1 && right_trunc) + src_len--; + if (i) + strcat (p->u.t.term, " "); + strxcat (p->u.t.term, src_str, src_len); + ADVANCE; } - if (qa) + if (left_trunc && right_trunc) { - int i; - /* use ... */ - for (i=0; qa[i]; i++) + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) { - int j; - for (j=0; jnuse; j++) - add_attr (p, 1, qa[i]->use[j]); + ccl_error = CCL_ERR_TRUNC_NOT_BOTH; + free (qa); + ccl_rpn_delete (p); + return NULL; } - /* structure ... */ - if (qa[0]->structure == 0) + add_attr (p, CCL_BIB1_TRU, 3); + } + else if (right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) { - if (no == 1) - add_attr (p, 4, 2); - else - add_attr (p, 4, 1); + ccl_error = CCL_ERR_TRUNC_NOT_RIGHT; + free (qa); + ccl_rpn_delete (p); + return NULL; } - else if (qa[0]->structure > 0) - add_attr (p, 4, qa[0]->structure); + add_attr (p, CCL_BIB1_TRU, 1); + } + else if (left_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) + { + ccl_error = CCL_ERR_TRUNC_NOT_LEFT; + free (qa); + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 2); + } + else + { + if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) + add_attr (p, CCL_BIB1_TRU, 100); } return p; } +/* + * qualifiers: Parse CCL qualifiers and search terms. + * la: Token pointer to RELATION token. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, - struct ccl_qualifier **qa) + struct ccl_rpn_attr **qa) { struct ccl_token *lookahead = look_token; - struct ccl_qualifier **ap; - int no = 1; - int i; - + struct ccl_rpn_attr **ap; + int no = 0; + int i, rel; +#if 0 if (qa) { - ccl_error = CCL_ERR_DOBBLE_QUAL; + ccl_error = CCL_ERR_DOUBLE_QUAL; return NULL; } +#endif for (lookahead = look_token; lookahead != la; lookahead=lookahead->next) no++; - ap = malloc (no * sizeof(*ap)); + if (qa) + for (i=0; qa[i]; i++) + no++; + ap = malloc ((no+1) * sizeof(*ap)); assert (ap); - for (i=0; look_token != la; i++) + for (i = 0; look_token != la; i++) { - ap[i] = ccl_qual_search (bibset, lookahead->name); + ap[i] = ccl_qual_search (bibset, look_token->name, look_token->len); if (!ap[i]) { ccl_error = CCL_ERR_UNKNOWN_QUAL; @@ -184,8 +428,11 @@ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, if (KIND == CCL_TOK_COMMA) ADVANCE; } + if (qa) + while (*qa) + ap[i++] = *qa++; ap[i] = NULL; - if (ap[0]->relation != 0) + if (!qual_val_type (ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER)) { /* unordered relation */ struct ccl_rpn_node *p; @@ -218,13 +465,120 @@ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, free (ap); return p; } - /* ordered relation ... */ - assert (0); + rel = 0; + if (look_token->len == 1) + { + if (look_token->name[0] == '<') + rel = 1; + else if (look_token->name[0] == '=') + rel = 3; + else if (look_token->name[0] == '>') + rel = 5; + } + else if (look_token->len == 2) + { + if (!memcmp (look_token->name, "<=", 2)) + rel = 2; + else if (!memcmp (look_token->name, ">=", 2)) + rel = 4; + else if (!memcmp (look_token->name, "<>", 2)) + rel = 6; + } + if (!rel) + ccl_error = CCL_ERR_BAD_RELATION; + else + { + struct ccl_rpn_node *p; + + ADVANCE; /* skip relation */ + if (KIND == CCL_TOK_TERM && look_token->next->kind == CCL_TOK_MINUS) + { + struct ccl_rpn_node *p1; + if (!(p1 = search_term (ap))) + { + free (ap); + return NULL; + } + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ + { + struct ccl_rpn_node *p2; + + if (!(p2 = search_term (ap))) + { + ccl_rpn_delete (p1); + free (ap); + return NULL; + } + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr (p1, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr (p2, CCL_BIB1_REL, 2); + free (ap); + return p; + } + else /* = term - */ + { + add_attr (p1, CCL_BIB1_REL, 4); + free (ap); + return p1; + } + } + else if (KIND == CCL_TOK_MINUS) /* = - term ? */ + { + ADVANCE; + if (!(p = search_term (ap))) + { + free (ap); + return NULL; + } + add_attr (p, CCL_BIB1_REL, 2); + free (ap); + return p; + } + else if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (ap))) + { + free (ap); + return NULL; + } + if (KIND != CCL_TOK_RP) + { + ccl_error = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + free (ap); + return NULL; + } + ADVANCE; + free (ap); + return p; + } + else + { + if (!(p = search_terms (ap))) + { + free (ap); + return NULL; + } + add_attr (p, CCL_BIB1_REL, rel); + free (ap); + return p; + } + ccl_error = CCL_ERR_TERM_EXPECTED; + } free (ap); return NULL; } -static struct ccl_rpn_node *search_terms (struct ccl_qualifier **qa) +/* + * search_terms: Parse CCL search terms - including proximity. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ +static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; p1 = search_term (qa); @@ -241,7 +595,7 @@ static struct ccl_rpn_node *search_terms (struct ccl_qualifier **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -254,7 +608,7 @@ static struct ccl_rpn_node *search_terms (struct ccl_qualifier **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -265,7 +619,12 @@ static struct ccl_rpn_node *search_terms (struct ccl_qualifier **qa) return p1; } -static struct ccl_rpn_node *search_elements (struct ccl_qualifier **qa) +/* + * search_elements: Parse CCL search elements + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ +static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1; struct ccl_token *lookahead; @@ -287,12 +646,14 @@ static struct ccl_rpn_node *search_elements (struct ccl_qualifier **qa) else if (KIND == CCL_TOK_SET) { ADVANCE; + if (KIND == CCL_TOK_EQ) + ADVANCE; if (KIND != CCL_TOK_TERM) { ccl_error = CCL_ERR_SETNAME_EXPECTED; return NULL; } - p1 = mk_node (SET); + p1 = mk_node (CCL_RPN_SET); p1->u.setname = copy_token_name (look_token); ADVANCE; return p1; @@ -306,7 +667,12 @@ static struct ccl_rpn_node *search_elements (struct ccl_qualifier **qa) return search_terms (qa); } -static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa) +/* + * find_spec: Parse CCL find specification + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ +static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; if (!(p1 = search_elements (qa))) @@ -323,7 +689,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (AND); + pn = mk_node (CCL_RPN_AND); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -336,7 +702,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (OR); + pn = mk_node (CCL_RPN_OR); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -349,7 +715,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (NOT); + pn = mk_node (CCL_RPN_NOT); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -360,6 +726,14 @@ static struct ccl_rpn_node *find_spec (struct ccl_qualifier **qa) return p1; } +/* + * ccl_find: Parse CCL find - token representation + * abibset: Bibset to be used for the parsing + * list: List of tokens + * error: Pointer to integer. Holds error no. on completion. + * pos: Pointer to char position. Holds approximate error position. + * return: RPN tree on successful completion; NULL otherwise. + */ struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, int *error, const char **pos) { @@ -385,79 +759,24 @@ struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, return p; } -static void pr_tree (struct ccl_rpn_node *rpn) -{ - - switch (rpn->kind) - { - case TERM: - printf ("\"%s\"", rpn->u.t.term); - if (rpn->u.t.attr_list) - { - struct ccl_rpn_attr *attr; - printf ("[ "); - for (attr = rpn->u.t.attr_list; attr; attr = attr->next) - printf ("%d=%d ", attr->type, attr->value); - printf ("] "); - } - break; - case AND: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") and ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case OR: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") or ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case NOT: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") not ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case SET: - printf ("set=%s", rpn->u.setname); - break; - case PROX: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") prox ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - default: - assert (0); - } -} - +/* + * ccl_find_str: Parse CCL find - string representation + * bibset: Bibset to be used for the parsing + * str: String to be parsed + * error: Pointer to integer. Holds error no. on completion. + * pos: Pointer to char position. Holds approximate error position. + * return: RPN tree on successful completion; NULL otherwise. + */ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, int *error, int *pos) { - struct ccl_token *list, *li; + struct ccl_token *list; struct ccl_rpn_node *rpn; const char *char_pos; list = ccl_tokenize (str); -#if 0 - for (li = list; li; li = li->next) - printf ("kind=%d, str='%.*s'\n", li->kind, li->len, li->name); -#endif rpn = ccl_find (bibset, list, error, &char_pos); - if (! *error) - { - pr_tree (rpn); - printf ("\n"); - } - else - { + if (*error) *pos = char_pos - str; - } return rpn; }