X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=ccl%2Fcclfind.c;h=cbbe75d2badc048a87b17ef8c786979bc4f92abd;hb=2114bdbe326a18f092ffaf2b90fbe8621da1b5f2;hp=76556a1d502695467bec27905e962b7c8668a72b;hpb=6b74708f98852f738fecb8a737a9640edf3d427e;p=egate.git diff --git a/ccl/cclfind.c b/ccl/cclfind.c index 76556a1..cbbe75d 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -2,7 +2,30 @@ * Europagate, 1995 * * $Log: cclfind.c,v $ - * Revision 1.5 1995/02/14 14:12:41 adam + * Revision 1.13 1995/04/17 09:31:42 adam + * Improved handling of qualifiers. Aliases or reserved words. + * + * Revision 1.12 1995/03/20 15:27:43 adam + * Minor changes. + * + * Revision 1.11 1995/02/23 08:31:59 adam + * Changed header. + * + * Revision 1.9 1995/02/16 13:20:06 adam + * Spell fix. + * + * Revision 1.8 1995/02/14 19:59:42 adam + * Removed a syntax error. + * + * Revision 1.7 1995/02/14 19:55:10 adam + * Header files ccl.h/cclp.h are gone! They have been merged an + * moved to ../include/ccl.h. + * Node kind(s) in ccl_rpn_node have changed names. + * + * Revision 1.6 1995/02/14 16:20:55 adam + * Qualifiers are read from a file now. + * + * Revision 1.5 1995/02/14 14:12:41 adam * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990). * * Revision 1.4 1995/02/14 13:16:29 adam @@ -24,27 +47,80 @@ #include #include -#include "cclp.h" +#include +/* current lookahead token */ static struct ccl_token *look_token; + +/* holds error no if error occur */ static int ccl_error; + +/* current bibset */ static CCL_bibset bibset; +/* returns type of current lookahead */ #define KIND (look_token->kind) + +/* move one token forward */ #define ADVANCE look_token = look_token->next -#define ADVX(x) x=(x)->next -static struct ccl_rpn_attr *qual_val (struct ccl_rpn_attr *list, int type) +/* + * qual_val_range: search for attribute of type with range + * qa: Attribute array + * type: Type of attribute to search for + * vmin: Lower bound of value to search for + * vmax: Upper bound of value to search for + * return: Return pointer to integer of attribute value found; NULL + * otherwise. + */ +static int *qual_val_range (struct ccl_rpn_attr **qa, int type, + int vmin, int vmax) { - while (list) - { - if (list->type == type) - return list; - list = list->next; - } + int i; + struct ccl_rpn_attr *q; + + if (!qa) + return NULL; + for (i = 0; (q=qa[i]); i++) + while (q) + { + if (q->type == type && q->value >= vmin && q->value <= vmax) + return &q->value; + q = q->next; + } return NULL; } +/* + * qual_val_type: test for existance of attribute type/value pair. + * qa: Attribute array + * type: Type of attribute to search for + * value: Value of attribute to seach for + * return: 1 if found; 0 otherwise. + */ +static int qual_val_type (struct ccl_rpn_attr **qa, int type, int value) +{ + int i; + struct ccl_rpn_attr *q; + + if (!qa) + return 0; + for (i = 0; (q=qa[i]); i++) + while (q) + { + if (q->type == type && q->value == value) + return 1; + q = q->next; + } + return 0; +} + +/* + * strxcat: concatenate strings. + * n: Null-terminated Destination string + * src: Source string to be appended (not null-terminated) + * len: Length of source string. + */ static void strxcat (char *n, const char *src, int len) { while (*n) @@ -54,6 +130,11 @@ static void strxcat (char *n, const char *src, int len) *n = '\0'; } +/* + * copy_token_name: Return copy of CCL token name + * tp: Pointer to token info. + * return: malloc(3) allocated copy of token name. + */ static char *copy_token_name (struct ccl_token *tp) { char *str = malloc (tp->len + 1); @@ -63,6 +144,11 @@ static char *copy_token_name (struct ccl_token *tp) return str; } +/* + * mk_node: Create RPN node. + * kind: Type of node. + * return: pointer to allocated node. + */ static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind) { struct ccl_rpn_node *p; @@ -72,6 +158,10 @@ static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind) return p; } +/* + * ccl_rpn_delete: Delete RPN tree. + * rpn: Pointer to tree. + */ void ccl_rpn_delete (struct ccl_rpn_node *rpn) { struct ccl_rpn_attr *attr, *attr1; @@ -79,13 +169,13 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) return; switch (rpn->kind) { - case AND: - case OR: - case NOT: + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; - case TERM: + case CCL_RPN_TERM: free (rpn->u.t.term); for (attr = rpn->u.t.attr_list; attr; attr = attr1) { @@ -93,10 +183,10 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) free (attr); } break; - case SET: + case CCL_RPN_SET: free (rpn->u.setname); break; - case PROX: + case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; @@ -107,6 +197,12 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa); static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa); +/* + * add_attr: Add attribute (type/value) to RPN term node. + * p: RPN node of type term. + * type: Type of attribute + * value: Value of attribute + */ static void add_attr (struct ccl_rpn_node *p, int type, int value) { struct ccl_rpn_attr *n; @@ -119,6 +215,11 @@ static void add_attr (struct ccl_rpn_node *p, int type, int value) p->u.t.attr_list = n; } +/* + * search_term: Parse CCL search term. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p; @@ -128,16 +229,78 @@ static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa) int left_trunc = 0; int right_trunc = 0; int mid_trunc = 0; + int relation_value = -1; + int position_value = -1; + int structure_value = -1; + int truncation_value = -1; + int completeness_value = -1; if (KIND != CCL_TOK_TERM) { ccl_error = CCL_ERR_TERM_EXPECTED; return NULL; } + /* create the term node, but wait a moment before adding the term */ + p = mk_node (CCL_RPN_TERM); + p->u.t.attr_list = NULL; + p->u.t.term = NULL; + + if (!qa) + { + /* no qualifier(s) applied. Use 'term' if it is defined */ + + qa = malloc (2*sizeof(*qa)); + assert (qa); + qa[0] = ccl_qual_search (bibset, "term", 4); + qa[1] = NULL; + } + + /* go through all attributes and add them to the attribute list */ + for (i=0; qa && qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = qa[i]; attr; attr = attr->next) + if (attr->value > 0) + { /* deal only with REAL attributes (positive) */ + switch (attr->type) + { + case CCL_BIB1_REL: + if (relation_value != -1) + continue; + relation_value = attr->value; + break; + case CCL_BIB1_POS: + if (position_value != -1) + continue; + position_value = attr->value; + break; + case CCL_BIB1_STR: + if (structure_value != -1) + continue; + structure_value = attr->value; + break; + case CCL_BIB1_TRU: + if (truncation_value != -1) + continue; + truncation_value = attr->value; + break; + case CCL_BIB1_COM: + if (completeness_value != -1) + continue; + completeness_value = attr->value; + break; + } + add_attr (p, attr->type, attr->value); + } + } + /* go through each TERM token. If no truncation attribute is yet + met, then look for left/right truncation markers (?) and + set left_trunc/right_trunc/mid_trunc accordingly */ for (no = 0; lookahead->kind == CCL_TOK_TERM; no++) { for (i = 0; ilen; i++) - if (lookahead->name[i] == '?') + if (truncation_value == -1 && lookahead->name[i] == '?') { if (no == 0 && i == 0 && lookahead->len >= 1) left_trunc = 1; @@ -150,10 +313,22 @@ static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa) len += 1+lookahead->len; lookahead = lookahead->next; } - p = mk_node (TERM); + /* len now holds the number of characters in the RPN term */ + /* no holds the number of CCL tokens (1 or more) */ + + if (structure_value == -1 && + qual_val_type (qa, CCL_BIB1_STR, CCL_BIB1_STR_WP)) + { /* no structure attribute met. Apply either structure attribute + WORD or PHRASE depending on number of CCL tokens */ + if (no == 1) + add_attr (p, CCL_BIB1_STR, 2); + else + add_attr (p, CCL_BIB1_STR, 1); + } + + /* make the RPN token */ p->u.t.term = malloc (len); assert (p->u.t.term); - p->u.t.attr_list = NULL; p->u.t.term[0] = '\0'; for (i = 0; iu.t.term, " "); strxcat (p->u.t.term, src_str, src_len); - ADVANCE; + ADVANCE; } - if (qa) + if (left_trunc && right_trunc) { - int i; - struct ccl_rpn_attr *attr; - for (i=0; qa[i]; i++) + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) { - struct ccl_rpn_attr *attr; - - for (attr = qa[i]; attr; attr = attr->next) - if (attr->value > 0) - add_attr (p, attr->type, attr->value); - } - if ((attr = qual_val (qa[0], CCL_BIB1_STR)) && - attr->value == CCL_BIB1_STR_WP) - { - if (no == 1) - add_attr (p, CCL_BIB1_STR, 2); - else - add_attr (p, CCL_BIB1_STR, 1); + ccl_error = CCL_ERR_TRUNC_NOT_BOTH; + free (qa); + ccl_rpn_delete (p); + return NULL; } - } - if (left_trunc && right_trunc) add_attr (p, CCL_BIB1_TRU, 3); + } else if (right_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) + { + ccl_error = CCL_ERR_TRUNC_NOT_RIGHT; + free (qa); + ccl_rpn_delete (p); + return NULL; + } add_attr (p, CCL_BIB1_TRU, 1); + } else if (left_trunc) + { + if (!qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) + { + ccl_error = CCL_ERR_TRUNC_NOT_LEFT; + free (qa); + ccl_rpn_delete (p); + return NULL; + } add_attr (p, CCL_BIB1_TRU, 2); + } + else + { + if (qual_val_type (qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) + add_attr (p, CCL_BIB1_TRU, 100); + } return p; } +/* + * qualifiers: Parse CCL qualifiers and search terms. + * la: Token pointer to RELATION token. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, struct ccl_rpn_attr **qa) { struct ccl_token *lookahead = look_token; struct ccl_rpn_attr **ap; - int no = 1; + int no = 0; int i, rel; - struct ccl_rpn_attr *attr; - +#if 0 if (qa) { - ccl_error = CCL_ERR_DOBBLE_QUAL; + ccl_error = CCL_ERR_DOUBLE_QUAL; return NULL; } +#endif for (lookahead = look_token; lookahead != la; lookahead=lookahead->next) no++; - ap = malloc (no * sizeof(*ap)); + if (qa) + for (i=0; qa[i]; i++) + no++; + ap = malloc ((no+1) * sizeof(*ap)); assert (ap); - for (i=0; look_token != la; i++) + for (i = 0; look_token != la; i++) { ap[i] = ccl_qual_search (bibset, look_token->name, look_token->len); if (!ap[i]) @@ -233,8 +428,11 @@ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, if (KIND == CCL_TOK_COMMA) ADVANCE; } + if (qa) + while (*qa) + ap[i++] = *qa++; ap[i] = NULL; - if (! (attr = qual_val (ap[0], CCL_BIB1_REL)) || attr->value == 3) + if (!qual_val_type (ap, CCL_BIB1_REL, CCL_BIB1_REL_ORDER)) { /* unordered relation */ struct ccl_rpn_node *p; @@ -293,36 +491,36 @@ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, struct ccl_rpn_node *p; ADVANCE; /* skip relation */ - if (KIND == CCL_TOK_TERM) + if (KIND == CCL_TOK_TERM && look_token->next->kind == CCL_TOK_MINUS) { struct ccl_rpn_node *p1; - p1 = search_term (ap); - if (KIND == CCL_TOK_MINUS) + if (!(p1 = search_term (ap))) + { + free (ap); + return NULL; + } + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ { - ADVANCE; /* skip '-' */ - if (KIND == CCL_TOK_TERM) /* = term - term ? */ - { - struct ccl_rpn_node *p2; - - p2 = search_term (ap); - p = mk_node (AND); - p->u.p[0] = p1; - add_attr (p1, CCL_BIB1_REL, 4); - p->u.p[1] = p2; - add_attr (p2, CCL_BIB1_REL, 2); - free (ap); - return p; - } - else /* = term - */ - { - add_attr (p1, CCL_BIB1_REL, 4); - free (ap); - return p1; - } + struct ccl_rpn_node *p2; + + if (!(p2 = search_term (ap))) + { + ccl_rpn_delete (p1); + free (ap); + return NULL; + } + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr (p1, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr (p2, CCL_BIB1_REL, 2); + free (ap); + return p; } - else + else /* = term - */ { - add_attr (p1, CCL_BIB1_REL, rel); + add_attr (p1, CCL_BIB1_REL, 4); free (ap); return p1; } @@ -330,17 +528,56 @@ static struct ccl_rpn_node *qualifiers (struct ccl_token *la, else if (KIND == CCL_TOK_MINUS) /* = - term ? */ { ADVANCE; - p = search_term (ap); + if (!(p = search_term (ap))) + { + free (ap); + return NULL; + } add_attr (p, CCL_BIB1_REL, 2); free (ap); return p; } + else if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (ap))) + { + free (ap); + return NULL; + } + if (KIND != CCL_TOK_RP) + { + ccl_error = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + free (ap); + return NULL; + } + ADVANCE; + free (ap); + return p; + } + else + { + if (!(p = search_terms (ap))) + { + free (ap); + return NULL; + } + add_attr (p, CCL_BIB1_REL, rel); + free (ap); + return p; + } ccl_error = CCL_ERR_TERM_EXPECTED; } free (ap); return NULL; } +/* + * search_terms: Parse CCL search terms - including proximity. + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; @@ -358,7 +595,7 @@ static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -371,7 +608,7 @@ static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -382,6 +619,11 @@ static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) return p1; } +/* + * search_elements: Parse CCL search elements + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1; @@ -411,7 +653,7 @@ static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa) ccl_error = CCL_ERR_SETNAME_EXPECTED; return NULL; } - p1 = mk_node (SET); + p1 = mk_node (CCL_RPN_SET); p1->u.setname = copy_token_name (look_token); ADVANCE; return p1; @@ -425,6 +667,11 @@ static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa) return search_terms (qa); } +/* + * find_spec: Parse CCL find specification + * qa: Qualifier attributes already applied. + * return: pointer to node(s); NULL on error. + */ static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; @@ -442,7 +689,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (AND); + pn = mk_node (CCL_RPN_AND); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -455,7 +702,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (OR); + pn = mk_node (CCL_RPN_OR); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -468,7 +715,7 @@ static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) ccl_rpn_delete (p1); return NULL; } - pn = mk_node (NOT); + pn = mk_node (CCL_RPN_NOT); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -479,6 +726,14 @@ static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) return p1; } +/* + * ccl_find: Parse CCL find - token representation + * abibset: Bibset to be used for the parsing + * list: List of tokens + * error: Pointer to integer. Holds error no. on completion. + * pos: Pointer to char position. Holds approximate error position. + * return: RPN tree on successful completion; NULL otherwise. + */ struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, int *error, const char **pos) { @@ -504,18 +759,22 @@ struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, return p; } +/* + * ccl_find_str: Parse CCL find - string representation + * bibset: Bibset to be used for the parsing + * str: String to be parsed + * error: Pointer to integer. Holds error no. on completion. + * pos: Pointer to char position. Holds approximate error position. + * return: RPN tree on successful completion; NULL otherwise. + */ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, int *error, int *pos) { - struct ccl_token *list, *li; + struct ccl_token *list; struct ccl_rpn_node *rpn; const char *char_pos; list = ccl_tokenize (str); -#if 0 - for (li = list; li; li = li->next) - printf ("kind=%d, str='%.*s'\n", li->kind, li->len, li->name); -#endif rpn = ccl_find (bibset, list, error, &char_pos); if (*error) *pos = char_pos - str;