X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=ccl%2Fcclfind.c;h=3fac8e5b03c9ea1a21e32a16dfe413e20c93b906;hb=71e058e3a28894727dbaa4ad9056199e6442d6ae;hp=b663666fc255578bbcc4d05f4fe95cfca1cb7c1b;hpb=68727e97f4ab17a7008c5a8552796ec123af031c;p=egate.git diff --git a/ccl/cclfind.c b/ccl/cclfind.c index b663666..3fac8e5 100644 --- a/ccl/cclfind.c +++ b/ccl/cclfind.c @@ -2,7 +2,39 @@ * Europagate, 1995 * * $Log: cclfind.c,v $ - * Revision 1.1 1995/02/13 12:35:20 adam + * Revision 1.12 1995/03/20 15:27:43 adam + * Minor changes. + * + * Revision 1.11 1995/02/23 08:31:59 adam + * Changed header. + * + * Revision 1.9 1995/02/16 13:20:06 adam + * Spell fix. + * + * Revision 1.8 1995/02/14 19:59:42 adam + * Removed a syntax error. + * + * Revision 1.7 1995/02/14 19:55:10 adam + * Header files ccl.h/cclp.h are gone! They have been merged an + * moved to ../include/ccl.h. + * Node kind(s) in ccl_rpn_node have changed names. + * + * Revision 1.6 1995/02/14 16:20:55 adam + * Qualifiers are read from a file now. + * + * Revision 1.5 1995/02/14 14:12:41 adam + * Ranges for ordered qualfiers implemented (e.g. pd=1980-1990). + * + * Revision 1.4 1995/02/14 13:16:29 adam + * Left and/or right truncation implemented. + * + * Revision 1.3 1995/02/14 10:25:56 adam + * The constructions 'qualifier rel term ...' implemented. + * + * Revision 1.2 1995/02/13 15:15:07 adam + * Added handling of qualifiers. Not finished yet. + * + * Revision 1.1 1995/02/13 12:35:20 adam * First version of CCL. Qualifiers aren't handled yet. * */ @@ -12,15 +44,38 @@ #include #include -#include "cclp.h" +#include static struct ccl_token *look_token; static int ccl_error; +static CCL_bibset bibset; #define KIND (look_token->kind) #define ADVANCE look_token = look_token->next #define ADVX(x) x=(x)->next +static struct ccl_rpn_attr *qual_val (struct ccl_rpn_attr *list, int type) +{ + while (list) + { + if (list->type == type) + return list; + list = list->next; + } + return NULL; +} + +static int qual_val_type (struct ccl_rpn_attr *list, int type, int value) +{ + while (list) + { + if (list->type == type && list->value == value) + return 1; + list = list->next; + } + return 0; +} + static void strxcat (char *n, const char *src, int len) { while (*n) @@ -33,6 +88,7 @@ static void strxcat (char *n, const char *src, int len) static char *copy_token_name (struct ccl_token *tp) { char *str = malloc (tp->len + 1); + assert (str); memcpy (str, tp->name, tp->len); str[tp->len] = '\0'; return str; @@ -49,24 +105,29 @@ static struct ccl_rpn_node *mk_node (enum rpn_node_kind kind) void ccl_rpn_delete (struct ccl_rpn_node *rpn) { + struct ccl_rpn_attr *attr, *attr1; if (!rpn) return; switch (rpn->kind) { - case AND: - case OR: - case NOT: + case CCL_RPN_AND: + case CCL_RPN_OR: + case CCL_RPN_NOT: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; - case TERM: + case CCL_RPN_TERM: free (rpn->u.t.term); - /* attr list */ + for (attr = rpn->u.t.attr_list; attr; attr = attr1) + { + attr1 = attr->next; + free (attr); + } break; - case SET: + case CCL_RPN_SET: free (rpn->u.setname); break; - case PROX: + case CCL_RPN_PROX: ccl_rpn_delete (rpn->u.p[0]); ccl_rpn_delete (rpn->u.p[1]); break; @@ -74,50 +135,285 @@ void ccl_rpn_delete (struct ccl_rpn_node *rpn) free (rpn); } -static struct ccl_rpn_node *find_spec (void); +static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa); +static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa); -static struct ccl_rpn_node *search_term (void) +static void add_attr (struct ccl_rpn_node *p, int type, int value) +{ + struct ccl_rpn_attr *n; + + n = malloc (sizeof(*n)); + assert (n); + n->type = type; + n->value = value; + n->next = p->u.t.attr_list; + p->u.t.attr_list = n; +} + +static struct ccl_rpn_node *search_term (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p; + struct ccl_rpn_attr *attr; struct ccl_token *lookahead = look_token; int len = 0; + int no, i; + int left_trunc = 0; + int right_trunc = 0; + int mid_trunc = 0; if (KIND != CCL_TOK_TERM) { ccl_error = CCL_ERR_TERM_EXPECTED; return NULL; } - while (lookahead->kind == CCL_TOK_TERM) + for (no = 0; lookahead->kind == CCL_TOK_TERM; no++) { - len += 1+lookahead->len; + for (i = 0; ilen; i++) + if (lookahead->name[i] == '?') + { + if (no == 0 && i == 0 && lookahead->len >= 1) + left_trunc = 1; + else if (lookahead->next->kind != CCL_TOK_TERM && + i == lookahead->len-1 && i >= 1) + right_trunc = 1; + else + mid_trunc = 1; + } + len += 1+lookahead->len; lookahead = lookahead->next; } - p = mk_node (TERM); + p = mk_node (CCL_RPN_TERM); p->u.t.term = malloc (len); + assert (p->u.t.term); p->u.t.attr_list = NULL; p->u.t.term[0] = '\0'; - assert (p->u.t.term); - strxcat (p->u.t.term, look_token->name, look_token->len); - ADVANCE; - while (KIND == CCL_TOK_TERM) + for (i = 0; iu.t.term, " "); - strxcat (p->u.t.term, look_token->name, look_token->len); - ADVANCE; + const char *src_str = look_token->name; + int src_len = look_token->len; + + if (i == 0 && left_trunc) + { + src_len--; + src_str++; + } + else if (i == no-1 && right_trunc) + src_len--; + if (i) + strcat (p->u.t.term, " "); + strxcat (p->u.t.term, src_str, src_len); + ADVANCE; + } + if (qa) + { + int i; + for (i=0; qa[i]; i++) + { + struct ccl_rpn_attr *attr; + + for (attr = qa[i]; attr; attr = attr->next) + if (attr->value > 0) + add_attr (p, attr->type, attr->value); + } + attr = qa[0]; + } + else + attr = ccl_qual_search (bibset, "term", 4); + if (attr && qual_val_type (attr, CCL_BIB1_STR, CCL_BIB1_STR_WP)) + { + if (no == 1) + add_attr (p, CCL_BIB1_STR, 2); + else + add_attr (p, CCL_BIB1_STR, 1); + } + if (left_trunc && right_trunc) + { + if (attr && !qual_val_type (attr, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_BOTH)) + { + ccl_error = CCL_ERR_TRUNC_NOT_BOTH; + if (qa) + free (qa); + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 3); + } + else if (right_trunc) + { + if (attr && !qual_val_type (attr, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_RIGHT)) + { + ccl_error = CCL_ERR_TRUNC_NOT_RIGHT; + if (qa) + free (qa); + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 1); + } + else if (left_trunc) + { + if (attr && !qual_val_type (attr, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_LEFT)) + { + ccl_error = CCL_ERR_TRUNC_NOT_LEFT; + if (qa) + free (qa); + ccl_rpn_delete (p); + return NULL; + } + add_attr (p, CCL_BIB1_TRU, 2); + } + else + { + if (attr && qual_val_type (attr, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE)) + add_attr (p, CCL_BIB1_TRU, 100); } return p; } -static struct ccl_rpn_node *qualifiers (struct ccl_token *la) +static struct ccl_rpn_node *qualifiers (struct ccl_token *la, + struct ccl_rpn_attr **qa) { - assert (0); + struct ccl_token *lookahead = look_token; + struct ccl_rpn_attr **ap; + int no = 1; + int i, rel; + struct ccl_rpn_attr *attr; + + if (qa) + { + ccl_error = CCL_ERR_DOUBLE_QUAL; + return NULL; + } + for (lookahead = look_token; lookahead != la; lookahead=lookahead->next) + no++; + ap = malloc (no * sizeof(*ap)); + assert (ap); + for (i=0; look_token != la; i++) + { + ap[i] = ccl_qual_search (bibset, look_token->name, look_token->len); + if (!ap[i]) + { + ccl_error = CCL_ERR_UNKNOWN_QUAL; + free (ap); + return NULL; + } + ADVANCE; + if (KIND == CCL_TOK_COMMA) + ADVANCE; + } + ap[i] = NULL; + if (! (attr = qual_val (ap[0], CCL_BIB1_REL)) || + attr->value != CCL_BIB1_REL_ORDER) + { + /* unordered relation */ + struct ccl_rpn_node *p; + if (KIND != CCL_TOK_EQ) + { + ccl_error = CCL_ERR_EQ_EXPECTED; + free (ap); + return NULL; + } + ADVANCE; + if (KIND == CCL_TOK_LP) + { + ADVANCE; + if (!(p = find_spec (ap))) + { + free (ap); + return NULL; + } + if (KIND != CCL_TOK_RP) + { + ccl_error = CCL_ERR_RP_EXPECTED; + ccl_rpn_delete (p); + free (ap); + return NULL; + } + ADVANCE; + } + else + p = search_terms (ap); + free (ap); + return p; + } + rel = 0; + if (look_token->len == 1) + { + if (look_token->name[0] == '<') + rel = 1; + else if (look_token->name[0] == '=') + rel = 3; + else if (look_token->name[0] == '>') + rel = 5; + } + else if (look_token->len == 2) + { + if (!memcmp (look_token->name, "<=", 2)) + rel = 2; + else if (!memcmp (look_token->name, ">=", 2)) + rel = 4; + else if (!memcmp (look_token->name, "<>", 2)) + rel = 6; + } + if (!rel) + ccl_error = CCL_ERR_BAD_RELATION; + else + { + struct ccl_rpn_node *p; + + ADVANCE; /* skip relation */ + if (KIND == CCL_TOK_TERM) + { + struct ccl_rpn_node *p1; + p1 = search_term (ap); + if (KIND == CCL_TOK_MINUS) + { + ADVANCE; /* skip '-' */ + if (KIND == CCL_TOK_TERM) /* = term - term ? */ + { + struct ccl_rpn_node *p2; + + p2 = search_term (ap); + p = mk_node (CCL_RPN_AND); + p->u.p[0] = p1; + add_attr (p1, CCL_BIB1_REL, 4); + p->u.p[1] = p2; + add_attr (p2, CCL_BIB1_REL, 2); + free (ap); + return p; + } + else /* = term - */ + { + add_attr (p1, CCL_BIB1_REL, 4); + free (ap); + return p1; + } + } + else + { + add_attr (p1, CCL_BIB1_REL, rel); + free (ap); + return p1; + } + } + else if (KIND == CCL_TOK_MINUS) /* = - term ? */ + { + ADVANCE; + p = search_term (ap); + add_attr (p, CCL_BIB1_REL, 2); + free (ap); + return p; + } + ccl_error = CCL_ERR_TERM_EXPECTED; + } + free (ap); return NULL; } -static struct ccl_rpn_node *search_terms (void) +static struct ccl_rpn_node *search_terms (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; - p1 = search_term (); + p1 = search_term (qa); if (!p1) return NULL; while (1) @@ -125,26 +421,26 @@ static struct ccl_rpn_node *search_terms (void) if (KIND == CCL_TOK_PROX) { ADVANCE; - p2 = search_term (); + p2 = search_term (qa); if (!p2) { ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; } else if (KIND == CCL_TOK_TERM) { - p2 = search_term (); + p2 = search_term (qa); if (!p2) { ccl_rpn_delete (p1); return NULL; } - pn = mk_node (PROX); + pn = mk_node (CCL_RPN_PROX); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -155,14 +451,14 @@ static struct ccl_rpn_node *search_terms (void) return p1; } -static struct ccl_rpn_node *search_elements (void) +static struct ccl_rpn_node *search_elements (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1; struct ccl_token *lookahead; if (KIND == CCL_TOK_LP) { ADVANCE; - p1 = find_spec (); + p1 = find_spec (qa); if (!p1) return NULL; if (KIND != CCL_TOK_RP) @@ -177,12 +473,14 @@ static struct ccl_rpn_node *search_elements (void) else if (KIND == CCL_TOK_SET) { ADVANCE; + if (KIND == CCL_TOK_EQ) + ADVANCE; if (KIND != CCL_TOK_TERM) { ccl_error = CCL_ERR_SETNAME_EXPECTED; return NULL; } - p1 = mk_node (SET); + p1 = mk_node (CCL_RPN_SET); p1->u.setname = copy_token_name (look_token); ADVANCE; return p1; @@ -192,14 +490,14 @@ static struct ccl_rpn_node *search_elements (void) while (lookahead->kind==CCL_TOK_TERM || lookahead->kind==CCL_TOK_COMMA) lookahead = lookahead->next; if (lookahead->kind == CCL_TOK_REL || lookahead->kind == CCL_TOK_EQ) - return qualifiers (lookahead); - return search_terms (); + return qualifiers (lookahead, qa); + return search_terms (qa); } -static struct ccl_rpn_node *find_spec (void) +static struct ccl_rpn_node *find_spec (struct ccl_rpn_attr **qa) { struct ccl_rpn_node *p1, *p2, *pn; - if (!(p1 = search_elements ())) + if (!(p1 = search_elements (qa))) return NULL; while (1) { @@ -207,39 +505,39 @@ static struct ccl_rpn_node *find_spec (void) { case CCL_TOK_AND: ADVANCE; - p2 = search_elements (); + p2 = search_elements (qa); if (!p2) { ccl_rpn_delete (p1); return NULL; } - pn = mk_node (AND); + pn = mk_node (CCL_RPN_AND); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; continue; case CCL_TOK_OR: ADVANCE; - p2 = search_elements (); + p2 = search_elements (qa); if (!p2) { ccl_rpn_delete (p1); return NULL; } - pn = mk_node (OR); + pn = mk_node (CCL_RPN_OR); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; continue; case CCL_TOK_NOT: ADVANCE; - p2 = search_elements (); + p2 = search_elements (qa); if (!p2) { ccl_rpn_delete (p1); return NULL; } - pn = mk_node (NOT); + pn = mk_node (CCL_RPN_NOT); pn->u.p[0] = p1; pn->u.p[1] = p2; p1 = pn; @@ -250,13 +548,14 @@ static struct ccl_rpn_node *find_spec (void) return p1; } -struct ccl_rpn_node *ccl_find (struct ccl_token *list, +struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, int *error, const char **pos) { struct ccl_rpn_node *p; look_token = list; - p = find_spec (); + bibset = abibset; + p = find_spec (NULL); if (p && KIND != CCL_TOK_EOL) { if (KIND == CCL_TOK_RP) @@ -274,68 +573,16 @@ struct ccl_rpn_node *ccl_find (struct ccl_token *list, return p; } -static void pr_tree (struct ccl_rpn_node *rpn) +struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str, + int *error, int *pos) { - switch (rpn->kind) - { - case TERM: - printf ("\"%s\"", rpn->u.t.term); - break; - case AND: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") and ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case OR: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") or ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case NOT: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") not ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - case SET: - printf ("set=%s", rpn->u.setname); - break; - case PROX: - printf ("("); - pr_tree (rpn->u.p[0]); - printf (") prox ("); - pr_tree (rpn->u.p[1]); - printf (")"); - break; - default: - assert (0); - } -} - -void ccl_find_str (const char *str, int *error, int *pos) -{ - struct ccl_token *list, *li; + struct ccl_token *list; struct ccl_rpn_node *rpn; const char *char_pos; list = ccl_tokenize (str); -#if 0 - for (li = list; li; li = li->next) - printf ("kind=%d, str='%.*s'\n", li->kind, li->len, li->name); -#endif - rpn = ccl_find (list, error, &char_pos); - if (! *error) - { - pr_tree (rpn); - printf ("\n"); - } - else - { + rpn = ccl_find (bibset, list, error, &char_pos); + if (*error) *pos = char_pos - str; - } + return rpn; }