X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=ccl%2Fccltoken.c;h=31a18d83e4fb3cd3fa0cc9936ffd96a85abf5239;hp=64a82793120ef972197f987403b7044985ba5448;hb=d0e56fdb958b43316f5ebffddd7f9dd8480978f8;hpb=a00dfa73d5d3796f8048f2134fec2685b62e2658 diff --git a/ccl/ccltoken.c b/ccl/ccltoken.c index 64a8279..31a18d8 100644 --- a/ccl/ccltoken.c +++ b/ccl/ccltoken.c @@ -44,16 +44,9 @@ /* CCL - lexical analysis * Europagate, 1995 * - * $Log: ccltoken.c,v $ - * Revision 1.7 1997-09-01 08:48:12 adam - * New windows NT/95 port using MSV5.0. Only a few changes made - * to avoid warnings. + * $Id: ccltoken.c,v 1.22 2003-02-14 18:49:23 adam Exp $ * - * Revision 1.6 1997/04/30 08:52:07 quinn - * Null - * - * Revision 1.5 1996/10/11 15:00:26 adam - * CCL parser from Europagate Email gateway 1.0. + * Old Europagate Log: * * Revision 1.10 1995/07/11 12:28:31 adam * New function: ccl_token_simple (split into simple tokens) and @@ -90,18 +83,11 @@ * */ -#include #include #include -#include - -#include +#include -const char *ccl_token_and = "and"; -const char *ccl_token_or = "or"; -const char *ccl_token_not = "not andnot"; -const char *ccl_token_set = "set"; -int ccl_case_sensitive = 1; +#include /* * token_cmp: Compare token with keyword(s) @@ -110,16 +96,23 @@ int ccl_case_sensitive = 1; * return: 1 if token string matches one of the keywords in list; * 0 otherwise. */ -static int token_cmp (const char *kw, struct ccl_token *token) +static int token_cmp (CCL_parser cclp, const char *kw, struct ccl_token *token) { const char *cp1 = kw; const char *cp2; + const char *aliases; + int case_sensitive = cclp->ccl_case_sensitive; + + aliases = ccl_qual_search_special(cclp->bibset, "case"); + if (aliases) + case_sensitive = atoi(aliases); if (!kw) return 0; while ((cp2 = strchr (cp1, ' '))) { if (token->len == (size_t) (cp2-cp1)) - if (ccl_case_sensitive) + { + if (case_sensitive) { if (!memcmp (cp1, token->name, token->len)) return 1; @@ -129,9 +122,10 @@ static int token_cmp (const char *kw, struct ccl_token *token) if (!ccl_memicmp (cp1, token->name, token->len)) return 1; } + } cp1 = cp2+1; } - if (ccl_case_sensitive) + if (case_sensitive) return token->len == strlen(cp1) && !memcmp (cp1, token->name, token->len); return token->len == strlen(cp1) && @@ -156,14 +150,14 @@ struct ccl_token *ccl_token_simple (const char *command) } if (!first) { - first = last = malloc (sizeof (*first)); - assert (first); + first = last = (struct ccl_token *)xmalloc (sizeof (*first)); + ccl_assert (first); last->prev = NULL; } else { - last->next = malloc (sizeof(*first)); - assert (last->next); + last->next = (struct ccl_token *)xmalloc (sizeof(*first)); + ccl_assert (last->next); last->next->prev = last; last = last->next; } @@ -199,12 +193,14 @@ struct ccl_token *ccl_token_simple (const char *command) return first; } + /* * ccl_tokenize: tokenize CCL command string. * return: CCL token list. */ -struct ccl_token *ccl_tokenize (const char *command) +struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command) { + const char *aliases; const char *cp = command; struct ccl_token *first = NULL; struct ccl_token *last = NULL; @@ -218,14 +214,14 @@ struct ccl_token *ccl_tokenize (const char *command) } if (!first) { - first = last = malloc (sizeof (*first)); - assert (first); + first = last = (struct ccl_token *)xmalloc (sizeof (*first)); + ccl_assert (first); last->prev = NULL; } else { - last->next = malloc (sizeof(*first)); - assert (last->next); + last->next = (struct ccl_token *)xmalloc (sizeof(*first)); + ccl_assert (last->next); last->next->prev = last; last = last->next; } @@ -249,7 +245,7 @@ struct ccl_token *ccl_tokenize (const char *command) case '%': case '!': last->kind = CCL_TOK_PROX; - while (*cp == '%' || *cp == '!') + while (isdigit(*cp)) { ++ last->len; cp++; @@ -269,9 +265,6 @@ struct ccl_token *ccl_tokenize (const char *command) else last->kind = CCL_TOK_REL; break; - case '-': - last->kind = CCL_TOK_MINUS; - break; case '\"': last->kind = CCL_TOK_TERM; last->name = cp; @@ -285,26 +278,56 @@ struct ccl_token *ccl_tokenize (const char *command) cp++; break; default: - while (*cp && !strchr ("(),%!><=- \t\n\r", *cp)) + if (!strchr ("(),%!><= \t\n\r", cp[-1])) { - cp++; - ++ last->len; + while (*cp && !strchr ("(),%!><= \t\n\r", *cp)) + { + cp++; + ++ last->len; + } } - if (token_cmp (ccl_token_and, last)) + last->kind = CCL_TOK_TERM; + + aliases = ccl_qual_search_special(cclp->bibset, "and"); + if (!aliases) + aliases = cclp->ccl_token_and; + if (token_cmp (cclp, aliases, last)) last->kind = CCL_TOK_AND; - else if (token_cmp (ccl_token_or, last)) + + aliases = ccl_qual_search_special(cclp->bibset, "or"); + if (!aliases) + aliases = cclp->ccl_token_or; + if (token_cmp (cclp, aliases, last)) last->kind = CCL_TOK_OR; - else if (token_cmp (ccl_token_not, last)) + + aliases = ccl_qual_search_special(cclp->bibset, "not"); + if (!aliases) + aliases = cclp->ccl_token_not; + if (token_cmp (cclp, aliases, last)) last->kind = CCL_TOK_NOT; - else if (token_cmp (ccl_token_set, last)) + + aliases = ccl_qual_search_special(cclp->bibset, "set"); + if (!aliases) + aliases = cclp->ccl_token_set; + + if (token_cmp (cclp, aliases, last)) last->kind = CCL_TOK_SET; - else - last->kind = CCL_TOK_TERM; } } return first; } +struct ccl_token *ccl_tokenize (const char *command) +{ + CCL_parser cclp = ccl_parser_create (); + struct ccl_token *list; + + list = ccl_parser_tokenize (cclp, command); + + ccl_parser_destroy (cclp); + return list; +} + /* * ccl_token_del: delete CCL tokens */ @@ -315,7 +338,89 @@ void ccl_token_del (struct ccl_token *list) while (list) { list1 = list->next; - free (list); + xfree (list); list = list1; } } + +char *ccl_strdup (const char *str) +{ + int len = strlen(str); + char *p = (char*) xmalloc (len+1); + strcpy (p, str); + return p; +} + +CCL_parser ccl_parser_create (void) +{ + CCL_parser p = (CCL_parser)xmalloc (sizeof(*p)); + if (!p) + return p; + p->look_token = NULL; + p->error_code = 0; + p->error_pos = NULL; + p->bibset = NULL; + + p->ccl_token_and = ccl_strdup("and"); + p->ccl_token_or = ccl_strdup("or"); + p->ccl_token_not = ccl_strdup("not andnot"); + p->ccl_token_set = ccl_strdup("set"); + p->ccl_case_sensitive = 1; + + return p; +} + +void ccl_parser_destroy (CCL_parser p) +{ + if (!p) + return; + xfree (p->ccl_token_and); + xfree (p->ccl_token_or); + xfree (p->ccl_token_not); + xfree (p->ccl_token_set); + xfree (p); +} + +void ccl_parser_set_op_and (CCL_parser p, const char *op) +{ + if (p && op) + { + if (p->ccl_token_and) + xfree (p->ccl_token_and); + p->ccl_token_and = ccl_strdup (op); + } +} + +void ccl_parser_set_op_or (CCL_parser p, const char *op) +{ + if (p && op) + { + if (p->ccl_token_or) + xfree (p->ccl_token_or); + p->ccl_token_or = ccl_strdup (op); + } +} +void ccl_parser_set_op_not (CCL_parser p, const char *op) +{ + if (p && op) + { + if (p->ccl_token_not) + xfree (p->ccl_token_not); + p->ccl_token_not = ccl_strdup (op); + } +} +void ccl_parser_set_op_set (CCL_parser p, const char *op) +{ + if (p && op) + { + if (p->ccl_token_set) + xfree (p->ccl_token_set); + p->ccl_token_set = ccl_strdup (op); + } +} + +void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag) +{ + if (p) + p->ccl_case_sensitive = case_sensitivity_flag; +}