src/ccltoken.c

   1 /*
   2  * Copyright (c) 1995, the EUROPAGATE consortium (see below).
   3  *
   4  * The EUROPAGATE consortium members are:
   5  *
   6  *    University College Dublin
   7  *    Danmarks Teknologiske Videnscenter
   8  *    An Chomhairle Leabharlanna
   9  *    Consejo Superior de Investigaciones Cientificas
  10  *
  11  * Permission to use, copy, modify, distribute, and sell this software and
  12  * its documentation, in whole or in part, for any purpose, is hereby granted,
  13  * provided that:
  14  *
  15  * 1. This copyright and permission notice appear in all copies of the
  16  * software and its documentation. Notices of copyright or attribution
  17  * which appear at the beginning of any file must remain unchanged.
  18  *
  19  * 2. The names of EUROPAGATE or the project partners may not be used to
  20  * endorse or promote products derived from this software without specific
  21  * prior written permission.
  22  *
  23  * 3. Users of this software (implementors and gateway operators) agree to
  24  * inform the EUROPAGATE consortium of their use of the software. This
  25  * information will be used to evaluate the EUROPAGATE project and the
  26  * software, and to plan further developments. The consortium may use
  27  * the information in later publications.
  28  *
  29  * 4. Users of this software agree to make their best efforts, when
  30  * documenting their use of the software, to acknowledge the EUROPAGATE
  31  * consortium, and the role played by the software in their work.
  32  *
  33  * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
  34  * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  35  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  36  * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
  37  * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
  38  * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
  39  * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
  40  * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
  41  * USE OR PERFORMANCE OF THIS SOFTWARE.
  42  *
  43  */
  44 /**
  45  * \file ccltoken.c
  46  * \brief Implements CCL lexical analyzer (scanner)
  47  */
  48 /* CCL - lexical analysis
  49  * Europagate, 1995
  50  *
  51  * $Id: ccltoken.c,v 1.9 2005-08-22 20:34:21 adam Exp $
  52  *
  53  * Old Europagate Log:
  54  *
  55  * Revision 1.10  1995/07/11  12:28:31  adam
  56  * New function: ccl_token_simple (split into simple tokens) and
  57  *  ccl_token_del (delete tokens).
  58  *
  59  * Revision 1.9  1995/05/16  09:39:28  adam
  60  * LICENSE.
  61  *
  62  * Revision 1.8  1995/05/11  14:03:57  adam
  63  * Changes in the reading of qualifier(s). New function: ccl_qual_fitem.
  64  * New variable ccl_case_sensitive, which controls whether reserved
  65  * words and field names are case sensitive or not.
  66  *
  67  * Revision 1.7  1995/04/19  12:11:24  adam
  68  * Minor change.
  69  *
  70  * Revision 1.6  1995/04/17  09:31:48  adam
  71  * Improved handling of qualifiers. Aliases or reserved words.
  72  *
  73  * Revision 1.5  1995/02/23  08:32:00  adam
  74  * Changed header.
  75  *
  76  * Revision 1.3  1995/02/15  17:42:16  adam
  77  * Minor changes of the api of this module. FILE* argument added
  78  * to ccl_pr_tree.
  79  *
  80  * Revision 1.2  1995/02/14  19:55:13  adam
  81  * Header files ccl.h/cclp.h are gone! They have been merged an
  82  * moved to ../include/ccl.h.
  83  * Node kind(s) in ccl_rpn_node have changed names.
  84  *
  85  * Revision 1.1  1995/02/13  12:35:21  adam
  86  * First version of CCL. Qualifiers aren't handled yet.
  87  *
  88  */
  89
  90 #include <string.h>
  91 #include <stdlib.h>
  92 #include <ctype.h>
  93
  94 #include <yaz/ccl.h>
  95
  96 /*
  97  * token_cmp: Compare token with keyword(s)
  98  * kw:     Keyword list. Each keyword is separated by space.
  99  * token:  CCL token.
 100  * return: 1 if token string matches one of the keywords in list;
 101  *         0 otherwise.
 102  */
 103 static int token_cmp (CCL_parser cclp, const char *kw, struct ccl_token *token)
 104 {
 105     const char *cp1 = kw;
 106     const char *cp2;
 107     const char *aliases;
 108     int case_sensitive = cclp->ccl_case_sensitive;
 109
 110     aliases = ccl_qual_search_special(cclp->bibset, "case");
 111     if (aliases)
 112         case_sensitive = atoi(aliases);
 113     if (!kw)
 114         return 0;
 115     while ((cp2 = strchr (cp1, ' ')))
 116     {
 117         if (token->len == (size_t) (cp2-cp1))
 118         {
 119             if (case_sensitive)
 120             {
 121                 if (!memcmp (cp1, token->name, token->len))
 122                     return 1;
 123             }
 124             else
 125             {
 126                 if (!ccl_memicmp (cp1, token->name, token->len))
 127                     return 1;
 128             }
 129         }
 130         cp1 = cp2+1;
 131     }
 132     if (case_sensitive)
 133         return token->len == strlen(cp1)
 134             && !memcmp (cp1, token->name, token->len);
 135     return token->len == strlen(cp1) &&
 136         !ccl_memicmp (cp1, token->name, token->len);
 137 }
 138
 139 /*
 140  * ccl_tokenize: tokenize CCL command string.
 141  * return: CCL token list.
 142  */
 143 struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command)
 144 {
 145     const char *aliases;
 146     const unsigned char *cp = (const unsigned char *) command;
 147     struct ccl_token *first = NULL;
 148     struct ccl_token *last = NULL;
 149
 150     while (1)
 151     {
 152         const unsigned char *cp0 = cp;
 153         while (*cp && strchr (" \t\r\n", *cp))
 154             cp++;
 155         if (!first)
 156         {
 157             first = last = (struct ccl_token *)xmalloc (sizeof (*first));
 158             ccl_assert (first);
 159             last->prev = NULL;
 160         }
 161         else
 162         {
 163             last->next = (struct ccl_token *)xmalloc (sizeof(*first));
 164             ccl_assert (last->next);
 165             last->next->prev = last;
 166             last = last->next;
 167         }
 168         last->ws_prefix_buf = (const char *) cp0;
 169         last->ws_prefix_len = cp - cp0;
 170         last->next = NULL;
 171         last->name = (const char *) cp;
 172         last->len = 1;
 173         switch (*cp++)
 174         {
 175         case '\0':
 176             last->kind = CCL_TOK_EOL;
 177             return first;
 178         case '(':
 179             last->kind = CCL_TOK_LP;
 180             break;
 181         case ')':
 182             last->kind = CCL_TOK_RP;
 183             break;
 184         case ',':
 185             last->kind = CCL_TOK_COMMA;
 186             break;
 187         case '%':
 188         case '!':
 189             last->kind = CCL_TOK_PROX;
 190             while (isdigit(*cp))
 191             {
 192                 ++ last->len;
 193                 cp++;
 194             }
 195             break;
 196         case '>':
 197         case '<':
 198         case '=':
 199             if (*cp == '=' || *cp == '<' || *cp == '>')
 200             {
 201                 cp++;
 202                 last->kind = CCL_TOK_REL;
 203                 ++ last->len;
 204             }
 205             else if (cp[-1] == '=')
 206                 last->kind = CCL_TOK_EQ;
 207             else
 208                 last->kind = CCL_TOK_REL;
 209             break;
 210         case '\"':
 211             last->kind = CCL_TOK_TERM;
 212             last->name = (const char *) cp;
 213             last->len = 0;
 214             while (*cp && *cp != '\"')
 215             {
 216                 cp++;
 217                 ++ last->len;
 218             }
 219             if (*cp == '\"')
 220                 cp++;
 221             break;
 222         default:
 223             if (!strchr ("(),%!><= \t\n\r", cp[-1]))
 224             {
 225                 while (*cp && !strchr ("(),%!><= \t\n\r", *cp))
 226                 {
 227                     cp++;
 228                     ++ last->len;
 229                 }
 230             }
 231             last->kind = CCL_TOK_TERM;
 232
 233             aliases = ccl_qual_search_special(cclp->bibset, "and");
 234             if (!aliases)
 235                 aliases = cclp->ccl_token_and;
 236             if (token_cmp (cclp, aliases, last))
 237                 last->kind = CCL_TOK_AND;
 238
 239             aliases = ccl_qual_search_special(cclp->bibset, "or");
 240             if (!aliases)
 241                 aliases = cclp->ccl_token_or;
 242             if (token_cmp (cclp, aliases, last))
 243                 last->kind = CCL_TOK_OR;
 244
 245             aliases = ccl_qual_search_special(cclp->bibset, "not");
 246             if (!aliases)
 247                 aliases = cclp->ccl_token_not;
 248             if (token_cmp (cclp, aliases, last))
 249                 last->kind = CCL_TOK_NOT;
 250
 251             aliases = ccl_qual_search_special(cclp->bibset, "set");
 252             if (!aliases)
 253                 aliases = cclp->ccl_token_set;
 254
 255             if (token_cmp (cclp, aliases, last))
 256                 last->kind = CCL_TOK_SET;
 257         }
 258     }
 259     return first;
 260 }
 261
 262 struct ccl_token *ccl_token_add (struct ccl_token *at)
 263 {
 264     struct ccl_token *n = (struct ccl_token *)xmalloc (sizeof(*n));
 265     ccl_assert(n);
 266     n->next = at->next;
 267     n->prev = at;
 268     at->next = n;
 269     if (n->next)
 270         n->next->prev = n;
 271
 272     n->kind = CCL_TOK_TERM;
 273     n->name = 0;
 274     n->len = 0;
 275     n->ws_prefix_buf = 0;
 276     n->ws_prefix_len = 0;
 277     return n;
 278 }
 279
 280 struct ccl_token *ccl_tokenize (const char *command)
 281 {
 282     CCL_parser cclp = ccl_parser_create ();
 283     struct ccl_token *list;
 284
 285     list = ccl_parser_tokenize (cclp, command);
 286
 287     ccl_parser_destroy (cclp);
 288     return list;
 289 }
 290
 291 /*
 292  * ccl_token_del: delete CCL tokens
 293  */
 294 void ccl_token_del (struct ccl_token *list)
 295 {
 296     struct ccl_token *list1;
 297
 298     while (list)
 299     {
 300         list1 = list->next;
 301         xfree (list);
 302         list = list1;
 303     }
 304 }
 305
 306 char *ccl_strdup (const char *str)
 307 {
 308     int len = strlen(str);
 309     char *p = (char*) xmalloc (len+1);
 310     strcpy (p, str);
 311     return p;
 312 }
 313
 314 CCL_parser ccl_parser_create (void)
 315 {
 316     CCL_parser p = (CCL_parser)xmalloc (sizeof(*p));
 317     if (!p)
 318         return p;
 319     p->look_token = NULL;
 320     p->error_code = 0;
 321     p->error_pos = NULL;
 322     p->bibset = NULL;
 323
 324     p->ccl_token_and = ccl_strdup("and");
 325     p->ccl_token_or = ccl_strdup("or");
 326     p->ccl_token_not = ccl_strdup("not andnot");
 327     p->ccl_token_set = ccl_strdup("set");
 328     p->ccl_case_sensitive = 1;
 329
 330     return p;
 331 }
 332
 333 void ccl_parser_destroy (CCL_parser p)
 334 {
 335     if (!p)
 336         return;
 337     xfree (p->ccl_token_and);
 338     xfree (p->ccl_token_or);
 339     xfree (p->ccl_token_not);
 340     xfree (p->ccl_token_set);
 341     xfree (p);
 342 }
 343
 344 void ccl_parser_set_op_and (CCL_parser p, const char *op)
 345 {
 346     if (p && op)
 347     {
 348         if (p->ccl_token_and)
 349             xfree (p->ccl_token_and);
 350         p->ccl_token_and = ccl_strdup (op);
 351     }
 352 }
 353
 354 void ccl_parser_set_op_or (CCL_parser p, const char *op)
 355 {
 356     if (p && op)
 357     {
 358         if (p->ccl_token_or)
 359             xfree (p->ccl_token_or);
 360         p->ccl_token_or = ccl_strdup (op);
 361     }
 362 }
 363 void ccl_parser_set_op_not (CCL_parser p, const char *op)
 364 {
 365     if (p && op)
 366     {
 367         if (p->ccl_token_not)
 368             xfree (p->ccl_token_not);
 369         p->ccl_token_not = ccl_strdup (op);
 370     }
 371 }
 372 void ccl_parser_set_op_set (CCL_parser p, const char *op)
 373 {
 374     if (p && op)
 375     {
 376         if (p->ccl_token_set)
 377             xfree (p->ccl_token_set);
 378         p->ccl_token_set = ccl_strdup (op);
 379     }
 380 }
 381
 382 void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag)
 383 {
 384     if (p)
 385         p->ccl_case_sensitive = case_sensitivity_flag;
 386 }
 387 /*
 388  * Local variables:
 389  * c-basic-offset: 4
 390  * indent-tabs-mode: nil
 391  * End:
 392  * vim: shiftwidth=4 tabstop=8 expandtab
 393  */
 394