X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Fccl.h;h=c22e45592698efa553aba5916d402205b0ffdc55;hp=6296864079837ca486df5e46f9ac2fd0bf89a940;hb=b1df5f9013d82510f6250d93623a0126ec19265f;hpb=fb6d99a0c7e07d9cc4a315c447deaf6564a85505 diff --git a/include/yaz/ccl.h b/include/yaz/ccl.h index 6296864..c22e455 100644 --- a/include/yaz/ccl.h +++ b/include/yaz/ccl.h @@ -1,45 +1,28 @@ -/* - * Copyright (c) 1995, the EUROPAGATE consortium (see below). - * - * The EUROPAGATE consortium members are: - * - * University College Dublin - * Danmarks Teknologiske Videnscenter - * An Chomhairle Leabharlanna - * Consejo Superior de Investigaciones Cientificas - * - * Permission to use, copy, modify, distribute, and sell this software and - * its documentation, in whole or in part, for any purpose, is hereby granted, - * provided that: - * - * 1. This copyright and permission notice appear in all copies of the - * software and its documentation. Notices of copyright or attribution - * which appear at the beginning of any file must remain unchanged. - * - * 2. The names of EUROPAGATE or the project partners may not be used to - * endorse or promote products derived from this software without specific - * prior written permission. +/* This file is part of the YAZ toolkit. + * Copyright (C) Index Data. + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: * - * 3. Users of this software (implementors and gateway operators) agree to - * inform the EUROPAGATE consortium of their use of the software. This - * information will be used to evaluate the EUROPAGATE project and the - * software, and to plan further developments. The consortium may use - * the information in later publications. - * - * 4. Users of this software agree to make their best efforts, when - * documenting their use of the software, to acknowledge the EUROPAGATE - * consortium, and the role played by the software in their work. - * - * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND, - * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY - * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE - * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF - * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA - * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND - * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE - * USE OR PERFORMANCE OF THIS SOFTWARE. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Index Data nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** \file ccl.h @@ -49,7 +32,6 @@ /* * CCL - header file * - * $Id: ccl.h,v 1.23 2005-06-25 15:46:01 adam Exp $ * * Old Europagate Log: * @@ -95,8 +77,9 @@ #include YAZ_BEGIN_CDECL - -/* CCL error numbers */ + +#define YAZ_781 1 + #define CCL_ERR_OK 0 #define CCL_ERR_TERM_EXPECTED 1 #define CCL_ERR_RP_EXPECTED 2 @@ -110,44 +93,50 @@ YAZ_BEGIN_CDECL #define CCL_ERR_TRUNC_NOT_LEFT 10 #define CCL_ERR_TRUNC_NOT_BOTH 11 #define CCL_ERR_TRUNC_NOT_RIGHT 12 - -/** attribute node (type, value) pair as used in RPN */ +#define CCL_ERR_TRUNC_NOT_EMBED 13 +#define CCL_ERR_TRUNC_NOT_SINGLE 14 + +/** \brief attribute node (type, value) pair as used in RPN */ struct ccl_rpn_attr { - /** next node */ + /** \brief next attribute */ struct ccl_rpn_attr *next; - /** attribute set */ + /** \brief attribute set */ char *set; - /** attribute type, Bib-1: 1=use, 2=relation, 3=position, .. */ + /** \brief attribute type, Bib-1: 1=use, 2=relation, 3=position, etc */ int type; - /** attribute value type (numeric or string) */ + /** \brief attribute value type (numeric or string) */ int kind; #define CCL_RPN_ATTR_NUMERIC 1 #define CCL_RPN_ATTR_STRING 2 union { - /** numeric attribute value */ + /** \brief numeric attribute value */ int numeric; - /** string attribute value */ + /** \brief string attribute value */ char *str; } value; }; -#define CCL_RPN_AND 1 -#define CCL_RPN_OR 2 -#define CCL_RPN_NOT 3 -#define CCL_RPN_TERM 4 -#define CCL_RPN_SET 5 -#define CCL_RPN_PROX 6 +/** \brief node type or RPN tree generated by the CCL parser */ +enum ccl_rpn_kind { + CCL_RPN_AND, + CCL_RPN_OR, + CCL_RPN_NOT, + CCL_RPN_TERM, + CCL_RPN_SET, + CCL_RPN_PROX +}; -/** RPN tree structure node */ +/** \brief RPN tree structure node */ struct ccl_rpn_node { - /** RPN tree node type */ - int kind; + /** \brief node type, one of CCL_RPN_AND, CCL_RPN_OR, etc */ + enum ccl_rpn_kind kind; union { - /** Boolean including proximity 0=left, 2=right, 3=prox parms */ + /** \brief Boolean including proximity 0=left, 1=right, 2=prox parms */ struct ccl_rpn_node *p[3]; - /** Attributes + Term */ + /** \brief Attributes + Term */ struct { char *term; + char *qual; struct ccl_rpn_attr *attr_list; } t; /** Result set */ @@ -155,224 +144,76 @@ struct ccl_rpn_node { } u; }; +/** \brief CCL bibset, AKA profile */ typedef struct ccl_qualifiers *CCL_bibset; -/* use (1) - - relation (2) - -1 none - 0 ordered - 1-6 relation (<, <=, =, >=, >, <>) - - position (3) - -1 none - 1 first in field - 2 first in sub field - 3 any position in field - structure (4) - -1 none - 0 word/phrase auto select - 1 phrase - 2 word - 3 key - 4 year - 5 date (normalized) - 6 word list - 100 date (un-normalized) - 101 name (normalized) - 102 name (un-normalized) - truncation (5) - completeness (6) -*/ - -#define CCL_BIB1_USE 1 -#define CCL_BIB1_REL 2 -#define CCL_BIB1_POS 3 -#define CCL_BIB1_STR 4 -#define CCL_BIB1_TRU 5 -#define CCL_BIB1_COM 6 - -#define CCL_BIB1_STR_WP (-1) -#define CCL_BIB1_STR_AND_LIST (-2) -#define CCL_BIB1_STR_OR_LIST (-3) -#define CCL_BIB1_REL_ORDER (-1) -#define CCL_BIB1_REL_PORDER (-2) - -#define CCL_BIB1_TRU_CAN_LEFT (-1) -#define CCL_BIB1_TRU_CAN_RIGHT (-2) -#define CCL_BIB1_TRU_CAN_BOTH (-3) -#define CCL_BIB1_TRU_CAN_NONE (-4) - -#define CCL_TOK_EOL 0 -#define CCL_TOK_TERM 1 -#define CCL_TOK_REL 2 -#define CCL_TOK_EQ 3 -#define CCL_TOK_PROX 4 -#define CCL_TOK_LP 5 -#define CCL_TOK_RP 6 -#define CCL_TOK_COMMA 7 -#define CCL_TOK_AND 8 -#define CCL_TOK_OR 9 -#define CCL_TOK_NOT 10 -#define CCL_TOK_SET 11 - -/** CCL token */ -struct ccl_token { - char kind; - size_t len; /* length of name below */ - const char *name; /* string / name of token */ - struct ccl_token *next; - struct ccl_token *prev; - const char *ws_prefix_buf; /* leading white space buf */ - size_t ws_prefix_len; /* leading white space len */ -}; - -/** CCL Qualifier */ -struct ccl_qualifier { - char *name; - int no_sub; - struct ccl_qualifier **sub; - struct ccl_rpn_attr *attr_list; - struct ccl_qualifier *next; -}; - -/** CCL parser structure */ -struct ccl_parser { - /** current lookahead token */ - struct ccl_token *look_token; - - /** holds error code if error occur */ - int error_code; - /** if error occurs, this holds position (starting from 0). */ - const char *error_pos; - - /** current bibset */ - CCL_bibset bibset; - - /** names of and operator */ - char *ccl_token_and; - /** names of or operator */ - char *ccl_token_or; - /** names of not operator */ - char *ccl_token_not; - /** names of set operator */ - char *ccl_token_set; - /** 1=CCL parser is case sensitive, 0=case insensitive */ - int ccl_case_sensitive; -}; - +/** \brief CCL parser */ typedef struct ccl_parser *CCL_parser; - -/** - * Splits CCL command string into individual tokens using - * a CCL parser. - */ -YAZ_EXPORT -struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command); - -/** - * Splits CCL command string into tokens using temporary parser. - * - * Use ccl_parser_tokenize instead and provide a parser - it is - * more flexible and efficient. - */ -YAZ_EXPORT -struct ccl_token *ccl_tokenize (const char *command); - -/** - * Deletes token list - */ -YAZ_EXPORT -void ccl_token_del (struct ccl_token *list); /** - * Add single token after a given onde. - */ -YAZ_EXPORT -struct ccl_token *ccl_token_add (struct ccl_token *at); + \brief parse CCL find string using CCL profile return RPN tree -/** - * Parses a CCL Find command in a simple C string. Returns CCL parse - * tree node describing RPN if parsing is successful. If parsing is - * unsuccesful, NULL is returned and error and pos is set accordingly. - */ + Parses a CCL Find command in a simple C string. Returns CCL parse + tree node describing RPN if parsing is successful. If parsing is + unsuccesful, NULL is returned and error and pos is set accordingly. +*/ YAZ_EXPORT -struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, - const char *str, int *error, int *pos); +struct ccl_rpn_node *ccl_find_str(CCL_bibset bibset, + const char *str, int *error, int *pos); -/** - * Parses CCL Find command from a list of CCL tokens. Otherwise similar to - * ccl_find_str. - */ -YAZ_EXPORT -struct ccl_rpn_node *ccl_find (CCL_bibset abibset, struct ccl_token *list, - int *error, const char **pos); /** - * Parses a CCL Find command from a list of CCL tokens and given a CCL - * parser. Otherwise similar to ccl_find_str. - */ -YAZ_EXPORT -struct ccl_rpn_node *ccl_parser_find (CCL_parser cclp, struct ccl_token *list); - -/** Set names for AND operator in parser */ -YAZ_EXPORT -void ccl_parser_set_op_and (CCL_parser p, const char *op); + \brief parse CCL find string with parser and return RPN tree -/** Set names for OR operator in parser */ -YAZ_EXPORT -void ccl_parser_set_op_or (CCL_parser p, const char *op); - -/** Set names for ANDNOT operator in parser */ -YAZ_EXPORT -void ccl_parser_set_op_not (CCL_parser p, const char *op); - -/** Set names for ResultSet in parser */ + Parses a CCL Find command in a simple C string. Returns CCL parse + tree node describing RPN if parsing is successful. If parsing is + unsuccesful, NULL is returned and error and pos is set accordingly. +*/ YAZ_EXPORT -void ccl_parser_set_op_set (CCL_parser p, const char *op); +struct ccl_rpn_node *ccl_parser_find_str(CCL_parser cclp, const char *str); /** Set case sensitivity for parser */ YAZ_EXPORT -void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag); +void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag); /** Return english-readable error message for CCL parser error number */ YAZ_EXPORT -const char *ccl_err_msg (int ccl_errno); +const char *ccl_err_msg(int ccl_errno); /** Delete RPN tree returned by ccl_find */ YAZ_EXPORT -void ccl_rpn_delete (struct ccl_rpn_node *rpn); +void ccl_rpn_delete(struct ccl_rpn_node *rpn); /** Dump RPN tree in readable format to fd_out */ YAZ_EXPORT -void ccl_pr_tree (struct ccl_rpn_node *rpn, FILE *fd_out); +void ccl_pr_tree(struct ccl_rpn_node *rpn, FILE *fd_out); /** Add qualifier and supply attribute pairs for it */ YAZ_EXPORT -void ccl_qual_add (CCL_bibset b, const char *name, int no, int *attr); +void ccl_qual_add(CCL_bibset b, const char *name, int no, int *attr); /** Add qualifier and supply attributes pairs+attribute set for it */ YAZ_EXPORT -void ccl_qual_add_set (CCL_bibset b, const char *name, int no, - int *type, int *value, char **svalue, char **attsets); +void ccl_qual_add_set(CCL_bibset b, const char *name, int no, + int *type, int *value, char **svalue, char **attsets); /** Add special qualifier */ YAZ_EXPORT -void ccl_qual_add_special (CCL_bibset bibset, const char *n, const char *v); +void ccl_qual_add_special(CCL_bibset bibset, const char *n, const char *cp); /** Add combo qualifier */ YAZ_EXPORT -void ccl_qual_add_combi (CCL_bibset b, const char *n, const char *names); +void ccl_qual_add_combi(CCL_bibset b, const char *n, const char **names); /** Read CCL qualifier list spec from file inf */ YAZ_EXPORT -void ccl_qual_file (CCL_bibset bibset, FILE *inf); +void ccl_qual_file(CCL_bibset bibset, FILE *inf); /** Read CCL qualifier list spec from file inf */ YAZ_EXPORT -int ccl_qual_fname (CCL_bibset bibset, const char *fname); +int ccl_qual_fname(CCL_bibset bibset, const char *fname); -/** Add CCL qualifier as buf spec (multiple lines). */ +/** Add CCL qualifier as buf spec(multiple lines). */ YAZ_EXPORT void ccl_qual_buf(CCL_bibset bibset, const char *buf); @@ -382,57 +223,153 @@ void ccl_qual_line(CCL_bibset bibset, char *line); /* Add CCL qualifier by using qual_name + value pair */ YAZ_EXPORT -void ccl_qual_fitem (CCL_bibset bibset, const char *value, - const char *qual_name); +void ccl_qual_fitem(CCL_bibset bibset, const char *value, + const char *qual_name); +YAZ_EXPORT +int ccl_qual_fitem2(CCL_bibset bibset, const char *value, + const char *qual_name, const char **addinfo); /** Make CCL qualifier set */ YAZ_EXPORT -CCL_bibset ccl_qual_mk (void); +CCL_bibset ccl_qual_mk(void); + +/** Make CCL qualifier set from an existing one (duplicate it) */ +YAZ_EXPORT +CCL_bibset ccl_qual_dup(CCL_bibset b); /** Delete CCL qualifier set */ YAZ_EXPORT -void ccl_qual_rm (CCL_bibset *b); +void ccl_qual_rm(CCL_bibset *b); /** Char-to-upper function */ -extern int (*ccl_toupper)(int c); +extern int(*ccl_toupper)(int c); /** CCL version of ccl_stricmp */ YAZ_EXPORT -int ccl_stricmp (const char *s1, const char *s2); +int ccl_stricmp(const char *s1, const char *s2); /** CCL version of ccl_memicmp */ YAZ_EXPORT -int ccl_memicmp (const char *s1, const char *s2, size_t n); - -/** Search for qualifier 'name' in set 'b'. */ -YAZ_EXPORT -struct ccl_rpn_attr *ccl_qual_search (CCL_parser cclp, const char *name, - size_t len, int seq); +int ccl_memicmp(const char *s1, const char *s2, size_t n); /** Create CCL parser */ YAZ_EXPORT -CCL_parser ccl_parser_create (void); +CCL_parser ccl_parser_create(CCL_bibset bibset); /** Destroy CCL parser */ YAZ_EXPORT -void ccl_parser_destroy (CCL_parser p); - -/** String dup utility (ala strdup) */ -YAZ_EXPORT -char *ccl_strdup (const char *str); +void ccl_parser_destroy(CCL_parser p); /** Search for special qualifier */ YAZ_EXPORT -const char *ccl_qual_search_special (CCL_bibset b, - const char *name); +const char **ccl_qual_search_special(CCL_bibset b, const char *name); /** Pretty-print CCL RPN node tree to WRBUF */ YAZ_EXPORT -void ccl_pquery (WRBUF w, struct ccl_rpn_node *p); +void ccl_pquery(WRBUF w, struct ccl_rpn_node *p); + +YAZ_EXPORT +int ccl_parser_get_error(CCL_parser cclp, int *pos); + +YAZ_EXPORT +struct ccl_rpn_node *ccl_rpn_node_create(enum ccl_rpn_kind kind); + +YAZ_EXPORT +void ccl_add_attr_numeric(struct ccl_rpn_node *p, const char *set, + int type, int value); + +YAZ_EXPORT +void ccl_add_attr_string(struct ccl_rpn_node *p, const char *set, + int type, char *value); + +YAZ_EXPORT +int ccl_search_stop(CCL_bibset bibset, const char *qname, + const char *src_str, size_t src_len); + + +/** \brief stop words handle (pimpl) */ +typedef struct ccl_stop_words *ccl_stop_words_t; + +/** \brief creates stop words handle */ +YAZ_EXPORT +ccl_stop_words_t ccl_stop_words_create(void); + +/** \brief destroys stop words handle */ +YAZ_EXPORT +void ccl_stop_words_destroy(ccl_stop_words_t csw); + +/** \brief removes stop words from RPN tree */ +YAZ_EXPORT +int ccl_stop_words_tree(ccl_stop_words_t csw, + CCL_bibset bibset, struct ccl_rpn_node **t); + +/** \brief returns information about removed "stop" words */ +YAZ_EXPORT +int ccl_stop_words_info(ccl_stop_words_t csw, int idx, + const char **qualname, const char **term); + +YAZ_EXPORT +struct ccl_rpn_attr *ccl_parser_qual_search(CCL_parser cclp, const char *name, + size_t name_len); + #ifndef ccl_assert #define ccl_assert(x) ; #endif + +/** \brief common attributes + + use (1) + + relation (2) + -1 none + 0 ordered + 1-6 relation (<, <=, =, >=, >, <>) + + position (3) + -1 none + 1 first in field + 2 first in sub field + 3 any position in field + structure (4) + -1 none + 0 word/phrase auto select + 1 phrase + 2 word + 3 key + 4 year + 5 date (normalized) + 6 word list + 100 date (un-normalized) + 101 name (normalized) + 102 name (un-normalized) + truncation (5) + completeness (6) +*/ + +#define CCL_BIB1_USE 1 +#define CCL_BIB1_REL 2 +#define CCL_BIB1_POS 3 +#define CCL_BIB1_STR 4 +#define CCL_BIB1_TRU 5 +#define CCL_BIB1_COM 6 + +#define CCL_BIB1_STR_WP (-1) +#define CCL_BIB1_STR_AND_LIST (-2) +#define CCL_BIB1_STR_OR_LIST (-3) +#define CCL_BIB1_STR_AUTO_GROUP (-4) +#define CCL_BIB1_REL_ORDER (-1) +#define CCL_BIB1_REL_PORDER (-2) +#define CCL_BIB1_REL_OMIT_EQUALS (-3) + +#define CCL_BIB1_TRU_CAN_LEFT (-1) +#define CCL_BIB1_TRU_CAN_RIGHT (-2) +#define CCL_BIB1_TRU_CAN_BOTH (-3) +#define CCL_BIB1_TRU_CAN_NONE (-4) +#define CCL_BIB1_TRU_CAN_REGEX (-5) +#define CCL_BIB1_TRU_CAN_Z3958 (-6) + + YAZ_END_CDECL #endif @@ -440,6 +377,7 @@ YAZ_END_CDECL /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab