From a479e3c7f966848a6fc71ac2c2c7f5db7068351b Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 6 Nov 2007 10:29:58 +0000 Subject: [PATCH] The zebramaps implements index_types functionality. --- include/Makefile.am | 4 +- include/index_types.h | 143 ------------------- include/zebramap.h | 10 +- index/check_res.c | 3 +- index/extract.c | 56 +++----- index/index.h | 4 +- index/zebraapi.c | 28 +--- test/api/Makefile.am | 4 +- test/api/indextypes17.xml | 18 --- test/api/t17.c | 5 +- test/api/t17.idx | 59 ++++++++ test/api/zebra17.cfg | 4 +- util/.cvsignore | 1 - util/Makefile.am | 9 +- util/index_types.c | 348 --------------------------------------------- util/tst_index_types.c | 168 ---------------------- util/zebramap.c | 78 +++++++++- 17 files changed, 178 insertions(+), 764 deletions(-) delete mode 100644 include/index_types.h delete mode 100644 test/api/indextypes17.xml create mode 100644 test/api/t17.idx delete mode 100644 util/index_types.c delete mode 100644 util/tst_index_types.c diff --git a/include/Makefile.am b/include/Makefile.am index d60ed90..0082eaa 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,7 +1,7 @@ -# $Id: Makefile.am,v 1.28 2007-10-25 09:22:36 adam Exp $ +# $Id: Makefile.am,v 1.29 2007-11-06 10:29:58 adam Exp $ noinst_HEADERS = bset.h charmap.h \ direntz.h passwddb.h dfa.h zebra_xpath.h d1_absyn.h \ rset.h dfaset.h sortidx.h zebra-lock.h attrfind.h zebramap.h \ - it_key.h su_codec.h index_types.h + it_key.h su_codec.h SUBDIRS = idzebra diff --git a/include/index_types.h b/include/index_types.h deleted file mode 100644 index b930c61..0000000 --- a/include/index_types.h +++ /dev/null @@ -1,143 +0,0 @@ -/* $Id: index_types.h,v 1.2 2007-10-25 19:25:00 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. - -Zebra is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -Zebra is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*/ - -/** - \file - \brief Definitions for Zebra's index types -*/ - -#ifndef ZEBRA_INDEX_TYPES_H -#define ZEBRA_INDEX_TYPES_H - -#include -#include - -YAZ_BEGIN_CDECL - -/** \brief zebra index types handle (ptr) */ -typedef struct zebra_index_types_s *zebra_index_types_t; - -/** \brief zebra index type handle (ptr) */ -typedef struct zebra_index_type_s *zebra_index_type_t; - -/** \brief creates index types handler/object from file - \param fname filename - \returns handle (NULL if unsuccessful) - - Config file format: - \verbatim - - - - - - - - - - - - \endverbatim - */ -zebra_index_types_t zebra_index_types_create(const char *fname); - -/** \brief destroys index rules object - \param types handle - */ -void zebra_index_types_destroy(zebra_index_types_t types); - - -/** \brief creates index types handler/object from xml Doc - \param doc Libxml2 document - \returns handle (NULL if unsuccessful) - - Similar to zebra_index_types_create -*/ -zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc); - - -/** \brief lookup of index type - \param types types - \param id id to search for - \returns pattern ID -*/ -const char *zebra_index_type_lookup_str(zebra_index_types_t types, - const char *id); - - -/** \brief get index type of a given ID - \param types types - \param id ID to search for - \returns index type handle -*/ -zebra_index_type_t zebra_index_type_get(zebra_index_types_t types, - const char *id); - -/** \brief check whether index type is of type 'index' - \param type index type - \retval 1 YES - \retval 0 NO -*/ -int zebra_index_type_is_index(zebra_index_type_t type); - -/** \brief check whether index type is of type 'sort' - \param type index type - \retval 1 YES - \retval 0 NO -*/ -int zebra_index_type_is_sort(zebra_index_type_t type); - -/** \brief check whether index type is of type 'staticrank' - \param type index type - \retval 1 YES - \retval 0 NO -*/ -int zebra_index_type_is_staticrank(zebra_index_type_t type); - - -/** \brief tokenize a term for an index type - \param type index type - \param buf term buffer (pass 0 to continue with previous buf) - \param len term length - \param result_buf resulting token buffer - \param result_len resulting token length - \retval 1 token read and result is in result_buf - \retval 0 no token read (no more tokens in buf) -*/ -int zebra_index_type_tokenize(zebra_index_type_t type, - const char *buf, size_t len, - const char **result_buf, size_t *result_len); - -YAZ_END_CDECL - -#endif -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ - diff --git a/include/zebramap.h b/include/zebramap.h index 0912f89..48ae0a7 100644 --- a/include/zebramap.h +++ b/include/zebramap.h @@ -1,4 +1,4 @@ -/* $Id: zebramap.h,v 1.26 2007-10-31 16:56:13 adam Exp $ +/* $Id: zebramap.h,v 1.27 2007-11-06 10:29:58 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -79,6 +79,9 @@ YAZ_EXPORT int zebra_maps_is_positioned(zebra_map_t zm); YAZ_EXPORT +int zebra_maps_is_icu(zebra_map_t zm); + +YAZ_EXPORT int zebra_maps_is_first_in_field(zebra_map_t zm); YAZ_EXPORT @@ -91,6 +94,11 @@ zebra_map_t zebra_map_get(zebra_maps_t zms, const char *id); YAZ_EXPORT zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id); + +int zebra_map_tokenize(zebra_map_t zm, + const char *buf, size_t len, + const char **result_buf, size_t *result_len); + YAZ_END_CDECL #endif diff --git a/index/check_res.c b/index/check_res.c index d079022..b508f72 100644 --- a/index/check_res.c +++ b/index/check_res.c @@ -1,4 +1,4 @@ -/* $Id: check_res.c,v 1.7 2007-10-29 09:25:40 adam Exp $ +/* $Id: check_res.c,v 1.8 2007-11-06 10:29:59 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -63,7 +63,6 @@ int zebra_check_res(Res res) res_add(v, "threads", ""); res_add(v, "trunclimit", ""); res_add(v, "truncmax", ""); - res_add(v, "indextypes", ""); res_add(v, "database", "p"); res_add(v, "explainDatabase", "p"); res_add(v, "fileVerboseLimit", "p"); diff --git a/index/extract.c b/index/extract.c index c70e6a1..6ac93de 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.267 2007-10-31 16:56:14 adam Exp $ +/* $Id: extract.c,v 1.268 2007-11-06 10:29:59 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -71,7 +71,6 @@ static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t skp); static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid); static void extract_token_add(RecWord *p); -static void extract_token_add2(RecWord *p); static void check_log_limit(ZebraHandle zh) { @@ -821,14 +820,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, stream->endf(stream, &null_offset);; extractCtrl.init = extract_init; - if (zh->reg->index_types) - { - extractCtrl.tokenAdd = extract_token_add2; - } - else - { - extractCtrl.tokenAdd = extract_token_add; - } + extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; extractCtrl.handle = zh; @@ -1700,14 +1692,14 @@ static void extract_add_complete_field(RecWord *p, zebra_map_t zm) extract_add_string(p, zm, buf, i); } -static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type, - RecWord *p) +static void extract_add_icu(RecWord *p, zebra_map_t zm) { struct it_key key; const char *res_buf = 0; size_t res_len = 0; - int r = zebra_index_type_tokenize(type, p->term_buf, p->term_len, - &res_buf, &res_len); + ZebraHandle zh = p->extractCtrl->handle; + int r = zebra_map_tokenize(zm, p->term_buf, p->term_len, + &res_buf, &res_len); int cat = zinfo_index_category_index; int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, p->index_type, p->index_name); if (ch < 0) @@ -1728,27 +1720,10 @@ static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type, zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key); p->seqno++; - r = zebra_index_type_tokenize(type, 0, 0, &res_buf, &res_len); + r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len); } } -static void extract_token_add2(RecWord *p) -{ - ZebraHandle zh = p->extractCtrl->handle; - zebra_index_type_t type = zebra_index_type_get(zh->reg->index_types, p->index_type); - if (type) - { - if (zebra_index_type_is_index(type)) - { - extract_token_add2_index(zh, type, p); - } - else if (zebra_index_type_is_sort(type)) - { - ; - - } - } -} /** \brief top-level indexing handler for recctrl system \param p token data to be indexed @@ -1780,13 +1755,20 @@ static void extract_token_add(RecWord *p) } if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len))) { - p->term_buf = wrbuf_buf(wrbuf); - p->term_len = wrbuf_len(wrbuf); + p->term_buf = wrbuf_buf(wrbuf); + p->term_len = wrbuf_len(wrbuf); + } + if (zebra_maps_is_icu(zm)) + { + extract_add_icu(p, zm); } - if (zebra_maps_is_complete(zm)) - extract_add_complete_field(p, zm); else - extract_add_incomplete_field(p, zm); + { + if (zebra_maps_is_complete(zm)) + extract_add_complete_field(p, zm); + else + extract_add_incomplete_field(p, zm); + } } static void extract_set_store_data_cb(struct recExtractCtrl *p, diff --git a/index/index.h b/index/index.h index 80ba909..64d2745 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.208 2007-11-05 11:20:39 adam Exp $ +/* $Id: index.h,v 1.209 2007-11-06 10:29:59 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -29,7 +29,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include -#include #if HAVE_SYS_TIMES_H #include #endif @@ -144,7 +143,6 @@ struct zebra_register { char *server_path_prefix; data1_handle dh; - zebra_index_types_t index_types; zebra_maps_t zebra_maps; ZebraRankClass rank_classes; RecTypes recTypes; diff --git a/index/zebraapi.c b/index/zebraapi.c index 7ebb47a..676846f 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.263 2007-11-05 11:20:39 adam Exp $ +/* $Id: zebraapi.c,v 1.264 2007-11-06 10:29:59 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -381,7 +381,6 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, data1_set_tabroot (reg->dh, reg_path); reg->recTypes = recTypes_init (zs->record_classes, reg->dh); - reg->index_types = 0; reg->zebra_maps = zebra_maps_open(res, reg_path, profilePath); if (!reg->zebra_maps) @@ -421,30 +420,6 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, record_compression = REC_COMPRESS_BZIP2; { - const char *index_types_fname = res_get(res, "indextypes"); - if (index_types_fname) - { - char tmp_full_name[1024]; - - if (!yaz_filepath_resolve(index_types_fname, - profilePath, - reg_path, - tmp_full_name)) - { - yaz_log(YLOG_WARN, "Could not find %s", index_types_fname); - ret = ZEBRA_FAIL; - } - else - { - reg->index_types = zebra_index_types_create( - tmp_full_name); - yaz_log(YLOG_LOG, "zebra_index_types_create returned %p", - reg->index_types); - } - } - - } - { const char *index_fname = res_get_def(res, "index", "default.idx"); if (index_fname && *index_fname) { @@ -604,7 +579,6 @@ static void zebra_register_close(ZebraService zs, struct zebra_register *reg) recTypes_destroy (reg->recTypes); zebra_maps_close (reg->zebra_maps); - zebra_index_types_destroy(reg->index_types); zebraRankDestroy (reg); bfs_destroy (reg->bfs); data1_destroy (reg->dh); diff --git a/test/api/Makefile.am b/test/api/Makefile.am index bec97ea..12bd994 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.41 2007-10-29 13:43:58 adam Exp $ +# $Id: Makefile.am,v 1.42 2007-11-06 10:29:59 adam Exp $ noinst_PROGRAMS = testclient testclient_SOURCES = testclient.c @@ -9,7 +9,7 @@ check_PROGRAMS = $(simpletests) $(safaritests) TESTS = $(check_PROGRAMS) EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebra10.cfg zebra15.cfg safari.cfg \ - t10.att t10.abs zebra17.cfg indextypes17.xml + t10.att t10.abs zebra17.cfg t17.idx noinst_LIBRARIES = libtestlib.a diff --git a/test/api/indextypes17.xml b/test/api/indextypes17.xml deleted file mode 100644 index 49b4d21..0000000 --- a/test/api/indextypes17.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - diff --git a/test/api/t17.c b/test/api/t17.c index 84b1ae3..15289fa 100644 --- a/test/api/t17.c +++ b/test/api/t17.c @@ -1,4 +1,4 @@ -/* $Id: t17.c,v 1.1 2007-10-29 13:43:58 adam Exp $ +/* $Id: t17.c,v 1.2 2007-11-06 10:29:59 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -43,8 +43,7 @@ static void tst(int argc, char **argv) YAZ_CHECK(tl_query(zh, "@attr 1=title notfound", 0)); /* we should get 3 hits. But 0 for now */ -#if 0 - +#if 1 YAZ_CHECK(tl_query(zh, "@attr 1=title title", 3)); #else YAZ_CHECK(tl_query(zh, "@attr 1=title title", 0)); diff --git a/test/api/t17.idx b/test/api/t17.idx new file mode 100644 index 0000000..0555ebd --- /dev/null +++ b/test/api/t17.idx @@ -0,0 +1,59 @@ +# Zebra indexes as referred to from the *.abs-files. +# $Id: t17.idx,v 1.1 2007-11-06 10:30:00 adam Exp $ +# + +# Traditional word index +# Used if completenss is 'incomplete field' (@attr 6=1) and +# structure is word/phrase/word-list/free-form-text/document-text +index w +completeness 0 +position 1 +alwaysmatches 1 +firstinfield 1 +simplechain dummy + +# Phrase index +# Used if completeness is 'complete {sub}field' (@attr 6=2, @attr 6=1) +# and structure is word/phrase/word-list/free-form-text/document-text +index p +completeness 1 +charmap string.chr + +# URX (URL) index +# Used if structure=urx (@attr 4=104) +index u +completeness 0 +charmap urx.chr + +# Numeric index +# Used if structure=numeric (@attr 4=109) +index n +completeness 0 +charmap numeric.chr + +# Null map index (no mapping at all) +# Used if structure=key (@attr 4=3) +index 0 +completeness 0 +position 1 +charmap @ + +# Year +# Used if structure=year (@attr 4=4) +index y +completeness 0 +charmap @ + +# Date +# Used if structure=date (@attr 4=5) +index d +completeness 0 +charmap @ + +# Sort register +sort s +completeness 1 +charmap string.chr + +# Staticrank (uncomment to enable) +#staticrank r diff --git a/test/api/zebra17.cfg b/test/api/zebra17.cfg index 9c2fb1e..1b4ddde 100644 --- a/test/api/zebra17.cfg +++ b/test/api/zebra17.cfg @@ -1,4 +1,4 @@ -# $Id: zebra17.cfg,v 1.1 2007-10-29 13:43:58 adam Exp $ +# $Id: zebra17.cfg,v 1.2 2007-11-06 10:30:00 adam Exp $ profilepath: ${srcdir:-.}:${srcdir:-.}/../../tab attset: bib1.att @@ -6,7 +6,7 @@ attset: explain.att recordType: grs.sgml -indextypes: indextypes17.xml +index: t17.idx isam: b diff --git a/util/.cvsignore b/util/.cvsignore index 2ae39fe..186ba78 100644 --- a/util/.cvsignore +++ b/util/.cvsignore @@ -14,4 +14,3 @@ tstflock.out tstlockscope tstpass tstres -tst_index_types diff --git a/util/Makefile.am b/util/Makefile.am index 4575ff9..0ec89c2 100644 --- a/util/Makefile.am +++ b/util/Makefile.am @@ -1,9 +1,8 @@ -## $Id: Makefile.am,v 1.36 2007-10-25 09:22:36 adam Exp $ +## $Id: Makefile.am,v 1.37 2007-11-06 10:30:00 adam Exp $ noinst_LTLIBRARIES = libidzebra-util.la -check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres \ - tst_index_types +check_PROGRAMS = tstcharmap tstflock tstlockscope tstpass tstres TESTS = $(check_PROGRAMS) @@ -18,7 +17,7 @@ LDADD = libidzebra-util.la $(YAZLALIB) libidzebra_util_la_SOURCES = version.c zint.c res.c charmap.c zebramap.c \ passwddb.c zebra-lock.c dirent.c xpath.c atoi_zn.c snippet.c flock.c \ - attrfind.c exit.c it_key.c su_codec.c index_types.c + attrfind.c exit.c it_key.c su_codec.c tstpass_SOURCES = tstpass.c @@ -30,8 +29,6 @@ tstlockscope_SOURCES = tstlockscope.c tstres_SOURCES = tstres.c -tst_index_types_SOURCES = tst_index_types.c - clean-local: -rm -rf *.LCK -rm -rf *.log diff --git a/util/index_types.c b/util/index_types.c deleted file mode 100644 index 09e81d8..0000000 --- a/util/index_types.c +++ /dev/null @@ -1,348 +0,0 @@ -/* $Id: index_types.c,v 1.3 2007-10-29 08:20:16 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - - This file is part of the Zebra server. - - Zebra is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. - - Zebra is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with Zebra; see the file LICENSE.zebra. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. -*/ - -/** - \file - \brief Implementation of Zebra's index types system -*/ - -#include -#include -#include -#include - -#include "index_types.h" -#if HAVE_ICU -#include -#endif -#include -#include -#include -#include - -struct zebra_index_types_s { -#if YAZ_HAVE_XML2 - zebra_index_type_t rules; - xmlDocPtr doc; -#endif -}; - -#if YAZ_HAVE_XML2 -struct zebra_index_type_s { - const xmlNode *ptr; - const char *id; - const char *locale; - const char *position; - const char *alwaysmatches; - const char *firstinfield; - int sort_flag; - int index_flag; - int staticrank_flag; - int simple_chain; -#if HAVE_ICU - struct icu_chain *chain; -#endif - zebra_index_type_t next; - WRBUF simple_buf; - size_t simple_off; -}; - -static void index_type_destroy(zebra_index_type_t t); - -zebra_index_type_t parse_index_type(const xmlNode *ptr) -{ - struct _xmlAttr *attr; - struct zebra_index_type_s *rule; - - rule = xmalloc(sizeof(*rule)); - rule->next = 0; -#if HAVE_ICU - rule->chain = 0; -#endif - rule->ptr = ptr; - rule->locale = 0; - rule->id = 0; - rule->position = 0; - rule->alwaysmatches = 0; - rule->firstinfield = 0; - rule->sort_flag = 0; - rule->index_flag = 1; - rule->staticrank_flag = 0; - rule->simple_chain = 0; - rule->simple_buf = wrbuf_alloc(); - for (attr = ptr->properties; attr; attr = attr->next) - { - if (attr->children && attr->children->type == XML_TEXT_NODE) - { - if (!strcmp((const char *) attr->name, "id")) - rule->id = (const char *) attr->children->content; - else if (!strcmp((const char *) attr->name, "locale")) - rule->locale = (const char *) attr->children->content; - else if (!strcmp((const char *) attr->name, "position")) - rule->position = (const char *) attr->children->content; - else if (!strcmp((const char *) attr->name, "alwaysmatches")) - rule->alwaysmatches = (const char *) attr->children->content; - else if (!strcmp((const char *) attr->name, "firstinfield")) - rule->firstinfield = (const char *) attr->children->content; - else if (!strcmp((const char *) attr->name, "index")) - { - const char *v = (const char *) attr->children->content; - if (v) - rule->index_flag = *v == '1'; - } - else if (!strcmp((const char *) attr->name, "sort")) - { - const char *v = (const char *) attr->children->content; - if (v) - rule->sort_flag = *v == '1'; - } - else if (!strcmp((const char *) attr->name, "staticrank")) - { - const char *v = (const char *) attr->children->content; - if (v) - rule->staticrank_flag = *v == '1'; - } - else - { - yaz_log(YLOG_WARN, "Unsupport attribute '%s' for indextype", - attr->name); - index_type_destroy(rule); - return 0; - } - } - } - ptr = ptr->children; - while (ptr && ptr->type != XML_ELEMENT_NODE) - ptr = ptr->next; - if (!ptr) - { - yaz_log(YLOG_WARN, "Missing rules for indexrule"); - index_type_destroy(rule); - rule = 0; - } - else if (!strcmp((const char *) ptr->name, "icu_chain")) - { -#if HAVE_ICU - UErrorCode status; - rule->chain = icu_chain_xml_config(ptr, - rule->locale, - rule->sort_flag, - &status); - if (!rule->chain) - { - index_type_destroy(rule); - rule = 0; - } -#else - yaz_log(YLOG_WARN, "ICU unsupported (must be part of YAZ)"); - xfree(rule); - rule = 0; -#endif - } - else if (!strcmp((const char *) ptr->name, "simple")) - { - rule->simple_chain = 1; - } - else - { - yaz_log(YLOG_WARN, "Unsupported mapping %s for indexrule", ptr->name); - index_type_destroy(rule); - rule = 0; - } - return rule; -} -/* YAZ_HAVE_XML2 */ -#endif - -zebra_index_types_t zebra_index_types_create(const char *fname) -{ - xmlDocPtr doc = xmlParseFile(fname); - if (!doc) - return 0; - return zebra_index_types_create_doc(doc); -} - -zebra_index_types_t zebra_index_types_create_doc(xmlDocPtr doc) -{ -#if YAZ_HAVE_XML2 - zebra_index_types_t r = xmalloc(sizeof(*r)); - zebra_index_type_t *rp = &r->rules; - const xmlNode *top = xmlDocGetRootElement(doc); - - r->doc = doc; - *rp = 0; - if (top && top->type == XML_ELEMENT_NODE - && !strcmp((const char *) top->name, "indextypes")) - { - const xmlNode *ptr = top->children; - for (; ptr; ptr = ptr->next) - { - if (ptr->type == XML_ELEMENT_NODE - && !strcmp((const char *) ptr->name, "indextype")) - { - *rp = parse_index_type(ptr); - if (!*rp) - { - zebra_index_types_destroy(r); - return 0; - } - rp = &(*rp)->next; - } - } - } - else - { - zebra_index_types_destroy(r); - r = 0; - } - return r; -#else - yaz_log(YLOG_WARN, "XML unsupported. Cannot read index rules"); - return 0; -/* YAZ_HAVE_XML2 */ -#endif -} - -static void index_type_destroy(zebra_index_type_t t) -{ - if (t) - { -#if HAVE_ICU - if (t->chain) - icu_chain_destroy(t->chain); -#endif - wrbuf_destroy(t->simple_buf); - xfree(t); - } -} - -void zebra_index_types_destroy(zebra_index_types_t r) -{ - if (r) - { -#if YAZ_HAVE_XML2 - zebra_index_type_t rule; - while (r->rules) - { - rule = r->rules; - r->rules = rule->next; - index_type_destroy(rule); - } - xmlFreeDoc(r->doc); - -#endif - xfree(r); - } -} - -zebra_index_type_t zebra_index_type_get(zebra_index_types_t types, - const char *id) -{ -#if YAZ_HAVE_XML2 - zebra_index_type_t rule = types->rules; - - while (rule && !yaz_match_glob(rule->id, id)) - rule = rule->next; - return rule; -#endif - return 0; -} - -const char *zebra_index_type_lookup_str(zebra_index_types_t types, - const char *id) -{ - zebra_index_type_t t = zebra_index_type_get(types, id); - if (t) - return t->id; - return 0; -} - -int zebra_index_type_is_index(zebra_index_type_t type) -{ - return type->index_flag; -} - -int zebra_index_type_is_sort(zebra_index_type_t type) -{ - return type->sort_flag; -} - -int zebra_index_type_is_staticrank(zebra_index_type_t type) -{ - return type->staticrank_flag; -} - -#define SE_CHARS ";,.()-/?<> \r\n\t" - -int tokenize_simple(zebra_index_type_t type, - const char **result_buf, size_t *result_len) -{ - char *buf = wrbuf_buf(type->simple_buf); - size_t len = wrbuf_len(type->simple_buf); - size_t i = type->simple_off; - size_t start; - - while (i < len && strchr(SE_CHARS, buf[i])) - i++; - start = i; - while (i < len && !strchr(SE_CHARS, buf[i])) - { - if (buf[i] > 32 && buf[i] < 127) - buf[i] = tolower(buf[i]); - i++; - } - - type->simple_off = i; - if (start != i) - { - *result_buf = buf + start; - *result_len = i - start; - return 1; - } - return 0; - } - -int zebra_index_type_tokenize(zebra_index_type_t type, - const char *buf, size_t len, - const char **result_buf, size_t *result_len) -{ - if (type->simple_chain) - { - if (buf) - { - wrbuf_rewind(type->simple_buf); - wrbuf_write(type->simple_buf, buf, len); - type->simple_off = 0; - } - return tokenize_simple(type, result_buf, result_len); - } - return 0; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ - diff --git a/util/tst_index_types.c b/util/tst_index_types.c deleted file mode 100644 index 2e0c0ac..0000000 --- a/util/tst_index_types.c +++ /dev/null @@ -1,168 +0,0 @@ -/* $Id: tst_index_types.c,v 1.3 2007-10-25 19:25:00 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. - -Zebra is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -Zebra is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*/ - -#include -#include -#include -#include - -const char *xml_str = -" " -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -; - -int compare_lookup(zebra_index_types_t r, const char *id, - const char *expected_id) -{ - const char *got_id = zebra_index_type_lookup_str(r, id); - if (!got_id && !expected_id) - return 1; /* none expected */ - - if (got_id && expected_id && !strcmp(got_id, expected_id)) - return 1; - return 0; -} - -void tst1(void) -{ -#if YAZ_HAVE_XML2 - xmlDocPtr doc = xmlParseMemory(xml_str, strlen(xml_str)); - - YAZ_CHECK(doc); - if (doc) - { - zebra_index_types_t rules = zebra_index_types_create_doc(doc); - zebra_index_type_t type; - YAZ_CHECK(rules); - - if (!rules) - return ; - - YAZ_CHECK(compare_lookup(rules, "title:s", "*:s")); - YAZ_CHECK(compare_lookup(rules, "title:sx", 0)); - YAZ_CHECK(compare_lookup(rules, "title:Sx", 0)); - YAZ_CHECK(compare_lookup(rules, "any:w", "*:w")); - YAZ_CHECK(compare_lookup(rules, "any:w:en", 0)); - YAZ_CHECK(compare_lookup(rules, "any:w:el", "*:w:el")); - - { - int i, iter = 3333; - for (i = 0; i < iter; i++) - { - compare_lookup(rules, "title:s", "*:s"); - compare_lookup(rules, "title:sx", 0); - compare_lookup(rules, "title:Sx", 0); - } - } - - type = zebra_index_type_get(rules, "any:w"); - YAZ_CHECK(type); - if (type) - { - const char *buf = " How are you?"; - size_t len = strlen(buf); - int r = 1; - - if (r) - { - const char *result_buf = 0; - size_t result_len = 0; - r = zebra_index_type_tokenize(type, buf, len, - &result_buf, &result_len); - YAZ_CHECK_EQ(r, 1); - YAZ_CHECK(result_len == 3 && - !memcmp(result_buf, "how", result_len)); - } - - if (r) - { - const char *result_buf = 0; - size_t result_len = 0; - r = zebra_index_type_tokenize(type, 0, 0, - &result_buf, &result_len); - YAZ_CHECK_EQ(r, 1); - YAZ_CHECK(result_len == 3 && - !memcmp(result_buf, "are", result_len)); - } - - if (r) - { - const char *result_buf = 0; - size_t result_len = 0; - r = zebra_index_type_tokenize(type, 0, 0, - &result_buf, &result_len); - YAZ_CHECK_EQ(r, 1); - YAZ_CHECK(result_len == 3 && - !memcmp(result_buf, "you", result_len)); - } - - if (r) - { - const char *result_buf = 0; - size_t result_len = 0; - r = zebra_index_type_tokenize(type, 0, 0, - &result_buf, &result_len); - YAZ_CHECK_EQ(r, 0); - } - } - zebra_index_types_destroy(rules); - } -#else - zebra_index_types_t rules = zebra_index_types_create_doc(doc); - YAZ_CHECK(!rules); -#endif -} - -int main(int argc, char **argv) -{ - YAZ_CHECK_INIT(argc, argv); - YAZ_CHECK_LOG(); - - tst1(); - - YAZ_CHECK_TERM; -} - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ - diff --git a/util/zebramap.c b/util/zebramap.c index d322645..e1cb678 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -1,4 +1,4 @@ -/* $Id: zebramap.c,v 1.64 2007-11-05 13:58:01 adam Exp $ +/* $Id: zebramap.c,v 1.65 2007-11-06 10:30:02 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -46,6 +46,7 @@ struct zebra_map { int alwaysmatches; int first_in_field; int type; + int use_chain; union { struct { int entry_size; @@ -61,6 +62,8 @@ struct zebra_map { #if HAVE_ICU struct icu_chain *icu_chain; #endif + WRBUF simple_buf; + size_t simple_off; struct zebra_map *next; }; @@ -90,6 +93,7 @@ void zebra_maps_close(zebra_maps_t zms) #if YAZ_HAVE_XML2 xmlFreeDoc(zm->doc); #endif + wrbuf_destroy(zm->simple_buf); zm = zm->next; } wrbuf_destroy(zms->wrbuf_1); @@ -105,6 +109,7 @@ zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type, zm->zebra_maps = zms; zm->id = nmem_strdup(zms->nmem, index_type); zm->maptab_name = 0; + zm->use_chain = 0; zm->locale = 0; zm->maptab = 0; zm->type = map_type; @@ -125,6 +130,7 @@ zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type, #if YAZ_HAVE_XML2 zm->doc = 0; #endif + zm->simple_buf = wrbuf_alloc(); return zm; } @@ -216,9 +222,20 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, { zm->locale = nmem_strdup(zms->nmem, argv[1]); } + else if (!yaz_matchstr(argv[0], "simplechain")) + { + zm->use_chain = 1; + zm->icu_chain = 0; + } else if (!yaz_matchstr(argv[0], "icuchain")) { #if YAZ_HAVE_XML2 + if (!zm->locale) + { + yaz_log(YLOG_WARN, "%s:%d: locale required before icuchain", + fname, lineno); + return -1; + } zm->doc = xmlParseFile(argv[1]); if (!zm->doc) { @@ -240,6 +257,7 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv, yaz_log(YLOG_WARN, "%s:%d: Failed to load ICU chain %s", fname, lineno, argv[1]); } + zm->use_chain = 1; #else yaz_log(YLOG_WARN, "%s:%d: ICU support unavailable", fname, lineno); @@ -587,6 +605,64 @@ WRBUF zebra_replace(zebra_map_t zm, const char *ex_list, return zm->zebra_maps->wrbuf_1; } +#define SE_CHARS ";,.()-/?<> \r\n\t" + +static int tokenize_simple(zebra_map_t zm, + const char **result_buf, size_t *result_len) +{ + char *buf = wrbuf_buf(zm->simple_buf); + size_t len = wrbuf_len(zm->simple_buf); + size_t i = zm->simple_off; + size_t start; + + while (i < len && strchr(SE_CHARS, buf[i])) + i++; + start = i; + while (i < len && !strchr(SE_CHARS, buf[i])) + { + if (buf[i] > 32 && buf[i] < 127) + buf[i] = tolower(buf[i]); + i++; + } + + zm->simple_off = i; + if (start != i) + { + *result_buf = buf + start; + *result_len = i - start; + return 1; + } + return 0; + } + +int zebra_map_tokenize(zebra_map_t zm, + const char *buf, size_t len, + const char **result_buf, size_t *result_len) +{ + assert(zm->use_chain); + if (!zm->icu_chain) + { + if (buf) + { + wrbuf_rewind(zm->simple_buf); + wrbuf_write(zm->simple_buf, buf, len); + zm->simple_off = 0; + } + return tokenize_simple(zm, result_buf, result_len); + } + return 0; +} + +int zebra_maps_is_icu(zebra_map_t zm) +{ +#if HAVE_ICU + return zm->use_chain; +#else + return 0; +#endif +} + + /* * Local variables: * c-basic-offset: 4 -- 1.7.10.4