From 7415d28c149c1bab51fe93aeaccdd14085b69bd9 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 3 Jul 2006 14:27:00 +0000 Subject: [PATCH] Build main libidzebra-2.0.la. Bug #620. --- Makefile.am | 4 +- bfile/Makefile.am | 4 +- configure.ac | 5 +- data1/Makefile.am | 4 +- dfa/Makefile.am | 4 +- dict/Makefile.am | 4 +- examples/gils/zebra.cfg | 4 +- examples/marc21/zebra.cfg | 4 +- examples/marcxml/zebra.cfg | 2 +- index/Makefile.am | 93 +- index/alvis.c | 769 ++++++++++++++++ index/inline.c | 133 +++ index/inline.h | 45 + index/marcomp.c | 413 +++++++++ index/marcomp.h | 117 +++ index/marcread.c | 899 ++++++++++++++++++ index/recctrl.c | 284 ++++++ index/recgrs.c | 1450 +++++++++++++++++++++++++++++ index/rectext.c | 265 ++++++ index/regxread.c | 2028 +++++++++++++++++++++++++++++++++++++++++ index/safari.c | 279 ++++++ index/sgmlread.c | 145 +++ index/xmlread.c | 549 +++++++++++ isamb/Makefile.am | 4 +- isamc/Makefile.am | 4 +- isams/Makefile.am | 4 +- rset/Makefile.am | 4 +- tab/usmarc.abs | 4 +- test/api/Makefile.am | 15 +- test/api/safari.cfg | 4 +- test/cddb/zebra.cfg | 4 +- test/charmap/Makefile.am | 17 +- test/charmap/zebra.cfg | 4 +- test/codec/Makefile.am | 18 +- test/config/zebra.cfg | 4 +- test/espec/Makefile.am | 17 +- test/filters/Makefile.am | 17 +- test/filters/zebra.cfg | 4 +- test/malxml/zebra.cfg | 4 +- test/marcxml/Makefile.am | 18 +- test/marcxml/zebra.cfg | 4 +- test/mbox/Makefile.am | 17 +- test/rusmarc/Makefile.am | 17 +- test/rusmarc/zebra.cfg | 4 +- test/sort/Makefile.am | 17 +- test/sort2/Makefile.am | 17 +- test/usmarc/zebra.cfg | 4 +- test/xpath/Makefile.am | 17 +- test/xslt/Makefile.am | 15 +- test/xslt/zebra.cfg | 2 +- test/xslt/zebrastaticrank.cfg | 2 +- util/Makefile.am | 4 +- 52 files changed, 7523 insertions(+), 247 deletions(-) create mode 100644 index/alvis.c create mode 100644 index/inline.c create mode 100644 index/inline.h create mode 100644 index/marcomp.c create mode 100644 index/marcomp.h create mode 100644 index/marcread.c create mode 100644 index/recctrl.c create mode 100644 index/recgrs.c create mode 100644 index/rectext.c create mode 100644 index/regxread.c create mode 100644 index/safari.c create mode 100644 index/sgmlread.c create mode 100644 index/xmlread.c diff --git a/Makefile.am b/Makefile.am index 093ba5a..6decffa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,11 +1,11 @@ -## $Id: Makefile.am,v 1.25 2006-06-28 13:26:13 adam Exp $ +## $Id: Makefile.am,v 1.26 2006-07-03 14:27:00 adam Exp $ AUTOMAKE_OPTIONS=foreign ACLOCAL_AMFLAGS = -I m4 SUBDIRS=util bfile dfa dict isams isamb isamc rset data1 \ - recctrl tab index test examples include doc + tab index test examples include doc aclocaldir=$(datadir)/aclocal aclocal_DATA = m4/idzebra.m4 diff --git a/bfile/Makefile.am b/bfile/Makefile.am index e495164..6d1008c 100644 --- a/bfile/Makefile.am +++ b/bfile/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.8 2006-06-29 13:47:52 adam Exp $ +## $Id: Makefile.am,v 1.9 2006-07-03 14:27:01 adam Exp $ -lib_LTLIBRARIES = libidzebra-bfile.la +noinst_LTLIBRARIES = libidzebra-bfile.la check_PROGRAMS = tstbfile1 diff --git a/configure.ac b/configure.ac index 99e6581..a269026 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Zebra, Index Data ApS, 1995-2006 -dnl $Id: configure.ac,v 1.23 2006-07-03 12:23:17 adam Exp $ +dnl $Id: configure.ac,v 1.24 2006-07-03 14:27:00 adam Exp $ dnl AC_PREREQ(2.59) AC_INIT([idzebra],[2.0.0],[adam@indexdata.dk]) @@ -10,6 +10,8 @@ PACKAGE_SUFFIX="-2.0" AC_SUBST(PACKAGE_SUFFIX) ZEBRALIBS_VERSION=2:0:0 AC_SUBST(ZEBRALIBS_VERSION) +main_zebralib=index/libidzebra${PACKAGE_SUFFIX}.la +AC_SUBST(main_zebralib) dnl ------ Substitutions AC_SUBST(TCL_INCLUDE) AC_SUBST(TCL_LIB) @@ -350,7 +352,6 @@ AC_OUTPUT([ isamc/Makefile rset/Makefile data1/Makefile - recctrl/Makefile index/Makefile include/Makefile include/idzebra/Makefile tab/Makefile diff --git a/data1/Makefile.am b/data1/Makefile.am index 54e28fa..2af5577 100644 --- a/data1/Makefile.am +++ b/data1/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.5 2006-06-29 13:47:52 adam Exp $ +## $Id: Makefile.am,v 1.6 2006-07-03 14:27:02 adam Exp $ -lib_LTLIBRARIES=libidzebra-data1.la +noinst_LTLIBRARIES=libidzebra-data1.la libidzebra_data1_la_LDFLAGS=-version-info $(ZEBRALIBS_VERSION) libidzebra_data1_la_SOURCES = d1_handle.c d1_read.c d1_attset.c d1_tagset.c \ diff --git a/dfa/Makefile.am b/dfa/Makefile.am index e911a36..a3ceb22 100644 --- a/dfa/Makefile.am +++ b/dfa/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.9 2006-06-29 13:47:54 adam Exp $ +## $Id: Makefile.am,v 1.10 2006-07-03 14:27:03 adam Exp $ -lib_LTLIBRARIES = libidzebra-dfa.la +noinst_LTLIBRARIES = libidzebra-dfa.la noinst_PROGRAMS = agrep lexer grepper diff --git a/dict/Makefile.am b/dict/Makefile.am index 4a20766..089a5b7 100644 --- a/dict/Makefile.am +++ b/dict/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.15 2006-06-29 13:47:55 adam Exp $ +## $Id: Makefile.am,v 1.16 2006-07-03 14:27:04 adam Exp $ -lib_LTLIBRARIES = libidzebra-dict.la +noinst_LTLIBRARIES = libidzebra-dict.la noinst_PROGRAMS = dicttest dictext check_PROGRAMS = scantest diff --git a/examples/gils/zebra.cfg b/examples/gils/zebra.cfg index 64effed..7bef8ca 100644 --- a/examples/gils/zebra.cfg +++ b/examples/gils/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.12 2006-06-02 10:59:53 adam Exp $ +# $Id: zebra.cfg,v 1.13 2006-07-03 14:27:05 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: .:../../tab @@ -18,6 +18,6 @@ perm.anonymous: rw #rank: zvrank recordId: (bib-1,title) -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs #shadow: shadow:100M # register: register:100M diff --git a/examples/marc21/zebra.cfg b/examples/marc21/zebra.cfg index 97aabb2..c7851bd 100644 --- a/examples/marc21/zebra.cfg +++ b/examples/marc21/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.1 2005-01-04 17:03:22 quinn Exp $ +# $Id: zebra.cfg,v 1.2 2006-07-03 14:27:06 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -10,4 +10,4 @@ attset: explain.att recordType: grs.marcxml.marc21 -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/examples/marcxml/zebra.cfg b/examples/marcxml/zebra.cfg index 3aa9501..e1c36d7 100644 --- a/examples/marcxml/zebra.cfg +++ b/examples/marcxml/zebra.cfg @@ -1,5 +1,5 @@ profilePath: ./:./../../tab -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs recordtype: alvis.filter_alvis_conf.xml diff --git a/index/Makefile.am b/index/Makefile.am index b29b759..7972530 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,11 +1,75 @@ -## $Id: Makefile.am,v 1.47 2006-06-29 13:47:57 adam Exp $ +## $Id: Makefile.am,v 1.48 2006-07-03 14:27:09 adam Exp $ + +aux_libs = \ + ../rset/libidzebra-rset.la \ + ../dict/libidzebra-dict.la \ + ../isams/libidzebra-isams.la \ + ../isamc/libidzebra-isamc.la \ + ../isamb/libidzebra-isamb.la \ + ../data1/libidzebra-data1.la \ + ../bfile/libidzebra-bfile.la \ + ../dfa/libidzebra-dfa.la \ + ../util/libidzebra-util.la + +# Should match the main_zebralib in configure.ac +# Should also match the libidzebra_..._la lines..! +zebralib = libidzebra-2.0.la noinst_PROGRAMS = apitest kdump zebrash -lib_LTLIBRARIES = libidzebra-api.la +modlibdir=$(libdir)/$(PACKAGE)$(PACKAGE_SUFFIX)/modules + +# The shared modules +mod_grs_regx_la_SOURCES = regxread.c +mod_grs_regx_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_grs_regx_la_LADD = $(TCL_LIB) +mod_grs_regx_la_LIBADD = $(zebralib) $(mod_grs_regx_la_LADD) + +mod_grs_xml_la_SOURCES = xmlread.c +mod_grs_xml_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_grs_xml_la_LADD = $(EXPAT_LIBS) +mod_grs_xml_la_LIBADD = $(zebralib) $(mod_grs_xml_la_LADD) + +mod_grs_marc_la_SOURCES = marcread.c marcomp.c marcomp.h inline.c inline.h +mod_grs_marc_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_grs_marc_la_LADD = +mod_grs_marc_la_LIBADD = $(zebralib) $(mod_grs_marc_la_LADD) + +mod_safari_la_SOURCES = safari.c +mod_safari_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_safari_la_LADD = +mod_safari_la_LIBADD = $(zebralib) $(mod_safari_la_LADD) -libidzebra_api_la_LDFLAGS=-version-info $(ZEBRALIBS_VERSION) -libidzebra_api_la_SOURCES = \ +mod_alvis_la_SOURCES = alvis.c +mod_alvis_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_alvis_la_LADD = $(XSLT_LIBS) +mod_alvis_la_LIBADD = $(zebralib) $(mod_alvis_la_LADD) + +modlib_LTLIBRARIES = $(SHARED_MODULE_LA) +EXTRA_LTLIBRARIES = \ + mod-grs-regx.la \ + mod-grs-xml.la \ + mod-grs-marc.la \ + mod-safari.la \ + mod-alvis.la + +EXTRA_libidzebra_2_0_la_SOURCES = \ + $(mod_grs_regx_la_SOURCES) \ + $(mod_grs_xml_la_SOURCES) \ + $(mod_grs_marc_la_SOURCES) \ + $(mod_safari_la_SOURCES) \ + $(mod_alvis_la_SOURCES) + +lib_LTLIBRARIES = $(zebralib) + +libidzebra_2_0_la_DEPENDENCIES = $(STATIC_MODULE_OBJ) +libidzebra_2_0_la_LIBADD = $(STATIC_MODULE_OBJ) $(aux_libs) $(STATIC_MODULE_LADD) +libidzebra_2_0_la_LDFLAGS=-export-dynamic -version-info $(ZEBRALIBS_VERSION) +libidzebra_2_0_la_SOURCES = \ + recctrl.c \ + recgrs.c \ + sgmlread.c \ + rectext.c \ api_swig.c attribute.c \ compact.c \ dir.c dirs.c \ @@ -20,22 +84,8 @@ libidzebra_api_la_SOURCES = \ update_path.c update_file.c trunc.c \ zebraapi.c zinfo.c zinfo.h zserver.h zsets.c zrpn.c - bin_PROGRAMS = zebraidx zebrasrv -zebralibs = \ - libidzebra-api.la \ - ../rset/libidzebra-rset.la \ - ../recctrl/libidzebra-recctrl.la \ - ../dict/libidzebra-dict.la \ - ../isams/libidzebra-isams.la \ - ../isamc/libidzebra-isamc.la \ - ../isamb/libidzebra-isamb.la \ - ../data1/libidzebra-data1.la \ - ../bfile/libidzebra-bfile.la \ - ../dfa/libidzebra-dfa.la \ - ../util/libidzebra-util.la - zebraidx_SOURCES = main.c zebrasrv_SOURCES = zserver.c zebrash_SOURCES = zebrash.c @@ -43,11 +93,12 @@ apitest_SOURCES = apitest.c kdump_SOURCES = kdump.c AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) \ - -DDEFAULT_PROFILE_PATH=\"$(pkgdatadir)/tab\" + -DDEFAULT_PROFILE_PATH=\"$(pkgdatadir)/tab\" \ + $(TCL_INCLUDE) $(XSLT_FLAGS) $(XSLT_CFLAGS) -zebrash_LDADD= $(zebralibs) $(YAZLALIB) $(READLINE_LIBS) +LDADD = $(zebralib) $(YAZLALIB) -LDADD = $(zebralibs) $(YAZLALIB) +zebrash_LDADD= $(LDADD) $(READLINE_LIBS) install-exec-hook: cd $(DESTDIR)$(bindir); for p in $(bin_PROGRAMS); do \ diff --git a/index/alvis.c b/index/alvis.c new file mode 100644 index 0000000..9d4ff4c --- /dev/null +++ b/index/alvis.c @@ -0,0 +1,769 @@ +/* $Id: alvis.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct filter_schema { + const char *name; + const char *identifier; + const char *stylesheet; + struct filter_schema *next; + const char *default_schema; + /* char default_schema; */ + const char *include_snippet; + xsltStylesheetPtr stylesheet_xsp; +}; + +struct filter_info { + xmlDocPtr doc; + char *fname; + char *full_name; + const char *profile_path; + const char *split_level; + const char *split_path; + ODR odr; + struct filter_schema *schemas; + xmlTextReaderPtr reader; +}; + +#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1" + +#define XML_STRCMP(a,b) strcmp((char*)a, b) +#define XML_STRLEN(a) strlen((char*)a) + +static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS; + +static void set_param_xml(const char **params, const char *name, + const char *value, ODR odr) +{ + while (*params) + params++; + params[0] = name; + params[1] = value; + params[2] = 0; +} + +static void set_param_str(const char **params, const char *name, + const char *value, ODR odr) +{ + char *quoted = odr_malloc(odr, 3 + strlen(value)); + sprintf(quoted, "'%s'", value); + while (*params) + params++; + params[0] = name; + params[1] = quoted; + params[2] = 0; +} + +static void set_param_int(const char **params, const char *name, + zint value, ODR odr) +{ + char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */ + while (*params) + params++; + sprintf(quoted, "'" ZINT_FORMAT "'", value); + params[0] = name; + params[1] = quoted; + params[2] = 0; +} + +#define ENABLE_INPUT_CALLBACK 0 + +#if ENABLE_INPUT_CALLBACK +static int zebra_xmlInputMatchCallback (char const *filename) +{ + yaz_log(YLOG_LOG, "match %s", filename); + return 0; +} + +static void * zebra_xmlInputOpenCallback (char const *filename) +{ + return 0; +} + +static int zebra_xmlInputReadCallback (void * context, char * buffer, int len) +{ + return 0; +} + +static int zebra_xmlInputCloseCallback (void * context) +{ + return 0; +} +#endif + +static void *filter_init(Res res, RecType recType) +{ + struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); + tinfo->reader = 0; + tinfo->fname = 0; + tinfo->full_name = 0; + tinfo->profile_path = 0; + tinfo->split_level = 0; + tinfo->split_path = 0; + tinfo->odr = odr_createmem(ODR_ENCODE); + tinfo->doc = 0; + tinfo->schemas = 0; + +#if ENABLE_INPUT_CALLBACK + xmlRegisterDefaultInputCallbacks(); + xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback, + zebra_xmlInputOpenCallback, + zebra_xmlInputReadCallback, + zebra_xmlInputCloseCallback); +#endif + return tinfo; +} + +static int attr_content(struct _xmlAttr *attr, const char *name, + const char **dst_content) +{ + if (!XML_STRCMP(attr->name, name) && attr->children && + attr->children->type == XML_TEXT_NODE) + { + *dst_content = (const char *)(attr->children->content); + return 1; + } + return 0; +} + +static void destroy_schemas(struct filter_info *tinfo) +{ + struct filter_schema *schema = tinfo->schemas; + while (schema) + { + struct filter_schema *schema_next = schema->next; + if (schema->stylesheet_xsp) + xsltFreeStylesheet(schema->stylesheet_xsp); + xfree(schema); + schema = schema_next; + } + tinfo->schemas = 0; + xfree(tinfo->fname); + if (tinfo->doc) + xmlFreeDoc(tinfo->doc); + tinfo->doc = 0; +} + +static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname) +{ + char tmp_full_name[1024]; + xmlNodePtr ptr; + tinfo->fname = xstrdup(fname); + + if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, + NULL, tmp_full_name)) + tinfo->full_name = xstrdup(tmp_full_name); + else + tinfo->full_name = xstrdup(tinfo->fname); + + yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name); + + tinfo->doc = xmlParseFile(tinfo->full_name); + + if (!tinfo->doc){ + yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s", + tinfo->full_name); + + return ZEBRA_FAIL; + } + + ptr = xmlDocGetRootElement(tinfo->doc); + if (!ptr || ptr->type != XML_ELEMENT_NODE || + XML_STRCMP(ptr->name, "schemaInfo")){ + yaz_log(YLOG_WARN, + "alvis filter: config file %s :" + " expected root element ", + tinfo->full_name); + return ZEBRA_FAIL; + } + + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!XML_STRCMP(ptr->name, "schema")) + { + char tmp_xslt_full_name[1024]; + struct _xmlAttr *attr; + struct filter_schema *schema = xmalloc(sizeof(*schema)); + schema->name = 0; + schema->identifier = 0; + schema->stylesheet = 0; + schema->default_schema = 0; + schema->next = tinfo->schemas; + schema->stylesheet_xsp = 0; + schema->include_snippet = 0; + tinfo->schemas = schema; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "identifier", &schema->identifier); + attr_content(attr, "name", &schema->name); + attr_content(attr, "stylesheet", &schema->stylesheet); + attr_content(attr, "default", &schema->default_schema); + attr_content(attr, "snippet", &schema->include_snippet); + } + /*yaz_log(YLOG_LOG, "XSLT add %s %s %s", + schema->name, schema->identifier, schema->stylesheet); */ + + /* find requested schema */ + + if (schema->stylesheet){ + yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path, + NULL, tmp_xslt_full_name); + schema->stylesheet_xsp + = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name); + if (!schema->stylesheet_xsp) + yaz_log(YLOG_WARN, + "alvis filter: could not parse xslt stylesheet %s", + tmp_xslt_full_name); + } + + + } + else if (!XML_STRCMP(ptr->name, "split")) + { + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "level", &tinfo->split_level); + attr_content(attr, "path", &tinfo->split_path); + } + } + else + { + yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname); + return ZEBRA_FAIL; + } + } + return ZEBRA_OK; +} + +static struct filter_schema *lookup_schema(struct filter_info *tinfo, + const char *est) +{ + struct filter_schema *schema; + + for (schema = tinfo->schemas; schema; schema = schema->next) + { + /* find requested schema */ + if (est) + { + if (schema->identifier && !strcmp(schema->identifier, est)) + return schema; + + if (schema->name && !strcmp(schema->name, est)) + return schema; + } + /* or return default schema if defined */ + else if (schema->default_schema) + return schema; + } + + /* return first schema if no default schema defined */ + if (tinfo->schemas) + return tinfo->schemas; + + return 0; +} + +static ZEBRA_RES filter_config(void *clientData, Res res, const char *args) +{ + struct filter_info *tinfo = clientData; + if (!args || !*args){ + yaz_log(YLOG_WARN, "alvis filter: need config file"); + return ZEBRA_FAIL; + } + + if (tinfo->fname && !strcmp(args, tinfo->fname)) + return ZEBRA_OK; + + tinfo->profile_path + /* = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH); */ + = res_get(res, "profilePath"); + yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path); + + destroy_schemas(tinfo); + create_schemas(tinfo, args); + return ZEBRA_OK; +} + +static void filter_destroy(void *clientData) +{ + struct filter_info *tinfo = clientData; + destroy_schemas(tinfo); + if (tinfo->reader) + xmlFreeTextReader(tinfo->reader); + odr_destroy(tinfo->odr); + xfree(tinfo); +} + +static int ioread_ex(void *context, char *buffer, int len) +{ + struct recExtractCtrl *p = context; + return (*p->readf)(p->fh, buffer, len); +} + +static int ioclose_ex(void *context) +{ + return 0; +} + +static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl, + xmlNodePtr ptr, RecWord *recWord) +{ + for(; ptr; ptr = ptr->next) + { + index_cdata(tinfo, ctrl, ptr->children, recWord); + if (ptr->type != XML_TEXT_NODE) + continue; + recWord->term_buf = (const char *)ptr->content; + recWord->term_len = XML_STRLEN(ptr->content); + (*ctrl->tokenAdd)(recWord); + } +} + +static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, + xmlNodePtr ptr, RecWord *recWord) +{ + for(; ptr; ptr = ptr->next) + { + index_node(tinfo, ctrl, ptr->children, recWord); + if (ptr->type != XML_ELEMENT_NODE || !ptr->ns || + XML_STRCMP(ptr->ns->href, zebra_xslt_ns)) + continue; + if (!XML_STRCMP(ptr->name, "index")) + { + const char *name_str = 0; + const char *type_str = 0; + const char *xpath_str = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "name", &name_str); + attr_content(attr, "xpath", &xpath_str); + attr_content(attr, "type", &type_str); + } + if (name_str) + { + int prev_type = recWord->index_type; /* save default type */ + + if (type_str && *type_str) + recWord->index_type = *type_str; /* type was given */ + recWord->index_name = name_str; + index_cdata(tinfo, ctrl, ptr->children, recWord); + + recWord->index_type = prev_type; /* restore it again */ + } + } + } +} + +static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl, + xmlNodePtr ptr, RecWord *recWord) +{ + const char *type_str = "update"; + + if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns && + !XML_STRCMP(ptr->ns->href, zebra_xslt_ns) + && !XML_STRCMP(ptr->name, "record")) + { + const char *id_str = 0; + const char *rank_str = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + attr_content(attr, "type", &type_str); + attr_content(attr, "id", &id_str); + attr_content(attr, "rank", &rank_str); + } + if (id_str) + sscanf(id_str, "%255s", ctrl->match_criteria); + + if (rank_str) + ctrl->staticrank = atoi(rank_str); + + ptr = ptr->children; + } + + if (!strcmp("update", type_str)) + index_node(tinfo, ctrl, ptr, recWord); + else if (!strcmp("delete", type_str)) + yaz_log(YLOG_WARN, "alvis filter delete: to be implemented"); + else + yaz_log(YLOG_WARN, "alvis filter: unknown record type '%s'", + type_str); +} + +static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, + xmlDocPtr doc) +{ + RecWord recWord; + const char *params[10]; + xmlChar *buf_out; + int len_out; + + struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns); + + params[0] = 0; + set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr); + + (*p->init)(p, &recWord); + + if (schema && schema->stylesheet_xsp) + { + xmlNodePtr root_ptr; + xmlDocPtr resDoc = + xsltApplyStylesheet(schema->stylesheet_xsp, + doc, params); + if (p->flagShowRecords) + { + xmlDocDumpMemory(resDoc, &buf_out, &len_out); + fwrite(buf_out, len_out, 1, stdout); + xmlFree(buf_out); + } + root_ptr = xmlDocGetRootElement(resDoc); + if (root_ptr) + index_record(tinfo, p, root_ptr, &recWord); + else + { + yaz_log(YLOG_WARN, "No root for index XML record." + " split_level=%s stylesheet=%s", + tinfo->split_level, schema->stylesheet); + } + xmlFreeDoc(resDoc); + } + xmlDocDumpMemory(doc, &buf_out, &len_out); + if (p->flagShowRecords) + fwrite(buf_out, len_out, 1, stdout); + (*p->setStoreData)(p, buf_out, len_out); + xmlFree(buf_out); + + xmlFreeDoc(doc); + return RECCTRL_EXTRACT_OK; +} + +static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) +{ + int ret; + int split_depth = 0; + if (p->first_record) + { + if (tinfo->reader) + xmlFreeTextReader(tinfo->reader); + tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex, + p /* I/O handler */, + 0 /* URL */, + 0 /* encoding */, + XML_PARSE_XINCLUDE); + } + if (!tinfo->reader) + return RECCTRL_EXTRACT_ERROR_GENERIC; + + if (tinfo->split_level) + split_depth = atoi(tinfo->split_level); + ret = xmlTextReaderRead(tinfo->reader); + while (ret == 1) { + int type = xmlTextReaderNodeType(tinfo->reader); + int depth = xmlTextReaderDepth(tinfo->reader); + if (split_depth == 0 || + (split_depth > 0 && + type == XML_READER_TYPE_ELEMENT && split_depth == depth)) + { + xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader); + xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); + xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0"); + + xmlDocSetRootElement(doc, ptr2); + + return extract_doc(tinfo, p, doc); + } + ret = xmlTextReaderRead(tinfo->reader); + } + xmlFreeTextReader(tinfo->reader); + tinfo->reader = 0; + return RECCTRL_EXTRACT_EOF; +} + +static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p) +{ + if (p->first_record) /* only one record per stream */ + { + xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, + 0 /* URL */, + 0 /* encoding */, + XML_PARSE_XINCLUDE); + if (!doc) + { + return RECCTRL_EXTRACT_ERROR_GENERIC; + } + return extract_doc(tinfo, p, doc); + } + else + return RECCTRL_EXTRACT_EOF; +} + +static int filter_extract(void *clientData, struct recExtractCtrl *p) +{ + struct filter_info *tinfo = clientData; + + odr_reset(tinfo->odr); + + if (tinfo->split_level == 0 && tinfo->split_path == 0) + return extract_full(tinfo, p); + else + { + return extract_split(tinfo, p); + } +} + +static int ioread_ret(void *context, char *buffer, int len) +{ + struct recRetrieveCtrl *p = context; + return (*p->readf)(p->fh, buffer, len); +} + +static int ioclose_ret(void *context) +{ + return 0; +} + + +static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode, + int window_size) +{ + const char *xml_doc_str; + int ord = 0; + WRBUF wrbuf = wrbuf_alloc(); + zebra_snippets *res = + zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size); + zebra_snippet_word *w = zebra_snippets_list(res); + + if (text_mode) + wrbuf_printf(wrbuf, "\'"); + else + wrbuf_printf(wrbuf, "\n", zebra_xslt_ns); + for (; w; w = w->next) + { + if (ord == 0) + ord = w->ord; + else if (ord != w->ord) + + break; + if (text_mode) + wrbuf_printf(wrbuf, "%s%s%s ", + w->match ? "*" : "", + w->term, + w->match ? "*" : ""); + else + { + wrbuf_printf(wrbuf, " ", + w->ord, w->seqno, + (w->match ? "match='1'" : "")); + wrbuf_xmlputs(wrbuf, w->term); + wrbuf_printf(wrbuf, "\n"); + } + } + if (text_mode) + wrbuf_printf(wrbuf, "\'"); + else + wrbuf_printf(wrbuf, "\n"); + + xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf)); + + zebra_snippets_destroy(res); + wrbuf_free(wrbuf, 1); + return xml_doc_str; +} + +static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) +{ + /* const char *esn = zebra_xslt_ns; */ + const char *esn = 0; + const char *params[32]; + struct filter_info *tinfo = clientData; + xmlDocPtr resDoc; + xmlDocPtr doc; + struct filter_schema *schema; + int window_size = -1; + + if (p->comp) + { + if (p->comp->which == Z_RecordComp_simple + && p->comp->u.simple->which == Z_ElementSetNames_generic) + { + esn = p->comp->u.simple->u.generic; + } + else if (p->comp->which == Z_RecordComp_complex + && p->comp->u.complex->generic->elementSpec + && p->comp->u.complex->generic->elementSpec->which == + Z_ElementSpec_elementSetName) + { + esn = p->comp->u.complex->generic->elementSpec->u.elementSetName; + } + } + schema = lookup_schema(tinfo, esn); + if (!schema) + { + p->diagnostic = + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + return 0; + } + + if (schema->include_snippet) + window_size = atoi(schema->include_snippet); + + params[0] = 0; + set_param_int(params, "id", p->localno, p->odr); + if (p->fname) + set_param_str(params, "filename", p->fname, p->odr); + if (p->staticrank >= 0) + set_param_int(params, "rank", p->staticrank, p->odr); + + if (esn) + set_param_str(params, "schema", esn, p->odr); + else + if (schema->name) + set_param_str(params, "schema", schema->name, p->odr); + else if (schema->identifier) + set_param_str(params, "schema", schema->identifier, p->odr); + else + set_param_str(params, "schema", "", p->odr); + + if (p->score >= 0) + set_param_int(params, "score", p->score, p->odr); + set_param_int(params, "size", p->recordSize, p->odr); + + if (window_size >= 0) + set_param_xml(params, "snippet", snippet_doc(p, 1, window_size), + p->odr); + doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */, + 0 /* URL */, + 0 /* encoding */, + XML_PARSE_XINCLUDE); + if (!doc) + { + p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + return 0; + } + + if (window_size >= 0) + { + xmlNodePtr node = xmlDocGetRootElement(doc); + const char *snippet_str = snippet_doc(p, 0, window_size); + xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str)); + xmlAddChild(node, xmlDocGetRootElement(snippet_doc)); + } + if (!schema->stylesheet_xsp) + resDoc = doc; + else + { + resDoc = xsltApplyStylesheet(schema->stylesheet_xsp, + doc, params); + xmlFreeDoc(doc); + } + if (!resDoc) + { + p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + } + else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML) + { + xmlChar *buf_out; + int len_out; + + xsltSaveResultToString(&buf_out, &len_out, resDoc, + schema->stylesheet_xsp); + + p->output_format = VAL_TEXT_XML; + p->rec_len = len_out; + p->rec_buf = odr_malloc(p->odr, p->rec_len); + memcpy(p->rec_buf, buf_out, p->rec_len); + xmlFree(buf_out); + } + else if (p->output_format == VAL_SUTRS) + { + xmlChar *buf_out; + int len_out; + + xsltSaveResultToString(&buf_out, &len_out, resDoc, + schema->stylesheet_xsp); + + p->output_format = VAL_SUTRS; + p->rec_len = len_out; + p->rec_buf = odr_malloc(p->odr, p->rec_len); + memcpy(p->rec_buf, buf_out, p->rec_len); + + xmlFree(buf_out); + } + else + { + p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP; + } + xmlFreeDoc(resDoc); + return 0; +} + +static struct recType filter_type = { + 0, + "alvis", + filter_init, + filter_config, + filter_destroy, + filter_extract, + filter_retrieve +}; + +RecType +#ifdef IDZEBRA_STATIC_ALVIS +idzebra_filter_alvis +#else +idzebra_filter +#endif + +[] = { + &filter_type, + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/inline.c b/index/inline.c new file mode 100644 index 0000000..4068da0 --- /dev/null +++ b/index/inline.c @@ -0,0 +1,133 @@ +/* + $Id: inline.c,v 1.1 2006-07-03 14:27:09 adam Exp $ +*/ +#include +#include +#include +#include +#include "inline.h" + +static void inline_destroy_subfield_recursive(inline_subfield *p); + +inline_field *inline_mk_field(void) +{ + inline_field *p = (inline_field *) xmalloc(sizeof(*p)); + + if (p) + { + memset(p, 0, sizeof(*p)); + p->name = (char *) xmalloc(SZ_FNAME+1); + *(p->name) = '\0'; + p->ind1 = (char *) xmalloc(SZ_IND+1); + *(p->ind1) = '\0'; + p->ind2 = (char *) xmalloc(SZ_IND+1); + *(p->ind2) = '\0'; + } + return p; +} +void inline_destroy_field(inline_field *p) +{ + if (p) + { + if (p->name) xfree(p->name); + if (p->ind1) xfree(p->ind1); + if (p->ind2) xfree(p->ind2); + if (p->list) + inline_destroy_subfield_recursive(p->list); + xfree(p); + } +} +static inline_subfield *inline_mk_subfield(inline_subfield *parent) +{ + inline_subfield *p = (inline_subfield *)xmalloc(sizeof(*p)); + + if (p) + { + memset(p, 0, sizeof(*p)); + p->name = (char *) xmalloc(SZ_SFNAME+1); + *(p->name) = '\0'; + p->parent = parent; + } + return p; +} + +#if 0 +static void inline_destroy_subfield(inline_subfield *p) +{ + if (p) + { + if (p->name) xfree(p->name); + if (p->data) xfree(p->data); + if (p->parent) p->parent->next = p->next; + xfree(p); + } +} +#endif + +static void inline_destroy_subfield_recursive(inline_subfield *p) +{ + if (p) + { + inline_destroy_subfield_recursive(p->next); + if (p->name) xfree(p->name); + if (p->data) xfree(p->data); + if (p->parent) + p->parent->next = 0; + xfree(p); + } +} +int inline_parse(inline_field *pif, const char *tag, const char *s) +{ + inline_field *pf = pif; + char *p = (char *)s; + + if (!pf) + return -1; + + if (pf->name[0] == '\0') + { + if ((sscanf(p, "%3s", pf->name)) != 1) + return -2; + + p += SZ_FNAME; + + if (!memcmp(pf->name, "00", 2)) + { + pf->list = inline_mk_subfield(0); + pf->list->data = xstrdup(p); + } + else + { + if ((sscanf(p, "%c%c", pf->ind1, pf->ind2)) != 2) + return -3; + } + } + else + { + inline_subfield *psf = inline_mk_subfield(0); + + sscanf(tag, "%1s", psf->name); + psf->data = xstrdup(p); + + if (!pf->list) + { + pf->list = psf; + } + else + { + inline_subfield *last = pf->list; + while (last->next) + last = last->next; + last->next = psf; + } + } + return 0; +} +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/inline.h b/index/inline.h new file mode 100644 index 0000000..ab61a13 --- /dev/null +++ b/index/inline.h @@ -0,0 +1,45 @@ +/* + $Id: inline.h,v 1.1 2006-07-03 14:27:09 adam Exp $ +*/ +#ifndef INLINE_H +#define INLINE_H + +#include "marcomp.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct inline_field +{ + char *name; + char *ind1; + char *ind2; + struct inline_subfield *list; +} inline_field; +typedef struct inline_subfield +{ + char *name; + char *data; + struct inline_subfield *next; + struct inline_subfield *parent; +} inline_subfield; + +inline_field *inline_mk_field(void); +int inline_parse(inline_field *pf, const char *tag, const char *s); +void inline_destroy_field(inline_field *p); + +#ifdef __cplusplus +} +#endif + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/marcomp.c b/index/marcomp.c new file mode 100644 index 0000000..f79a82f --- /dev/null +++ b/index/marcomp.c @@ -0,0 +1,413 @@ +/* + $Id: marcomp.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + + marcomp.c - compiler of MARC statements. +*/ + +#include +#include +#include +#include + +#include + +#include "marcomp.h" + +static mc_token mc_gettoken(mc_context *c); +static void mc_ungettoken(mc_context *c); +static int mc_getval(mc_context *c); +static int mc_getdata(mc_context *c, char *s, int sz); +static void mc_getinterval(mc_context *c, int *start, int *end); + +static mc_subfield *mc_mk_subfield(mc_subfield *parent); +static mc_field *mc_mk_field(void); + +static struct mc_errmsg +{ + mc_errcode code; + const char *msg; +} mc_errmsg[] = { +{EMCOK, "OK"}, +{EMCNOMEM, "NO mem"}, +{EMCF, "not complete field"}, +{EMCSF, "not complete subfield"}, +{EMCSFGROUP, "not closed GROUP"}, +{EMCSFVAR, "not closed VARIANT"}, +{EMCSFINLINE, "not closed IN-LINE"}, +{EMCEND, "not correct errno"} +}; +mc_errcode mc_errno(mc_context *c) +{ + return c->errcode; +} +const char *mc_error(mc_errcode no) +{ + if (no >= EMCOK && noerrcode = EMCOK; + p->data = s; + p->len = strlen(s); + p->crrtok = NOP; + } + + return p; +} +void mc_destroy_context(mc_context *c) +{ + if (c) xfree(c); +} +mc_token mc_gettoken(mc_context *c) +{ + if (c->offset >= c->len) + return NOP; + + switch (*(c->data+c->offset)) + { + case '{': c->crrtok = LVARIANT; break; + case '}': c->crrtok = RVARIANT; break; + case '(': c->crrtok = LGROUP; break; + case ')': c->crrtok = RGROUP; break; + case '<': c->crrtok = LINLINE; break; + case '>': c->crrtok = RINLINE; break; + case '$': c->crrtok = SUBFIELD; break; + case '[': c->crrtok = LINTERVAL; break; + case ']': c->crrtok = RINTERVAL; break; + default: + if (isspace(*(unsigned char *) (c->data+c->offset)) + || *(c->data+c->offset) == '\n') + { + c->crrtok = NOP; + } + else + { + c->crrtok = REGULAR; + c->crrval = *(c->data+c->offset); + } + } +#ifdef DEBUG + fprintf(stderr, "gettoken(): offset: %d", c->offset); + if (c->crrtok == REGULAR) + fprintf(stderr, "<%c>", c->crrval); + fprintf(stderr, "\n"); +#endif + c->offset++; + return c->crrtok; +} +void mc_ungettoken(mc_context *c) +{ + if (c->offset > 0) + c->offset--; +} +int mc_getval(mc_context *c) +{ + return c->crrval; +} +int mc_getdata(mc_context *c, char *s, int sz) +{ + int i; + + for (i=0; iname = (char *)xmalloc(SZ_FNAME+1); + *p->name = '\0'; + p->ind1 = (char *)xmalloc(SZ_IND+1); + *p->ind1 = '\0'; + p->ind2 = (char *)xmalloc(SZ_IND+1); + *p->ind2 = '\0'; + p->interval.start = p->interval.end = -1; + } + return p; +} +void mc_destroy_field(mc_field *p) +{ + if (!p) + return; + if (p->name) xfree(p->name); + if (p->ind1) xfree(p->ind1); + if (p->ind2) xfree(p->ind2); + if (p->list) mc_destroy_subfields_recursive(p->list); + xfree(p); +} +mc_field *mc_getfield(mc_context *c) +{ + mc_field *pf; + + pf = mc_mk_field(); + + if (!pf) + { + c->errcode = EMCNOMEM; + return 0; + } + + if (mc_getdata(c, pf->name, SZ_FNAME) == SZ_FNAME) + { + mc_token nexttok = mc_gettoken(c); + + mc_ungettoken(c); + + if (nexttok == LINTERVAL) + { + mc_getinterval(c, &pf->interval.start, &pf->interval.end); +#ifdef DEBUG + fprintf(stderr, "ineterval (%d)-(%d)\n", pf->interval.start, + pf->interval.end); +#endif + } + + if ((mc_getdata(c, pf->ind1, SZ_IND) == SZ_IND) && + (mc_getdata(c, pf->ind2, SZ_IND) == SZ_IND)) + { + pf->list = mc_getsubfields(c, 0); + } + } + else + { + c->errcode = EMCF; + mc_destroy_field(pf); + return 0; + } + + return pf; +} +mc_subfield *mc_mk_subfield(mc_subfield *parent) +{ + mc_subfield *p = (mc_subfield*)xmalloc(sizeof(*p)); + + if (p) + { + memset(p, 0, sizeof(*p)); + p->which = MC_SF; + p->name = (char *)xmalloc(SZ_SFNAME+1); + *p->name = '\0'; + p->prefix = (char *)xmalloc(SZ_PREFIX+1); + *p->prefix = '\0'; + p->suffix = (char *)xmalloc(SZ_SUFFIX+1); + *p->suffix = '\0'; + p->parent = parent; + p->interval.start = p->interval.end = -1; + } + return p; +} +void mc_destroy_subfield(mc_subfield *p) +{ + if (!p) + return; + + if (p->which == MC_SFGROUP || p->which == MC_SFVARIANT) + { + if (p->u.child) + mc_destroy_subfields_recursive(p->u.child); + } + else if (p->which == MC_SF) + { + if (p->u.in_line) + mc_destroy_field(p->u.in_line); + } + if (p->name) xfree(p->name); + if (p->prefix) xfree(p->prefix); + if (p->suffix) xfree(p->suffix); + if (p->parent) p->parent->next = p->next; + xfree(p); +} +void mc_destroy_subfields_recursive(mc_subfield *p) +{ + if (!p) + return; + + mc_destroy_subfields_recursive(p->next); + + if (p->which == MC_SFGROUP || p->which == MC_SFVARIANT) + { + if (p->u.child) + mc_destroy_subfields_recursive(p->u.child); + } + else if (p->which == MC_SF) + { + if (p->u.in_line) + mc_destroy_field(p->u.in_line); + } + + if (p->name) xfree(p->name); + if (p->prefix) xfree(p->prefix); + if (p->suffix) xfree(p->suffix); + if (p->parent) p->parent->next = 0; + xfree(p); +} +mc_subfield *mc_getsubfields(mc_context *c, mc_subfield *parent) +{ + mc_subfield *psf=0; + mc_token tok = mc_gettoken(c); + + if (tok == NOP) + return 0; + + if (tok == LGROUP) + { + if (!(psf = mc_mk_subfield(parent))) + { + c->errcode = EMCNOMEM; + return 0; + } + + psf->which = MC_SFGROUP; + psf->u.child = mc_getsubfields(c, psf); + + if (mc_gettoken(c) == RGROUP) + psf->next = mc_getsubfields(c, psf); + else + { + c->errcode = EMCSFGROUP; + mc_destroy_subfield(psf); + return 0; + } + } + else if (tok == LVARIANT) + { + if (!(psf = mc_mk_subfield(parent))) + { + c->errcode = EMCNOMEM; + return 0; + } + + psf->which = MC_SFVARIANT; + psf->u.child = mc_getsubfields(c, psf); + + if (mc_gettoken(c) == RVARIANT) + psf->next = mc_getsubfields(c, psf); + else + { + c->errcode = EMCSFVAR; + mc_destroy_subfield(psf); + return 0; + } + } + else if (tok == RGROUP || tok == RVARIANT || tok == RINLINE) + { + mc_ungettoken(c); + return 0; + } + else if (tok == REGULAR) + { + if (!(psf = mc_mk_subfield(parent))) + { + c->errcode = EMCNOMEM; + return 0; + } + + mc_ungettoken(c); + + if((mc_getdata(c, psf->prefix, SZ_PREFIX) == SZ_PREFIX) && + (mc_gettoken(c) == SUBFIELD) && + (mc_getdata(c, psf->name, SZ_SFNAME) == SZ_SFNAME)) + { + mc_token tok = mc_gettoken(c); + + mc_ungettoken(c); + + if (tok == LINTERVAL) + { + mc_getinterval(c, &psf->interval.start, &psf->interval.end); + } + else if (tok == LINLINE) + { + mc_gettoken(c); + psf->u.in_line = mc_getfield(c); + if (mc_gettoken(c) != RINLINE) + { + c->errcode = EMCSFINLINE; + mc_destroy_subfield(psf); + return 0; + } + } + + if (mc_getdata(c, psf->suffix, SZ_SUFFIX) == SZ_SUFFIX) + { + psf->which = MC_SF; + psf->next = mc_getsubfields(c, psf); + } + else + { + c->errcode = EMCSF; + mc_destroy_subfield(psf); + return 0; + } + } + } + return psf; +} +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/marcomp.h b/index/marcomp.h new file mode 100644 index 0000000..40d5f28 --- /dev/null +++ b/index/marcomp.h @@ -0,0 +1,117 @@ +/* + $Id: marcomp.h,v 1.1 2006-07-03 14:27:09 adam Exp $ +*/ +#ifndef MARCOMP_H +#define MARCOMP_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct mc_subfield +{ + char *name; + char *prefix; + char *suffix; + struct { + int start; + int end; + } interval; + int which; + union { +#define MC_SF 1 +#define MC_SFGROUP 2 +#define MC_SFVARIANT 3 + struct mc_field *in_line; + struct mc_subfield *child; + } u; + struct mc_subfield *next; + struct mc_subfield *parent; +} mc_subfield; + +#define SZ_FNAME 3 +#define SZ_IND 1 +#define SZ_SFNAME 1 +#define SZ_PREFIX 1 +#define SZ_SUFFIX 1 + +typedef struct mc_field +{ + char *name; + char *ind1; + char *ind2; + struct { + int start; + int end; + } interval; + struct mc_subfield *list; +} mc_field; + +typedef enum +{ + NOP, + REGULAR, + LVARIANT, + RVARIANT, + LGROUP, + RGROUP, + LINLINE, + RINLINE, + SUBFIELD, + LINTERVAL, + RINTERVAL, +} mc_token; + +typedef enum +{ + EMCOK = 0, /* first always, mondatory */ + EMCNOMEM, + EMCF, + EMCSF, + EMCSFGROUP, + EMCSFVAR, + EMCSFINLINE, + EMCEND /* last always, mondatory */ +} mc_errcode; + +typedef struct mc_context +{ + int offset; + + int crrval; + mc_token crrtok; + + mc_errcode errcode; + + int len; + const char *data; +} mc_context; + +mc_context *mc_mk_context(const char *s); +void mc_destroy_context(mc_context *c); + +mc_field *mc_getfield(mc_context *c); +void mc_destroy_field(mc_field *p); +void mc_pr_field(mc_field *p, int offset); + +mc_subfield *mc_getsubfields(mc_context *c, mc_subfield *parent); +void mc_destroy_subfield(mc_subfield *p); +void mc_destroy_subfields_recursive(mc_subfield *p); +void mc_pr_subfields(mc_subfield *p, int offset); + +mc_errcode mc_errno(mc_context *c); +const char *mc_error(mc_errcode no); + +#ifdef __cplusplus +} +#endif + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/marcread.c b/index/marcread.c new file mode 100644 index 0000000..e030cf9 --- /dev/null +++ b/index/marcread.c @@ -0,0 +1,899 @@ +/* $Id: marcread.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#include + +#include +#include +#include +#include "marcomp.h" +#include "inline.h" + +#define MARC_DEBUG 0 +#define MARCOMP_DEBUG 0 + +struct marc_info { + char type[256]; +}; + +static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) +{ + struct marc_info *mi = (struct marc_info*) p->clientData; + char buf[100000]; + int entry_p; + int record_length; + int indicator_length; + int identifier_length; + int base_address; + int end_of_directory; + int length_data_entry; + int length_starting; + int length_implementation; + int read_bytes; +#if MARC_DEBUG + FILE *outf = stdout; +#endif + data1_node *res_root, *res_top; + char *absynName; + data1_marctab *marctab; + + if ((*p->readf)(p->fh, buf, 5) != 5) + return NULL; + while (*buf < '0' || *buf > '9') + { + int i; + + yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", + *buf & 0xff, *buf & 0xff); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + + if ((*p->readf)(p->fh, buf+4, 1) != 1) + return NULL; + } + record_length = atoi_n (buf, 5); + if (record_length < 25) + { + yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length); + return NULL; + } + /* read remaining part - attempt to read one byte furhter... */ + read_bytes = (*p->readf)(p->fh, buf+5, record_length-4); + if (read_bytes < record_length-5) + { + yaz_log (YLOG_WARN, "Couldn't read whole MARC record"); + return NULL; + } + if (read_bytes == record_length - 4) + { + off_t cur_offset = (*p->tellf)(p->fh); + if (cur_offset <= 27) + return NULL; + if (p->endf) + (*p->endf)(p->fh, cur_offset - 1); + } + absynName = mi->type; + res_root = data1_mk_root (p->dh, p->mem, absynName); + if (!res_root) + { + yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax"); + return 0; + } + if (marc_xml) + { + data1_node *lead; + const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0}; + + res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root); + + lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top); + data1_mk_text_n(p->dh, p->mem, buf, 24, lead); + } + else + res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); + + if ((marctab = data1_absyn_getmarctab(p->dh, res_root->u.root.absyn))) + { + memcpy(marctab->leader, buf, 24); + memcpy(marctab->implementation_codes, buf+6, 4); + marctab->implementation_codes[4] = '\0'; + memcpy(marctab->user_systems, buf+17, 3); + marctab->user_systems[3] = '\0'; + } + + if (marctab && marctab->force_indicator_length >= 0) + indicator_length = marctab->force_indicator_length; + else + indicator_length = atoi_n (buf+10, 1); + if (marctab && marctab->force_identifier_length >= 0) + identifier_length = marctab->force_identifier_length; + else + identifier_length = atoi_n (buf+11, 1); + base_address = atoi_n (buf+12, 5); + + length_data_entry = atoi_n (buf+20, 1); + length_starting = atoi_n (buf+21, 1); + length_implementation = atoi_n (buf+22, 1); + + for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int l = 3 + length_data_entry + length_starting; + if (entry_p + l >= record_length) + { + yaz_log(YLOG_WARN, "MARC: Directory offset %d: end of record.", + entry_p); + return 0; + } + /* check for digits in length info */ + while (--l >= 3) + if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) + break; + if (l >= 3) + { + /* not all digits, so stop directory scan */ + yaz_log(YLOG_LOG, "MARC: Bad directory"); + break; + } + entry_p += 3 + length_data_entry + length_starting; + } + end_of_directory = entry_p; + if (base_address != entry_p+1) + { + yaz_log(YLOG_WARN, "MARC: Base address does not follow directory"); + } + for (entry_p = 24; entry_p != end_of_directory; ) + { + int data_length; + int data_offset; + int end_offset; + int i, i0; + char tag[4]; + data1_node *res; + data1_node *parent = res_top; + + memcpy (tag, buf+entry_p, 3); + entry_p += 3; + tag[3] = '\0'; + + if (marc_xml) + res = parent; + else + res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); + +#if MARC_DEBUG + fprintf (outf, "%s ", tag); +#endif + data_length = atoi_n (buf+entry_p, length_data_entry); + entry_p += length_data_entry; + data_offset = atoi_n (buf+entry_p, length_starting); + entry_p += length_starting; + i = data_offset + base_address; + end_offset = i+data_length-1; + + if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) + { + yaz_log(YLOG_WARN, "MARC: Bad offsets in data. Skipping rest"); + break; + } + + if (memcmp (tag, "00", 2) && indicator_length) + { + /* generate indicator node */ + if (marc_xml) + { + const char *attr[10]; + int j; + + attr[0] = "tag"; + attr[1] = tag; + attr[2] = 0; + + res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res); + + for (j = 0; jdh, p->mem, res, attr); + } + } + else + { +#if MARC_DEBUG + int j; +#endif + res = data1_mk_tag_n (p->dh, p->mem, + buf+i, indicator_length, 0 /* attr */, res); +#if MARC_DEBUG + for (j = 0; jdh, p->mem, "controlfield", attr, res); + } + } + parent = res; + /* traverse sub fields */ + i0 = i; + while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) + { + if (memcmp (tag, "00", 2) && identifier_length) + { + data1_node *res; + if (marc_xml) + { + int j; + const char *attr[3]; + char code[10]; + + for (j = 1; jdh, p->mem, "subfield", + attr, parent); + } + else + { + res = data1_mk_tag_n (p->dh, p->mem, + buf+i+1, identifier_length-1, + 0 /* attr */, parent); + } +#if MARC_DEBUG + fprintf (outf, " $"); + for (j = 1; jdh, p->mem, buf + i0, i - i0, res); + i0 = i; + } + else + { +#if MARC_DEBUG + fprintf (outf, "%c", buf[i]); +#endif + i++; + } + } + if (i > i0) + { + data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent); + } +#if MARC_DEBUG + fprintf (outf, "\n"); + if (i < end_offset) + fprintf (outf, "-- separator but not at end of field\n"); + if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + fprintf (outf, "-- no separator at end of field\n"); +#endif + } + return res_root; +} + +/* + * Locate some data under this node. This routine should handle variants + * prettily. + */ +static char *get_data(data1_node *n, int *len) +{ + char *r; + + while (n) + { + if (n->which == DATA1N_data) + { + int i; + *len = n->u.data.len; + + for (i = 0; i<*len; i++) + if (!d1_isspace(n->u.data.data[i])) + break; + while (*len && d1_isspace(n->u.data.data[*len - 1])) + (*len)--; + *len = *len - i; + if (*len > 0) + return n->u.data.data + i; + } + if (n->which == DATA1N_tag) + n = n->child; + else if (n->which == DATA1N_data) + n = n->next; + else + break; + } + r = ""; + *len = strlen(r); + return r; +} + +static data1_node *lookup_subfield(data1_node *node, const char *name) +{ + data1_node *p; + + for (p=node; p; p=p->next) + { + if (!yaz_matchstr(p->u.tag.tag, name)) + return p; + } + return 0; +} + +static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, + const char *name) +{ + inline_subfield *p; + + for (p=pisf; p; p=p->next) + { + if (!yaz_matchstr(p->name, name)) + return p; + } + return 0; +} + +static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, + inline_subfield *pisf) +{ + mc_subfield *p; + + for (p = psf; p && pisf; p = p->next) + { + if (p->which == MC_SF) + { + inline_subfield *found = lookup_inline_subfield(pisf, p->name); + + if (found) + { + if (strcmp(p->prefix, "_")) + { + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); + } + if (p->interval.start == -1) + { + wrbuf_puts(buf, found->data); + } + else + { + wrbuf_write(buf, found->data+p->interval.start, + p->interval.end-p->interval.start); + wrbuf_puts(buf, ""); + } + if (strcmp(p->suffix, "_")) + { + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); + } +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); +#endif + pisf = found->next; + } + } + else if (p->which == MC_SFVARIANT) + { + inline_subfield *next; + + do { + next = cat_inline_subfield(p->u.child, buf, pisf); + if (next == pisf) + break; + pisf = next; + } while (pisf); + } + else if (p->which == MC_SFGROUP) + { + mc_subfield *pp; + int found; + + for (pp = p->u.child, found = 0; pp; pp = pp->next) + { + if (!yaz_matchstr(pisf->name, p->name)) + { + found = 1; + break; + } + } + if (found) + { + wrbuf_puts(buf, " ("); + pisf = cat_inline_subfield(p->u.child, buf, pisf); + wrbuf_puts(buf, ") "); + } + } + } + return pisf; +} + +static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) +{ + if (!pf || !subfield) + return; + + for (;subfield;) + { + int len; + inline_field *pif=NULL; + data1_node *psubf; + + if (yaz_matchstr(subfield->u.tag.tag, "1")) + { + subfield = subfield->next; + continue; + } + + psubf = subfield; + pif = inline_mk_field(); + do + { + int i; + if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0) + { + yaz_log(YLOG_WARN, "inline subfield ($%s): parse error", + psubf->u.tag.tag); + inline_destroy_field(pif); + return; + } + psubf = psubf->next; + } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1")); + + subfield = psubf; + + if (pif && !yaz_matchstr(pif->name, pf->name)) + { + if (!pf->list && pif->list) + { + wrbuf_puts(buf, pif->list->data); + } + else + { + int ind1, ind2; + + /* + check indicators + */ + + ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0]; + ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0]; + + if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && + ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))) + { + cat_inline_subfield(pf->list, buf, pif->list); + + /* + add separator for inline fields + */ + if (wrbuf_len(buf)) + { + wrbuf_puts(buf, "\n"); + } + } + else + { + yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); + } + } + } + inline_destroy_field(pif); + } +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf); +#endif +} + +static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, + data1_node *subfield) +{ + mc_subfield *p; + + for (p = psf; p && subfield; p = p->next) + { + if (p->which == MC_SF) + { + data1_node *found = lookup_subfield(subfield, p->name); + + if (found) + { + int len; + + if (strcmp(p->prefix, "_")) + { + wrbuf_puts(buf, " "); + wrbuf_puts(buf, p->prefix); + } + + if (p->u.in_line) + { + cat_inline_field(p->u.in_line, buf, found); + } + else if (p->interval.start == -1) + { + wrbuf_puts(buf, get_data(found, &len)); + } + else + { + wrbuf_write(buf, get_data(found, &len)+p->interval.start, + p->interval.end-p->interval.start); + wrbuf_puts(buf, ""); + } + if (strcmp(p->suffix, "_")) + { + wrbuf_puts(buf, p->suffix); + wrbuf_puts(buf, " "); + } +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); +#endif + subfield = found->next; + } + } + else if (p->which == MC_SFVARIANT) + { + data1_node *next; + do { + next = cat_subfield(p->u.child, buf, subfield); + if (next == subfield) + break; + subfield = next; + } while (subfield); + } + else if (p->which == MC_SFGROUP) + { + mc_subfield *pp; + int found; + + for (pp = p->u.child, found = 0; pp; pp = pp->next) + { + if (!yaz_matchstr(subfield->u.tag.tag, pp->name)) + { + found = 1; + break; + } + } + if (found) + { + wrbuf_puts(buf, " ("); + subfield = cat_subfield(p->u.child, buf, subfield); + wrbuf_puts(buf, ") "); + } + } + } + return subfield; +} + +static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, + WRBUF buf, data1_node *field) +{ + data1_node *subfield; + int ind1, ind2; + + if (!pf || !field) + return 0; + + + if (yaz_matchstr(field->u.tag.tag, pf->name)) + return field->next; + + subfield = field->child; + + if (!subfield) + return field->next; + + /* + check subfield without indicators + */ + + if (!pf->list && subfield->which == DATA1N_data) + { + int len; + + if (pf->interval.start == -1) + { + wrbuf_puts(buf, get_data(field, &len)); + } + else + { + wrbuf_write(buf, get_data(field, &len)+pf->interval.start, + pf->interval.end-pf->interval.start); + wrbuf_puts(buf, ""); + } +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); +#endif + return field->next; + } + + /* + check indicators + */ + + ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0]; + ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1]; + + if (!( + ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && + ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])) + )) + { +#if MARCOMP_DEBUG + yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag); +#endif + return field->next; + } + + subfield = subfield->child; + + if (!subfield) + return field->next; + + cat_subfield(pf->list, buf, subfield); + +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); +#endif + + return field->next; +} + +static int is_empty(char *s) +{ + char *p = s; + + for (p = s; *p; p++) + { + if (!isspace(*(unsigned char *)p)) + return 0; + } + return 1; +} + +static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, + data1_node *root) +{ + data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root->u.root.absyn); + data1_node *top = root->child; + data1_node *field; + mc_context *c; + mc_field *pf; + WRBUF buf; + + c = mc_mk_context(mc_stmnt+3); + + if (!c) + return; + + pf = mc_getfield(c); + + if (!pf) + { + mc_destroy_context(c); + return; + } + buf = wrbuf_alloc(); +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); +#endif + if (!yaz_matchstr(pf->name, "ldr")) + { + data1_node *new; +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions", + pf->interval.start, pf->interval.end); +#endif + new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); + data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start, + pf->interval.end-pf->interval.start+1, new); + } + else + { + field=top->child; + + while(field) + { + if (!yaz_matchstr(field->u.tag.tag, pf->name)) + { + data1_node *new; + char *pb; +#if MARCOMP_DEBUG + yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); +#endif + wrbuf_rewind(buf); + wrbuf_puts(buf, ""); + + field = cat_field(p, pf, buf, field); + + pb = wrbuf_buf(buf); + for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) + { + if (!is_empty(pb)) + { + new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); + data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new); + } + } + } + else + { + field = field->next; + } + } + } + mc_destroy_field(pf); + mc_destroy_context(c); + wrbuf_free(buf, 1); +} + +data1_node *grs_read_marcxml(struct grs_read_info *p) +{ + data1_node *root = grs_read_iso2709(p, 1); + data1_element *e; + + if (!root) + return 0; + + for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) + { + data1_tag *tag = e->tag; + + if (tag && tag->which == DATA1T_string && + !yaz_matchstr(tag->value.string, "mc?")) + parse_data1_tree(p, tag->value.string, root); + } + return root; +} + +data1_node *grs_read_marc(struct grs_read_info *p) +{ + data1_node *root = grs_read_iso2709(p, 0); + data1_element *e; + + if (!root) + return 0; + + for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) + { + data1_tag *tag = e->tag; + + if (tag && tag->which == DATA1T_string && + !yaz_matchstr(tag->value.string, "mc?")) + parse_data1_tree(p, tag->value.string, root); + } + return root; +} + +static void *init_marc(Res res, RecType rt) +{ + struct marc_info *p = xmalloc(sizeof(*p)); + strcpy(p->type, ""); + return p; +} + +static ZEBRA_RES config_marc(void *clientData, Res res, const char *args) +{ + struct marc_info *p = (struct marc_info*) clientData; + if (strlen(args) < sizeof(p->type)) + strcpy(p->type, args); + return ZEBRA_OK; +} + +static void destroy_marc(void *clientData) +{ + struct marc_info *p = (struct marc_info*) clientData; + xfree (p); +} + + +static int extract_marc(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_marc); +} + +static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_marc); +} + +static struct recType marc_type = { + 0, + "grs.marc", + init_marc, + config_marc, + destroy_marc, + extract_marc, + retrieve_marc, +}; + +static int extract_marcxml(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_marcxml); +} + +static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_marcxml); +} + +static struct recType marcxml_type = { + 0, + "grs.marcxml", + init_marc, + config_marc, + destroy_marc, + extract_marcxml, + retrieve_marcxml, +}; + +RecType +#ifdef IDZEBRA_STATIC_GRS_MARC +idzebra_filter_grs_marc +#else +idzebra_filter +#endif + +[] = { + &marc_type, + &marcxml_type, + 0, +}; + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/recctrl.c b/index/recctrl.c new file mode 100644 index 0000000..24beaae --- /dev/null +++ b/index/recctrl.c @@ -0,0 +1,284 @@ +/* $Id: recctrl.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include +#include +#include +#if HAVE_DLFCN_H +#include +#endif + +#include +#include +#include + +struct recTypeClass { + RecType recType; + struct recTypeClass *next; + void *module_handle; +}; + +struct recTypeInstance { + RecType recType; + struct recTypeInstance *next; + int init_flag; + void *clientData; +}; + +struct recTypes { + data1_handle dh; + struct recTypeInstance *entries; +}; + +static void recTypeClass_add (struct recTypeClass **rts, RecType *rt, + NMEM nmem, void *module_handle); + + +RecTypeClass recTypeClass_create (Res res, NMEM nmem) +{ + struct recTypeClass *rts = 0; + +#ifdef IDZEBRA_STATIC_GRS_SGML + if (1) + { + extern RecType idzebra_filter_grs_sgml[]; + recTypeClass_add (&rts, idzebra_filter_grs_sgml, nmem, 0); + } +#endif + +#ifdef IDZEBRA_STATIC_TEXT + if (1) + { + extern RecType idzebra_filter_text[]; + recTypeClass_add (&rts, idzebra_filter_text, nmem, 0); + } +#endif + +#ifdef IDZEBRA_STATIC_GRS_XML +#if HAVE_EXPAT_H + if (1) + { + extern RecType idzebra_filter_grs_xml[]; + recTypeClass_add (&rts, idzebra_filter_grs_xml, nmem, 0); + } +#endif +#endif + +#ifdef IDZEBRA_STATIC_GRS_REGX + if (1) + { + extern RecType idzebra_filter_grs_regx[]; + recTypeClass_add (&rts, idzebra_filter_grs_regx, nmem, 0); + } +#endif + +#ifdef IDZEBRA_STATIC_GRS_MARC + if (1) + { + extern RecType idzebra_filter_grs_marc[]; + recTypeClass_add (&rts, idzebra_filter_grs_marc, nmem, 0); + } +#endif + +#ifdef IDZEBRA_STATIC_SAFARI + if (1) + { + extern RecType idzebra_filter_safari[]; + recTypeClass_add (&rts, idzebra_filter_safari, nmem, 0); + } +#endif + +#ifdef IDZEBRA_STATIC_ALVIS +#if HAVE_XSLT + if (1) + { + extern RecType idzebra_filter_alvis[]; + recTypeClass_add (&rts, idzebra_filter_alvis, nmem, 0); + } +#endif +#endif + + return rts; +} + +void recTypeClass_load_modules(RecTypeClass *rts, NMEM nmem, + const char *module_path) +{ +#if HAVE_DLFCN_H + if (module_path) + { + DIR *dir = opendir(module_path); + yaz_log(YLOG_LOG, "searching filters in %s", module_path); + if (dir) + { + struct dirent *de; + + while ((de = readdir(dir))) + { + size_t dlen = strlen(de->d_name); + if (dlen >= 5 && + !memcmp(de->d_name, "mod-", 4) && + !strcmp(de->d_name + dlen - 3, ".so")) + { + void *mod_p, *fl; + char fname[FILENAME_MAX*2+1]; + sprintf(fname, "%.*s/%.*s", + FILENAME_MAX, module_path, + FILENAME_MAX, de->d_name); + mod_p = dlopen(fname, RTLD_NOW|RTLD_GLOBAL); + if (mod_p && (fl = dlsym(mod_p, "idzebra_filter"))) + { + yaz_log(YLOG_LOG, "Loaded filter module %s", fname); + recTypeClass_add(rts, fl, nmem, mod_p); + } + else if (mod_p) + { + const char *err = dlerror(); + yaz_log(YLOG_WARN, "dlsym failed %s %s", + fname, err ? err : "none"); + dlclose(mod_p); + } + else + { + const char *err = dlerror(); + yaz_log(YLOG_WARN, "dlopen failed %s %s", + fname, err ? err : "none"); + + } + } + } + closedir(dir); + } + } +#endif +} + +static void recTypeClass_add(struct recTypeClass **rts, RecType *rt, + NMEM nmem, void *module_handle) +{ + while (*rt) + { + struct recTypeClass *r = (struct recTypeClass *) + nmem_malloc (nmem, sizeof(*r)); + + r->next = *rts; + *rts = r; + + yaz_log(YLOG_LOG, "Adding filter %s", (*rt)->name); + r->module_handle = module_handle; + module_handle = 0; /* so that we only store module_handle once */ + r->recType = *rt; + + rt++; + } +} + +void recTypeClass_info(RecTypeClass rtc, void *cd, + void (*cb)(void *cd, const char *s)) +{ + for (; rtc; rtc = rtc->next) + (*cb)(cd, rtc->recType->name); +} + +void recTypeClass_destroy(RecTypeClass rtc) +{ + for (; rtc; rtc = rtc->next) + { +#if HAVE_DLFCN_H + if (rtc->module_handle) + dlclose(rtc->module_handle); +#endif + } +} + +RecTypes recTypes_init(RecTypeClass rtc, data1_handle dh) +{ + RecTypes rts = (RecTypes) nmem_malloc(data1_nmem_get(dh), sizeof(*rts)); + + struct recTypeInstance **rti = &rts->entries; + + rts->dh = dh; + + for (; rtc; rtc = rtc->next) + { + *rti = nmem_malloc(data1_nmem_get(dh), sizeof(**rti)); + (*rti)->recType = rtc->recType; + (*rti)->init_flag = 0; + rti = &(*rti)->next; + } + *rti = 0; + return rts; +} + +void recTypes_destroy (RecTypes rts) +{ + struct recTypeInstance *rti; + + for (rti = rts->entries; rti; rti = rti->next) + { + if (rti->init_flag) + (*(rti->recType)->destroy)(rti->clientData); + } +} + +RecType recType_byName (RecTypes rts, Res res, const char *name, + void **clientDataP) +{ + struct recTypeInstance *rti; + + for (rti = rts->entries; rti; rti = rti->next) + { + size_t slen = strlen(rti->recType->name); + if (!strncmp (rti->recType->name, name, slen) + && (name[slen] == '\0' || name[slen] == '.')) + { + if (!rti->init_flag) + { + rti->init_flag = 1; + rti->clientData = + (*(rti->recType)->init)(res, rti->recType); + } + *clientDataP = rti->clientData; + if (name[slen]) + slen++; /* skip . */ + + if (rti->recType->config) + { + if ((*(rti->recType)->config) + (rti->clientData, res, name+slen) != ZEBRA_OK) + return 0; + } + return rti->recType; + } + } + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/recgrs.c b/index/recgrs.c new file mode 100644 index 0000000..7adf160 --- /dev/null +++ b/index/recgrs.c @@ -0,0 +1,1450 @@ +/* $Id: recgrs.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define GRS_MAX_WORD 512 + +struct source_parser { + int len; + const char *tok; + const char *src; + int lookahead; + NMEM nmem; +}; + +static int sp_lex(struct source_parser *sp) +{ + while (*sp->src == ' ') + (sp->src)++; + sp->tok = sp->src; + sp->len = 0; + while (*sp->src && !strchr("<>();,-: ", *sp->src)) + { + sp->src++; + sp->len++; + } + if (sp->len) + sp->lookahead = 't'; + else + { + sp->lookahead = *sp->src; + if (*sp->src) + sp->src++; + } + return sp->lookahead; +} + +static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd); + +static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd) +{ + int start, len; + RecWord tmp_w; + + /* ( */ + sp_lex(sp); + if (sp->lookahead != '(') + return 0; + sp_lex(sp); /* skip ( */ + + /* 1st arg: string */ + if (!sp_expr(sp, n, wrd)) + return 0; + + if (sp->lookahead != ',') + return 0; + sp_lex(sp); /* skip , */ + + /* 2nd arg: start */ + if (!sp_expr(sp, n, &tmp_w)) + return 0; + start = atoi_n(tmp_w.term_buf, tmp_w.term_len); + + if (sp->lookahead == ',') + { + sp_lex(sp); /* skip , */ + + /* 3rd arg: length */ + if (!sp_expr(sp, n, &tmp_w)) + return 0; + len = atoi_n(tmp_w.term_buf, tmp_w.term_len); + } + else + len = wrd->term_len; + + /* ) */ + if (sp->lookahead != ')') + return 0; + sp_lex(sp); + + if (wrd->term_buf && wrd->term_len) + { + wrd->term_buf += start; + wrd->term_len -= start; + if (wrd->term_len > len) + wrd->term_len = len; + } + return 1; +} + +static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd) +{ + char num_str[20]; + int min_pos = -1; + sp_lex(sp); + if (sp->lookahead != '(') + return 0; + sp_lex(sp); /* skip ( */ + if (!sp_expr(sp, n, wrd)) + return 0; + while (sp->lookahead == ',') + { + RecWord search_w; + int i; + sp_lex(sp); /* skip , */ + + if (!sp_expr(sp, n, &search_w)) + return 0; + for (i = 0; iterm_len; i++) + { + int j; + for (j = 0; jterm_len; j++) + if (wrd->term_buf[i+j] != search_w.term_buf[j]) + break; + if (j == search_w.term_len) /* match ? */ + { + if (min_pos == -1 || i < min_pos) + min_pos = i; + break; + } + } + } + if (sp->lookahead != ')') + return 0; + sp_lex(sp); + if (min_pos == -1) + min_pos = 0; /* the default if not found */ + sprintf(num_str, "%d", min_pos); + wrd->term_buf = nmem_strdup(sp->nmem, num_str); + wrd->term_len = strlen(wrd->term_buf); + return 1; +} + +static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd) +{ + if (sp->lookahead != 't') + return 0; + if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len)) + { + if (n->which == DATA1N_data) + { + wrd->term_buf = n->u.data.data; + wrd->term_len = n->u.data.len; + } + sp_lex(sp); + } + else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len)) + { + if (n->which == DATA1N_tag) + { + wrd->term_buf = n->u.tag.tag; + wrd->term_len = strlen(n->u.tag.tag); + } + sp_lex(sp); + } + else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len)) + { + RecWord tmp_w; + sp_lex(sp); + if (sp->lookahead != '(') + return 0; + sp_lex(sp); + + if (!sp_expr(sp, n, &tmp_w)) + return 0; + + wrd->term_buf = ""; + wrd->term_len = 0; + if (n->which == DATA1N_tag) + { + data1_xattr *p = n->u.tag.attributes; + while (p && strlen(p->name) != tmp_w.term_len && + memcmp (p->name, tmp_w.term_buf, tmp_w.term_len)) + p = p->next; + if (p) + { + wrd->term_buf = p->value; + wrd->term_len = strlen(p->value); + } + } + if (sp->lookahead != ')') + return 0; + sp_lex(sp); + } + else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len)) + { + return sp_first(sp, n, wrd); + } + else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len)) + { + return sp_range(sp, n, wrd); + } + else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok)) + { + char *b; + wrd->term_len = sp->len; + b = nmem_malloc(sp->nmem, sp->len); + memcpy(b, sp->tok, sp->len); + wrd->term_buf = b; + sp_lex(sp); + } + else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'') + { + char *b; + wrd->term_len = sp->len - 2; + b = nmem_malloc(sp->nmem, wrd->term_len); + memcpy(b, sp->tok+1, wrd->term_len); + wrd->term_buf = b; + sp_lex(sp); + } + else + { + wrd->term_buf = ""; + wrd->term_len = 0; + sp_lex(sp); + } + return 1; +} + +static struct source_parser *source_parser_create() +{ + struct source_parser *sp = xmalloc(sizeof(*sp)); + + sp->nmem = nmem_create(); + return sp; +} + +static void source_parser_destroy(struct source_parser *sp) +{ + if (!sp) + return; + nmem_destroy(sp->nmem); + xfree(sp); +} + +static int sp_parse(struct source_parser *sp, + data1_node *n, RecWord *wrd, const char *src) +{ + sp->len = 0; + sp->tok = 0; + sp->src = src; + sp->lookahead = 0; + nmem_reset(sp->nmem); + + sp_lex(sp); + return sp_expr(sp, n, wrd); +} + +int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p) +{ + int res = 1; + char *attname; + data1_xattr *attr; + + if (!p) { + return 1; + } else { + if (p->which == XPATH_PREDICATE_RELATION) { + if (p->u.relation.name[0]) { + if (*p->u.relation.name != '@') { + yaz_log(YLOG_WARN, + " Only attributes (@) are supported in xelm xpath predicates"); + yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name); + return 1; + } + attname = p->u.relation.name + 1; + res = 0; + /* looking for the attribute with a specified name */ + for (attr = n->u.tag.attributes; attr; attr = attr->next) { + yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name ); + + if (!strcmp(attr->name, attname)) { + if (p->u.relation.op[0]) { + if (*p->u.relation.op != '=') { + yaz_log(YLOG_WARN, + "Only '=' relation is supported (%s)",p->u.relation.op); + yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name); + res = 1; break; + } else { + yaz_log(YLOG_DEBUG," - value %s <-> %s", + p->u.relation.value, attr->value ); + if (!strcmp(attr->value, p->u.relation.value)) { + res = 1; break; + } + } + } else { + /* attribute exists, no value specified */ + res = 1; break; + } + } + } + yaz_log(YLOG_DEBUG, "return %d", res); + return res; + } else { + return 1; + } + } + else if (p->which == XPATH_PREDICATE_BOOLEAN) { + if (!strcmp(p->u.boolean.op,"and")) { + return d1_check_xpath_predicate(n, p->u.boolean.left) + && d1_check_xpath_predicate(n, p->u.boolean.right); + } + else if (!strcmp(p->u.boolean.op,"or")) { + return (d1_check_xpath_predicate(n, p->u.boolean.left) + || d1_check_xpath_predicate(n, p->u.boolean.right)); + } else { + yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op); + return 1; + } + } + } + return 0; +} + + +static int dfa_match_first(struct DFA_state **dfaar, const char *text) +{ + struct DFA_state *s = dfaar[0]; /* start state */ + struct DFA_tran *t; + int i; + const char *p = text; + unsigned char c; + + for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++) + { + if (c >= t->ch[0] && c <= t->ch[1]) + { + while (i >= 0) + { + /* move to next state and return if we get a match */ + s = dfaar[t->to]; + if (s->rule_no) + return 1; + /* next char */ + if (!c) + return 0; + c = *p++; + for (t = s->trans, i = s->tran_no; --i >= 0; t++) + if (c >= t->ch[0] && c <= t->ch[1]) + break; + } + } + } + return 0; +} + +/* *ostrich* + +New function, looking for xpath "element" definitions in abs, by +tagpath, using a kind of ugly regxp search.The DFA was built while +parsing abs, so here we just go trough them and try to match +against the given tagpath. The first matching entry is returned. + +pop, 2002-12-13 + +Added support for enhanced xelm. Now [] predicates are considered +as well, when selecting indexing rules... (why the hell it's called +termlist???) + +pop, 2003-01-17 + +*/ + +data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) +{ + data1_absyn *abs = n->root->u.root.absyn; + data1_xpelement *xpe = abs->xp_elements; + data1_node *nn; +#ifdef ENHANCED_XELM + struct xpath_location_step *xp; +#endif + char *pexpr = xmalloc(strlen(tagpath)+5); + int ok = 0; + + sprintf (pexpr, "/%s\n", tagpath); + for (; xpe; xpe = xpe->next) + { + int i; + ok = dfa_match_first(xpe->dfa->states, pexpr); + + if (ok) { +#ifdef ENHANCED_XELM + /* we have to check the perdicates up to the root node */ + xp = xpe->xpath; + + /* find the first tag up in the node structure */ + for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent) + ; + + /* go from inside out in the node structure, while going + backwards trough xpath location steps ... */ + for (i = xpe->xpath_len - 1; i>0; i--) + { + yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s", + i, xp[i].part, nn->u.tag.tag); + + if (!d1_check_xpath_predicate(nn, xp[i].predicate)) + { + yaz_log(YLOG_DEBUG, " Predicates didn't match"); + ok = 0; + break; + } + + if (nn->which == DATA1N_tag) + nn = nn->parent; + } +#endif + if (ok) + break; + } + } + + xfree(pexpr); + + if (ok) { + yaz_log(YLOG_DEBUG, "Got it"); + return xpe->termlists; + } else { + return NULL; + } +} + +/* use + 1 start element (tag) + 2 end element + 3 start attr (and attr-exact) + 4 end attr + + 1016 cdata + 1015 attr data + + *ostrich* + + Now, if there is a matching xelm described in abs, for the + indexed element or the attribute, then the data is handled according + to those definitions... + + modified by pop, 2002-12-13 +*/ + +/* add xpath index for an attribute */ +static void index_xpath_attr (char *tag_path, char *name, char *value, + char *structure, struct recExtractCtrl *p, + RecWord *wrd) +{ + wrd->index_name = ZEBRA_XPATH_ELM_BEGIN; + wrd->index_type = '0'; + wrd->term_buf = tag_path; + wrd->term_len = strlen(tag_path); + (*p->tokenAdd)(wrd); + + if (value) { + wrd->index_name = ZEBRA_XPATH_ATTR_CDATA; + wrd->index_type = 'w'; + wrd->term_buf = value; + wrd->term_len = strlen(value); + (*p->tokenAdd)(wrd); + } + wrd->index_name = ZEBRA_XPATH_ELM_END; + wrd->index_type = '0'; + wrd->term_buf = tag_path; + wrd->term_len = strlen(tag_path); + (*p->tokenAdd)(wrd); +} + + +static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n) +{ + size_t flen = 0; + data1_node *nn; + + /* we have to fetch the whole path to the data tag */ + for (nn = n; nn; nn = nn->parent) + { + if (nn->which == DATA1N_tag) + { + size_t tlen = strlen(nn->u.tag.tag); + if (tlen + flen > (max - 2)) + break; + memcpy (tag_path_full + flen, nn->u.tag.tag, tlen); + flen += tlen; + tag_path_full[flen++] = '/'; + } + else + if (nn->which == DATA1N_root) + break; + } + tag_path_full[flen] = 0; +} + + +static void index_xpath(struct source_parser *sp, data1_node *n, + struct recExtractCtrl *p, + int level, RecWord *wrd, + char *xpath_index, + int xpath_is_start + ) +{ + int i; + char tag_path_full[1024]; + int termlist_only = 1; + data1_termlist *tl; + int xpdone = 0; + if ((!n->root->u.root.absyn) || + (n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)) { + termlist_only = 0; + } + + switch (n->which) + { + case DATA1N_data: + wrd->term_buf = n->u.data.data; + wrd->term_len = n->u.data.len; + xpdone = 0; + + mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n); + + /* If we have a matching termlist... */ + if (n->root->u.root.absyn && + (tl = xpath_termlist_by_tagpath(tag_path_full, n))) + { + for (; tl; tl = tl->next) + { + /* need to copy recword because it may be changed */ + RecWord wrd_tl; + wrd->index_type = *tl->structure; + memcpy (&wrd_tl, wrd, sizeof(*wrd)); + if (tl->source) + sp_parse(sp, n, &wrd_tl, tl->source); + if (!tl->index_name) + { + /* this is the ! case, so structure is for the xpath index */ + wrd_tl.index_name = xpath_index; + if (p->flagShowRecords) + { + int i; + printf("%*sXPath index", (level + 1) * 4, ""); + printf (" XData:\""); + for (i = 0; i 40) + printf (" ..."); + fputc ('\n', stdout); + } + else + (*p->tokenAdd)(&wrd_tl); + xpdone = 1; + } else { + /* this is just the old fashioned attribute based index */ + wrd_tl.index_name = tl->index_name; + if (p->flagShowRecords) + { + int i; + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tl->structure); + printf("%s %s", tl->index_name, tl->source); + printf (" XData:\""); + for (i = 0; i 40) + printf (" ..."); + fputc ('\n', stdout); + } + else + (*p->tokenAdd)(&wrd_tl); + } + } + } + /* xpath indexing is done, if there was no termlist given, + or no ! in the termlist, and default indexing is enabled... */ + if (!p->flagShowRecords && !xpdone && !termlist_only) + { + wrd->index_name = xpath_index; + wrd->index_type = 'w'; + (*p->tokenAdd)(wrd); + } + break; + case DATA1N_tag: + mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n); + + wrd->index_type = '0'; + wrd->term_buf = tag_path_full; + wrd->term_len = strlen(tag_path_full); + wrd->index_name = xpath_index; + if (p->flagShowRecords) + { + printf("%*s tag=", (level + 1) * 4, ""); + for (i = 0; iterm_len && i < 40; i++) + fputc (wrd->term_buf[i], stdout); + if (i == 40) + printf (" .."); + printf("\n"); + } + else + { + data1_xattr *xp; + data1_termlist *tl; + int do_xpindex; + + /* Add tag start/end xpath index, only when there is a ! in + the apropriate xelm directive, or default xpath indexing + is enabled + */ + if (!(do_xpindex = 1 - termlist_only)) + { + if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) + { + for (; tl; tl = tl->next) + { + if (!tl->index_name) + do_xpindex = 1; + } + } + } + if (do_xpindex) { + (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */ + } + + if (xpath_is_start == 1) /* only for the starting tag... */ + { +#define MAX_ATTR_COUNT 50 + data1_termlist *tll[MAX_ATTR_COUNT]; + + int i = 0; + + /* get termlists for attributes, and find out, if we have to do xpath indexing */ + for (xp = n->u.tag.attributes; xp; xp = xp->next) { + i++; + } + + i = 0; + for (xp = n->u.tag.attributes; xp; xp = xp->next) { + char comb[512]; + int do_xpindex = 1 - termlist_only; + data1_termlist *tl; + char attr_tag_path_full[1024]; + + /* this could be cached as well */ + sprintf (attr_tag_path_full, "@%s/%s", + xp->name, tag_path_full); + + tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n); + + /* if there is a ! in the xelm termlist, or default indexing is on, + proceed with xpath idx */ + if ((tl = tll[i])) + { + for (; tl; tl = tl->next) + { + if (!tl->index_name) + do_xpindex = 1; + } + } + + if (do_xpindex) { + + /* attribute (no value) */ + wrd->index_type = '0'; + wrd->index_name = ZEBRA_XPATH_ATTR_NAME; + wrd->term_buf = xp->name; + wrd->term_len = strlen(xp->name); + + wrd->seqno--; + (*p->tokenAdd)(wrd); + + if (xp->value && + strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) { + + /* attribute value exact */ + strcpy (comb, xp->name); + strcat (comb, "="); + strcat (comb, xp->value); + + wrd->index_name = ZEBRA_XPATH_ATTR_NAME; + wrd->index_type = '0'; + wrd->term_buf = comb; + wrd->term_len = strlen(comb); + wrd->seqno--; + + (*p->tokenAdd)(wrd); + } + } + i++; + } + + i = 0; + for (xp = n->u.tag.attributes; xp; xp = xp->next) { + data1_termlist *tl; + char attr_tag_path_full[1024]; + int xpdone = 0; + + sprintf (attr_tag_path_full, "@%s/%s", + xp->name, tag_path_full); + + if ((tl = tll[i])) + { + /* If there is a termlist given (=xelm directive) */ + for (; tl; tl = tl->next) + { + if (!tl->index_name) + { + /* add xpath index for the attribute */ + index_xpath_attr (attr_tag_path_full, xp->name, + xp->value, tl->structure, + p, wrd); + xpdone = 1; + } else { + /* index attribute value (only path/@attr) */ + if (xp->value) + { + wrd->index_name = tl->index_name; + wrd->index_type = *tl->structure; + wrd->term_buf = xp->value; + wrd->term_len = strlen(xp->value); + (*p->tokenAdd)(wrd); + } + } + } + } + /* if there was no termlist for the given path, + or the termlist didn't have a ! element, index + the attribute as "w" */ + if ((!xpdone) && (!termlist_only)) + { + index_xpath_attr (attr_tag_path_full, xp->name, + xp->value, "w", p, wrd); + } + i++; + } + } + } + } +} + +static void index_termlist (struct source_parser *sp, data1_node *par, + data1_node *n, + struct recExtractCtrl *p, int level, RecWord *wrd) +{ + data1_termlist *tlist = 0; + data1_datatype dtype = DATA1K_string; + + /* + * cycle up towards the root until we find a tag with an att.. + * this has the effect of indexing locally defined tags with + * the attribute of their ancestor in the record. + */ + + while (!par->u.tag.element) + if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) + break; + if (!par || !(tlist = par->u.tag.element->termlists)) + return; + if (par->u.tag.element->tag) + dtype = par->u.tag.element->tag->kind; + + for (; tlist; tlist = tlist->next) + { + /* consider source */ + wrd->term_buf = 0; + assert(tlist->source); + sp_parse(sp, n, wrd, tlist->source); + + if (wrd->term_buf && wrd->term_len) + { + if (p->flagShowRecords) + { + int i; + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tlist->structure); + printf("%s %s", tlist->index_name, tlist->source); + printf (" XData:\""); + for (i = 0; iterm_len && i < 40; i++) + fputc (wrd->term_buf[i], stdout); + fputc ('"', stdout); + if (wrd->term_len > 40) + printf (" ..."); + fputc ('\n', stdout); + } + else + { + wrd->index_type = *tlist->structure; + wrd->index_name = tlist->index_name; + (*p->tokenAdd)(wrd); + } + } + } +} + +static int dumpkeys_r(struct source_parser *sp, + data1_node *n, struct recExtractCtrl *p, int level, + RecWord *wrd) +{ + for (; n; n = n->next) + { + if (p->flagShowRecords) /* display element description to user */ + { + if (n->which == DATA1N_root) + { + printf("%*s", level * 4, ""); + printf("Record type: '%s'\n", n->u.root.type); + } + else if (n->which == DATA1N_tag) + { + data1_element *e; + + printf("%*s", level * 4, ""); + if (!(e = n->u.tag.element)) + printf("Local tag: '%s'\n", n->u.tag.tag); + else + { + printf("Elm: '%s' ", e->name); + if (e->tag) + { + data1_tag *t = e->tag; + + printf("TagNam: '%s' ", t->names->name); + printf("("); + if (t->tagset) + printf("%s[%d],", t->tagset->name, t->tagset->type); + else + printf("?,"); + if (t->which == DATA1T_numeric) + printf("%d)", t->value.numeric); + else + printf("'%s')", t->value.string); + } + printf("\n"); + } + } + } + + if (n->which == DATA1N_tag) + { + index_termlist(sp, n, n, p, level, wrd); + /* index start tag */ + if (n->root->u.root.absyn) + index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN, + 1 /* is start */); + } + + if (n->child) + if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0) + return -1; + + + if (n->which == DATA1N_data) + { + data1_node *par = get_parent_tag(p->dh, n); + + if (p->flagShowRecords) + { + printf("%*s", level * 4, ""); + printf("Data: "); + if (n->u.data.len > 256) + printf("'%.170s ... %.70s'\n", n->u.data.data, + n->u.data.data + n->u.data.len-70); + else if (n->u.data.len > 0) + printf("'%.*s'\n", n->u.data.len, n->u.data.data); + else + printf("NULL\n"); + } + + if (par) + index_termlist(sp, par, n, p, level, wrd); + + index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA, + 0 /* is start */); + } + + if (n->which == DATA1N_tag) + { + /* index end tag */ + index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END, + 0 /* is start */); + } + + if (p->flagShowRecords && n->which == DATA1N_root) + { + printf("%*s-------------\n\n", level * 4, ""); + } + } + return 0; +} + +static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd) +{ + struct source_parser *sp = source_parser_create(); + int r = dumpkeys_r(sp, n, p, 0, wrd); + source_parser_destroy(sp); + return r; +} + +int grs_extract_tree(struct recExtractCtrl *p, data1_node *n) +{ + oident oe; + int oidtmp[OID_SIZE]; + RecWord wrd; + + oe.proto = PROTO_Z3950; + oe.oclass = CLASS_SCHEMA; + if (n->u.root.absyn) + { + oe.value = n->u.root.absyn->reference; + + if ((oid_ent_to_oid (&oe, oidtmp))) + (*p->schemaAdd)(p, oidtmp); + } + (*p->init)(p, &wrd); + + return dumpkeys(n, p, &wrd); +} + +static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, + NMEM mem, + data1_node *(*grs_read)(struct grs_read_info *)) +{ + data1_node *n; + struct grs_read_info gri; + oident oe; + int oidtmp[OID_SIZE]; + RecWord wrd; + + gri.readf = p->readf; + gri.seekf = p->seekf; + gri.tellf = p->tellf; + gri.endf = p->endf; + gri.fh = p->fh; + gri.offset = p->offset; + gri.mem = mem; + gri.dh = p->dh; + gri.clientData = clientData; + + n = (*grs_read)(&gri); + if (!n) + return RECCTRL_EXTRACT_EOF; + oe.proto = PROTO_Z3950; + oe.oclass = CLASS_SCHEMA; +#if 0 + if (!n->u.root.absyn) + return RECCTRL_EXTRACT_ERROR; +#endif + if (n->u.root.absyn) + { + oe.value = n->u.root.absyn->reference; + if ((oid_ent_to_oid (&oe, oidtmp))) + (*p->schemaAdd)(p, oidtmp); + } + data1_concat_text(p->dh, mem, n); + + /* ensure our data1 tree is UTF-8 */ + data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n)); + +#if 0 + data1_pr_tree (p->dh, n, stdout); +#endif + + (*p->init)(p, &wrd); + if (dumpkeys(n, p, &wrd) < 0) + { + data1_free_tree(p->dh, n); + return RECCTRL_EXTRACT_ERROR_GENERIC; + } + data1_free_tree(p->dh, n); + return RECCTRL_EXTRACT_OK; +} + +int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, + data1_node *(*grs_read)(struct grs_read_info *)) +{ + int ret; + NMEM mem = nmem_create (); + ret = grs_extract_sub(clientData, p, mem, grs_read); + nmem_destroy(mem); + return ret; +} + +/* + * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic. + */ +static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c, + char **addinfo, ODR o) +{ + data1_esetname *eset; + Z_Espec1 *espec = 0; + Z_ElementSpec *p; + + switch (c->which) + { + case Z_RecordComp_simple: + if (c->u.simple->which != Z_ElementSetNames_generic) + return 26; /* only generic form supported. Fix this later */ + if (!(eset = data1_getesetbyname(dh, n->u.root.absyn, + c->u.simple->u.generic))) + { + yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic); + *addinfo = odr_strdup(o, c->u.simple->u.generic); + return 25; /* invalid esetname */ + } + yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec", + c->u.simple->u.generic); + espec = eset->spec; + break; + case Z_RecordComp_complex: + if (c->u.complex->generic) + { + /* insert check for schema */ + if ((p = c->u.complex->generic->elementSpec)) + { + switch (p->which) + { + case Z_ElementSpec_elementSetName: + if (!(eset = + data1_getesetbyname(dh, n->u.root.absyn, + p->u.elementSetName))) + { + yaz_log(YLOG_DEBUG, "Unknown esetname '%s'", + p->u.elementSetName); + *addinfo = odr_strdup(o, p->u.elementSetName); + return 25; /* invalid esetname */ + } + yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec", + p->u.elementSetName); + espec = eset->spec; + break; + case Z_ElementSpec_externalSpec: + if (p->u.externalSpec->which == Z_External_espec1) + { + yaz_log(YLOG_DEBUG, "Got Espec-1"); + espec = p->u.externalSpec-> u.espec1; + } + else + { + yaz_log(YLOG_LOG, "Unknown external espec."); + return 25; /* bad. what is proper diagnostic? */ + } + break; + } + } + } + else + return 26; /* fix */ + } + if (espec) + { + yaz_log(YLOG_DEBUG, "Element: Espec-1 match"); + return data1_doespec1(dh, n, espec); + } + else + { + yaz_log(YLOG_DEBUG, "Element: all match"); + return -1; + } +} + +/* Add Zebra info in separate namespace ... + + 359 + 447 + records/genera.xml + + +*/ + +static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top, + NMEM mem) +{ + const char *idzebra_ns[3]; + const char *i2 = "\n "; + const char *i4 = "\n "; + data1_node *n; + + idzebra_ns[0] = "xmlns"; + idzebra_ns[1] = "http://www.indexdata.dk/zebra/"; + idzebra_ns[2] = 0; + + data1_mk_text (p->dh, mem, i2, top); + + n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top); + + data1_mk_text (p->dh, mem, "\n", top); + + data1_mk_text (p->dh, mem, i4, n); + + data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem); + + if (p->score != -1) + { + data1_mk_text (p->dh, mem, i4, n); + data1_mk_tag_data_int (p->dh, n, "score", p->score, mem); + } + data1_mk_text (p->dh, mem, i4, n); + data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem); + if (p->fname) + { + data1_mk_text (p->dh, mem, i4, n); + data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem); + } + data1_mk_text (p->dh, mem, i2, n); +} + +int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, + data1_node *(*grs_read)(struct grs_read_info *)) +{ + data1_node *node = 0, *onode = 0, *top; + data1_node *dnew; + data1_maptab *map; + int res, selected = 0; + NMEM mem; + struct grs_read_info gri; + const char *tagname; + + int requested_schema = VAL_NONE; + data1_marctab *marctab; + int dummy; + + mem = nmem_create(); + gri.readf = p->readf; + gri.seekf = p->seekf; + gri.tellf = p->tellf; + gri.endf = NULL; + gri.fh = p->fh; + gri.offset = 0; + gri.mem = mem; + gri.dh = p->dh; + gri.clientData = clientData; + + yaz_log(YLOG_DEBUG, "grs_retrieve"); + node = (*grs_read)(&gri); + if (!node) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + data1_concat_text(p->dh, mem, node); + +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif + top = data1_get_root_tag (p->dh, node); + + yaz_log(YLOG_DEBUG, "grs_retrieve: size"); + tagname = data1_systag_lookup(node->u.root.absyn, "size", "size"); + if (tagname && + (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem))) + { + dnew->u.data.what = DATA1I_text; + dnew->u.data.data = dnew->lbuf; + sprintf(dnew->u.data.data, "%d", p->recordSize); + dnew->u.data.len = strlen(dnew->u.data.data); + } + + tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank"); + if (tagname && p->score >= 0 && + (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem))) + { + yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname); + dnew->u.data.what = DATA1I_num; + dnew->u.data.data = dnew->lbuf; + sprintf(dnew->u.data.data, "%d", p->score); + dnew->u.data.len = strlen(dnew->u.data.data); + } + + tagname = data1_systag_lookup(node->u.root.absyn, "sysno", + "localControlNumber"); + if (tagname && p->localno > 0 && + (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem))) + { + yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname); + dnew->u.data.what = DATA1I_text; + dnew->u.data.data = dnew->lbuf; + + sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno); + dnew->u.data.len = strlen(dnew->u.data.data); + } + + if (p->input_format == VAL_TEXT_XML) + zebra_xml_metadata (p, top, mem); + +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif + if (p->comp && p->comp->which == Z_RecordComp_complex && + p->comp->u.complex->generic && + p->comp->u.complex->generic->which == Z_Schema_oid && + p->comp->u.complex->generic->schema.oid) + { + oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid); + if (oe) + requested_schema = oe->value; + } + /* If schema has been specified, map if possible, then check that + * we got the right one + */ + if (requested_schema != VAL_NONE) + { + yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping"); + for (map = node->u.root.absyn->maptabs; map; map = map->next) + { + if (map->target_absyn_ref == requested_schema) + { + onode = node; + if (!(node = data1_map_record(p->dh, onode, map, mem))) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + break; + } + } + if (node->u.root.absyn && + requested_schema != node->u.root.absyn->reference) + { + p->diagnostic = 238; + nmem_destroy (mem); + return 0; + } + } + /* + * Does the requested format match a known syntax-mapping? (this reflects + * the overlap of schema and formatting which is inherent in the MARC + * family) + */ + yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping"); + if (node->u.root.absyn) + for (map = node->u.root.absyn->maptabs; map; map = map->next) + { + if (map->target_absyn_ref == p->input_format) + { + onode = node; + if (!(node = data1_map_record(p->dh, onode, map, mem))) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + break; + } + } + yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier"); + if (node->u.root.absyn && + node->u.root.absyn->reference != VAL_NONE && + p->input_format == VAL_GRS1) + { + oident oe; + Odr_oid *oid; + int oidtmp[OID_SIZE]; + + oe.proto = PROTO_Z3950; + oe.oclass = CLASS_SCHEMA; + oe.value = node->u.root.absyn->reference; + + if ((oid = oid_ent_to_oid (&oe, oidtmp))) + { + char tmp[128]; + data1_handle dh = p->dh; + char *p = tmp; + int *ii; + + for (ii = oid; *ii >= 0; ii++) + { + if (p != tmp) + *(p++) = '.'; + sprintf(p, "%d", *ii); + p += strlen(p); + } + if ((dnew = data1_mk_tag_data_wd(dh, top, + "schemaIdentifier", mem))) + { + dnew->u.data.what = DATA1I_oid; + dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp); + memcpy(dnew->u.data.data, tmp, p - tmp); + dnew->u.data.len = p - tmp; + } + } + } + + yaz_log(YLOG_DEBUG, "grs_retrieve: element spec"); + if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo, + p->odr)) > 0) + { + p->diagnostic = res; + if (onode) + data1_free_tree(p->dh, onode); + data1_free_tree(p->dh, node); + nmem_destroy(mem); + return 0; + } + else if (p->comp && !res) + selected = 1; + +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif + yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping"); + switch (p->output_format = (p->input_format != VAL_NONE ? + p->input_format : VAL_SUTRS)) + { + case VAL_TEXT_XML: +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif + /* default output encoding for XML is UTF-8 */ + data1_iconv (p->dh, mem, node, + p->encoding ? p->encoding : "UTF-8", + data1_get_encoding(p->dh, node)); + + if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + case VAL_GRS1: + data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + dummy = 0; + if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected, + p->odr, &dummy))) + p->diagnostic = 238; /* not available in requested syntax */ + else + p->rec_len = -1; + break; + case VAL_EXPLAIN: + /* ensure our data1 tree is UTF-8 */ + data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + + if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; + else + p->rec_len = -1; + break; + case VAL_SUMMARY: + /* ensure our data1 tree is UTF-8 */ + data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; + else + p->rec_len = -1; + break; + case VAL_SUTRS: + if (p->encoding) + data1_iconv (p->dh, mem, node, p->encoding, + data1_get_encoding(p->dh, node)); + if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + case VAL_SOIF: + if (p->encoding) + data1_iconv (p->dh, mem, node, p->encoding, + data1_get_encoding(p->dh, node)); + if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + default: + if (!node->u.root.absyn) + { + p->diagnostic = 238; + break; + } + for (marctab = node->u.root.absyn->marc; marctab; + marctab = marctab->next) + if (marctab->reference == p->input_format) + break; + if (!marctab) + { + p->diagnostic = 238; + break; + } + if (p->encoding) + data1_iconv (p->dh, mem, node, p->encoding, + data1_get_encoding(p->dh, node)); + if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, + selected, &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + } + if (node) + data1_free_tree(p->dh, node); + if (onode) + data1_free_tree(p->dh, onode); + nmem_destroy(mem); + return 0; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/rectext.c b/index/rectext.c new file mode 100644 index 0000000..e07aa2e --- /dev/null +++ b/index/rectext.c @@ -0,0 +1,265 @@ +/* $Id: rectext.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include +#include +#include + +#include +#include + +struct filter_info { + char *sep; +}; + +static void *filter_init (Res res, RecType recType) +{ + struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); + tinfo->sep = 0; + return tinfo; +} + +static ZEBRA_RES filter_config(void *clientData, Res res, const char *args) +{ + struct filter_info *tinfo = (struct filter_info*) clientData; + xfree(tinfo->sep); + tinfo->sep = 0; + if (args && *args) + tinfo->sep = xstrdup(args); + return ZEBRA_OK; +} + +static void filter_destroy (void *clientData) +{ + struct filter_info *tinfo = clientData; + xfree (tinfo->sep); + xfree (tinfo); +} + +struct buf_info { + struct recExtractCtrl *p; + char *buf; + int offset; + int max; +}; + +static struct buf_info *buf_open (struct recExtractCtrl *p) +{ + struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi)); + + fi->p = p; + fi->buf = (char *) xmalloc (4096); + fi->offset = 1; + fi->max = 1; + return fi; +} + +static int buf_getchar (struct filter_info *tinfo, struct buf_info *fi, char *dst) +{ + if (fi->offset >= fi->max) + { + if (fi->max <= 0) + return 0; + fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->offset = 0; + if (fi->max <= 0) + return 0; + } + *dst = fi->buf[(fi->offset)++]; + if (tinfo->sep && *dst == *tinfo->sep) + { + off_t off = (*fi->p->tellf)(fi->p->fh); + (*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset)); + return 0; + } + return 1; +} + +static void buf_close (struct buf_info *fi) +{ + xfree (fi->buf); + xfree (fi); +} + +static int filter_extract (void *clientData, struct recExtractCtrl *p) +{ + struct filter_info *tinfo = clientData; + char w[512]; + RecWord recWord; + int r; + struct buf_info *fi = buf_open (p); + int no_read = 0; + +#if 0 + yaz_log(YLOG_LOG, "filter_extract off=%ld", + (long) (*fi->p->tellf)(fi->p->fh)); +#endif + (*p->init)(p, &recWord); + do + { + int i = 0; + + r = buf_getchar (tinfo, fi, w); + while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r') + { + i++; + r = buf_getchar (tinfo, fi, w + i); + } + if (i) + { + no_read += i; + recWord.term_buf = w; + recWord.term_len = i; + (*p->tokenAdd)(&recWord); + } + } while (r > 0); + buf_close (fi); + if (no_read == 0) + return RECCTRL_EXTRACT_EOF; + return RECCTRL_EXTRACT_OK; +} + +static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) +{ + int r, filter_ptr = 0; + static char *filter_buf = NULL; + static int filter_size = 0; + int make_header = 1; + int make_body = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; + + if (elementSetName) + { + /* don't make header for the R(aw) element set name */ + if (!strcmp(elementSetName, "R")) + { + make_header = 0; + make_body = 1; + } + /* only make header for the H(eader) element set name */ + else if (!strcmp(elementSetName, "H")) + { + make_header = 1; + make_body = 0; + } + } + while (1) + { + if (filter_ptr + 4096 >= filter_size) + { + char *nb; + + filter_size = 2*filter_size + 8192; + nb = (char *) xmalloc (filter_size); + if (filter_buf) + { + memcpy (nb, filter_buf, filter_ptr); + xfree (filter_buf); + } + filter_buf = nb; + } + if (make_header && filter_ptr == 0) + { + if (p->score >= 0) + { + sprintf (filter_buf, "Rank: %d\n", p->score); + filter_ptr = strlen(filter_buf); + } + sprintf (filter_buf + filter_ptr, "Local Number: " ZINT_FORMAT "\n", + p->localno); + filter_ptr = strlen(filter_buf); + if (p->fname) + { + sprintf (filter_buf + filter_ptr, "Filename: %s\n", p->fname); + filter_ptr = strlen(filter_buf); + } + strcpy(filter_buf+filter_ptr++, "\n"); + } + if (!make_body) + break; + r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096); + if (r <= 0) + break; + filter_ptr += r; + } + filter_buf[filter_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = filter_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + filter_ptr = p-filter_buf; + } + } + p->output_format = VAL_SUTRS; + p->rec_buf = filter_buf; + p->rec_len = filter_ptr; + return 0; +} + +static struct recType filter_type = { + 0, + "text", + filter_init, + filter_config, + filter_destroy, + filter_extract, + filter_retrieve +}; + +RecType +#ifdef IDZEBRA_STATIC_TEXT +idzebra_filter_text +#else +idzebra_filter +#endif + +[] = { + &filter_type, + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/regxread.c b/index/regxread.c new file mode 100644 index 0000000..95bf213 --- /dev/null +++ b/index/regxread.c @@ -0,0 +1,2028 @@ +/* $Id: regxread.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if HAVE_TCL_H +#include + +#if MAJOR_VERSION >= 8 +#define HAVE_TCL_OBJECTS +#endif +#endif + +#define REGX_DEBUG 0 + +#define F_WIN_EOF 2000000000 +#define F_WIN_READ 1 + +#define REGX_EOF 0 +#define REGX_PATTERN 1 +#define REGX_BODY 2 +#define REGX_BEGIN 3 +#define REGX_END 4 +#define REGX_CODE 5 +#define REGX_CONTEXT 6 +#define REGX_INIT 7 + +struct regxCode { + char *str; +#if HAVE_TCL_OBJECTS + Tcl_Obj *tcl_obj; +#endif +}; + +struct lexRuleAction { + int which; + union { + struct { + struct DFA *dfa; /* REGX_PATTERN */ + int body; + } pattern; + struct regxCode *code; /* REGX_CODE */ + } u; + struct lexRuleAction *next; +}; + +struct lexRuleInfo { + int no; + struct lexRuleAction *actionList; +}; + +struct lexRule { + struct lexRuleInfo info; + struct lexRule *next; +}; + +struct lexContext { + char *name; + struct DFA *dfa; + struct lexRule *rules; + struct lexRuleInfo **fastRule; + int ruleNo; + int initFlag; + + struct lexRuleAction *beginActionList; + struct lexRuleAction *endActionList; + struct lexRuleAction *initActionList; + struct lexContext *next; +}; + +struct lexConcatBuf { + int max; + char *buf; +}; + +struct lexSpec { + char *name; + struct lexContext *context; + + struct lexContext **context_stack; + int context_stack_size; + int context_stack_top; + + int lineNo; + NMEM m; + data1_handle dh; +#if HAVE_TCL_H + Tcl_Interp *tcl_interp; +#endif + void *f_win_fh; + void (*f_win_ef)(void *, off_t); + + int f_win_start; /* first byte of buffer is this file offset */ + int f_win_end; /* last byte of buffer is this offset - 1 */ + int f_win_size; /* size of buffer */ + char *f_win_buf; /* buffer itself */ + int (*f_win_rf)(void *, char *, size_t); + off_t (*f_win_sf)(void *, off_t); + + struct lexConcatBuf *concatBuf; + int maxLevel; + data1_node **d1_stack; + int d1_level; + int stop_flag; + + int *arg_start; + int *arg_end; + int arg_no; + int ptr; +}; + +struct lexSpecs { + struct lexSpec *spec; + char type[256]; +}; + +static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, + int *size) +{ + int i, r, off = start_pos - spec->f_win_start; + + if (off >= 0 && end_pos <= spec->f_win_end) + { + *size = end_pos - start_pos; + return spec->f_win_buf + off; + } + if (off < 0 || start_pos >= spec->f_win_end) + { + (*spec->f_win_sf)(spec->f_win_fh, start_pos); + spec->f_win_start = start_pos; + + if (!spec->f_win_buf) + spec->f_win_buf = (char *) xmalloc (spec->f_win_size); + *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf, + spec->f_win_size); + spec->f_win_end = spec->f_win_start + *size; + + if (*size > end_pos - start_pos) + *size = end_pos - start_pos; + return spec->f_win_buf; + } + for (i = 0; if_win_end - start_pos; i++) + spec->f_win_buf[i] = spec->f_win_buf[i + off]; + r = (*spec->f_win_rf)(spec->f_win_fh, + spec->f_win_buf + i, + spec->f_win_size - i); + spec->f_win_start = start_pos; + spec->f_win_end += r; + *size = i + r; + if (*size > end_pos - start_pos) + *size = end_pos - start_pos; + return spec->f_win_buf; +} + +static int f_win_advance (struct lexSpec *spec, int *pos) +{ + int size; + char *buf; + + if (*pos >= spec->f_win_start && *pos < spec->f_win_end) + return spec->f_win_buf[(*pos)++ - spec->f_win_start]; + if (*pos == F_WIN_EOF) + return 0; + buf = f_win_get (spec, *pos, *pos+1, &size); + if (size == 1) + { + (*pos)++; + return *buf; + } + *pos = F_WIN_EOF; + return 0; +} + +static void regxCodeDel (struct regxCode **pp) +{ + struct regxCode *p = *pp; + if (p) + { +#if HAVE_TCL_OBJECTS + if (p->tcl_obj) + Tcl_DecrRefCount (p->tcl_obj); +#endif + xfree (p->str); + xfree (p); + *pp = NULL; + } +} + +static void regxCodeMk (struct regxCode **pp, const char *buf, int len) +{ + struct regxCode *p; + + p = (struct regxCode *) xmalloc (sizeof(*p)); + p->str = (char *) xmalloc (len+1); + memcpy (p->str, buf, len); + p->str[len] = '\0'; +#if HAVE_TCL_OBJECTS + p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); + if (p->tcl_obj) + Tcl_IncrRefCount (p->tcl_obj); +#endif + *pp = p; +} + +static struct DFA *lexSpecDFA (void) +{ + struct DFA *dfa; + + dfa = dfa_init (); + dfa_parse_cmap_del (dfa, ' '); + dfa_parse_cmap_del (dfa, '\t'); + dfa_parse_cmap_add (dfa, '/', 0); + return dfa; +} + +static void actionListDel (struct lexRuleAction **rap) +{ + struct lexRuleAction *ra1, *ra; + + for (ra = *rap; ra; ra = ra1) + { + ra1 = ra->next; + switch (ra->which) + { + case REGX_PATTERN: + dfa_delete (&ra->u.pattern.dfa); + break; + case REGX_CODE: + regxCodeDel (&ra->u.code); + break; + } + xfree (ra); + } + *rap = NULL; +} + +static struct lexContext *lexContextCreate (const char *name) +{ + struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p)); + + p->name = xstrdup (name); + p->ruleNo = 1; + p->initFlag = 0; + p->dfa = lexSpecDFA (); + p->rules = NULL; + p->fastRule = NULL; + p->beginActionList = NULL; + p->endActionList = NULL; + p->initActionList = NULL; + p->next = NULL; + return p; +} + +static void lexContextDestroy (struct lexContext *p) +{ + struct lexRule *rp, *rp1; + + dfa_delete (&p->dfa); + xfree (p->fastRule); + for (rp = p->rules; rp; rp = rp1) + { + rp1 = rp->next; + actionListDel (&rp->info.actionList); + xfree (rp); + } + actionListDel (&p->beginActionList); + actionListDel (&p->endActionList); + actionListDel (&p->initActionList); + xfree (p->name); + xfree (p); +} + +static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) +{ + struct lexSpec *p; + int i; + + p = (struct lexSpec *) xmalloc (sizeof(*p)); + p->name = (char *) xmalloc (strlen(name)+1); + strcpy (p->name, name); + +#if HAVE_TCL_H + p->tcl_interp = 0; +#endif + p->dh = dh; + p->context = NULL; + p->context_stack_size = 100; + p->context_stack = (struct lexContext **) + xmalloc (sizeof(*p->context_stack) * p->context_stack_size); + p->f_win_buf = NULL; + + p->maxLevel = 128; + p->concatBuf = (struct lexConcatBuf *) + xmalloc (sizeof(*p->concatBuf) * p->maxLevel); + for (i = 0; i < p->maxLevel; i++) + { + p->concatBuf[i].max = 0; + p->concatBuf[i].buf = 0; + } + p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel); + p->d1_level = 0; + return p; +} + +static void lexSpecDestroy (struct lexSpec **pp) +{ + struct lexSpec *p; + struct lexContext *lt; + int i; + + assert (pp); + p = *pp; + if (!p) + return ; + + for (i = 0; i < p->maxLevel; i++) + xfree (p->concatBuf[i].buf); + xfree (p->concatBuf); + + lt = p->context; + while (lt) + { + struct lexContext *lt_next = lt->next; + lexContextDestroy (lt); + lt = lt_next; + } +#if HAVE_TCL_OBJECTS + if (p->tcl_interp) + Tcl_DeleteInterp (p->tcl_interp); +#endif + xfree (p->name); + xfree (p->f_win_buf); + xfree (p->context_stack); + xfree (p->d1_stack); + xfree (p); + *pp = NULL; +} + +static int readParseToken (const char **cpp, int *len) +{ + const char *cp = *cpp; + char cmd[32]; + int i, level; + + while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r') + cp++; + switch (*cp) + { + case '\0': + return 0; + case '/': + *cpp = cp+1; + return REGX_PATTERN; + case '{': + *cpp = cp+1; + level = 1; + while (*++cp) + { + if (*cp == '{') + level++; + else if (*cp == '}') + { + level--; + if (level == 0) + break; + } + } + *len = cp - *cpp; + return REGX_CODE; + default: + i = 0; + while (1) + { + if (*cp >= 'a' && *cp <= 'z') + cmd[i] = *cp; + else if (*cp >= 'A' && *cp <= 'Z') + cmd[i] = *cp + 'a' - 'A'; + else + break; + if (i < (int) sizeof(cmd)-2) + i++; + cp++; + } + cmd[i] = '\0'; + if (i == 0) + { + yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp); + cp++; + while (*cp && *cp != ' ' && *cp != '\t' && + *cp != '\n' && *cp != '\r') + cp++; + *cpp = cp; + return 0; + } + *cpp = cp; + if (!strcmp (cmd, "begin")) + return REGX_BEGIN; + else if (!strcmp (cmd, "end")) + return REGX_END; + else if (!strcmp (cmd, "body")) + return REGX_BODY; + else if (!strcmp (cmd, "context")) + return REGX_CONTEXT; + else if (!strcmp (cmd, "init")) + return REGX_INIT; + else + { + yaz_log (YLOG_WARN, "bad command %s", cmd); + return 0; + } + } +} + +static int actionListMk (struct lexSpec *spec, const char *s, + struct lexRuleAction **ap) +{ + int r, tok, len; + int bodyMark = 0; + const char *s0; + + while ((tok = readParseToken (&s, &len))) + { + switch (tok) + { + case REGX_BODY: + bodyMark = 1; + continue; + case REGX_CODE: + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); + (*ap)->which = tok; + regxCodeMk (&(*ap)->u.code, s, len); + s += len+1; + break; + case REGX_PATTERN: + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); + (*ap)->which = tok; + (*ap)->u.pattern.body = bodyMark; + bodyMark = 0; + (*ap)->u.pattern.dfa = lexSpecDFA (); + s0 = s; + r = dfa_parse ((*ap)->u.pattern.dfa, &s); + if (r || *s != '/') + { + int pos = s - s0; + xfree (*ap); + *ap = NULL; + yaz_log(YLOG_WARN, "regular expression error '%.*s'", pos, s0); + return -1; + } + else + { + int pos = s - s0; + if (debug_dfa_tran) + printf("pattern: %.*s\n", pos, s0); + dfa_mkstate((*ap)->u.pattern.dfa); + s++; + } + break; + case REGX_BEGIN: + yaz_log (YLOG_WARN, "cannot use BEGIN here"); + continue; + case REGX_INIT: + yaz_log (YLOG_WARN, "cannot use INIT here"); + continue; + case REGX_END: + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); + (*ap)->which = tok; + break; + } + ap = &(*ap)->next; + } + *ap = NULL; + return 0; +} + +int readOneSpec (struct lexSpec *spec, const char *s) +{ + int len, r, tok; + struct lexRule *rp; + struct lexContext *lc; + + tok = readParseToken (&s, &len); + if (tok == REGX_CONTEXT) + { + char context_name[32]; + tok = readParseToken (&s, &len); + if (tok != REGX_CODE) + { + yaz_log (YLOG_WARN, "missing name after CONTEXT keyword"); + return 0; + } + if (len > 31) + len = 31; + memcpy (context_name, s, len); + context_name[len] = '\0'; + lc = lexContextCreate (context_name); + lc->next = spec->context; + spec->context = lc; + return 0; + } + if (!spec->context) + spec->context = lexContextCreate ("main"); + + switch (tok) + { + case REGX_BEGIN: + actionListDel (&spec->context->beginActionList); + actionListMk (spec, s, &spec->context->beginActionList); + break; + case REGX_END: + actionListDel (&spec->context->endActionList); + actionListMk (spec, s, &spec->context->endActionList); + break; + case REGX_INIT: + actionListDel (&spec->context->initActionList); + actionListMk (spec, s, &spec->context->initActionList); + break; + case REGX_PATTERN: +#if REGX_DEBUG + yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s); +#endif + r = dfa_parse (spec->context->dfa, &s); + if (r) + { + yaz_log (YLOG_WARN, "regular expression error. r=%d", r); + return -1; + } + if (*s != '/') + { + yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s); + return -1; + } + s++; + rp = (struct lexRule *) xmalloc (sizeof(*rp)); + rp->info.no = spec->context->ruleNo++; + rp->next = spec->context->rules; + spec->context->rules = rp; + actionListMk (spec, s, &rp->info.actionList); + } + return 0; +} + +int readFileSpec (struct lexSpec *spec) +{ + struct lexContext *lc; + int c, i, errors = 0; + FILE *spec_inf = 0; + WRBUF lineBuf; + char fname[256]; + +#if HAVE_TCL_H + if (spec->tcl_interp) + { + sprintf (fname, "%s.tflt", spec->name); + spec_inf = data1_path_fopen (spec->dh, fname, "r"); + } +#endif + if (!spec_inf) + { + sprintf (fname, "%s.flt", spec->name); + spec_inf = data1_path_fopen (spec->dh, fname, "r"); + } + if (!spec_inf) + { + yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name); + return -1; + } + yaz_log (YLOG_LOG, "reading regx filter %s", fname); +#if HAVE_TCL_H + if (spec->tcl_interp) + yaz_log (YLOG_LOG, "Tcl enabled"); +#endif + +#if 0 + debug_dfa_trav = 0; + debug_dfa_tran = 1; + debug_dfa_followpos = 0; + dfa_verbose = 1; +#endif + + lineBuf = wrbuf_alloc(); + spec->lineNo = 0; + c = getc (spec_inf); + while (c != EOF) + { + wrbuf_rewind (lineBuf); + if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r') + { + while (c != '\n' && c != EOF) + c = getc (spec_inf); + spec->lineNo++; + if (c == '\n') + c = getc (spec_inf); + } + else + { + int addLine = 0; + + while (1) + { + int c1 = c; + wrbuf_putc(lineBuf, c); + c = getc (spec_inf); + while (c == '\r') + c = getc (spec_inf); + if (c == EOF) + break; + if (c1 == '\n') + { + if (c != ' ' && c != '\t') + break; + addLine++; + } + } + wrbuf_putc(lineBuf, '\0'); + readOneSpec (spec, wrbuf_buf(lineBuf)); + spec->lineNo += addLine; + } + } + fclose (spec_inf); + wrbuf_free(lineBuf, 1); + + for (lc = spec->context; lc; lc = lc->next) + { + struct lexRule *rp; + lc->fastRule = (struct lexRuleInfo **) + xmalloc (sizeof(*lc->fastRule) * lc->ruleNo); + for (i = 0; i < lc->ruleNo; i++) + lc->fastRule[i] = NULL; + for (rp = lc->rules; rp; rp = rp->next) + lc->fastRule[rp->info.no] = &rp->info; + dfa_mkstate (lc->dfa); + } + if (errors) + return -1; + + return 0; +} + +#if 0 +static struct lexSpec *curLexSpec = NULL; +#endif + +static void execData (struct lexSpec *spec, + const char *ebuf, int elen, int formatted_text, + const char *attribute_str, int attribute_len) +{ + struct data1_node *res, *parent; + int org_len; + + if (elen == 0) /* shouldn't happen, but it does! */ + return ; +#if REGX_DEBUG + if (elen > 80) + yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen, + ebuf, 40, ebuf + elen-40); + else if (elen == 1 && ebuf[0] == '\n') + { + yaz_log (YLOG_LOG, "data(new line)"); + } + else if (elen > 0) + yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf); + else + yaz_log (YLOG_LOG, "data(%d bytes)", elen); +#endif + + if (spec->d1_level <= 1) + return; + + parent = spec->d1_stack[spec->d1_level -1]; + assert (parent); + + if (attribute_str) + { + data1_xattr **ap; + res = parent; + if (res->which != DATA1N_tag) + return; + /* sweep through exising attributes.. */ + for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next) + if (strlen((*ap)->name) == attribute_len && + !memcmp((*ap)->name, attribute_str, attribute_len)) + break; + if (!*ap) + { + /* new attribute. Create it with name + value */ + *ap = nmem_malloc(spec->m, sizeof(**ap)); + + (*ap)->name = nmem_malloc(spec->m, attribute_len+1); + memcpy((*ap)->name, attribute_str, attribute_len); + (*ap)->name[attribute_len] = '\0'; + + (*ap)->value = nmem_malloc(spec->m, elen+1); + memcpy((*ap)->value, ebuf, elen); + (*ap)->value[elen] = '\0'; + (*ap)->next = 0; + } + else + { + /* append to value if attribute already exists */ + char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value)); + strcpy(nv, (*ap)->value); + memcpy (nv + strlen(nv), ebuf, elen); + nv[strlen(nv)+elen] = '\0'; + (*ap)->value = nv; + } + } + else + { + if ((res = spec->d1_stack[spec->d1_level]) && + res->which == DATA1N_data) + org_len = res->u.data.len; + else + { + org_len = 0; + + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); + res->u.data.what = DATA1I_text; + res->u.data.len = 0; + res->u.data.formatted_text = formatted_text; + res->u.data.data = 0; + + if (spec->d1_stack[spec->d1_level]) + spec->d1_stack[spec->d1_level]->next = res; + spec->d1_stack[spec->d1_level] = res; + } + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) + { + char *old_buf, *new_buf; + + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) + { + memcpy (new_buf, old_buf, org_len); + xfree (old_buf); + } + spec->concatBuf[spec->d1_level].buf = new_buf; + } + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); + res->u.data.len += elen; + } +} + +static void execDataP (struct lexSpec *spec, + const char *ebuf, int elen, int formatted_text) +{ + execData (spec, ebuf, elen, formatted_text, 0, 0); +} + +static void tagDataRelease (struct lexSpec *spec) +{ + data1_node *res; + + if ((res = spec->d1_stack[spec->d1_level]) && + res->which == DATA1N_data && + res->u.data.what == DATA1I_text) + { + assert (!res->u.data.data); + assert (res->u.data.len > 0); + if (res->u.data.len > DATA1_LOCALDATA) + res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len); + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf, + res->u.data.len); + } +} + +static void variantBegin (struct lexSpec *spec, + const char *class_str, int class_len, + const char *type_str, int type_len, + const char *value_str, int value_len) +{ + struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL]; + data1_vartype *tp; + int i; + data1_node *res; + + if (spec->d1_level == 0) + { + yaz_log (YLOG_WARN, "in variant begin. No record type defined"); + return ; + } + if (class_len >= DATA1_MAX_SYMBOL) + class_len = DATA1_MAX_SYMBOL-1; + memcpy (tclass, class_str, class_len); + tclass[class_len] = '\0'; + + if (type_len >= DATA1_MAX_SYMBOL) + type_len = DATA1_MAX_SYMBOL-1; + memcpy (ttype, type_str, type_len); + ttype[type_len] = '\0'; + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype, + spec->d1_level); +#endif + + if (!(tp = + data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn, + tclass, ttype))) + return; + + if (parent->which != DATA1N_variant) + { + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); + if (spec->d1_stack[spec->d1_level]) + tagDataRelease (spec); + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; + } + for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--) + if (spec->d1_stack[i]->u.variant.type == tp) + { + spec->d1_level = i; + break; + } + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level); +#endif + parent = spec->d1_stack[spec->d1_level-1]; + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); + res->u.variant.type = tp; + + if (value_len >= DATA1_LOCALDATA) + value_len =DATA1_LOCALDATA-1; + memcpy (res->lbuf, value_str, value_len); + res->lbuf[value_len] = '\0'; + + res->u.variant.value = res->lbuf; + + if (spec->d1_stack[spec->d1_level]) + tagDataRelease (spec); + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; +} + +static void tagStrip (const char **tag, int *len) +{ + int i; + + for (i = *len; i > 0 && isspace((*tag)[i-1]); --i) + ; + *len = i; + for (i = 0; i < *len && isspace((*tag)[i]); i++) + ; + *tag += i; + *len -= i; +} + +static void tagBegin (struct lexSpec *spec, + const char *tag, int len) +{ + if (spec->d1_level == 0) + { + yaz_log (YLOG_WARN, "in element begin. No record type defined"); + return ; + } + tagStrip (&tag, &len); + if (spec->d1_stack[spec->d1_level]) + tagDataRelease (spec); + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level); +#endif + + spec->d1_stack[spec->d1_level] = data1_mk_tag_n ( + spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]); + spec->d1_stack[++(spec->d1_level)] = NULL; +} + +static void tagEnd (struct lexSpec *spec, int min_level, + const char *tag, int len) +{ + tagStrip (&tag, &len); + while (spec->d1_level > min_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + if (spec->d1_level == 0) + break; + if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) && + (!tag || + (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) == + (size_t) len && + !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len)))) + break; + } +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level); +#endif +} + + +static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, + struct DFA *dfa, int greedy) +{ + struct DFA_state *state = dfa->states[0]; + struct DFA_tran *t; + unsigned char c = 0; + unsigned char c_prev = 0; + int ptr = *pptr; /* current pointer */ + int start_ptr = *pptr; /* first char of match */ + int last_ptr = 0; /* last char of match */ + int last_rule = 0; /* rule number of current match */ + int restore_ptr = 0; + int i; + + if (ptr) + { + --ptr; + c = f_win_advance (spec, &ptr); + } + while (1) + { + if (dfa->states[0] == state) + { + c_prev = c; + restore_ptr = ptr; + } + c = f_win_advance (spec, &ptr); + + if (ptr == F_WIN_EOF) + { + if (last_rule) + { + *mptr = start_ptr; + *pptr = last_ptr; + return 1; + } + break; + } + + t = state->trans; + i = state->tran_no; + while (1) + if (--i < 0) /* no transition for character c */ + { + if (last_rule) + { + *mptr = start_ptr; /* match starts here */ + *pptr = last_ptr; /* match end here (+1) */ + return 1; + } + state = dfa->states[0]; + + ptr = restore_ptr; + c = f_win_advance (spec, &ptr); + + start_ptr = ptr; + + break; + } + else if (c >= t->ch[0] && c <= t->ch[1]) + { + state = dfa->states[t->to]; + if (state->rule_no && c_prev == '\n') + { + last_rule = state->rule_no; + last_ptr = ptr; + } + else if (state->rule_nno) + { + last_rule = state->rule_nno; + last_ptr = ptr; + } + break; + } + else + t++; + } + return 0; +} + +static int execTok (struct lexSpec *spec, const char **src, + const char **tokBuf, int *tokLen) +{ + const char *s = *src; + + while (*s == ' ' || *s == '\t') + s++; + if (!*s) + return 0; + if (*s == '$' && s[1] >= '0' && s[1] <= '9') + { + int n = 0; + s++; + while (*s >= '0' && *s <= '9') + n = n*10 + (*s++ -'0'); + if (spec->arg_no == 0) + { + *tokBuf = ""; + *tokLen = 0; + } + else + { + if (n >= spec->arg_no) + n = spec->arg_no-1; + *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n], + tokLen); + } + } + else if (*s == '\"') + { + *tokBuf = ++s; + while (*s && *s != '\"') + s++; + *tokLen = s - *tokBuf; + if (*s) + s++; + *src = s; + } + else if (*s == '\n' || *s == ';') + { + *src = s+1; + return 1; + } + else if (*s == '-') + { + *tokBuf = s++; + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') + s++; + *tokLen = s - *tokBuf; + *src = s; + return 3; + } + else + { + *tokBuf = s++; + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') + s++; + *tokLen = s - *tokBuf; + } + *src = s; + return 2; +} + +static char *regxStrz (const char *src, int len, char *str) +{ + if (len > 63) + len = 63; + memcpy (str, src, len); + str[len] = '\0'; + return str; +} + +#if HAVE_TCL_H +static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, + int argc, const char **argv) +{ + struct lexSpec *spec = (struct lexSpec *) clientData; + if (argc < 2) + return TCL_ERROR; + if (!strcmp(argv[1], "record") && argc == 3) + { + const char *absynName = argv[2]; + data1_node *res; + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "begin record %s", absynName); +#endif + res = data1_mk_root (spec->dh, spec->m, absynName); + + spec->d1_level = 0; + + spec->d1_stack[spec->d1_level++] = res; + + res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); + + spec->d1_stack[spec->d1_level++] = res; + + spec->d1_stack[spec->d1_level] = NULL; + } + else if (!strcmp(argv[1], "element") && argc == 3) + { + tagBegin (spec, argv[2], strlen(argv[2])); + } + else if (!strcmp (argv[1], "variant") && argc == 5) + { + variantBegin (spec, argv[2], strlen(argv[2]), + argv[3], strlen(argv[3]), + argv[4], strlen(argv[4])); + } + else if (!strcmp (argv[1], "context") && argc == 3) + { + struct lexContext *lc = spec->context; +#if REGX_DEBUG + yaz_log (YLOG_LOG, "begin context %s",argv[2]); +#endif + while (lc && strcmp (argv[2], lc->name)) + lc = lc->next; + if (lc) + { + spec->context_stack[++(spec->context_stack_top)] = lc; + } + else + yaz_log (YLOG_WARN, "unknown context %s", argv[2]); + } + else + return TCL_ERROR; + return TCL_OK; +} + +static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, + int argc, const char **argv) +{ + struct lexSpec *spec = (struct lexSpec *) clientData; + if (argc < 2) + return TCL_ERROR; + + if (!strcmp (argv[1], "record")) + { + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end record"); +#endif + spec->stop_flag = 1; + } + else if (!strcmp (argv[1], "element")) + { + int min_level = 2; + const char *element = 0; + if (argc >= 3 && !strcmp(argv[2], "-record")) + { + min_level = 0; + if (argc == 4) + element = argv[3]; + } + else + if (argc == 3) + element = argv[2]; + tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); + if (spec->d1_level <= 1) + { +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end element end records"); +#endif + spec->stop_flag = 1; + } + } + else if (!strcmp (argv[1], "context")) + { +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end context"); +#endif + if (spec->context_stack_top) + (spec->context_stack_top)--; + } + else + return TCL_ERROR; + return TCL_OK; +} + +static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, + int argc, const char **argv) +{ + int argi = 1; + int textFlag = 0; + const char *element = 0; + const char *attribute = 0; + struct lexSpec *spec = (struct lexSpec *) clientData; + + while (argi < argc) + { + if (!strcmp("-text", argv[argi])) + { + textFlag = 1; + argi++; + } + else if (!strcmp("-element", argv[argi])) + { + argi++; + if (argi < argc) + element = argv[argi++]; + } + else if (!strcmp("-attribute", argv[argi])) + { + argi++; + if (argi < argc) + attribute = argv[argi++]; + } + else + break; + } + if (element) + tagBegin (spec, element, strlen(element)); + + while (argi < argc) + { +#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) + Tcl_DString ds; + char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); + execData (spec, native, strlen(native), textFlag, attribute, + attribute ? strlen(attribute) : 0); + Tcl_DStringFree (&ds); +#else + execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute, + attribute ? strlen(attribute) : 0); +#endif + argi++; + } + if (element) + tagEnd (spec, 2, NULL, 0); + return TCL_OK; +} + +static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, + int argc, const char **argv) +{ + struct lexSpec *spec = (struct lexSpec *) clientData; + int argi = 1; + int offset = 0; + int no; + + while (argi < argc) + { + if (!strcmp("-offset", argv[argi])) + { + argi++; + if (argi < argc) + { + offset = atoi(argv[argi]); + argi++; + } + } + else + break; + } + if (argi != argc-1) + return TCL_ERROR; + no = atoi(argv[argi]); + if (no >= spec->arg_no) + no = spec->arg_no - 1; + spec->ptr = spec->arg_start[no] + offset; + return TCL_OK; +} + +static void execTcl (struct lexSpec *spec, struct regxCode *code) +{ + int i; + int ret; + for (i = 0; i < spec->arg_no; i++) + { + char var_name[10], *var_buf; + int var_len, ch; + + sprintf (var_name, "%d", i); + var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i], + &var_len); + if (var_buf) + { + ch = var_buf[var_len]; + var_buf[var_len] = '\0'; + Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0); + var_buf[var_len] = ch; + } + } +#if HAVE_TCL_OBJECTS + ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); +#else + ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#endif + if (ret != TCL_OK) + { + const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); + yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", + spec->tcl_interp->errorLine, + spec->tcl_interp->result, + err ? err : "[NO ERRORINFO]"); + } +} +/* HAVE_TCL_H */ +#endif + +static void execCode (struct lexSpec *spec, struct regxCode *code) +{ + const char *s = code->str; + int cmd_len, r; + const char *cmd_str; + + r = execTok (spec, &s, &cmd_str, &cmd_len); + while (r) + { + char *p, ptmp[64]; + + if (r == 1) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + continue; + } + p = regxStrz (cmd_str, cmd_len, ptmp); + if (!strcmp (p, "begin")) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + { + yaz_log (YLOG_WARN, "missing keyword after 'begin'"); + continue; + } + p = regxStrz (cmd_str, cmd_len, ptmp); + if (!strcmp (p, "record")) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + if (spec->d1_level <= 1) + { + static char absynName[64]; + data1_node *res; + + if (cmd_len > 63) + cmd_len = 63; + memcpy (absynName, cmd_str, cmd_len); + absynName[cmd_len] = '\0'; +#if REGX_DEBUG + yaz_log (YLOG_LOG, "begin record %s", absynName); +#endif + res = data1_mk_root (spec->dh, spec->m, absynName); + + spec->d1_level = 0; + + spec->d1_stack[spec->d1_level++] = res; + + res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); + + spec->d1_stack[spec->d1_level++] = res; + + spec->d1_stack[spec->d1_level] = NULL; + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "element")) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + tagBegin (spec, cmd_str, cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "variant")) + { + int class_len; + const char *class_str = NULL; + int type_len; + const char *type_str = NULL; + int value_len; + const char *value_str = NULL; + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + class_str = cmd_str; + class_len = cmd_len; + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + type_str = cmd_str; + type_len = cmd_len; + + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + value_str = cmd_str; + value_len = cmd_len; + + variantBegin (spec, class_str, class_len, + type_str, type_len, value_str, value_len); + + + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "context")) + { + if (r > 1) + { + struct lexContext *lc = spec->context; + r = execTok (spec, &s, &cmd_str, &cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); +#if REGX_DEBUG + yaz_log (YLOG_LOG, "begin context %s", p); +#endif + while (lc && strcmp (p, lc->name)) + lc = lc->next; + if (lc) + spec->context_stack[++(spec->context_stack_top)] = lc; + else + yaz_log (YLOG_WARN, "unknown context %s", p); + + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + { + yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p); + } + } + else if (!strcmp (p, "end")) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + { + yaz_log (YLOG_WARN, "missing keyword after 'end'"); + continue; + } + p = regxStrz (cmd_str, cmd_len, ptmp); + if (!strcmp (p, "record")) + { + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } + r = execTok (spec, &s, &cmd_str, &cmd_len); +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end record"); +#endif + spec->stop_flag = 1; + } + else if (!strcmp (p, "element")) + { + int min_level = 2; + while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) + { + if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) + min_level = 0; + } + if (r > 2) + { + tagEnd (spec, min_level, cmd_str, cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + tagEnd (spec, min_level, NULL, 0); + if (spec->d1_level <= 1) + { +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end element end records"); +#endif + spec->stop_flag = 1; + } + + } + else if (!strcmp (p, "context")) + { +#if REGX_DEBUG + yaz_log (YLOG_LOG, "end context"); +#endif + if (spec->context_stack_top) + (spec->context_stack_top)--; + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + yaz_log (YLOG_WARN, "bad keyword '%s' after end", p); + } + else if (!strcmp (p, "data")) + { + int textFlag = 0; + int element_len; + const char *element_str = NULL; + int attribute_len; + const char *attribute_str = NULL; + + while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) + { + if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len)) + textFlag = 1; + else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len)) + { + r = execTok (spec, &s, &element_str, &element_len); + if (r < 2) + break; + } + else if (cmd_len==10 && !memcmp ("-attribute", cmd_str, + cmd_len)) + { + r = execTok (spec, &s, &attribute_str, &attribute_len); + if (r < 2) + break; + } + else + yaz_log (YLOG_WARN, "bad data option: %.*s", + cmd_len, cmd_str); + } + if (r != 2) + { + yaz_log (YLOG_WARN, "missing data item after data"); + continue; + } + if (element_str) + tagBegin (spec, element_str, element_len); + do + { + execData (spec, cmd_str, cmd_len, textFlag, + attribute_str, attribute_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } while (r > 1); + if (element_str) + tagEnd (spec, 2, NULL, 0); + } + else if (!strcmp (p, "unread")) + { + int no, offset; + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len)) + { + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + { + yaz_log (YLOG_WARN, "missing number after -offset"); + continue; + } + p = regxStrz (cmd_str, cmd_len, ptmp); + offset = atoi (p); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + offset = 0; + if (r < 2) + { + yaz_log (YLOG_WARN, "missing index after unread command"); + continue; + } + if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9') + { + yaz_log (YLOG_WARN, "bad index after unread command"); + continue; + } + else + { + no = *cmd_str - '0'; + if (no >= spec->arg_no) + no = spec->arg_no - 1; + spec->ptr = spec->arg_start[no] + offset; + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "context")) + { + if (r > 1) + { + struct lexContext *lc = spec->context; + r = execTok (spec, &s, &cmd_str, &cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); + + while (lc && strcmp (p, lc->name)) + lc = lc->next; + if (lc) + spec->context_stack[spec->context_stack_top] = lc; + else + yaz_log (YLOG_WARN, "unknown context %s", p); + + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + { + yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str); + r = execTok (spec, &s, &cmd_str, &cmd_len); + continue; + } + if (r > 1) + { + yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str); + do { + r = execTok (spec, &s, &cmd_str, &cmd_len); + } while (r > 1); + } + } +} + + +static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, + int start_ptr, int *pptr) +{ + int sptr; + int arg_start[20]; + int arg_end[20]; + int arg_no = 1; + + if (!ap) + return 1; + arg_start[0] = start_ptr; + arg_end[0] = *pptr; + spec->arg_start = arg_start; + spec->arg_end = arg_end; + + while (ap) + { + switch (ap->which) + { + case REGX_PATTERN: + if (ap->u.pattern.body) + { + arg_start[arg_no] = *pptr; + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0)) + { + arg_end[arg_no] = F_WIN_EOF; + arg_no++; + arg_start[arg_no] = F_WIN_EOF; + arg_end[arg_no] = F_WIN_EOF; + yaz_log(YLOG_DEBUG, "Pattern match rest of record"); + *pptr = F_WIN_EOF; + } + else + { + arg_end[arg_no] = sptr; + arg_no++; + arg_start[arg_no] = sptr; + arg_end[arg_no] = *pptr; + } + } + else + { + arg_start[arg_no] = *pptr; + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1)) + return 1; + if (sptr != arg_start[arg_no]) + return 1; + arg_end[arg_no] = *pptr; + } + arg_no++; + break; + case REGX_CODE: + spec->arg_no = arg_no; + spec->ptr = *pptr; +#if HAVE_TCL_H + if (spec->tcl_interp) + execTcl(spec, ap->u.code); + else + execCode (spec, ap->u.code); +#else + execCode (spec, ap->u.code); +#endif + *pptr = spec->ptr; + if (spec->stop_flag) + return 0; + break; + case REGX_END: + arg_start[arg_no] = *pptr; + arg_end[arg_no] = F_WIN_EOF; + arg_no++; + *pptr = F_WIN_EOF; + } + ap = ap->next; + } + return 1; +} + +static int execRule (struct lexSpec *spec, struct lexContext *context, + int ruleNo, int start_ptr, int *pptr) +{ +#if REGX_DEBUG + yaz_log (YLOG_LOG, "exec rule %d", ruleNo); +#endif + return execAction (spec, context->fastRule[ruleNo]->actionList, + start_ptr, pptr); +} + +data1_node *lexNode (struct lexSpec *spec, int *ptr) +{ + struct lexContext *context = spec->context_stack[spec->context_stack_top]; + struct DFA_state *state = context->dfa->states[0]; + struct DFA_tran *t; + unsigned char c; + unsigned char c_prev = '\n'; + int i; + int last_rule = 0; /* rule number of current match */ + int last_ptr = *ptr; /* last char of match */ + int start_ptr = *ptr; /* first char of match */ + int skip_ptr = *ptr; /* first char of run */ + + while (1) + { + c = f_win_advance (spec, ptr); + if (*ptr == F_WIN_EOF) + { + /* end of file met */ + if (last_rule) + { + /* there was a match */ + if (skip_ptr < start_ptr) + { + /* deal with chars that didn't match */ + int size; + char *buf; + buf = f_win_get (spec, skip_ptr, start_ptr, &size); + execDataP (spec, buf, size, 0); + } + /* restore pointer */ + *ptr = last_ptr; + /* execute rule */ + if (!execRule (spec, context, last_rule, start_ptr, ptr)) + break; + /* restore skip pointer */ + skip_ptr = *ptr; + last_rule = 0; + } + else if (skip_ptr < *ptr) + { + /* deal with chars that didn't match */ + int size; + char *buf; + buf = f_win_get (spec, skip_ptr, *ptr, &size); + execDataP (spec, buf, size, 0); + } + if (*ptr == F_WIN_EOF) + break; + } + t = state->trans; + i = state->tran_no; + while (1) + if (--i < 0) + { /* no transition for character c ... */ + if (last_rule) + { + if (skip_ptr < start_ptr) + { + /* deal with chars that didn't match */ + int size; + char *buf; + buf = f_win_get (spec, skip_ptr, start_ptr, &size); + execDataP (spec, buf, size, 0); + } + /* restore pointer */ + *ptr = last_ptr; + if (!execRule (spec, context, last_rule, start_ptr, ptr)) + { + if (spec->f_win_ef && *ptr != F_WIN_EOF) + { +#if REGX_DEBUG + yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr); +#endif + (*spec->f_win_ef)(spec->f_win_fh, *ptr); + } + return NULL; + } + context = spec->context_stack[spec->context_stack_top]; + skip_ptr = *ptr; + last_rule = 0; + last_ptr = start_ptr = *ptr; + if (start_ptr > 0) + { + --start_ptr; + c_prev = f_win_advance (spec, &start_ptr); + } + } + else + { + c_prev = f_win_advance (spec, &start_ptr); + *ptr = start_ptr; + } + state = context->dfa->states[0]; + break; + } + else if (c >= t->ch[0] && c <= t->ch[1]) + { /* transition ... */ + state = context->dfa->states[t->to]; + if (state->rule_no) + { + if (c_prev == '\n') + { + last_rule = state->rule_no; + last_ptr = *ptr; + } + else if (state->rule_nno) + { + last_rule = state->rule_nno; + last_ptr = *ptr; + } + } + break; + } + else + t++; + } + return NULL; +} + +static data1_node *lexRoot (struct lexSpec *spec, off_t offset, + const char *context_name) +{ + struct lexContext *lt = spec->context; + int ptr = offset; + + spec->stop_flag = 0; + spec->d1_level = 0; + spec->context_stack_top = 0; + while (lt) + { + if (!strcmp (lt->name, context_name)) + break; + lt = lt->next; + } + if (!lt) + { + yaz_log (YLOG_WARN, "cannot find context %s", context_name); + return NULL; + } + spec->context_stack[spec->context_stack_top] = lt; + spec->d1_stack[spec->d1_level] = NULL; +#if 1 + if (!lt->initFlag) + { + lt->initFlag = 1; + execAction (spec, lt->initActionList, ptr, &ptr); + } +#endif + execAction (spec, lt->beginActionList, ptr, &ptr); + lexNode (spec, &ptr); + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } + execAction (spec, lt->endActionList, ptr, &ptr); + return spec->d1_stack[0]; +} + +void grs_destroy(void *clientData) +{ + struct lexSpecs *specs = (struct lexSpecs *) clientData; + if (specs->spec) + { + lexSpecDestroy(&specs->spec); + } + xfree (specs); +} + +void *grs_init(Res res, RecType recType) +{ + struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs)); + specs->spec = 0; + strcpy(specs->type, ""); + return specs; +} + + +ZEBRA_RES grs_config(void *clientData, Res res, const char *args) +{ + struct lexSpecs *specs = (struct lexSpecs *) clientData; + if (strlen(args) < sizeof(specs->type)) + strcpy(specs->type, args); + return ZEBRA_OK; +} + +data1_node *grs_read_regx (struct grs_read_info *p) +{ + int res; + struct lexSpecs *specs = (struct lexSpecs *) p->clientData; + struct lexSpec **curLexSpec = &specs->spec; + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "grs_read_regx"); +#endif + if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type)) + { + if (*curLexSpec) + lexSpecDestroy (curLexSpec); + *curLexSpec = lexSpecCreate (specs->type, p->dh); + res = readFileSpec (*curLexSpec); + if (res) + { + lexSpecDestroy (curLexSpec); + return NULL; + } + } + (*curLexSpec)->dh = p->dh; + if (!p->offset) + { + (*curLexSpec)->f_win_start = 0; + (*curLexSpec)->f_win_end = 0; + (*curLexSpec)->f_win_rf = p->readf; + (*curLexSpec)->f_win_sf = p->seekf; + (*curLexSpec)->f_win_fh = p->fh; + (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_size = 500000; + } + (*curLexSpec)->m = p->mem; + return lexRoot (*curLexSpec, p->offset, "main"); +} + +static int extract_regx(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_regx); +} + +static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_regx); +} + +static struct recType regx_type = { + 0, + "grs.regx", + grs_init, + grs_config, + grs_destroy, + extract_regx, + retrieve_regx, +}; + + +#if HAVE_TCL_H +data1_node *grs_read_tcl (struct grs_read_info *p) +{ + int res; + struct lexSpecs *specs = (struct lexSpecs *) p->clientData; + struct lexSpec **curLexSpec = &specs->spec; + +#if REGX_DEBUG + yaz_log (YLOG_LOG, "grs_read_tcl"); +#endif + if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type)) + { + Tcl_Interp *tcl_interp; + if (*curLexSpec) + lexSpecDestroy (curLexSpec); + *curLexSpec = lexSpecCreate (specs->type, p->dh); + Tcl_FindExecutable(""); + tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); + Tcl_Init(tcl_interp); + Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread, + *curLexSpec, 0); + res = readFileSpec (*curLexSpec); + if (res) + { + lexSpecDestroy (curLexSpec); + return NULL; + } + } + (*curLexSpec)->dh = p->dh; + if (!p->offset) + { + (*curLexSpec)->f_win_start = 0; + (*curLexSpec)->f_win_end = 0; + (*curLexSpec)->f_win_rf = p->readf; + (*curLexSpec)->f_win_sf = p->seekf; + (*curLexSpec)->f_win_fh = p->fh; + (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_size = 500000; + } + (*curLexSpec)->m = p->mem; + return lexRoot (*curLexSpec, p->offset, "main"); +} + +static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_tcl); +} + +static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl); +} + +static struct recType tcl_type = { + 0, + "grs.tcl", + grs_init, + grs_config, + grs_destroy, + extract_tcl, + retrieve_tcl, +}; + +#endif + +RecType +#ifdef IDZEBRA_STATIC_GRS_REGX +idzebra_filter_grs_regx +#else +idzebra_filter +#endif + +[] = { + ®x_type, +#if HAVE_TCL_H + &tcl_type, +#endif + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/safari.c b/index/safari.c new file mode 100644 index 0000000..f7f2595 --- /dev/null +++ b/index/safari.c @@ -0,0 +1,279 @@ +/* $Id: safari.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include +#include +#include + +#include +#include + +struct filter_info { + char *sep; +}; + +static void *filter_init (Res res, RecType recType) +{ + struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); + tinfo->sep = 0; + return tinfo; +} + +static ZEBRA_RES filter_config(void *clientData, Res res, const char *args) +{ + return ZEBRA_OK; +} + +static void filter_destroy(void *clientData) +{ + struct filter_info *tinfo = clientData; + xfree (tinfo->sep); + xfree (tinfo); +} + +struct fi_info { + struct recExtractCtrl *p; + char *buf; + int offset; + int max; +}; + +static struct fi_info *fi_open(struct recExtractCtrl *p) +{ + struct fi_info *fi = (struct fi_info *) xmalloc (sizeof(*fi)); + + fi->p = p; + fi->buf = (char *) xmalloc (4096); + fi->offset = 1; + fi->max = 1; + return fi; +} + +static int fi_getchar(struct fi_info *fi, char *dst) +{ + if (fi->offset >= fi->max) + { + if (fi->max <= 0) + return 0; + fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->offset = 0; + if (fi->max <= 0) + return 0; + } + *dst = fi->buf[(fi->offset)++]; + return 1; +} + +static int fi_gets(struct fi_info *fi, char *dst, int max) +{ + int l = 0; + while(1) + { + char dstbyte; + if (!fi_getchar(fi, &dstbyte)) + return 0; + if (dstbyte == '\n') + break; + if (l < max) + dst[l++] = dstbyte; + } + dst[l] = '\0'; + return 1; +} + +static void fi_close (struct fi_info *fi) +{ + xfree (fi->buf); + xfree (fi); +} + +static int filter_extract(void *clientData, struct recExtractCtrl *p) +{ + struct filter_info *tinfo = clientData; + char line[512]; + RecWord recWord; + struct fi_info *fi = fi_open(p); + +#if 0 + yaz_log(YLOG_LOG, "filter_extract off=%ld", + (long) (*fi->p->tellf)(fi->p->fh)); +#endif + xfree(tinfo->sep); + tinfo->sep = 0; + (*p->init)(p, &recWord); + + if (!fi_gets(fi, line, sizeof(line)-1)) + return RECCTRL_EXTRACT_ERROR_GENERIC; + sscanf(line, "%255s", p->match_criteria); + + recWord.index_type = '0'; + while (fi_gets(fi, line, sizeof(line)-1)) + { + int nor = 0; + char field[40]; + char *cp; +#if 0 + yaz_log(YLOG_LOG, "safari line: %s", line); +#endif + if (sscanf(line, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n", + &recWord.record_id, &recWord.section_id, &recWord.seqno, + field, &nor) < 4) + { + yaz_log(YLOG_WARN, "Bad safari record line: %s", line); + return RECCTRL_EXTRACT_ERROR_GENERIC; + } + for (cp = line + nor; *cp == ' '; cp++) + ; + recWord.index_name = field; + recWord.term_buf = cp; + recWord.term_len = strlen(cp); + (*p->tokenAdd)(&recWord); + } + fi_close(fi); + return RECCTRL_EXTRACT_OK; +} + +static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) +{ + int r, filter_ptr = 0; + static char *filter_buf = NULL; + static int filter_size = 0; + int make_header = 1; + int make_body = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; + + if (elementSetName) + { + /* don't make header for the R(aw) element set name */ + if (!strcmp(elementSetName, "R")) + { + make_header = 0; + make_body = 1; + } + /* only make header for the H(eader) element set name */ + else if (!strcmp(elementSetName, "H")) + { + make_header = 1; + make_body = 0; + } + } + while (1) + { + if (filter_ptr + 4096 >= filter_size) + { + char *nb; + + filter_size = 2*filter_size + 8192; + nb = (char *) xmalloc (filter_size); + if (filter_buf) + { + memcpy (nb, filter_buf, filter_ptr); + xfree (filter_buf); + } + filter_buf = nb; + } + if (make_header && filter_ptr == 0) + { + if (p->score >= 0) + { + sprintf (filter_buf, "Rank: %d\n", p->score); + filter_ptr = strlen(filter_buf); + } + sprintf (filter_buf + filter_ptr, "Local Number: " ZINT_FORMAT "\n", + p->localno); + filter_ptr = strlen(filter_buf); + if (p->fname) + { + sprintf (filter_buf + filter_ptr, "Filename: %s\n", p->fname); + filter_ptr = strlen(filter_buf); + } + strcpy(filter_buf+filter_ptr++, "\n"); + } + if (!make_body) + break; + r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096); + if (r <= 0) + break; + filter_ptr += r; + } + filter_buf[filter_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = filter_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + filter_ptr = p-filter_buf; + } + } + p->output_format = VAL_SUTRS; + p->rec_buf = filter_buf; + p->rec_len = filter_ptr; + return 0; +} + +static struct recType filter_type = { + 0, + "safari", + filter_init, + filter_config, + filter_destroy, + filter_extract, + filter_retrieve +}; + +RecType +#ifdef IDZEBRA_STATIC_SAFARI +idzebra_filter_safari +#else +idzebra_filter +#endif + +[] = { + &filter_type, + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/sgmlread.c b/index/sgmlread.c new file mode 100644 index 0000000..6ab7be3 --- /dev/null +++ b/index/sgmlread.c @@ -0,0 +1,145 @@ +/* $Id: sgmlread.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include +#include + +#include + +struct sgml_getc_info { + char *buf; + int buf_size; + int size; + int off; + off_t moffset; + void *fh; + int (*readf)(void *, char *, size_t); + WRBUF wrbuf; +}; + +int sgml_getc (void *clientData) +{ + struct sgml_getc_info *p = (struct sgml_getc_info *) clientData; + int res; + + if (p->off < p->size) + return p->buf[(p->off)++]; + if (p->size < p->buf_size) + return 0; + p->moffset += p->off; + p->off = 0; + p->size = 0; + res = (*p->readf)(p->fh, p->buf, p->buf_size); + if (res > 0) + { + p->size += res; + return p->buf[(p->off)++]; + } + return 0; +} + +static data1_node *grs_read_sgml (struct grs_read_info *p) +{ + struct sgml_getc_info *sgi = (struct sgml_getc_info *) p->clientData; + data1_node *node; + int res; + + sgi->moffset = p->offset; + sgi->fh = p->fh; + sgi->readf = p->readf; + sgi->off = 0; + sgi->size = 0; + res = (*sgi->readf)(sgi->fh, sgi->buf, sgi->buf_size); + if (res > 0) + sgi->size += res; + else + return 0; + node = data1_read_nodex (p->dh, p->mem, sgml_getc, sgi, sgi->wrbuf); + if (node && p->endf) + (*p->endf)(sgi->fh, sgi->moffset + sgi->off); + return node; +} + +static void *grs_init_sgml(Res res, RecType recType) +{ + struct sgml_getc_info *p = (struct sgml_getc_info *) xmalloc (sizeof(*p)); + p->buf_size = 512; + p->buf = xmalloc (p->buf_size); + p->wrbuf = wrbuf_alloc(); + return p; +} + +static ZEBRA_RES grs_config_sgml(void *clientData, Res res, const char *args) +{ + return ZEBRA_OK; +} + +static void grs_destroy_sgml(void *clientData) +{ + struct sgml_getc_info *p = (struct sgml_getc_info *) clientData; + + wrbuf_free(p->wrbuf, 1); + xfree (p->buf); + xfree (p); +} + +static int grs_extract_sgml(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_sgml); +} + +static int grs_retrieve_sgml(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_sgml); +} + +static struct recType grs_type_sgml = +{ + 0, + "grs.sgml", + grs_init_sgml, + grs_config_sgml, + grs_destroy_sgml, + grs_extract_sgml, + grs_retrieve_sgml +}; + +RecType +#ifdef IDZEBRA_STATIC_GRS_SGML +idzebra_filter_grs_sgml +#else +idzebra_filter +#endif + +[] = { + &grs_type_sgml, + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/xmlread.c b/index/xmlread.c new file mode 100644 index 0000000..a75ef3c --- /dev/null +++ b/index/xmlread.c @@ -0,0 +1,549 @@ +/* $Id: xmlread.c,v 1.1 2006-07-03 14:27:09 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#if HAVE_EXPAT_H + +#include +#include +#include +#if HAVE_ICONV_H +#include +#include +#endif + +#include + +#include + +#include +#include + +#include + +#define XML_CHUNK 1024 + +struct user_info { + data1_node *d1_stack[256]; + int level; + data1_handle dh; + NMEM nmem; + int loglevel; +}; + +static void cb_start (void *user, const char *el, const char **attr) +{ + struct user_info *ui = (struct user_info*) user; + if (ui->level == 1) + data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el); + ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr, + ui->d1_stack[ui->level-1]); + ui->level++; + yaz_log (ui->loglevel, "cb_start %s", el); +} + +static void cb_end (void *user, const char *el) +{ + struct user_info *ui = (struct user_info*) user; + + ui->level--; + yaz_log (ui->loglevel, "cb_end %s", el); +} + +static void cb_chardata (void *user, const char *s, int len) +{ + struct user_info *ui = (struct user_info*) user; +#if 0 + yaz_log (ui->loglevel, "cb_chardata %.*s", len, s); +#endif + ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len, + ui->d1_stack[ui->level -1]); +} + +static void cb_decl (void *user, const char *version, const char *encoding, + int standalone) +{ + struct user_info *ui = (struct user_info*) user; + const char *attr_list[7]; + + attr_list[0] = "version"; + attr_list[1] = version; + + attr_list[2] = "encoding"; + attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */ + + attr_list[4] = "standalone"; + attr_list[5] = standalone ? "yes" : "no"; + + attr_list[6] = 0; + + data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list, + ui->d1_stack[ui->level-1]); +#if 0 + yaz_log (YLOG_LOG, "decl version=%s encoding=%s", + version ? version : "null", + encoding ? encoding : "null"); +#endif +} + +static void cb_processing (void *user, const char *target, + const char *data) +{ + struct user_info *ui = (struct user_info*) user; + data1_node *res = + data1_mk_preprocess (ui->dh, ui->nmem, target, 0, + ui->d1_stack[ui->level-1]); + data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res); + + yaz_log (ui->loglevel, "decl processing target=%s data=%s", + target ? target : "null", + data ? data : "null"); +} + +static void cb_comment (void *user, const char *data) +{ + struct user_info *ui = (struct user_info*) user; + yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null"); + data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]); +} + +static void cb_doctype_start (void *userData, const char *doctypeName, + const char *sysid, const char *pubid, + int has_internal_subset) +{ + struct user_info *ui = (struct user_info*) userData; + yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s", + doctypeName, sysid, pubid); +} + +static void cb_doctype_end (void *userData) +{ + struct user_info *ui = (struct user_info*) userData; + yaz_log (ui->loglevel, "doctype end"); +} + + +static void cb_entity_decl (void *userData, const char *entityName, + int is_parameter_entity, + const char *value, int value_length, + const char *base, const char *systemId, + const char *publicId, const char *notationName) +{ + struct user_info *ui = (struct user_info*) userData; + yaz_log (ui->loglevel, + "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s" + " publicId=%s notationName=%s", + entityName, is_parameter_entity, value_length, value, + base, systemId, publicId, notationName); + +} + +static int cb_external_entity (XML_Parser pparser, + const char *context, + const char *base, + const char *systemId, + const char *publicId) +{ + struct user_info *ui = (struct user_info*) XML_GetUserData(pparser); + FILE *inf; + int done = 0; + XML_Parser parser; + + yaz_log (ui->loglevel, + "external entity context=%s base=%s systemid=%s publicid=%s", + context, base, systemId, publicId); + if (!systemId) + return 1; + + if (!(inf = fopen (systemId, "rb"))) + { + yaz_log (YLOG_WARN|YLOG_ERRNO, "fopen %s", systemId); + return 0; + } + + parser = XML_ExternalEntityParserCreate (pparser, "", 0); + while (!done) + { + int r; + void *buf = XML_GetBuffer (parser, XML_CHUNK); + if (!buf) + { + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); + break; + } + r = fread (buf, 1, XML_CHUNK, inf); + if (r == 0) + { + if (ferror(inf)) + { + yaz_log (YLOG_WARN|YLOG_ERRNO, "fread %s", systemId); + break; + } + done = 1; + } + if (!XML_ParseBuffer (parser, r, done)) + { + done = 1; + yaz_log (YLOG_WARN, "%s:%d:%d:XML error: %s", + systemId, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + } + } + fclose (inf); + XML_ParserFree (parser); + return done; +} + + +#if HAVE_ICONV_H +static int cb_encoding_convert (void *data, const char *s) +{ + iconv_t t = (iconv_t) data; + size_t ret; + size_t outleft = 2; + char outbuf_[2], *outbuf = outbuf_; + size_t inleft = 4; + char *inbuf = (char *) s; + unsigned short code; + +#if 1 + yaz_log(YLOG_LOG, "------------------------- cb_encoding_convert --- "); +#endif + ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft); + if (ret == (size_t) (-1) && errno != E2BIG) + { + iconv (t, 0, 0, 0, 0); + return -1; + } + if (outleft != 0) + return -1; + memcpy (&code, outbuf_, sizeof(short)); + return code; +} + +static void cb_encoding_release (void *data) +{ + iconv_t t = (iconv_t) data; + iconv_close (t); +} + +static int cb_encoding_handler (void *userData, const char *name, + XML_Encoding *info) +{ + int i = 0; + int no_ok = 0; + struct user_info *ui = (struct user_info*) userData; + + iconv_t t = iconv_open ("UNICODE", name); + if (t == (iconv_t) (-1)) + return 0; + + info->data = 0; /* signal that multibyte is not in use */ + yaz_log (ui->loglevel, "Encoding handler of %s", name); + for (i = 0; i<256; i++) + { + size_t ret; + char outbuf_[5]; + char inbuf_[5]; + char *inbuf = inbuf_; + char *outbuf = outbuf_; + size_t inleft = 1; + size_t outleft = 2; + inbuf_[0] = i; + + iconv (t, 0, 0, 0, 0); /* reset iconv */ + + ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft); + if (ret == (size_t) (-1)) + { + if (errno == EILSEQ) + { + yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i); + info->map[i] = -1; /* invalid sequence */ + } + if (errno == EINVAL) + { /* multi byte input */ + int len = 2; + int j = 0; + info->map[i] = -1; + + while (len <= 4) + { + char sbuf[80]; + int k; + inbuf = inbuf_; + inleft = len; + outbuf = outbuf_; + outleft = 2; + + inbuf_[len-1] = j; + iconv (t, 0,0,0,0); + + assert (i >= 0 && i<255); + + *sbuf = 0; + for (k = 0; k 255) + break; + } + else if (errno == EINVAL) + { + len++; + j = 7; + } + } + else if (outleft == 0) + { + info->map[i] = -len; + info->data = t; /* signal that multibyte is in use */ + break; + } + else + { + break; + } + } + if (info->map[i] < -1) + yaz_log (ui->loglevel, "Encoding %d: multibyte input %d", + i, -info->map[i]); + else + yaz_log (ui->loglevel, "Encoding %d: multibyte input failed", + i); + } + if (errno == E2BIG) + { + info->map[i] = -1; /* no room for output */ + if (i != 0) + yaz_log (YLOG_WARN, "Encoding %d: no room for output", + i); + } + } + else if (outleft == 0) + { + unsigned short code; + memcpy (&code, outbuf_, sizeof(short)); + info->map[i] = code; + no_ok++; + } + else + { /* should never happen */ + info->map[i] = -1; + yaz_log (YLOG_DEBUG, "Encoding %d: bad state", i); + } + } + if (info->data) + { /* at least one multi byte */ + info->convert = cb_encoding_convert; + info->release = cb_encoding_release; + } + else + { + /* no multi byte - we no longer need iconv handler */ + iconv_close(t); + info->convert = 0; + info->release = 0; + } + if (!no_ok) + return 0; + return 1; +} +/* HAVE_ICONV_H */ +#endif + +static void cb_ns_start(void *userData, const char *prefix, const char *uri) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix && uri) + yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri); +} + +static void cb_ns_end(void *userData, const char *prefix) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix) + yaz_log(ui->loglevel, "cb_ns_end %s", prefix); +} +data1_node *zebra_read_xml (data1_handle dh, + int (*rf)(void *, char *, size_t), void *fh, + NMEM m) +{ + XML_Parser parser; + struct user_info uinfo; + int done = 0; + data1_node *first_node; + int no_read = 0; + + uinfo.loglevel = YLOG_DEBUG; + uinfo.level = 1; + uinfo.dh = dh; + uinfo.nmem = m; + uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0); + uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */ + + parser = XML_ParserCreate (0 /* encoding */); + + XML_SetElementHandler (parser, cb_start, cb_end); + XML_SetCharacterDataHandler (parser, cb_chardata); + XML_SetXmlDeclHandler (parser, cb_decl); + XML_SetProcessingInstructionHandler (parser, cb_processing); + XML_SetUserData (parser, &uinfo); + XML_SetCommentHandler (parser, cb_comment); + XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end); + XML_SetEntityDeclHandler (parser, cb_entity_decl); + XML_SetExternalEntityRefHandler (parser, cb_external_entity); + XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end); +#if HAVE_ICONV_H + XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo); +#endif + while (!done) + { + int r; + void *buf = XML_GetBuffer (parser, XML_CHUNK); + if (!buf) + { + /* error */ + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); + break; + } + r = (*rf)(fh, buf, XML_CHUNK); + if (r < 0) + { + /* error */ + yaz_log (YLOG_WARN, "XML read fail"); + break; + } + else if (r == 0) + done = 1; + else + no_read += r; + if (no_read && !XML_ParseBuffer (parser, r, done)) + { + done = 1; + yaz_log (YLOG_WARN, "%d:%d:XML error: %s", + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + } + } + XML_ParserFree (parser); + if (no_read == 0) + return 0; + if (!uinfo.d1_stack[1] || !done) + return 0; + /* insert XML header if not present .. */ + first_node = uinfo.d1_stack[0]->child; + if (first_node->which != DATA1N_preprocess || + strcmp(first_node->u.preprocess.target, "xml")) + { + const char *attr_list[5]; + + attr_list[0] = "version"; + attr_list[1] = "1.0"; + + attr_list[2] = "encoding"; + attr_list[3] = "UTF-8"; /* encoding */ + + attr_list[4] = 0; + + data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list, + uinfo.d1_stack[0]); + } + return uinfo.d1_stack[0]; +} + +struct xml_info { + XML_Expat_Version expat_version; +}; + +static data1_node *grs_read_xml (struct grs_read_info *p) +{ + return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); +} + +static void *filter_init(Res res, RecType recType) +{ + struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p)); + + p->expat_version = XML_ExpatVersionInfo(); + + return p; +} + +static void filter_destroy(void *clientData) +{ + struct xml_info *p = (struct xml_info *) clientData; + + xfree (p); +} + +static int filter_extract(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_xml); +} + +static int filter_retrieve(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_xml); +} + +static struct recType filter_type = { + 0, + "grs.xml", + filter_init, + 0, + filter_destroy, + filter_extract, + filter_retrieve, +}; + +RecType +#ifdef IDZEBRA_STATIC_GRS_XML +idzebra_filter_grs_xml +#else +idzebra_filter +#endif + +[] = { + &filter_type, + 0, +}; + +#endif + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/isamb/Makefile.am b/isamb/Makefile.am index f09dce2..5bb7ad9 100644 --- a/isamb/Makefile.am +++ b/isamb/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.11 2006-06-29 13:47:58 adam Exp $ +## $Id: Makefile.am,v 1.12 2006-07-03 14:27:10 adam Exp $ -lib_LTLIBRARIES = libidzebra-isamb.la +noinst_LTLIBRARIES = libidzebra-isamb.la check_PROGRAMS = tstisamb diff --git a/isamc/Makefile.am b/isamc/Makefile.am index 64d7fb0..e0ce2d9 100644 --- a/isamc/Makefile.am +++ b/isamc/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.10 2006-06-29 13:48:00 adam Exp $ +## $Id: Makefile.am,v 1.11 2006-07-03 14:27:11 adam Exp $ -lib_LTLIBRARIES = libidzebra-isamc.la +noinst_LTLIBRARIES = libidzebra-isamc.la libidzebra_isamc_la_LDFLAGS=-version-info $(ZEBRALIBS_VERSION) libidzebra_isamc_la_SOURCES = isamc.c merge.c isamc-p.h diff --git a/isams/Makefile.am b/isams/Makefile.am index a12f8b2..73c4bd8 100644 --- a/isams/Makefile.am +++ b/isams/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.6 2006-06-29 13:48:02 adam Exp $ +## $Id: Makefile.am,v 1.7 2006-07-03 14:27:12 adam Exp $ -lib_LTLIBRARIES = libidzebra-isams.la +noinst_LTLIBRARIES = libidzebra-isams.la AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) diff --git a/rset/Makefile.am b/rset/Makefile.am index 740835d..0c8a466 100644 --- a/rset/Makefile.am +++ b/rset/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.16 2006-06-29 13:48:05 adam Exp $ +## $Id: Makefile.am,v 1.17 2006-07-03 14:27:13 adam Exp $ -lib_LTLIBRARIES = libidzebra-rset.la +noinst_LTLIBRARIES = libidzebra-rset.la libidzebra_rset_la_LDFLAGS=-version-info $(ZEBRALIBS_VERSION) libidzebra_rset_la_SOURCES = rset.c rstemp.c rsnull.c rsbool.c rsbetween.c \ diff --git a/tab/usmarc.abs b/tab/usmarc.abs index 5908468..2d58035 100644 --- a/tab/usmarc.abs +++ b/tab/usmarc.abs @@ -1,4 +1,4 @@ -# $Id: usmarc.abs,v 1.1 2002-10-22 12:51:09 adam Exp $ +# $Id: usmarc.abs,v 1.2 2006-07-03 14:27:15 adam Exp $ name usmarc reference USmarc attset bib1.att @@ -9,6 +9,8 @@ marc usmarc.mar esetname B usmarc-b.est esetname F @ +all any + # All 245 subfields mapped to title (word) and # 245 subfield a mapped to tile (phrase). elm 245 title - diff --git a/test/api/Makefile.am b/test/api/Makefile.am index 52a7d31..d4e242a 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.36 2006-05-19 13:49:35 adam Exp $ +# $Id: Makefile.am,v 1.37 2006-07-03 14:27:15 adam Exp $ noinst_PROGRAMS = testclient testclient_SOURCES = testclient.c @@ -34,18 +34,7 @@ safari1_SOURCES = safari1.c testlib.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la +zebralibs = ../../$(main_zebralib) LDADD = libtestlib.a $(zebralibs) $(YAZLALIB) diff --git a/test/api/safari.cfg b/test/api/safari.cfg index 3e93eee..9c571f2 100644 --- a/test/api/safari.cfg +++ b/test/api/safari.cfg @@ -1,4 +1,4 @@ -# $Id: safari.cfg,v 1.2 2005-03-31 12:20:06 adam Exp $ +# $Id: safari.cfg,v 1.3 2006-07-03 14:27:15 adam Exp $ profilepath: ${srcdir:-.}/../../tab attset: bib1.att @@ -8,5 +8,5 @@ recordType: safari isam: b # Set up modulePath -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/cddb/zebra.cfg b/test/cddb/zebra.cfg index 521c14e..6eafbf9 100644 --- a/test/cddb/zebra.cfg +++ b/test/cddb/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.3 2004-09-27 10:44:50 adam Exp $ +# $Id: zebra.cfg,v 1.4 2006-07-03 14:27:16 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: .:../../tab:../../../yaz/tab @@ -15,4 +15,4 @@ storekeys: 1 storedata: 1 #recordId: (bib1,identifier-standard) -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/charmap/Makefile.am b/test/charmap/Makefile.am index 8bc3790..6d685a7 100644 --- a/test/charmap/Makefile.am +++ b/test/charmap/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.4 2006-04-27 20:40:42 marc Exp $ +# $Id: Makefile.am,v 1.5 2006-07-03 14:27:18 adam Exp $ check_PROGRAMS = charmap1 @@ -10,20 +10,7 @@ charmap1_SOURCES = charmap1.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/charmap/zebra.cfg b/test/charmap/zebra.cfg index 7800c3e..1de389e 100644 --- a/test/charmap/zebra.cfg +++ b/test/charmap/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.3 2004-09-27 10:44:50 adam Exp $ +# $Id: zebra.cfg,v 1.4 2006-07-03 14:27:18 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -14,4 +14,4 @@ recordtype: grs.xml isam: b -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/codec/Makefile.am b/test/codec/Makefile.am index f7ab8e5..696dd4a 100644 --- a/test/codec/Makefile.am +++ b/test/codec/Makefile.am @@ -1,25 +1,11 @@ -# $Id: Makefile.am,v 1.5 2005-03-30 09:25:25 adam Exp $ +# $Id: Makefile.am,v 1.6 2006-07-03 14:27:18 adam Exp $ check_PROGRAMS = tstcodec TESTS = $(check_PROGRAMS) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - tstcodec_SOURCES = tstcodec.c - AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -LDADD = $(zebralibs) $(YAZLALIB) +LDADD = ../../$(main_zebralib) $(YAZLALIB) diff --git a/test/config/zebra.cfg b/test/config/zebra.cfg index 3554db2..a02fc20 100644 --- a/test/config/zebra.cfg +++ b/test/config/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.3 2004-09-27 10:44:50 adam Exp $ +# $Id: zebra.cfg,v 1.4 2006-07-03 14:27:19 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -16,4 +16,4 @@ recordtype: grs.sgml #recordId: (bib1,identifier-standard) isam: b -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/espec/Makefile.am b/test/espec/Makefile.am index 48a4365..754ab9c 100644 --- a/test/espec/Makefile.am +++ b/test/espec/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.2 2006-04-27 20:40:42 marc Exp $ +# $Id: Makefile.am,v 1.3 2006-07-03 14:27:20 adam Exp $ check_PROGRAMS = t1 @@ -10,20 +10,7 @@ t1_SOURCES = t1.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/filters/Makefile.am b/test/filters/Makefile.am index 133e399..8dd2236 100644 --- a/test/filters/Makefile.am +++ b/test/filters/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.2 2006-04-27 20:40:42 marc Exp $ +# $Id: Makefile.am,v 1.3 2006-07-03 14:27:21 adam Exp $ check_PROGRAMS = text grs.xml grs.marc @@ -12,20 +12,7 @@ grs_marc_SOURCES = grs.marc.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/filters/zebra.cfg b/test/filters/zebra.cfg index e2b4f2a..e14f362 100644 --- a/test/filters/zebra.cfg +++ b/test/filters/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.1 2005-06-14 20:03:02 adam Exp $ +# $Id: zebra.cfg,v 1.2 2006-07-03 14:27:21 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -14,4 +14,4 @@ recordtype: grs.xml isam: b -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/malxml/zebra.cfg b/test/malxml/zebra.cfg index 8e66781..13bbe96 100644 --- a/test/malxml/zebra.cfg +++ b/test/malxml/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.3 2004-09-27 10:44:51 adam Exp $ +# $Id: zebra.cfg,v 1.4 2006-07-03 14:27:22 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}/../../tab @@ -16,4 +16,4 @@ recordtype: grs.xml #recordId: (bib1,identifier-standard) isam: b -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/marcxml/Makefile.am b/test/marcxml/Makefile.am index 7474718..f1b72cd 100644 --- a/test/marcxml/Makefile.am +++ b/test/marcxml/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.5 2006-04-27 20:40:42 marc Exp $ +# $Id: Makefile.am,v 1.6 2006-07-03 14:27:23 adam Exp $ check_PROGRAMS = t1 t2 @@ -11,21 +11,7 @@ t2_SOURCES = t2.c AM_CPPFLAGS = -I$(top_srcdir)/include -I$(srcdir)/../api $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) - +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/marcxml/zebra.cfg b/test/marcxml/zebra.cfg index ed6fd8d..06d6de1 100644 --- a/test/marcxml/zebra.cfg +++ b/test/marcxml/zebra.cfg @@ -1,5 +1,5 @@ # Simple Zebra configuration file -# $Id: zebra.cfg,v 1.4 2006-03-23 09:13:47 adam Exp $ +# $Id: zebra.cfg,v 1.5 2006-07-03 14:27:23 adam Exp $ # # Where the schema files, attribute files, etc are located. profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -17,7 +17,7 @@ recordId: (bib1,identifier-standard) isam: b perm.anonymous: rw -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs #shadow: shadow:1G #register: register:1G diff --git a/test/mbox/Makefile.am b/test/mbox/Makefile.am index bd9c778..af3892c 100644 --- a/test/mbox/Makefile.am +++ b/test/mbox/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.5 2006-04-27 20:40:43 marc Exp $ +# $Id: Makefile.am,v 1.6 2006-07-03 14:27:25 adam Exp $ check_PROGRAMS = mbox1 TESTS = $(check_PROGRAMS) @@ -10,20 +10,7 @@ mbox1_SOURCES = mbox1.c AM_CPPFLAGS = -I$(srcdir)/../api -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/rusmarc/Makefile.am b/test/rusmarc/Makefile.am index ecfc71f..5b2120e 100644 --- a/test/rusmarc/Makefile.am +++ b/test/rusmarc/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.6 2006-04-27 20:40:43 marc Exp $ +# $Id: Makefile.am,v 1.7 2006-07-03 14:27:27 adam Exp $ check_PROGRAMS = t1 @@ -16,20 +16,7 @@ t1_SOURCES = t1.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/rusmarc/zebra.cfg b/test/rusmarc/zebra.cfg index 4114c8f..76bf150 100644 --- a/test/rusmarc/zebra.cfg +++ b/test/rusmarc/zebra.cfg @@ -1,6 +1,6 @@ # Simple Zebra configuration file that defines # Database with RUSMARC records. -# $Id: zebra.cfg,v 1.5 2004-12-04 00:51:39 adam Exp $ +# $Id: zebra.cfg,v 1.6 2006-07-03 14:27:27 adam Exp $ # # Where are the config files located? profilePath: ${srcdir:-.}/tab:${srcdir:-.}/../../tab @@ -16,4 +16,4 @@ recordType: grs.marc.rusmarc encoding: koi8-r # Set up modulePath -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/sort/Makefile.am b/test/sort/Makefile.am index c323ecc..48db434 100644 --- a/test/sort/Makefile.am +++ b/test/sort/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.5 2006-04-27 20:40:43 marc Exp $ +# $Id: Makefile.am,v 1.6 2006-07-03 14:27:28 adam Exp $ check_PROGRAMS = sort1 @@ -11,20 +11,7 @@ sort1_SOURCES = sort1.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/sort2/Makefile.am b/test/sort2/Makefile.am index 5eb748a..c993176 100644 --- a/test/sort2/Makefile.am +++ b/test/sort2/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.4 2006-04-27 20:40:43 marc Exp $ +# $Id: Makefile.am,v 1.5 2006-07-03 14:27:29 adam Exp $ check_PROGRAMS = t1 @@ -11,20 +11,7 @@ t1_SOURCES = t1.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/usmarc/zebra.cfg b/test/usmarc/zebra.cfg index 12268e7..3341ad7 100644 --- a/test/usmarc/zebra.cfg +++ b/test/usmarc/zebra.cfg @@ -1,6 +1,6 @@ # Simple Zebra configuration file that defines # a database with USMARC records. -# $Id: zebra.cfg,v 1.7 2004-12-21 17:18:29 adam Exp $ +# $Id: zebra.cfg,v 1.8 2006-07-03 14:27:30 adam Exp $ # # Where are the config files located? profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab @@ -24,4 +24,4 @@ keyTmpDir: tmp # Approx. Memory usage during indexing memMax: 4 -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/xpath/Makefile.am b/test/xpath/Makefile.am index 5437cc3..738384a 100644 --- a/test/xpath/Makefile.am +++ b/test/xpath/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.12 2006-04-27 20:40:43 marc Exp $ +# $Id: Makefile.am,v 1.13 2006-07-03 14:27:34 adam Exp $ check_PROGRAMS = xpath1 xpath2 xpath3 xpath4 xpath5 xpath6 @@ -15,20 +15,7 @@ xpath6_SOURCES = xpath6.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la - -LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) +LDADD = ../api/libtestlib.a ../../$(main_zebralib) $(YAZLALIB) clean-local: -rm -rf *.LCK diff --git a/test/xslt/Makefile.am b/test/xslt/Makefile.am index 7127599..7cd4bb2 100644 --- a/test/xslt/Makefile.am +++ b/test/xslt/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.8 2006-04-27 20:40:44 marc Exp $ +# $Id: Makefile.am,v 1.9 2006-07-03 14:27:35 adam Exp $ check_PROGRAMS = xslt1 xslt2 xslt3 xslt4 TESTS = $(check_PROGRAMS) @@ -14,18 +14,7 @@ xslt4_SOURCES = xslt4.c AM_CPPFLAGS = -I$(srcdir)/../api -I$(top_srcdir)/include $(YAZINC) -zebralibs = \ - ../../index/libidzebra-api.la \ - ../../rset/libidzebra-rset.la \ - ../../recctrl/libidzebra-recctrl.la \ - ../../dict/libidzebra-dict.la \ - ../../isams/libidzebra-isams.la \ - ../../isamc/libidzebra-isamc.la \ - ../../isamb/libidzebra-isamb.la \ - ../../data1/libidzebra-data1.la \ - ../../bfile/libidzebra-bfile.la \ - ../../dfa/libidzebra-dfa.la \ - ../../util/libidzebra-util.la +zebralibs = ../../index/libidzebra-2.0.la LDADD = ../api/libtestlib.a $(zebralibs) $(YAZLALIB) diff --git a/test/xslt/zebra.cfg b/test/xslt/zebra.cfg index c890599..c5f114b 100644 --- a/test/xslt/zebra.cfg +++ b/test/xslt/zebra.cfg @@ -1,5 +1,5 @@ profilePath: ${srcdir:-.}/../../tab -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs diff --git a/test/xslt/zebrastaticrank.cfg b/test/xslt/zebrastaticrank.cfg index a214144..fa7df01 100644 --- a/test/xslt/zebrastaticrank.cfg +++ b/test/xslt/zebrastaticrank.cfg @@ -1,6 +1,6 @@ profilePath: ${srcdir:-.}/../../tab -modulePath: ../../recctrl/.libs +modulePath: ../../index/.libs staticrank: 1 diff --git a/util/Makefile.am b/util/Makefile.am index bb4442b..ce96fd0 100644 --- a/util/Makefile.am +++ b/util/Makefile.am @@ -1,6 +1,6 @@ -## $Id: Makefile.am,v 1.22 2006-07-02 21:22:17 adam Exp $ +## $Id: Makefile.am,v 1.23 2006-07-03 14:27:35 adam Exp $ -lib_LTLIBRARIES = libidzebra-util.la +noinst_LTLIBRARIES = libidzebra-util.la noinst_PROGRAMS = passtest -- 1.7.10.4