From aa9c7bcd6f5ec5a2523c40f929ebdc9d72c1b48a Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Sun, 1 May 2005 07:17:46 +0000 Subject: [PATCH] Make two xslt filters, "xslt" which does not split (reads whole XML doc) and does not depend on xmlreader. "xslt1" which splits on top-level and depend on xmlreader. xmlreader is unavailable in Debian woody (stable). --- recctrl/xslt.c | 154 +++++++++++++++++++++++++++++++++++------------- test/api/testlib.c | 6 +- test/xslt/Makefile.am | 7 ++- test/xslt/marc-one.xml | 32 ++++++++++ test/xslt/xslt1.c | 7 ++- test/xslt/xslt2.c | 10 +++- test/xslt/xslt3.c | 74 +++++++++++++++++++++++ test/xslt/zebra.cfg | 1 - 8 files changed, 241 insertions(+), 50 deletions(-) create mode 100644 test/xslt/marc-one.xml create mode 100644 test/xslt/xslt3.c diff --git a/recctrl/xslt.c b/recctrl/xslt.c index 370a169..4aeb455 100644 --- a/recctrl/xslt.c +++ b/recctrl/xslt.c @@ -1,4 +1,4 @@ -/* $Id: xslt.c,v 1.3 2005-04-28 13:33:20 adam Exp $ +/* $Id: xslt.c,v 1.4 2005-05-01 07:17:46 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -25,7 +25,11 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include +#include +#include +#ifdef LIBXML_READER_ENABLED #include +#endif #include #include @@ -33,7 +37,9 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA struct filter_info { xsltStylesheetPtr stylesheet_xsp; +#ifdef LIBXML_READER_ENABLED xmlTextReaderPtr reader; +#endif char *fname; int split_depth; ODR odr; @@ -68,17 +74,27 @@ static void set_param_int(const char **params, const char *name, } -static void *filter_init (Res res, RecType recType) +static void *filter_init_xslt(Res res, RecType recType) { struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo)); tinfo->stylesheet_xsp = 0; +#ifdef LIBXML_READER_ENABLED tinfo->reader = 0; +#endif tinfo->fname = 0; - tinfo->split_depth = 1; + tinfo->split_depth = 0; tinfo->odr = odr_createmem(ODR_ENCODE); return tinfo; } +static void *filter_init_xslt1(Res res, RecType recType) +{ + struct filter_info *tinfo = (struct filter_info *) + filter_init_xslt(res, recType); + tinfo->split_depth = 1; + return tinfo; +} + static void filter_config(void *clientData, Res res, const char *args) { struct filter_info *tinfo = clientData; @@ -101,6 +117,10 @@ static void filter_destroy(void *clientData) struct filter_info *tinfo = clientData; if (tinfo->stylesheet_xsp) xsltFreeStylesheet(tinfo->stylesheet_xsp); +#ifdef LIBXML_READER_ENABLED + if (tinfo->reader) + xmlFreeTextReader(tinfo->reader); +#endif xfree(tinfo->fname); odr_destroy(tinfo->odr); xfree(tinfo); @@ -163,18 +183,48 @@ static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl, } } -static int filter_extract(void *clientData, struct recExtractCtrl *p) +static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p, + xmlDocPtr doc) { - const char *params[10]; - struct filter_info *tinfo = clientData; RecWord recWord; - int ret; - + const char *params[10]; params[0] = 0; + xmlChar *buf_out; + int len_out; - odr_reset(tinfo->odr); set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr); + (*p->init)(p, &recWord); + recWord.reg_type = 'w'; + + if (tinfo->stylesheet_xsp) + { + xmlDocPtr resDoc = + xsltApplyStylesheet(tinfo->stylesheet_xsp, + doc, params); + if (p->flagShowRecords) + { + xmlDocDumpMemory(resDoc, &buf_out, &len_out); + fwrite(buf_out, len_out, 1, stdout); + xmlFree(buf_out); + } + index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord); + xmlFreeDoc(resDoc); + } + xmlDocDumpMemory(doc, &buf_out, &len_out); + if (p->flagShowRecords) + fwrite(buf_out, len_out, 1, stdout); + (*p->setStoreData)(p, buf_out, len_out); + xmlFree(buf_out); + + xmlFreeDoc(doc); + return RECCTRL_EXTRACT_OK; +} + +#ifdef LIBXML_READER_ENABLED +static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p) +{ + int ret; if (p->first_record) { if (tinfo->reader) @@ -191,9 +241,6 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) if (!tinfo->stylesheet_xsp) return RECCTRL_EXTRACT_ERROR_GENERIC; - (*p->init)(p, &recWord); - recWord.reg_type = 'w'; - ret = xmlTextReaderRead(tinfo->reader); while (ret == 1) { int type = xmlTextReaderNodeType(tinfo->reader); @@ -201,37 +248,13 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) if (tinfo->split_depth == 0 || (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth)) { - xmlChar *buf_out; - int len_out; - xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader); xmlNodePtr ptr2 = xmlCopyNode(ptr, 1); xmlDocPtr doc = xmlNewDoc("1.0"); xmlDocSetRootElement(doc, ptr2); - - if (tinfo->stylesheet_xsp) - { - xmlDocPtr resDoc = - xsltApplyStylesheet(tinfo->stylesheet_xsp, - doc, params); - if (p->flagShowRecords) - { - xmlDocDumpMemory(resDoc, &buf_out, &len_out); - fwrite(buf_out, len_out, 1, stdout); - xmlFree(buf_out); - } - index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord); - xmlFreeDoc(resDoc); - } - xmlDocDumpMemory(doc, &buf_out, &len_out); - if (p->flagShowRecords) - fwrite(buf_out, len_out, 1, stdout); - (*p->setStoreData)(p, buf_out, len_out); - xmlFree(buf_out); - xmlFreeDoc(doc); - return RECCTRL_EXTRACT_OK; + return extract_doc(tinfo, p, doc); } ret = xmlTextReaderRead(tinfo->reader); } @@ -239,6 +262,44 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p) tinfo->reader = 0; return RECCTRL_EXTRACT_EOF; } +#endif + +static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p) +{ + if (p->first_record) /* only one record per stream */ + { + xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */, + 0 /* URL */, + 0 /* encoding */, + XML_PARSE_XINCLUDE); + if (!doc) + { + return RECCTRL_EXTRACT_ERROR_GENERIC; + } + return extract_doc(tinfo, p, doc); + } + else + return RECCTRL_EXTRACT_EOF; +} + +static int filter_extract(void *clientData, struct recExtractCtrl *p) +{ + struct filter_info *tinfo = clientData; + + odr_reset(tinfo->odr); + + if (tinfo->split_depth == 0) + return extract_full(tinfo, p); + else + { +#ifdef LIBXML_READER_ENABLED + return extract_split(tinfo, p); +#else + /* no xmlreader so we can't split it */ + return RECCTRL_EXTRACT_ERROR_GENERIC; +#endif + } +} static int ioread_ret(void *context, char *buffer, int len) { @@ -339,10 +400,20 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p) return 0; } -static struct recType filter_type = { +static struct recType filter_type_xslt = { 0, "xslt", - filter_init, + filter_init_xslt, + filter_config, + filter_destroy, + filter_extract, + filter_retrieve +}; + +static struct recType filter_type_xslt1 = { + 0, + "xslt1", + filter_init_xslt1, filter_config, filter_destroy, filter_extract, @@ -357,6 +428,9 @@ idzebra_filter #endif [] = { - &filter_type, + &filter_type_xslt, +#ifdef LIBXML_READER_ENABLED + &filter_type_xslt1, +#endif 0, }; diff --git a/test/api/testlib.c b/test/api/testlib.c index ac8f9cb..a0b13d6 100644 --- a/test/api/testlib.c +++ b/test/api/testlib.c @@ -1,4 +1,4 @@ -/* $Id: testlib.c,v 1.14 2005-04-20 10:18:19 adam Exp $ +/* $Id: testlib.c,v 1.15 2005-05-01 07:17:47 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -181,7 +181,9 @@ int do_query_x(int lineno, ZebraHandle zh, char *query, int exphits, else { if (rc == ZEBRA_FAIL) { - printf("Error: search returned %d\n%s\n", rc, query); + int code = zebra_errCode(zh); + printf("Error: search returned %d. Code %d\n%s\n", rc, + code, query); exit (1); } if (exphits != -1 && hits != exphits) { diff --git a/test/xslt/Makefile.am b/test/xslt/Makefile.am index 9427f73..69ea9cb 100644 --- a/test/xslt/Makefile.am +++ b/test/xslt/Makefile.am @@ -1,12 +1,13 @@ -# $Id: Makefile.am,v 1.2 2005-04-28 12:34:45 adam Exp $ +# $Id: Makefile.am,v 1.3 2005-05-01 07:17:47 adam Exp $ -check_PROGRAMS = xslt1 xslt2 +check_PROGRAMS = xslt1 xslt2 xslt3 TESTS = $(check_PROGRAMS) -EXTRA_DIST=zebra.cfg marc-col.xml marc1.xsl +EXTRA_DIST=zebra.cfg marc-col.xml marc-one.xml marc1.xsl xslt1_SOURCES = xslt1.c xslt2_SOURCES = xslt2.c +xslt3_SOURCES = xslt3.c AM_CPPFLAGS = -I$(srcdir)/../api -I$(top_srcdir)/include $(YAZINC) diff --git a/test/xslt/marc-one.xml b/test/xslt/marc-one.xml new file mode 100644 index 0000000..0a61202 --- /dev/null +++ b/test/xslt/marc-one.xml @@ -0,0 +1,32 @@ + + 00366nam 22001698a 4500 + 11224466 + DLC + 00000000000000.0 + 910710c19910701nju 00010 eng + + 11224466 + + + DLC + DLC + + + 123-xyz + + + Jack Collins + + + How to program a computer + + + Penguin + + + 8710 + + + p. cm. + + diff --git a/test/xslt/xslt1.c b/test/xslt/xslt1.c index eddc5d7..df980ef 100644 --- a/test/xslt/xslt1.c +++ b/test/xslt/xslt1.c @@ -1,4 +1,4 @@ -/* $Id: xslt1.c,v 1.1 2005-04-28 14:58:24 adam Exp $ +/* $Id: xslt1.c,v 1.2 2005-05-01 07:17:47 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -29,11 +29,14 @@ int main(int argc, char **argv) ZebraService zs = start_up(0, argc, argv); ZebraHandle zh = zebra_open(zs); - check_filter(zs, "xslt"); + check_filter(zs, "xslt1"); + zebra_select_database(zh, "Default"); zebra_init(zh); + zebra_set_resource(zh, "recordType", "xslt1.marc1.xsl"); + zebra_begin_trans(zh, 1); sprintf(path, "%.200s/marc-col.xml", get_srcdir()); zebra_repository_update(zh, path); diff --git a/test/xslt/xslt2.c b/test/xslt/xslt2.c index 1d69c8a..8ab9450 100644 --- a/test/xslt/xslt2.c +++ b/test/xslt/xslt2.c @@ -1,4 +1,4 @@ -/* $Id: xslt2.c,v 1.1 2005-04-28 12:34:45 adam Exp $ +/* $Id: xslt2.c,v 1.2 2005-05-01 07:17:47 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -36,7 +36,13 @@ int main(int argc, char **argv) ZebraService zs = start_up(0, argc, argv); ZebraHandle zh = zebra_open(zs); - check_filter(zs, "xslt"); + check_filter(zs, "xslt1"); + + zebra_select_database(zh, "Default"); + + zebra_init(zh); + + zebra_set_resource(zh, "recordType", "xslt1.marc1.xsl"); sprintf(path, "%.200s/marc-col.xml", get_srcdir()); f = fopen(path, "rb"); diff --git a/test/xslt/xslt3.c b/test/xslt/xslt3.c new file mode 100644 index 0000000..0fb67c8 --- /dev/null +++ b/test/xslt/xslt3.c @@ -0,0 +1,74 @@ +/* $Id: xslt3.c,v 1.1 2005-05-01 07:17:47 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include "testlib.h" + +int main(int argc, char **argv) +{ + char path[256]; + char record_buf[20000]; + const char *records_array[] = { + record_buf, 0 + }; + FILE *f; + size_t r; + + ZebraService zs = start_up(0, argc, argv); + ZebraHandle zh = zebra_open(zs); + + check_filter(zs, "xslt"); + + zebra_select_database(zh, "Default"); + + zebra_init(zh); + + zebra_set_resource(zh, "recordType", "xslt.marc1.xsl"); + + sprintf(path, "%.200s/marc-one.xml", get_srcdir()); + f = fopen(path, "rb"); + if (!f) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "Cannot open %s", path); + exit(1); + } + r = fread(record_buf, 1, sizeof(record_buf)-1, f); + if (r < 2 || r == sizeof(record_buf)-1) + { + yaz_log(YLOG_FATAL, "Bad size of %s", path); + exit(1); + } + fclose(f); + + record_buf[r] = '\0'; + + /* for now only the first of the records in the collection is + indexed. That can be seen as a bug */ + init_data(zh, records_array); + + /* only get hits from first record .. */ + do_query(__LINE__, zh, "@attr 1=title computer", 1); + do_query(__LINE__, zh, "@attr 1=control 11224466", 1); + do_query_x(__LINE__, zh, "@attr 1=titl computer", 0, 121); + + return close_down(zh, zs, 0); +} diff --git a/test/xslt/zebra.cfg b/test/xslt/zebra.cfg index 743434f..50b32c7 100644 --- a/test/xslt/zebra.cfg +++ b/test/xslt/zebra.cfg @@ -2,4 +2,3 @@ profilePath: ${srcdir:-.}/../../tab modulePath: ../../recctrl/.libs -recordType: xslt.marc1.xsl -- 1.7.10.4