-/* $Id: mod_dom.c,v 1.36 2007-04-16 21:54:37 adam Exp $
- Copyright (C) 1995-2007
- Index Data ApS
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
- This file is part of the Zebra server.
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
- Zebra is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2, or (at your option) any later
- version.
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
- Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define ZEBRA_PI_NAME "zebra-2.0"
static const char *zebra_pi_name = ZEBRA_PI_NAME;
+enum convert_type {
+ convert_xslt_type,
+ convert_meta_type
+};
-
-struct convert_s {
+struct convert_xslt {
const char *stylesheet;
xsltStylesheetPtr stylesheet_xsp;
+};
+
+struct convert_meta {
+ int dummy;
+};
+
+struct convert_s {
+ enum convert_type which;
+ union {
+ struct convert_xslt xslt;
+ struct convert_meta meta;
+ } u;
struct convert_s *next;
};
char *fname;
char *full_name;
const char *profile_path;
- ODR odr_record;
- ODR odr_config;
+ NMEM nmem_record;
+ NMEM nmem_config;
xmlDocPtr doc_config;
struct filter_extract *extract;
struct filter_retrieve *retrieve_list;
static void set_param_str(const char **params, const char *name,
- const char *value, ODR odr)
+ const char *value, NMEM nmem)
{
- char *quoted = odr_malloc(odr, 3 + strlen(value));
+ char *quoted = nmem_malloc(nmem, 3 + strlen(value));
sprintf(quoted, "'%s'", value);
while (*params)
params++;
}
static void set_param_int(const char **params, const char *name,
- zint value, ODR odr)
+ zint value, NMEM nmem)
{
- char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
+ char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
while (*params)
params++;
sprintf(quoted, "'" ZINT_FORMAT "'", value);
tinfo->fname = 0;
tinfo->full_name = 0;
tinfo->profile_path = 0;
- tinfo->odr_record = odr_createmem(ODR_ENCODE);
- tinfo->odr_config = odr_createmem(ODR_ENCODE);
+ tinfo->nmem_record = nmem_create();
+ tinfo->nmem_config = nmem_create();
tinfo->extract = 0;
tinfo->retrieve_list = 0;
tinfo->input_list = 0;
static void destroy_xsp(struct convert_s *c)
{
- while(c)
+ while (c)
{
- if (c->stylesheet_xsp)
- xsltFreeStylesheet(c->stylesheet_xsp);
+ if (c->which == convert_xslt_type)
+ {
+ if (c->u.xslt.stylesheet_xsp)
+ xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
+ }
c = c->next;
}
}
xmlFreeDoc(tinfo->doc_config);
tinfo->doc_config = 0;
}
- odr_reset(tinfo->odr_config);
+ nmem_reset(tinfo->nmem_config);
}
static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
if (!XML_STRCMP(ptr->name, "xslt"))
{
struct _xmlAttr *attr;
- struct convert_s *p
- = odr_malloc(tinfo->odr_config, sizeof(*p));
+ struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
p->next = 0;
- p->stylesheet = 0;
- p->stylesheet_xsp = 0;
+ p->which = convert_xslt_type;
+ p->u.xslt.stylesheet = 0;
+ p->u.xslt.stylesheet_xsp = 0;
for (attr = ptr->properties; attr; attr = attr->next)
- if (attr_content(attr, "stylesheet", &p->stylesheet))
+ if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
;
else
{
dom_log(YLOG_WARN, tinfo, ptr,
"bad attribute @%s", attr->name);
}
- if (p->stylesheet)
+ if (p->u.xslt.stylesheet)
{
char tmp_xslt_full_name[1024];
- if (!yaz_filepath_resolve(p->stylesheet,
+ if (!yaz_filepath_resolve(p->u.xslt.stylesheet,
tinfo->profile_path,
NULL,
tmp_xslt_full_name))
dom_log(YLOG_WARN, tinfo, 0,
"stylesheet %s not found in "
"path %s",
- p->stylesheet,
+ p->u.xslt.stylesheet,
tinfo->profile_path);
return ZEBRA_FAIL;
}
- p->stylesheet_xsp
+ p->u.xslt.stylesheet_xsp
= xsltParseStylesheetFile((const xmlChar*)
tmp_xslt_full_name);
- if (!p->stylesheet_xsp)
+ if (!p->u.xslt.stylesheet_xsp)
{
dom_log(YLOG_WARN, tinfo, 0,
"could not parse xslt stylesheet %s",
tmp_xslt_full_name);
return ZEBRA_FAIL;
}
- }
- else
- {
- dom_log(YLOG_WARN, tinfo, ptr,
- "missing attribute 'stylesheet' ");
- return ZEBRA_FAIL;
- }
- *l = p;
- l = &p->next;
+ }
+ else
+ {
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "missing attribute 'stylesheet'");
+ return ZEBRA_FAIL;
+ }
+ *l = p;
+ l = &p->next;
+ }
+ else if (!XML_STRCMP(ptr->name, "process-meta"))
+ {
+ struct _xmlAttr *attr;
+ struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
+
+ p->next = 0;
+ p->which = convert_meta_type;
+
+ for (attr = ptr->properties; attr; attr = attr->next)
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s", attr->name);
+ *l = p;
+ l = &p->next;
}
else
{
return ZEBRA_OK;
}
+static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node,
+ struct recRetrieveCtrl *retctr)
+{
+
+ if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
+ 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
+ {
+ if (0 == XML_STRCMP(node->name, "meta"))
+ {
+ const char *element_set_name = 0;
+
+ struct _xmlAttr *attr;
+ for (attr = node->properties; attr; attr = attr->next)
+ {
+ if (attr_content(attr, "name", &element_set_name))
+ ;
+ else
+ {
+ dom_log(YLOG_WARN, tinfo, node,
+ "bad attribute @%s, expected @name", attr->name);
+ }
+ }
+ if (element_set_name)
+ {
+ WRBUF result = wrbuf_alloc();
+ WRBUF addinfo = wrbuf_alloc();
+ const Odr_oid *input_format = yaz_oid_recsyn_xml;
+ const Odr_oid *output_format = 0;
+ int ret;
+
+ ret = retctr->special_fetch(retctr->handle,
+ element_set_name,
+ input_format, &output_format,
+ result, addinfo);
+ if (ret == 0)
+ {
+ xmlDocPtr sub_doc =
+ xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
+ if (sub_doc)
+ {
+ xmlNodePtr t = xmlDocGetRootElement(sub_doc);
+ xmlReplaceNode(node, xmlCopyNode(t, 1));
+ xmlFreeDoc(sub_doc);
+ }
+ }
+ wrbuf_destroy(result);
+ wrbuf_destroy(addinfo);
+ }
+ }
+ }
+ for (node = node->children; node; node = node->next)
+ process_meta(tinfo, doc, node, retctr);
+ return 0;
+}
+
static ZEBRA_RES perform_convert(struct filter_info *tinfo,
struct recExtractCtrl *extctr,
+ struct recRetrieveCtrl *retctr,
struct convert_s *convert,
const char **params,
xmlDocPtr *doc,
{
for (; convert; convert = convert->next)
{
- xmlChar *buf_out = 0;
- int len_out = 0;
- xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
- *doc, params);
- if (last_xsp)
- *last_xsp = convert->stylesheet_xsp;
-
- if (!res_doc)
- break;
-
- /* now saving into buffer and re-reading into DOM to avoid annoing
- XSLT problem with thrown-out indentation text nodes */
- xsltSaveResultToString(&buf_out, &len_out, res_doc,
- convert->stylesheet_xsp);
- xmlFreeDoc(res_doc);
-
- xmlFreeDoc(*doc);
-
- *doc = xmlParseMemory((const char *) buf_out, len_out);
+ if (convert->which == convert_xslt_type)
+ {
+ xmlChar *buf_out = 0;
+ int len_out = 0;
+ xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
+ *doc, params);
+ if (last_xsp)
+ *last_xsp = convert->u.xslt.stylesheet_xsp;
+
+ if (!res_doc)
+ break;
+
+ /* now saving into buffer and re-reading into DOM to avoid annoing
+ XSLT problem with thrown-out indentation text nodes */
+ xsltSaveResultToString(&buf_out, &len_out, res_doc,
+ convert->u.xslt.stylesheet_xsp);
+ xmlFreeDoc(res_doc);
+
+ xmlFreeDoc(*doc);
+
+ *doc = xmlParseMemory((const char *) buf_out, len_out);
+
+ /* writing debug info out */
+ if (extctr && extctr->flagShowRecords)
+ yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ convert->u.xslt.stylesheet,
+ len_out, buf_out);
+
+ xmlFree(buf_out);
+ }
+ else if (convert->which == convert_meta_type)
+ {
+ if (retctr) /* only execute meta on retrieval */
+ {
+ process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
- /* writing debug info out */
- if (extctr && extctr->flagShowRecords)
- yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s",
- tinfo->fname ? tinfo->fname : "(none)",
- convert->stylesheet,
- len_out, buf_out);
-
- xmlFree(buf_out);
+ /* last stylesheet absent */
+ if (last_xsp)
+ *last_xsp = 0;
+ }
+ }
}
return ZEBRA_OK;
}
struct filter_input **np = &tinfo->input_list;
for (;*np; np = &(*np)->next)
;
- p = *np = odr_malloc(tinfo->odr_config, sizeof(*p));
+ p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
p->next = 0;
p->syntax = 0;
p->name = 0;
xmlNodePtr ptr;
xmlDocPtr doc;
- tinfo->fname = odr_strdup(tinfo->odr_config, fname);
+ tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
NULL, tmp_full_name))
- tinfo->full_name = odr_strdup(tinfo->odr_config, tmp_full_name);
+ tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
else
- tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname);
+ tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
yaz_log(YLOG_LOG, "%s dom filter: "
"loading config file %s", tinfo->fname, tinfo->full_name);
*/
struct _xmlAttr *attr;
struct filter_extract *f =
- odr_malloc(tinfo->odr_config, sizeof(*f));
+ nmem_malloc(tinfo->nmem_config, sizeof(*f));
tinfo->extract = f;
f->name = 0;
struct _xmlAttr *attr;
struct filter_retrieve **fp = &tinfo->retrieve_list;
struct filter_retrieve *f =
- odr_malloc(tinfo->odr_config, sizeof(*f));
+ nmem_malloc(tinfo->nmem_config, sizeof(*f));
while (*fp)
fp = &(*fp)->next;
</retrieve>
*/
struct filter_store *f =
- odr_malloc(tinfo->odr_config, sizeof(*f));
+ nmem_malloc(tinfo->nmem_config, sizeof(*f));
tinfo->store = f;
f->convert = 0;
{
struct filter_info *tinfo = clientData;
destroy_dom(tinfo);
- odr_destroy(tinfo->odr_config);
- odr_destroy(tinfo->odr_record);
+ nmem_destroy(tinfo->nmem_config);
+ nmem_destroy(tinfo->nmem_record);
xfree(tinfo);
}
}
-/* DOM filter style indexing */
-static int attr_content_xml(struct _xmlAttr *attr, const char *name,
- const char **dst_content)
-{
- if (0 == XML_STRCMP(attr->name, name) && attr->children
- && attr->children->type == XML_TEXT_NODE)
- {
- *dst_content = (const char *) (attr->children->content);
- return 1;
- }
- return 0;
-}
-
/* DOM filter style indexing */
static void index_value_of(struct filter_info *tinfo,
/* if there is no text, we do not need to proceed */
if (text_len)
{
+ /* keep seqno base so that all text will have
+ identical seqno's for multiple fields , e.g
+ <z:index name="title:w any:w title:p">.. */
+
+ zint seqno_base = recword->seqno;
+ zint seqno_max = recword->seqno;
+
+
const char *look = index_p;
const char *bval;
const char *eval;
}
/* actually indexing the text given */
- dom_log(YLOG_DEBUG, tinfo, 0,
- "INDEX '%s:%s' '%s'",
- index ? (const char *) index : "null",
- type ? (const char *) type : "null",
- text ? (const char *) text : "null");
+ recword->seqno = seqno_base;
recword->index_name = (const char *)index;
- if (type && *type)
- recword->index_type = *type;
+ if (*type)
+ recword->index_type = (const char *) type;
/* writing debug out */
if (extctr->flagShowRecords)
dom_log(YLOG_LOG, tinfo, 0,
"INDEX '%s:%s' '%s'",
- index ? (const char *) index : "null",
- type ? (const char *) type : "null",
- text ? (const char *) text : "null");
+ (const char *) index,
+ (const char *) type,
+ (const char *) text);
- /* actually indexing the text given */
- recword->index_name = (const char *)index;
- if (type && *type)
- recword->index_type = *type;
(extctr->tokenAdd)(recword);
+ if (seqno_max < recword->seqno)
+ seqno_max = recword->seqno;
+
/* eat whitespaces */
if (*look && ' ' == *look)
{
look++;
}
}
+ recword->seqno = seqno_max;
}
xmlFree(text);
}
else
dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
extctr->action = action;
- yaz_log(YLOG_LOG, "In mod_dom.c: setting action to %d", action);
}
if (tinfo->record_info_invoked == 1)
if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
&& 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
{
- if (0 == XML_STRCMP(node->name, "index"))
- {
+ if (0 == XML_STRCMP(node->name, "index"))
+ {
const char *index_p = 0;
struct _xmlAttr *attr;
for (attr = node->properties; attr; attr = attr->next)
{
- if (attr_content_xml(attr, "name", &index_p))
+ if (attr_content(attr, "name", &index_p))
{
index_value_of(tinfo, extctr, recword, node, index_p);
}
struct _xmlAttr *attr;
for (attr = node->properties; attr; attr = attr->next)
{
- if (attr_content_xml(attr, "id", &id_p))
+ if (attr_content(attr, "id", &id_p))
;
- else if (attr_content_xml(attr, "rank", &rank_p))
+ else if (attr_content(attr, "rank", &rank_p))
;
- else if (attr_content_xml(attr, "type", &type_p))
+ else if (attr_content(attr, "type", &type_p))
;
else
{
}
-
-
static int convert_extract_doc(struct filter_info *tinfo,
struct filter_input *input,
struct recExtractCtrl *p,
xmlDocPtr doc)
-
{
xmlChar *buf_out;
int len_out;
const char *params[10];
xsltStylesheetPtr last_xsp = 0;
- xmlDocPtr store_doc = 0;
/* per default do not ingest record */
tinfo->record_info_invoked = 0;
/* we actuallu have a document which needs to be processed further */
params[0] = 0;
- set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
-
- /* input conversion */
- perform_convert(tinfo, p, input->convert, params, &doc, 0);
+ set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
- if (tinfo->store)
+ if (p && p->flagShowRecords)
{
- /* store conversion */
- store_doc = xmlCopyDoc(doc, 1);
- perform_convert(tinfo, p, tinfo->store->convert,
- params, &store_doc, &last_xsp);
+ xmlChar *buf_out;
+ int len_out;
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+#if 0
+ FILE *outf = fopen("extract.xml", "w");
+ fwrite(buf_out, 1, len_out, outf);
+ fclose(outf);
+#endif
+ yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
}
-
- /* saving either store doc or original doc in case no store doc exists */
- if (last_xsp)
- xsltSaveResultToString(&buf_out, &len_out,
- store_doc ? store_doc : doc, last_xsp);
- else
- xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
- (*p->setStoreData)(p, buf_out, len_out);
- xmlFree(buf_out);
+ if (p->setStoreData)
+ {
+ xmlDocPtr store_doc = 0;
+
+ /* input conversion */
+ perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
+
+ if (tinfo->store)
+ {
+ /* store conversion */
+ store_doc = xmlCopyDoc(doc, 1);
+ perform_convert(tinfo, p, 0, tinfo->store->convert,
+ params, &store_doc, &last_xsp);
+ }
+
+ /* saving either store doc or original doc in case no store doc exists */
+ if (last_xsp)
+ xsltSaveResultToString(&buf_out, &len_out,
+ store_doc ? store_doc : doc, last_xsp);
+ else
+ xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
+
+ if (p->setStoreData)
+ (*p->setStoreData)(p, buf_out, len_out);
+ xmlFree(buf_out);
+ if (store_doc)
+ xmlFreeDoc(store_doc);
+ }
- if (store_doc)
- xmlFreeDoc(store_doc);
/* extract conversion */
- perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
+ perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
/* finally, do the indexing */
ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
if (ptr)
- {
+ {
/* we have a new document */
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
{
xmlDocPtr rdoc;
xmlNode *root_ptr;
- yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0);
+ yaz_marc_write_xml(input->u.marc.handle, &root_ptr,
+ "http://www.loc.gov/MARC21/slim", 0, 0);
rdoc = xmlNewDoc((const xmlChar*) "1.0");
xmlDocSetRootElement(rdoc, root_ptr);
return convert_extract_doc(tinfo, input, p, rdoc);
if (!input)
return RECCTRL_EXTRACT_ERROR_GENERIC;
- odr_reset(tinfo->odr_record);
+ nmem_reset(tinfo->nmem_record);
+
+ if (p->setStoreData == 0)
+ return extract_xml_full(tinfo, input, p);
switch(input->type)
{
case DOM_INPUT_XMLREADER:
static int ioread_ret(void *context, char *buffer, int len)
{
struct recRetrieveCtrl *p = context;
- return p->stream->readf(p->stream, buffer, len);
+ int r = p->stream->readf(p->stream, buffer, len);
+ return r;
}
static int ioclose_ret(void *context)
return 0;
}
-static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
+static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
{
/* const char *esn = zebra_dom_ns; */
const char *esn = 0;
{
p->diagnostic =
YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+ p->addinfo = odr_strdup(p->odr, esn);
return 0;
}
params[0] = 0;
- set_param_int(params, "id", p->localno, p->odr);
+ set_param_int(params, "id", p->localno, p->odr->mem);
if (p->fname)
- set_param_str(params, "filename", p->fname, p->odr);
+ set_param_str(params, "filename", p->fname, p->odr->mem);
if (p->staticrank >= 0)
- set_param_int(params, "rank", p->staticrank, p->odr);
+ set_param_int(params, "rank", p->staticrank, p->odr->mem);
if (esn)
- set_param_str(params, "schema", esn, p->odr);
+ set_param_str(params, "schema", esn, p->odr->mem);
else
if (retrieve->name)
- set_param_str(params, "schema", retrieve->name, p->odr);
+ set_param_str(params, "schema", retrieve->name, p->odr->mem);
else if (retrieve->identifier)
- set_param_str(params, "schema", retrieve->identifier, p->odr);
+ set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
else
- set_param_str(params, "schema", "", p->odr);
+ set_param_str(params, "schema", "", p->odr->mem);
if (p->score >= 0)
- set_param_int(params, "score", p->score, p->odr);
- set_param_int(params, "size", p->recordSize, p->odr);
+ set_param_int(params, "score", p->score, p->odr->mem);
+ set_param_int(params, "size", p->recordSize, p->odr->mem);
doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
0 /* URL */,
}
/* retrieve conversion */
- perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp);
+ perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab