-/* $Id: mod_dom.c,v 1.16 2007-02-18 21:53:22 adam Exp $
+/* $Id: mod_dom.c,v 1.24 2007-02-28 16:46:19 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
#include <stdio.h>
#include <assert.h>
#include <ctype.h>
+#include <stdarg.h>
#include <yaz/diagbib1.h>
#include <yaz/tpath.h>
+#include <yaz/snprintf.h>
#include <libxml/xmlversion.h>
#include <libxml/parser.h>
struct filter_store *store;
};
+
+
#define XML_STRCMP(a,b) strcmp((char*)a, b)
#define XML_STRLEN(a) strlen((char*)a)
+#define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
+
+static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
+ const char *fmt, ...)
+#ifdef __GNUC__
+ __attribute__ ((format (printf, 4, 5)))
+#endif
+ ;
+
+static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
+ const char *fmt, ...)
+{
+ va_list ap;
+ char buf[4096];
+
+ va_start(ap, fmt);
+ yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
+ if (ptr)
+ {
+ yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none",
+ xmlGetLineNo(ptr), buf);
+ }
+ else
+ {
+ yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
+ }
+ va_end(ap);
+}
static void set_param_str(const char **params, const char *name,
struct convert_s **l)
{
*l = 0;
- for(; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "xslt"))
{
struct _xmlAttr *attr;
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s, "
- "expected @stylesheet",
- tinfo->fname,
- node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s", attr->name);
}
if (p->stylesheet)
{
NULL,
tmp_xslt_full_name))
{
- yaz_log(YLOG_WARN, "%s: dom filter: "
+ dom_log(YLOG_WARN, tinfo, 0,
"stylesheet %s not found in "
"path %s",
- tinfo->fname,
p->stylesheet,
tinfo->profile_path);
return ZEBRA_FAIL;
tmp_xslt_full_name);
if (!p->stylesheet_xsp)
{
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "could not parse xslt "
- "stylesheet %s",
- tinfo->fname, tmp_xslt_full_name);
+ dom_log(YLOG_WARN, tinfo, 0,
+ "could not parse xslt stylesheet %s",
+ tmp_xslt_full_name);
return ZEBRA_FAIL;
}
- }
- else
- {
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s missing attribute 'stylesheet' ",
- tinfo->fname, node_path);
- xmlFree(node_path);
- return ZEBRA_FAIL;
- }
- *l = p;
- l = &p->next;
+ }
+ else
+ {
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "missing attribute 'stylesheet' ");
+ return ZEBRA_FAIL;
+ }
+ *l = p;
+ l = &p->next;
}
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_LOG,
- "%s: dom filter: "
- "%s bad node '%s'",
- tinfo->fname, node_path, ptr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad element '%s', expected <xslt>", ptr->name);
return ZEBRA_FAIL;
}
}
}
static ZEBRA_RES perform_convert(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
struct convert_s *convert,
const char **params,
xmlDocPtr *doc,
{
for (; convert; convert = convert->next)
{
+ xmlChar *buf_out = 0;
+ int len_out = 0;
xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
*doc, params);
if (last_xsp)
*last_xsp = convert->stylesheet_xsp;
+
xmlFreeDoc(*doc);
- *doc = res_doc;
+
+ /* now saving into buffer and re-reading into DOM to avoid annoing
+ XSLT problem with thrown-out indentation text nodes */
+ if (res_doc){
+ xsltSaveResultToString(&buf_out, &len_out, res_doc,
+ convert->stylesheet_xsp);
+ xmlFreeDoc(res_doc);
+ }
+
+
+ *doc = xmlParseDoc(buf_out);
+
+ /* writing debug info out */
+ if (extctr->flagShowRecords)
+ yaz_log(YLOG_LOG, "%s: XSLT %s \n %s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ convert->stylesheet,
+ buf_out);
+
+ xmlFree(buf_out);
}
return ZEBRA_OK;
}
}
static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
- const char *syntax,
- const char *name)
+ const char *syntax, const char *name)
{
- for (; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "marc"))
{
yaz_iconv_t iconv = 0;
for (attr = ptr->properties; attr; attr = attr->next)
{
- if (attr_content(attr, "charset", &input_charset))
+ if (attr_content(attr, "inputcharset", &input_charset))
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s,"
- " expected @charset",
- tinfo->fname,
- node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s, expected @inputcharset",
+ attr->name);
}
}
iconv = yaz_iconv_open("utf-8", input_charset);
if (!iconv)
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s unsupported @charset '%s'",
- tinfo->fname, node_path,
- input_charset);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "unsupported @charset '%s'", input_charset);
return ZEBRA_FAIL;
}
else
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s,"
- " expected @level",
- tinfo->fname, node_path,
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s, expected @level",
attr->name);
- xmlFree(node_path);
}
}
if (level_str)
}
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad element <%s>,"
- " expected <marc>|<xmlreader>",
- tinfo->fname, node_path, ptr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad element <%s>, expected <marc>|<xmlreader>",
+ ptr->name);
return ZEBRA_FAIL;
}
}
yaz_log(YLOG_LOG, "%s dom filter: "
"loading config file %s", tinfo->fname, tinfo->full_name);
-
+
doc = xmlParseFile(tinfo->full_name);
if (!doc)
{
if (!ptr || ptr->type != XML_ELEMENT_NODE
|| XML_STRCMP(ptr->name, "dom"))
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad root element <%s>,"
- " expected root element <dom>",
- tinfo->fname, node_path, ptr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad root element <%s>, expected root element <dom>",
+ ptr->name);
return ZEBRA_FAIL;
}
- for (ptr = ptr->children; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ ptr = ptr->children;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "extract"))
{
/*
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s"
- " expected @name",
- tinfo->fname,
- node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s, expected @name",
+ attr->name);
}
}
parse_convert(tinfo, ptr->children, &f->convert);
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s"
- " expected @identifier|@name",
- tinfo->fname,
- node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s, expected @identifier|@name",
+ attr->name);
}
}
parse_convert(tinfo, ptr->children, &f->convert);
;
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad attribute @%s"
- " expected @syntax|@name",
- tinfo->fname,
- node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad attribute @%s, expected @syntax|@name",
+ attr->name);
}
}
parse_input(tinfo, ptr->children, syntax, name);
}
else
{
- xmlChar *node_path = xmlGetNodePath(ptr);
- yaz_log(YLOG_WARN, "%s: dom filter: "
- "%s bad element <%s>,"
- " expected <extract>|<input>|<retrieve>|<store>",
- tinfo->fname, node_path, ptr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "bad element <%s>, "
+ "expected <extract>|<input>|<retrieve>|<store>",
+ ptr->name);
return ZEBRA_FAIL;
}
}
xmlChar * index_p)
{
xmlChar *text = xmlNodeGetContent(node);
- size_t text_len = strlen((const char *)text);
-
+ size_t text_len = strlen((const char *)text);
/* if there is no text, we do not need to proceed */
if (text_len)
type[eval - bval] = '\0';
}
- /* actually indexing the text given */
- yaz_log(YLOG_DEBUG, "%s dom filter: "
- "INDEX '%s:%s' '%s'",
- tinfo->fname, index, type, text);
+ /* writing debug out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "INDEX '%s:%s' '%s'",
+ index ? (const char *) index : "null",
+ type ? (const char *) type : "null",
+ text ? (const char *) text : "null");
+ /* actually indexing the text given */
recword->index_name = (const char *)index;
if (type && *type)
recword->index_type = *type;
xmlChar * rank_p,
xmlChar * type_p)
{
- yaz_log(YLOG_DEBUG, "%s dom filter: "
- "RECORD id=%s rank=%s type=%s",
- tinfo->fname, id_p, rank_p, type_p);
+
+ /* writing debug info out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "RECORD id=%s rank=%s type=%s",
+ id_p ? (const char *) id_p : "(null)",
+ rank_p ? (const char *) rank_p : "(null)",
+ type_p ? (const char *) type_p : "(null)");
+
if (id_p)
sscanf((const char *)id_p, "%255s", extctr->match_criteria);
/* if (!strcmp("update", type_str)) */
/* index_node(tinfo, ctrl, ptr, recword); */
/* else if (!strcmp("delete", type_str)) */
- /* yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
+ /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter delete: to be implemented"); */
/* else */
- /* yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", */
+ /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'", */
/* type_str); */
}
}
else
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s bad attribute @%s, expected @name",
- tinfo->fname, node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, node,
+ "bad attribute @%s, expected @name",
+ attr->name);
}
}
}
;
else
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s bad attribute @%s,"
- " expected @id|@rank|@type",
- tinfo->fname, node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, node,
+ "bad attribute @%s, expected @id|@rank|@type",
+ attr->name);
}
if (type_p && 0 != strcmp("update", (const char *)type_p))
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s attribute @%s,"
- " only implemented '@type='update'",
- tinfo->fname, node_path, attr->name);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, node,
+ "attribute @%s, only implemented '@type='update'",
+ attr->name);
}
-
-
}
set_record_info(tinfo, extctr, id_p, rank_p, type_p);
}
else
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s bad element <%s>,"
+ dom_log(YLOG_WARN, tinfo, node,
+ "bad element <%s>,"
" expected <record>|<index> in namespace '%s'",
- tinfo->fname, node_path,
node->name, zebra_dom_ns);
- xmlFree(node_path);
}
}
}
if (look && '\0' != *look)
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s content '%s', can not parse '%s'",
- tinfo->fname, node_path, pi_p, look);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, node,
+ "content '%s', can not parse '%s'",
+ pi_p, look);
}
else
set_record_info(tinfo, extctr, id, rank, 0);
}
else
{
- xmlChar *node_path = xmlGetNodePath(node);
- yaz_log(YLOG_WARN,"%s dom filter: "
- "%s content '%s', can not parse '%s'",
- tinfo->fname, node_path, pi_p, look);
- xmlFree(node_path);
+ dom_log(YLOG_WARN, tinfo, node,
+ "content '%s', can not parse '%s'",
+ pi_p, look);
}
}
}
RecWord recword;
(*extctr->init)(extctr, &recword);
+ /*
if (extctr->flagShowRecords)
{
xmlDocDumpMemory(doc, &buf_out, &len_out);
fwrite(buf_out, len_out, 1, stdout);
xmlFree(buf_out);
}
+ */
process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
}
set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
/* input conversion */
- perform_convert(tinfo, input->convert, params, &doc, 0);
+ perform_convert(tinfo, p, input->convert, params, &doc, 0);
if (tinfo->store)
{
/* store conversion */
store_doc = xmlCopyDoc(doc, 1);
- perform_convert(tinfo, tinfo->store->convert,
+ perform_convert(tinfo, p, tinfo->store->convert,
params, &store_doc, &last_xsp);
}
store_doc ? store_doc : doc, last_xsp);
else
xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
+
+ /* if (p->flagShowRecords)
+ fwrite(buf_out, len_out, 1, stdout); */
+
(*p->setStoreData)(p, buf_out, len_out);
xmlFree(buf_out);
xmlFreeDoc(store_doc);
/* extract conversion */
- perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
+ perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
/* finally, do the indexing */
if (doc)
- {
extract_dom_doc_node(tinfo, p, doc);
- /* extract_doc_alvis(tinfo, p, doc); */
+
+ if (doc)
xmlFreeDoc(doc);
- }
return RECCTRL_EXTRACT_OK;
}
xmlDocSetRootElement(doc, ptr2);
+ /* writing debug info out */
+ if (p->flagShowRecords){
+ xmlChar *buf_out = 0;
+ int len_out = 0;
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ depth,
+ buf_out);
+ xmlFree(buf_out);
+ }
+
return convert_extract_doc(tinfo, input, p, doc);
}
else
{
int i;
- yaz_log(YLOG_WARN, "%s dom filter: "
+ dom_log(YLOG_WARN, tinfo, 0,
"MARC: Skipping bad byte %d (0x%02X)",
- tinfo->fname, *buf & 0xff, *buf & 0xff);
+ *buf & 0xff, *buf & 0xff);
for (i = 0; i<4; i++)
buf[i] = buf[i+1];
record_length = atoi_n (buf, 5);
if (record_length < 25)
{
- yaz_log (YLOG_WARN, "%s dom filter: "
- "MARC record length < 25, is %d",
- tinfo->fname, record_length);
+ dom_log(YLOG_WARN, tinfo, 0,
+ "MARC record length < 25, is %d", record_length);
return RECCTRL_EXTRACT_ERROR_GENERIC;
}
read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
if (read_bytes < record_length-5)
{
- yaz_log (YLOG_WARN, "%s dom filter: "
- "Couldn't read whole MARC record",
- tinfo->fname);
+ dom_log(YLOG_WARN, tinfo, 0,
+ "couldn't read whole MARC record");
return RECCTRL_EXTRACT_ERROR_GENERIC;
}
r = yaz_marc_read_iso2709(input->u.marc.handle, buf, record_length);
if (r < record_length)
{
- yaz_log (YLOG_WARN, "%s dom filter: "
- "Parsing of MARC record failed r=%d length=%d",
- tinfo->fname, r, record_length);
+ dom_log (YLOG_WARN, tinfo, 0,
+ "parsing of MARC record failed r=%d length=%d",
+ r, record_length);
return RECCTRL_EXTRACT_ERROR_GENERIC;
}
else
}
/* retrieve conversion */
- perform_convert(tinfo, retrieve->convert, params, &doc, &last_xsp);
+ perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;