-/* $Id: mod_dom.c,v 1.17 2007-02-23 11:10:37 adam Exp $
+/* $Id: mod_dom.c,v 1.24 2007-02-28 16:46:19 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
#define XML_STRCMP(a,b) strcmp((char*)a, b)
#define XML_STRLEN(a) strlen((char*)a)
+
+#define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
+
static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
const char *fmt, ...)
#ifdef __GNUC__
{
va_list ap;
char buf[4096];
- xmlChar *node_path = 0;
-
- if (ptr)
- node_path = xmlGetNodePath(ptr);
va_start(ap, fmt);
yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
- yaz_log(level, "%s: dom filter %s%s: %s",
- tinfo->fname ? tinfo->fname : "none",
- node_path ? "in " : "",
- node_path ? (const char *) node_path : "", buf);
-
- if (node_path)
- xmlFree(node_path);
+ if (ptr)
+ {
+ yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none",
+ xmlGetLineNo(ptr), buf);
+ }
+ else
+ {
+ yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
+ }
va_end(ap);
}
struct convert_s **l)
{
*l = 0;
- for(; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "xslt"))
{
struct _xmlAttr *attr;
tmp_xslt_full_name);
return ZEBRA_FAIL;
}
- }
- else
- {
- dom_log(YLOG_WARN, tinfo, ptr,
- "missing attribute 'stylesheet' ");
- return ZEBRA_FAIL;
- }
- *l = p;
- l = &p->next;
+ }
+ else
+ {
+ dom_log(YLOG_WARN, tinfo, ptr,
+ "missing attribute 'stylesheet' ");
+ return ZEBRA_FAIL;
+ }
+ *l = p;
+ l = &p->next;
}
else
{
dom_log(YLOG_WARN, tinfo, ptr,
- "bad node '%s'", ptr->name);
+ "bad element '%s', expected <xslt>", ptr->name);
return ZEBRA_FAIL;
}
}
}
static ZEBRA_RES perform_convert(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
struct convert_s *convert,
const char **params,
xmlDocPtr *doc,
{
for (; convert; convert = convert->next)
{
+ xmlChar *buf_out = 0;
+ int len_out = 0;
xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
*doc, params);
if (last_xsp)
*last_xsp = convert->stylesheet_xsp;
+
xmlFreeDoc(*doc);
- *doc = res_doc;
+
+ /* now saving into buffer and re-reading into DOM to avoid annoing
+ XSLT problem with thrown-out indentation text nodes */
+ if (res_doc){
+ xsltSaveResultToString(&buf_out, &len_out, res_doc,
+ convert->stylesheet_xsp);
+ xmlFreeDoc(res_doc);
+ }
+
+
+ *doc = xmlParseDoc(buf_out);
+
+ /* writing debug info out */
+ if (extctr->flagShowRecords)
+ yaz_log(YLOG_LOG, "%s: XSLT %s \n %s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ convert->stylesheet,
+ buf_out);
+
+ xmlFree(buf_out);
}
return ZEBRA_OK;
}
static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
const char *syntax, const char *name)
{
- for (; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "marc"))
{
yaz_iconv_t iconv = 0;
for (attr = ptr->properties; attr; attr = attr->next)
{
- if (attr_content(attr, "charset", &input_charset))
+ if (attr_content(attr, "inputcharset", &input_charset))
;
else
{
dom_log(YLOG_WARN, tinfo, ptr,
- "bad attribute @%s, expected @charset",
+ "bad attribute @%s, expected @inputcharset",
attr->name);
}
}
yaz_log(YLOG_LOG, "%s dom filter: "
"loading config file %s", tinfo->fname, tinfo->full_name);
-
+
doc = xmlParseFile(tinfo->full_name);
if (!doc)
{
return ZEBRA_FAIL;
}
- for (ptr = ptr->children; ptr; ptr = ptr->next)
- {
- if (ptr->type != XML_ELEMENT_NODE)
- continue;
+ ptr = ptr->children;
+ FOR_EACH_ELEMENT(ptr) {
if (!XML_STRCMP(ptr->name, "extract"))
{
/*
xmlChar * index_p)
{
xmlChar *text = xmlNodeGetContent(node);
- size_t text_len = strlen((const char *)text);
-
+ size_t text_len = strlen((const char *)text);
/* if there is no text, we do not need to proceed */
if (text_len)
type[eval - bval] = '\0';
}
- /* actually indexing the text given */
- dom_log(YLOG_DEBUG, tinfo, 0,
- "INDEX '%s:%s' '%s'",
- index, type, text);
+ /* writing debug out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "INDEX '%s:%s' '%s'",
+ index ? (const char *) index : "null",
+ type ? (const char *) type : "null",
+ text ? (const char *) text : "null");
+ /* actually indexing the text given */
recword->index_name = (const char *)index;
if (type && *type)
recword->index_type = *type;
xmlChar * rank_p,
xmlChar * type_p)
{
- dom_log(YLOG_DEBUG, tinfo, 0,
- "RECORD id=%s rank=%s type=%s",
- id_p, rank_p, type_p);
+
+ /* writing debug info out */
+ if (extctr->flagShowRecords)
+ dom_log(YLOG_LOG, tinfo, 0,
+ "RECORD id=%s rank=%s type=%s",
+ id_p ? (const char *) id_p : "(null)",
+ rank_p ? (const char *) rank_p : "(null)",
+ type_p ? (const char *) type_p : "(null)");
+
if (id_p)
sscanf((const char *)id_p, "%255s", extctr->match_criteria);
RecWord recword;
(*extctr->init)(extctr, &recword);
+ /*
if (extctr->flagShowRecords)
{
xmlDocDumpMemory(doc, &buf_out, &len_out);
fwrite(buf_out, len_out, 1, stdout);
xmlFree(buf_out);
}
+ */
process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
}
set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
/* input conversion */
- perform_convert(tinfo, input->convert, params, &doc, 0);
+ perform_convert(tinfo, p, input->convert, params, &doc, 0);
if (tinfo->store)
{
/* store conversion */
store_doc = xmlCopyDoc(doc, 1);
- perform_convert(tinfo, tinfo->store->convert,
+ perform_convert(tinfo, p, tinfo->store->convert,
params, &store_doc, &last_xsp);
}
store_doc ? store_doc : doc, last_xsp);
else
xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
- if (p->flagShowRecords)
- fwrite(buf_out, len_out, 1, stdout);
+
+ /* if (p->flagShowRecords)
+ fwrite(buf_out, len_out, 1, stdout); */
+
(*p->setStoreData)(p, buf_out, len_out);
xmlFree(buf_out);
xmlFreeDoc(store_doc);
/* extract conversion */
- perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
+ perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
/* finally, do the indexing */
if (doc)
- {
extract_dom_doc_node(tinfo, p, doc);
- /* extract_doc_alvis(tinfo, p, doc); */
+
+ if (doc)
xmlFreeDoc(doc);
- }
return RECCTRL_EXTRACT_OK;
}
xmlDocSetRootElement(doc, ptr2);
+ /* writing debug info out */
+ if (p->flagShowRecords){
+ xmlChar *buf_out = 0;
+ int len_out = 0;
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%s",
+ tinfo->fname ? tinfo->fname : "(none)",
+ depth,
+ buf_out);
+ xmlFree(buf_out);
+ }
+
return convert_extract_doc(tinfo, input, p, doc);
}
else
}
/* retrieve conversion */
- perform_convert(tinfo, retrieve->convert, params, &doc, &last_xsp);
+ perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;