-/* $Id: mod_dom.c,v 1.1 2007-02-07 12:08:54 adam Exp $
+/* $Id: mod_dom.c,v 1.8 2007-02-14 16:16:15 marc Exp $
Copyright (C) 1995-2007
Index Data ApS
#define XML_STRCMP(a,b) strcmp((char*)a, b)
#define XML_STRLEN(a) strlen((char*)a)
+
+
+
static void set_param_str(const char **params, const char *name,
const char *value, ODR odr)
{
return 0;
}
+
+/* Alvis style indexing */
+#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
+static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
+
+/* Alvis style indexing */
static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
xmlNodePtr ptr, RecWord *recWord)
{
}
}
-#define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
-
-
-static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
-
+/* Alvis style indexing */
static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
xmlNodePtr ptr, RecWord *recWord)
{
}
}
+/* Alvis style indexing */
static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
xmlNodePtr ptr, RecWord *recWord)
{
yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'",
type_str);
}
+
+
+/* Alvis style indexing */
+static void extract_doc_alvis(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlDocPtr doc)
+{
+ if (doc){
+ RecWord recWord;
+ xmlChar *buf_out;
+ int len_out;
+ xmlNodePtr root_ptr;
+
+ (*recctr->init)(recctr, &recWord);
+
+ if (recctr->flagShowRecords){
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);
+ }
+ root_ptr = xmlDocGetRootElement(doc);
+ if (root_ptr)
+ index_record(tinfo, recctr, root_ptr, &recWord);
+ else
+ yaz_log(YLOG_WARN, "No root for index XML record");
+ }
+}
+
+
+/* DOM filter style indexing */
+static int attr_content_xml(struct _xmlAttr *attr, const char *name,
+ xmlChar **dst_content)
+{
+ if (0 == XML_STRCMP(attr->name, name) && attr->children
+ && attr->children->type == XML_TEXT_NODE)
+ {
+ *dst_content = (attr->children->content);
+ return 1;
+ }
+ return 0;
+}
+
+/* DOM filter style indexing */
+/* #define ZEBRA_XSLT_NS "http://indexdata.com/zebra-2.0" */
+/* static const char *zebra_xslt_ns = ZEBRA_XSLT_NS; */
+
+/* DOM filter style indexing */
+#define ZEBRA_PI_NAME "zebra-2.0"
+static const char *zebra_pi_name = ZEBRA_PI_NAME;
+
+
+/* DOM filter style indexing */
+void index_value_of(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlNodePtr node,
+ xmlChar * index_p)
+{
+ xmlChar *text = xmlNodeGetContent(node);
+
+ xmlChar *look = index_p;
+ xmlChar *bval;
+ xmlChar *eval;
+
+ xmlChar index[256];
+ xmlChar type[256];
+
+ /* parsing all index name/type pairs - may not start with ' ' or ':' */
+ while (*look && ' ' != *look && ':' != *look){
-static int extract_doc(struct filter_info *tinfo, struct filter_input *input,
- struct recExtractCtrl *p, xmlDocPtr doc)
+ /* setting name and type to zero */
+ *index = '\0';
+ *type = '\0';
+
+ /* parsing one index name */
+ bval = look;
+ while (*look && ':' != *look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)index, (const char *)bval, eval - bval);
+ index[eval - bval] = '\0';
+
+
+ /* parsing one index type, if existing */
+ if (':' == *look){
+ look++;
+
+ bval = look;
+ while (*look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)type, (const char *)bval, eval - bval);
+ type[eval - bval] = '\0';
+ }
+
+ printf("INDEX '%s:%s' '%s'\n", index, type, text);
+
+ if (*look && ' ' == *look && *(look+1)){
+ look++;
+ }
+ }
+
+ xmlFree(text);
+
+/* //recWord->term_buf = (const char *)ptr->content; */
+/* //recWord->term_len = XML_STRLEN(ptr->content); */
+/* // if (type_str && *type_str) */
+/* // recWord->index_type = *type_str; /\* type was given *\/ */
+/* // recWord->index_name = name_str; */
+/* // recWord->index_type = prev_type; /\* restore it again *\/ */
+}
+
+
+/* DOM filter style indexing */
+void set_record_info(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlChar * id_p,
+ xmlChar * rank_p,
+ xmlChar * action_p)
{
- RecWord recWord;
- const char *params[10];
+ printf("RECORD id=%s rank=%s action=%s\n", id_p, rank_p, action_p);
+}
+
+
+/* DOM filter style indexing */
+void process_xml_element_zebra_node(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlNodePtr node)
+{
+ if (node->type == XML_ELEMENT_NODE
+ && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_xslt_ns)){
+
+ if (0 == XML_STRCMP(node->name, "index")){
+ xmlChar *index_p = 0;
+
+ struct _xmlAttr *attr;
+ for (attr = node->properties; attr; attr = attr->next){
+ if (attr_content_xml(attr, "name", &index_p)){
+ index_value_of(tinfo, recctr, node, index_p);
+ }
+ else
+ // printf("%s: dom filter: s% bad attribute %s",
+ // tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
+ printf("dom filter: %s bad attribute @%s, expected @name\n",
+ xmlGetNodePath(node), attr->name);
+ }
+ }
+ else if (0 == XML_STRCMP(node->name, "record")){
+ xmlChar *id_p = 0;
+ xmlChar *rank_p = 0;
+ xmlChar *action_p = 0;
+
+ struct _xmlAttr *attr;
+ for (attr = node->properties; attr; attr = attr->next){
+ if (attr_content_xml(attr, "id", &id_p))
+ ;
+ else if (attr_content_xml(attr, "rank", &rank_p))
+ ;
+ else if (attr_content_xml(attr, "acton", &action_p))
+ ;
+ else
+ // printf("%s: dom filter: s% bad attribute %s",
+ // tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
+ printf("dom filter: %s bad attribute @%s,"
+ " expected @id|@rank|@action\n",
+ xmlGetNodePath(node), attr->name);
+
+ if (action_p && 0 != strcmp("update", (const char *)action_p))
+ printf("dom filter: %s attribute @%s,"
+ " only implemented '@action=\"update\"\n",
+ xmlGetNodePath(node), attr->name);
+
+
+ }
+ set_record_info(tinfo, recctr, id_p, rank_p, action_p);
+ } else {
+ // printf("%s: dom filter: s% bad attribute %s",
+ // tinfo->fname, xmlGetNodePath(node)), nodeattr->name);
+ printf("dom filter: %s bad element <%s>,"
+ " expected <record>|<index> in namespace '%s'\n",
+ xmlGetNodePath(node), node->name, zebra_xslt_ns);
+
+ }
+ }
+}
+
+
+/* DOM filter style indexing */
+void process_xml_pi_node(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlNodePtr node,
+ xmlChar **index_pp)
+{
+
+ /* printf("PI %s\n", xmlGetNodePath(node)); */
+
+ /* if right PI name, continue parsing PI */
+ if (0 == strcmp(zebra_pi_name, (const char *)node->name)){
+ xmlChar *pi_p = node->content;
+ xmlChar *look = pi_p;
+
+ xmlChar *bval;
+ xmlChar *eval;
+
+ /* parsing PI record instructions */
+ if (0 == strncmp((const char *)look, "record", 6)){
+ xmlChar id[256];
+ xmlChar rank[256];
+ xmlChar action[256];
+
+ *id = '\0';
+ *rank = '\0';
+ *action = '\0';
+
+ look += 6;
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* parse possible id */
+ if (*look && 0 == strncmp((const char *)look, "id=", 3)){
+ look += 3;
+ bval = look;
+ while (*look && ' ' != *look)
+ look++;
+ eval = look;
+ strncpy((char *)id, (const char *)bval, eval - bval);
+ id[eval - bval] = '\0';
+ }
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* parse possible rank */
+ if (*look && 0 == strncmp((const char *)look, "rank=", 5)){
+ look += 6;
+ bval = look;
+ while (*look && ' ' != *look)
+ look++;
+ eval = look;
+ strncpy((char *)rank, (const char *)bval, eval - bval);
+ rank[eval - bval] = '\0';
+ }
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ if (look && '\0' != *look){
+ printf ("ERROR %s: content '%s'; can not parse '%s'\n",
+ xmlGetNodePath(node), pi_p, look);
+ } else {
+ /* set_record_info(id, rank, action); */
+ set_record_info(tinfo, recctr, id, rank, 0);
+ }
+
+ }
+
+ /* parsing index instruction */
+ else if (0 == strncmp((const char *)look, "index", 5)){
+ look += 5;
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* export index instructions to outside */
+ *index_pp = look;
+
+ /* nor record, neither index */
+ } else {
+
+ printf ("ERROR %s: content '%s'; can not parse '%s'\n",
+ xmlGetNodePath(node), pi_p, look);
+ }
+ }
+}
+
+/* DOM filter style indexing */
+void process_xml_element_node(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlNodePtr node)
+{
+ /* remember indexing instruction from PI to next element node */
+ xmlChar *index_p = 0;
+
+ /* printf("ELEM %s\n", xmlGetNodePath(node)); */
+
+ /* check if we are an element node in the special zebra namespace
+ and either set record data or index value-of node content*/
+ process_xml_element_zebra_node(tinfo, recctr, node);
+
+ /* loop through kid nodes */
+ for (node = node->children; node; node = node->next)
+ {
+ /* check and set PI record and index index instructions */
+ if (node->type == XML_PI_NODE){
+ process_xml_pi_node(tinfo, recctr, node, &index_p);
+ }
+ else if (node->type == XML_ELEMENT_NODE){
+ /* if there was a PI index instruction before this element node */
+ if (index_p){
+ index_value_of(tinfo, recctr, node, index_p);
+ index_p = 0;
+ }
+ process_xml_element_node(tinfo, recctr, node);
+ }
+ else
+ continue;
+ }
+}
+
+
+
+
+
+/* DOM filter style indexing */
+void extract_dom_doc_node(struct filter_info *tinfo,
+ struct recExtractCtrl *recctr,
+ xmlDocPtr doc)
+{
+ printf("DOC %s\n", xmlGetNodePath((xmlNodePtr)doc));
+
+ process_xml_element_node(tinfo, recctr, (xmlNodePtr)doc);
+}
+
+
+
+
+static int convert_extract_doc(struct filter_info *tinfo,
+ struct filter_input *input,
+ struct recExtractCtrl *p,
+ xmlDocPtr doc)
+
+{
+ /* RecWord recWord; */
xmlChar *buf_out;
int len_out;
+ const char *params[10];
xsltStylesheetPtr last_xsp = 0;
xmlDocPtr store_doc = 0;
/* input conversion */
perform_convert(tinfo, input->convert, params, &doc, 0);
- (*p->init)(p, &recWord);
-
if (tinfo->store)
{
/* store conversion */
/* extract conversion */
perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
- if (doc)
- {
- xmlNodePtr root_ptr;
- if (p->flagShowRecords)
- {
- xmlDocDumpMemory(doc, &buf_out, &len_out);
- fwrite(buf_out, len_out, 1, stdout);
- xmlFree(buf_out);
- }
- root_ptr = xmlDocGetRootElement(doc);
- if (root_ptr)
- index_record(tinfo, p, root_ptr, &recWord);
- else
- {
- yaz_log(YLOG_WARN, "No root for index XML record");
- }
+
+ /* finally, do the indexing */
+ if (doc){
+ extract_dom_doc_node(tinfo, p, doc);
+ extract_doc_alvis(tinfo, p, doc);
xmlFreeDoc(doc);
- }
+ }
+
return RECCTRL_EXTRACT_OK;
}
p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE);
+ XML_PARSE_XINCLUDE|
+ XML_PARSE_NOENT);
}
if (!input->u.xmlreader.reader)
return RECCTRL_EXTRACT_ERROR_GENERIC;
xmlDocSetRootElement(doc, ptr2);
- return extract_doc(tinfo, input, p, doc);
+ return convert_extract_doc(tinfo, input, p, doc);
}
else
{
xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE);
+ XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
if (!doc)
{
return RECCTRL_EXTRACT_ERROR_GENERIC;
}
- return extract_doc(tinfo, input, p, doc);
+ return convert_extract_doc(tinfo, input, p, doc);
}
else
return RECCTRL_EXTRACT_EOF;
yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0);
rdoc = xmlNewDoc((const xmlChar*) "1.0");
xmlDocSetRootElement(rdoc, root_ptr);
- return extract_doc(tinfo, input, p, rdoc);
+ return convert_extract_doc(tinfo, input, p, rdoc);
}
return RECCTRL_EXTRACT_OK;
}
doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
0 /* URL */,
0 /* encoding */,
- XML_PARSE_XINCLUDE);
+ XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
if (!doc)
{
p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;