Changed record update API . It is now handled by function
[idzebra-moved-to-github.git] / index / mod_dom.c
index fca04eb..66f11aa 100644 (file)
@@ -1,4 +1,5 @@
-/* $Id: mod_dom.c,v 1.26 2007-03-03 21:39:10 adam Exp $
+
+/* $Id: mod_dom.c,v 1.31 2007-03-08 17:19:12 marc Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -76,6 +77,7 @@ struct filter_retrieve {
     struct filter_retrieve *next;
 };
 
+#define DOM_INPUT_DOM 0
 #define DOM_INPUT_XMLREADER 1
 #define DOM_INPUT_MARC 2
 struct filter_input {
@@ -85,14 +87,17 @@ struct filter_input {
     int type;
     union {
         struct {
-            const char *input_charset;
-            yaz_marc_t handle;
-            yaz_iconv_t iconv;
-        } marc;
+            int dummy;
+        } dom;
         struct {
             xmlTextReaderPtr reader;
             int split_level;
         } xmlreader;
+        struct {
+            const char *input_charset;
+            yaz_marc_t handle;
+            yaz_iconv_t iconv;
+        } marc;
     } u;
     struct filter_input *next;
 };
@@ -234,6 +239,8 @@ static void destroy_dom(struct filter_info *tinfo)
         {
             switch(i_ptr->type)
             {
+            case DOM_INPUT_DOM:
+                break;
             case DOM_INPUT_XMLREADER:
                 if (i_ptr->u.xmlreader.reader)
                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
@@ -362,7 +369,7 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo,
         *doc = xmlParseMemory((const char *) buf_out, len_out);
 
         /* writing debug info out */
-        if (extctr->flagShowRecords)
+        if (extctr && extctr->flagShowRecords)
             yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
                     tinfo->fname ? tinfo->fname : "(none)", 
                     convert->stylesheet,
@@ -461,10 +468,16 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
             parse_convert(tinfo, ptr, &p->convert);
             break;
         }
+        else if (!XML_STRCMP(ptr->name, "xslt")){
+            struct filter_input *p 
+                = new_input(tinfo, DOM_INPUT_DOM);
+            parse_convert(tinfo, ptr, &p->convert);
+            break;
+        }
         else
         {
             dom_log(YLOG_WARN, tinfo, ptr,
-                    "bad element <%s>, expected <marc>|<xmlreader>",
+                    "bad element <%s>, expected <marc>|<xmlreader>|<xslt>",
                     ptr->name);
             return ZEBRA_FAIL;
         }
@@ -630,6 +643,13 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
             return ZEBRA_FAIL;
         }
     }
+
+    /* adding an empty DOM dummy type if no <input> list has been defined */
+    if (! tinfo->input_list){
+          struct filter_input *p 
+                = new_input(tinfo, DOM_INPUT_DOM);
+    }
+    
     return ZEBRA_OK;
 }
 
@@ -1057,7 +1077,6 @@ static void extract_dom_doc_node(struct filter_info *tinfo,
     RecWord recword;
     (*extctr->init)(extctr, &recword);
 
-    tinfo->record_info_invoked = 0;
     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
 }
 
@@ -1076,6 +1095,14 @@ static int convert_extract_doc(struct filter_info *tinfo,
     xsltStylesheetPtr last_xsp = 0;
     xmlDocPtr store_doc = 0;
 
+    /* per default do not ingest record */
+    tinfo->record_info_invoked = 0;
+
+    /* exit if empty document given */
+    if (!doc)
+        return RECCTRL_EXTRACT_SKIP;
+
+    /* we actuallu have a document which needs to be processed further */
     params[0] = 0;
     set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
 
@@ -1090,14 +1117,12 @@ static int convert_extract_doc(struct filter_info *tinfo,
                         params, &store_doc, &last_xsp);
     }
     
+    /* saving either store doc or original doc in case no store doc exists */
     if (last_xsp)
         xsltSaveResultToString(&buf_out, &len_out, 
                                store_doc ? store_doc : doc, last_xsp);
     else
         xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
-  
-    /* if (p->flagShowRecords)
-       fwrite(buf_out, len_out, 1, stdout); */
 
     (*p->setStoreData)(p, buf_out, len_out);
     xmlFree(buf_out);
@@ -1108,15 +1133,17 @@ static int convert_extract_doc(struct filter_info *tinfo,
     /* extract conversion */
     perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
 
+
     /* finally, do the indexing */
-    if (doc)
+    if (doc){
         extract_dom_doc_node(tinfo, p, doc);
-
-    if (doc)
        xmlFreeDoc(doc);
-
+    }
+    
+    /* there was nothing to index, so there is no inserted/updated record */
     if (tinfo->record_info_invoked == 0)
         return RECCTRL_EXTRACT_SKIP;
+
     return RECCTRL_EXTRACT_OK;
 }
 
@@ -1134,8 +1161,9 @@ static int extract_xml_split(struct filter_info *tinfo,
                                                    p /* I/O handler */,
                                                    0 /* URL */, 
                                                    0 /* encoding */,
-                                                   XML_PARSE_XINCLUDE|
-                                                   XML_PARSE_NOENT);
+                                                   XML_PARSE_XINCLUDE
+                                                   | XML_PARSE_NOENT
+                                                   | XML_PARSE_NONET);
     }
     if (!input->u.xmlreader.reader)
        return RECCTRL_EXTRACT_ERROR_GENERIC;
@@ -1145,13 +1173,20 @@ static int extract_xml_split(struct filter_info *tinfo,
     {
         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
+
         if (type == XML_READER_TYPE_ELEMENT && 
             input->u.xmlreader.split_level == depth)
         {
-            xmlNodePtr ptr
-                = xmlTextReaderExpand(input->u.xmlreader.reader);
+            xmlNodePtr ptr;
+
+            /* per default do not ingest record */
+            tinfo->record_info_invoked = 0;
+            
+            ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
             if (ptr)
-            {
+                {
+                /* we have a new document */
+
                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
                 
@@ -1163,7 +1198,7 @@ static int extract_xml_split(struct filter_info *tinfo,
                     xmlChar *buf_out = 0;
                     int len_out = 0;
                     xmlDocDumpMemory(doc, &buf_out, &len_out);
-                    yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%.*s", 
+                    yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
                             tinfo->fname ? tinfo->fname : "(none)",
                             depth, len_out, buf_out); 
                     xmlFree(buf_out);
@@ -1195,7 +1230,9 @@ static int extract_xml_full(struct filter_info *tinfo,
                                   p /* I/O handler */,
                                   0 /* URL */,
                                   0 /* encoding */,
-                                  XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+                                  XML_PARSE_XINCLUDE
+                                  | XML_PARSE_NOENT
+                                  | XML_PARSE_NONET);
         if (!doc)
         {
             return RECCTRL_EXTRACT_ERROR_GENERIC;
@@ -1268,12 +1305,16 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
     struct filter_info *tinfo = clientData;
     struct filter_input *input = tinfo->input_list;
 
-    if (!input)
-        return RECCTRL_EXTRACT_ERROR_GENERIC;
 
+    if (!input)
+        return RECCTRL_EXTRACT_ERROR_GENERIC; 
+    
     odr_reset(tinfo->odr_record);
     switch(input->type)
     {
+    case DOM_INPUT_DOM:
+        return extract_xml_full(tinfo, input, p);
+        break;
     case DOM_INPUT_XMLREADER:
         if (input->u.xmlreader.split_level == 0)
             return extract_xml_full(tinfo, input, p);
@@ -1354,7 +1395,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
                    0 /* URL */,
                    0 /* encoding */,
-                   XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+                   XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
     if (!doc)
     {
         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;