X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fmod_dom.c;h=b1555d7add25712f74d73edfc626957f16e4faba;hb=cf66499bac7c49c5bdd363a2c927295fa92f547a;hp=e65a3bff764f5ec718c08b5a2079efe2f5409c61;hpb=7a23ff31063e70f55eb387477130a358f0992988;p=idzebra-moved-to-github.git

diff --git a/index/mod_dom.c b/index/mod_dom.c
index e65a3bf..b1555d7 100644
--- a/index/mod_dom.c
+++ b/index/mod_dom.c
@@ -1,4 +1,5 @@
-/* $Id: mod_dom.c,v 1.25 2007-03-01 10:35:46 adam Exp $
+
+/* $Id: mod_dom.c,v 1.30 2007-03-07 14:18:35 marc Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -348,25 +349,25 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo,
         if (last_xsp)
             *last_xsp = convert->stylesheet_xsp;
         
-        xmlFreeDoc(*doc);
+        if (!res_doc)
+            break;
 
         /* now saving into buffer and re-reading into DOM to avoid annoing
            XSLT problem with thrown-out indentation text nodes */
-        if (res_doc){
-            xsltSaveResultToString(&buf_out, &len_out, res_doc,
-                                   convert->stylesheet_xsp); 
-            xmlFreeDoc(res_doc);
-        }
+        xsltSaveResultToString(&buf_out, &len_out, res_doc,
+                               convert->stylesheet_xsp); 
+        xmlFreeDoc(res_doc);
 
+        xmlFreeDoc(*doc);
 
-        *doc =  xmlParseDoc(buf_out);
+        *doc = xmlParseMemory((const char *) buf_out, len_out);
 
         /* writing debug info out */
-        if (extctr->flagShowRecords)
-            yaz_log(YLOG_LOG, "%s: XSLT %s \n %s", 
+        if (extctr && extctr->flagShowRecords)
+            yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
                     tinfo->fname ? tinfo->fname : "(none)", 
                     convert->stylesheet,
-                    buf_out);
+                    len_out, buf_out);
         
         xmlFree(buf_out);
     }
@@ -720,8 +721,6 @@ static void index_value_of(struct filter_info *tinfo,
         xmlChar *text = xmlNodeGetContent(node);
         size_t text_len = strlen((const char *)text);
         
-        yaz_log(YLOG_LOG, "Indexing :%.*s:", text_len, text);
-        
         /* if there is no text, we do not need to proceed */
         if (text_len)
         {            
@@ -1059,15 +1058,6 @@ static void extract_dom_doc_node(struct filter_info *tinfo,
     RecWord recword;
     (*extctr->init)(extctr, &recword);
 
-    /*
-    if (extctr->flagShowRecords)
-    {
-        xmlDocDumpMemory(doc, &buf_out, &len_out);
-        fwrite(buf_out, len_out, 1, stdout);
-        xmlFree(buf_out);
-    }
-    */
-    tinfo->record_info_invoked = 0;
     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
 }
 
@@ -1086,6 +1076,14 @@ static int convert_extract_doc(struct filter_info *tinfo,
     xsltStylesheetPtr last_xsp = 0;
     xmlDocPtr store_doc = 0;
 
+    /* per default do not ingest record */
+    tinfo->record_info_invoked = 0;
+
+    /* exit if empty document given */
+    if (!doc)
+        return RECCTRL_EXTRACT_SKIP;
+
+    /* we actuallu have a document which needs to be processed further */
     params[0] = 0;
     set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
 
@@ -1100,14 +1098,12 @@ static int convert_extract_doc(struct filter_info *tinfo,
                         params, &store_doc, &last_xsp);
     }
     
+    /* saving either store doc or original doc in case no store doc exists */
     if (last_xsp)
         xsltSaveResultToString(&buf_out, &len_out, 
                                store_doc ? store_doc : doc, last_xsp);
     else
         xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
-  
-    /* if (p->flagShowRecords)
-       fwrite(buf_out, len_out, 1, stdout); */
 
     (*p->setStoreData)(p, buf_out, len_out);
     xmlFree(buf_out);
@@ -1118,15 +1114,17 @@ static int convert_extract_doc(struct filter_info *tinfo,
     /* extract conversion */
     perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
 
+
     /* finally, do the indexing */
-    if (doc)
+    if (doc){
         extract_dom_doc_node(tinfo, p, doc);
-
-    if (doc)
 	xmlFreeDoc(doc);
-
+    }
+    
+    /* there was nothing to index, so there is no inserted/updated record */
     if (tinfo->record_info_invoked == 0)
         return RECCTRL_EXTRACT_SKIP;
+
     return RECCTRL_EXTRACT_OK;
 }
 
@@ -1144,8 +1142,9 @@ static int extract_xml_split(struct filter_info *tinfo,
                                                    p /* I/O handler */,
                                                    0 /* URL */, 
                                                    0 /* encoding */,
-                                                   XML_PARSE_XINCLUDE|
-                                                   XML_PARSE_NOENT);
+                                                   XML_PARSE_XINCLUDE
+                                                   | XML_PARSE_NOENT
+                                                   | XML_PARSE_NONET);
     }
     if (!input->u.xmlreader.reader)
 	return RECCTRL_EXTRACT_ERROR_GENERIC;
@@ -1155,27 +1154,34 @@ static int extract_xml_split(struct filter_info *tinfo,
     {
         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
+
         if (type == XML_READER_TYPE_ELEMENT && 
             input->u.xmlreader.split_level == depth)
         {
-            xmlNodePtr ptr
-                = xmlTextReaderExpand(input->u.xmlreader.reader);
+            xmlNodePtr ptr;
+
+            /* per default do not ingest record */
+            tinfo->record_info_invoked = 0;
+            
+            ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
             if (ptr)
-            {
+                {
+                /* we have a new document */
+
                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
                 
                 xmlDocSetRootElement(doc, ptr2);
                 
                 /* writing debug info out */
-                if (p->flagShowRecords){
+                if (p->flagShowRecords)
+                {
                     xmlChar *buf_out = 0;
                     int len_out = 0;
                     xmlDocDumpMemory(doc, &buf_out, &len_out);
-                    yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%s", 
+                    yaz_log(YLOG_LOG, "%s: XMLREADER depth: %i\n%.*s", 
                             tinfo->fname ? tinfo->fname : "(none)",
-                            depth,
-                            buf_out); 
+                            depth, len_out, buf_out); 
                     xmlFree(buf_out);
                 }
                 
@@ -1205,7 +1211,9 @@ static int extract_xml_full(struct filter_info *tinfo,
                                   p /* I/O handler */,
                                   0 /* URL */,
                                   0 /* encoding */,
-                                  XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+                                  XML_PARSE_XINCLUDE
+                                  | XML_PARSE_NOENT
+                                  | XML_PARSE_NONET);
         if (!doc)
         {
             return RECCTRL_EXTRACT_ERROR_GENERIC;
@@ -1364,7 +1372,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
 		    0 /* URL */,
 		    0 /* encoding */,
-		    XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
+		    XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
     if (!doc)
     {
         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;