Bump copyright year
[idzebra-moved-to-github.git] / index / mod_dom.c
index 1832b05..2e65b89 100644 (file)
@@ -1,25 +1,25 @@
-/* $Id: mod_dom.c,v 1.34 2007-04-07 22:18:46 adam Exp $
-   Copyright (C) 1995-2007
-   Index Data ApS
+/* This file is part of the Zebra server.
+   Copyright (C) 2004-2013 Index Data
 
-   This file is part of the Zebra server.
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
 
-   Zebra is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 2, or (at your option) any later
-   version.
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
 
-   Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <stdio.h>
 #include <assert.h>
 #include <ctype.h>
@@ -43,6 +43,7 @@
 
 #include <idzebra/util.h>
 #include <idzebra/recctrl.h>
+#include <yaz/oid_db.h>
 
 /* DOM filter style indexing */
 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
@@ -52,11 +53,26 @@ static const char *zebra_dom_ns = ZEBRA_DOM_NS;
 #define ZEBRA_PI_NAME "zebra-2.0"
 static const char *zebra_pi_name = ZEBRA_PI_NAME;
 
+enum convert_type {
+    convert_xslt_type,
+    convert_meta_type
+};
 
-
-struct convert_s {
+struct convert_xslt {
     const char *stylesheet;
     xsltStylesheetPtr stylesheet_xsp;
+};
+
+struct convert_meta {
+    int dummy;
+};
+
+struct convert_s {
+    enum convert_type which;
+    union {
+        struct convert_xslt xslt;
+        struct convert_meta meta;
+    } u;
     struct convert_s *next;
 };
 
@@ -96,13 +112,13 @@ struct filter_input {
     } u;
     struct filter_input *next;
 };
-  
+
 struct filter_info {
     char *fname;
     char *full_name;
     const char *profile_path;
-    ODR odr_record;
-    ODR odr_config;
+    NMEM nmem_record;
+    NMEM nmem_config;
     xmlDocPtr doc_config;
     struct filter_extract *extract;
     struct filter_retrieve *retrieve_list;
@@ -136,7 +152,7 @@ static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
     if (ptr)
     {
-        yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
+        yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none",
                 xmlGetLineNo(ptr), buf);
     }
     else
@@ -148,9 +164,9 @@ static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
 
 
 static void set_param_str(const char **params, const char *name,
-                         const char *value, ODR odr)
+                         const char *value, NMEM nmem)
 {
-    char *quoted = odr_malloc(odr, 3 + strlen(value));
+    char *quoted = nmem_malloc(nmem, 3 + strlen(value));
     sprintf(quoted, "'%s'", value);
     while (*params)
        params++;
@@ -160,9 +176,9 @@ static void set_param_str(const char **params, const char *name,
 }
 
 static void set_param_int(const char **params, const char *name,
-                         zint value, ODR odr)
+                         zint value, NMEM nmem)
 {
-    char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
+    char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
     while (*params)
        params++;
     sprintf(quoted, "'" ZINT_FORMAT "'", value);
@@ -177,8 +193,8 @@ static void *filter_init(Res res, RecType recType)
     tinfo->fname = 0;
     tinfo->full_name = 0;
     tinfo->profile_path = 0;
-    tinfo->odr_record = odr_createmem(ODR_ENCODE);
-    tinfo->odr_config = odr_createmem(ODR_ENCODE);
+    tinfo->nmem_record = nmem_create();
+    tinfo->nmem_config = nmem_create();
     tinfo->extract = 0;
     tinfo->retrieve_list = 0;
     tinfo->input_list = 0;
@@ -187,7 +203,7 @@ static void *filter_init(Res res, RecType recType)
     tinfo->record_info_invoked = 0;
 
 #if YAZ_HAVE_EXSLT
-    exsltRegisterAll(); 
+    exsltRegisterAll();
 #endif
 
     return tinfo;
@@ -196,7 +212,7 @@ static void *filter_init(Res res, RecType recType)
 static int attr_content(struct _xmlAttr *attr, const char *name,
                        const char **dst_content)
 {
-    if (!XML_STRCMP(attr->name, name) && attr->children 
+    if (!XML_STRCMP(attr->name, name) && attr->children
         && attr->children->type == XML_TEXT_NODE)
     {
         *dst_content = (const char *)(attr->children->content);
@@ -207,10 +223,13 @@ static int attr_content(struct _xmlAttr *attr, const char *name,
 
 static void destroy_xsp(struct convert_s *c)
 {
-    while(c)
+    while (c)
     {
-        if (c->stylesheet_xsp)
-            xsltFreeStylesheet(c->stylesheet_xsp);
+        if (c->which == convert_xslt_type)
+        {
+            if (c->u.xslt.stylesheet_xsp)
+                xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
+        }
         c = c->next;
     }
 }
@@ -260,7 +279,7 @@ static void destroy_dom(struct filter_info *tinfo)
         xmlFreeDoc(tinfo->doc_config);
         tinfo->doc_config = 0;
     }
-    odr_reset(tinfo->odr_config);
+    nmem_reset(tinfo->nmem_config);
 }
 
 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
@@ -271,56 +290,70 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
         if (!XML_STRCMP(ptr->name, "xslt"))
         {
             struct _xmlAttr *attr;
-            struct convert_s *p 
-                = odr_malloc(tinfo->odr_config, sizeof(*p));
-            
+            struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
+
             p->next = 0;
-            p->stylesheet = 0;
-            p->stylesheet_xsp = 0;
-            
+            p->which = convert_xslt_type;
+            p->u.xslt.stylesheet = 0;
+            p->u.xslt.stylesheet_xsp = 0;
+
             for (attr = ptr->properties; attr; attr = attr->next)
-                if (attr_content(attr, "stylesheet", &p->stylesheet))
+                if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
                     ;
                 else
                 {
                     dom_log(YLOG_WARN, tinfo, ptr,
                             "bad attribute @%s", attr->name);
                 }
-            if (p->stylesheet)
+            if (p->u.xslt.stylesheet)
             {
                 char tmp_xslt_full_name[1024];
-                if (!yaz_filepath_resolve(p->stylesheet, 
+                if (!yaz_filepath_resolve(p->u.xslt.stylesheet,
                                           tinfo->profile_path,
-                                          NULL, 
+                                          NULL,
                                           tmp_xslt_full_name))
                 {
                     dom_log(YLOG_WARN, tinfo, 0,
                             "stylesheet %s not found in "
                             "path %s",
-                            p->stylesheet, 
+                            p->u.xslt.stylesheet,
                             tinfo->profile_path);
                     return ZEBRA_FAIL;
                 }
-                
-                p->stylesheet_xsp
-                    = xsltParseStylesheetFile((const xmlChar*) 
+
+                p->u.xslt.stylesheet_xsp
+                    = xsltParseStylesheetFile((const xmlChar*)
                                               tmp_xslt_full_name);
-                if (!p->stylesheet_xsp)
+                if (!p->u.xslt.stylesheet_xsp)
                 {
                     dom_log(YLOG_WARN, tinfo, 0,
                             "could not parse xslt stylesheet %s",
                             tmp_xslt_full_name);
                     return ZEBRA_FAIL;
                 }
-                }
-                else
-                {
-                    dom_log(YLOG_WARN, tinfo, ptr,
-                            "missing attribute 'stylesheet' ");
-                    return ZEBRA_FAIL;
-                }
-                *l = p;
-                l = &p->next;
+            }
+            else
+            {
+                dom_log(YLOG_WARN, tinfo, ptr,
+                        "missing attribute 'stylesheet'");
+                return ZEBRA_FAIL;
+            }
+            *l = p;
+            l = &p->next;
+        }
+        else if (!XML_STRCMP(ptr->name, "process-meta"))
+        {
+            struct _xmlAttr *attr;
+            struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
+
+            p->next = 0;
+            p->which = convert_meta_type;
+
+            for (attr = ptr->properties; attr; attr = attr->next)
+                dom_log(YLOG_WARN, tinfo, ptr,
+                        "bad attribute @%s", attr->name);
+            *l = p;
+            l = &p->next;
         }
         else
         {
@@ -332,8 +365,64 @@ static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
     return ZEBRA_OK;
 }
 
-static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
+static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node,
+                        struct recRetrieveCtrl *retctr)
+{
+
+    if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
+        0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
+    {
+        if (0 == XML_STRCMP(node->name, "meta"))
+        {
+            const char *element_set_name = 0;
+
+            struct _xmlAttr *attr;
+            for (attr = node->properties; attr; attr = attr->next)
+            {
+                if (attr_content(attr, "name", &element_set_name))
+                    ;
+                else
+                {
+                    dom_log(YLOG_WARN, tinfo, node,
+                            "bad attribute @%s, expected @name", attr->name);
+                }
+            }
+            if (element_set_name)
+            {
+                WRBUF result = wrbuf_alloc();
+                WRBUF addinfo = wrbuf_alloc();
+                const Odr_oid *input_format = yaz_oid_recsyn_xml;
+                const Odr_oid *output_format = 0;
+                int ret;
+
+                ret = retctr->special_fetch(retctr->handle,
+                                            element_set_name,
+                                            input_format, &output_format,
+                                            result, addinfo);
+                if (ret == 0)
+                {
+                    xmlDocPtr sub_doc =
+                        xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
+                    if (sub_doc)
+                    {
+                        xmlNodePtr t = xmlDocGetRootElement(sub_doc);
+                        xmlReplaceNode(node, xmlCopyNode(t, 1));
+                        xmlFreeDoc(sub_doc);
+                    }
+                }
+                wrbuf_destroy(result);
+                wrbuf_destroy(addinfo);
+            }
+        }
+    }
+    for (node = node->children; node; node = node->next)
+        process_meta(tinfo, doc, node, retctr);
+    return 0;
+}
+
+static ZEBRA_RES perform_convert(struct filter_info *tinfo,
                                  struct recExtractCtrl *extctr,
+                                 struct recRetrieveCtrl *retctr,
                                  struct convert_s *convert,
                                  const char **params,
                                  xmlDocPtr *doc,
@@ -341,34 +430,48 @@ static ZEBRA_RES perform_convert(struct filter_info *tinfo,
 {
     for (; convert; convert = convert->next)
     {
-        xmlChar *buf_out = 0;
-        int len_out = 0;
-        xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
-                                                *doc, params);
-        if (last_xsp)
-            *last_xsp = convert->stylesheet_xsp;
-        
-        if (!res_doc)
-            break;
+        if (convert->which == convert_xslt_type)
+        {
+            xmlChar *buf_out = 0;
+            int len_out = 0;
+            xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
+                                                    *doc, params);
+            if (last_xsp)
+                *last_xsp = convert->u.xslt.stylesheet_xsp;
+
+            if (!res_doc)
+                break;
 
-        /* now saving into buffer and re-reading into DOM to avoid annoing
-           XSLT problem with thrown-out indentation text nodes */
-        xsltSaveResultToString(&buf_out, &len_out, res_doc,
-                               convert->stylesheet_xsp); 
-        xmlFreeDoc(res_doc);
+            /* now saving into buffer and re-reading into DOM to avoid annoing
+               XSLT problem with thrown-out indentation text nodes */
+            xsltSaveResultToString(&buf_out, &len_out, res_doc,
+                                   convert->u.xslt.stylesheet_xsp);
+            xmlFreeDoc(res_doc);
 
-        xmlFreeDoc(*doc);
+            xmlFreeDoc(*doc);
 
-        *doc = xmlParseMemory((const char *) buf_out, len_out);
+            *doc = xmlParseMemory((const char *) buf_out, len_out);
 
-        /* writing debug info out */
-        if (extctr && extctr->flagShowRecords)
-            yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
-                    tinfo->fname ? tinfo->fname : "(none)", 
-                    convert->stylesheet,
-                    len_out, buf_out);
-        
-        xmlFree(buf_out);
+            /* writing debug info out */
+            if (extctr && extctr->flagShowRecords)
+                yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s",
+                        tinfo->fname ? tinfo->fname : "(none)",
+                        convert->u.xslt.stylesheet,
+                        len_out, buf_out);
+
+            xmlFree(buf_out);
+        }
+        else if (convert->which == convert_meta_type)
+        {
+            if (retctr) /* only execute meta on retrieval */
+            {
+                process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
+
+                /* last stylesheet absent */
+                if (last_xsp)
+                    *last_xsp = 0;
+            }
+        }
     }
     return ZEBRA_OK;
 }
@@ -379,7 +482,7 @@ static struct filter_input *new_input(struct filter_info *tinfo, int type)
     struct filter_input **np = &tinfo->input_list;
     for (;*np; np = &(*np)->next)
         ;
-    p = *np = odr_malloc(tinfo->odr_config, sizeof(*p));
+    p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
     p->next = 0;
     p->syntax = 0;
     p->name = 0;
@@ -397,7 +500,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
             yaz_iconv_t iconv = 0;
             const char *input_charset = "marc-8";
             struct _xmlAttr *attr;
-            
+
             for (attr = ptr->properties; attr; attr = attr->next)
             {
                 if (attr_content(attr, "inputcharset", &input_charset))
@@ -412,21 +515,21 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
             iconv = yaz_iconv_open("utf-8", input_charset);
             if (!iconv)
             {
-                dom_log(YLOG_WARN, tinfo, ptr, 
+                dom_log(YLOG_WARN, tinfo, ptr,
                         "unsupported @charset '%s'", input_charset);
                 return ZEBRA_FAIL;
             }
             else
             {
-                struct filter_input *p 
+                struct filter_input *p
                     = new_input(tinfo, DOM_INPUT_MARC);
                 p->u.marc.handle = yaz_marc_create();
                 p->u.marc.iconv = iconv;
-                
+
                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
-                
+
                 ptr = ptr->next;
-                
+
                 parse_convert(tinfo, ptr, &p->convert);
             }
             break;
@@ -434,7 +537,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
         }
         else if (!XML_STRCMP(ptr->name, "xmlreader"))
         {
-            struct filter_input *p 
+            struct filter_input *p
                 = new_input(tinfo, DOM_INPUT_XMLREADER);
             struct _xmlAttr *attr;
             const char *level_str = 0;
@@ -455,7 +558,7 @@ static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
             }
             if (level_str)
                 p->u.xmlreader.split_level = atoi(level_str);
-                
+
             ptr = ptr->next;
 
             parse_convert(tinfo, ptr, &p->convert);
@@ -478,14 +581,14 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
     xmlNodePtr ptr;
     xmlDocPtr doc;
 
-    tinfo->fname = odr_strdup(tinfo->odr_config, fname);
-    
-    if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
+    tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
+
+    if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
                              NULL, tmp_full_name))
-        tinfo->full_name = odr_strdup(tinfo->odr_config, tmp_full_name);
+        tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
     else
-        tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname);
-    
+        tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
+
     yaz_log(YLOG_LOG, "%s dom filter: "
             "loading config file %s", tinfo->fname, tinfo->full_name);
 
@@ -497,16 +600,16 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
                 tinfo->fname, tinfo->full_name);
         return ZEBRA_FAIL;
     }
-    /* save because we store ptrs to the content */ 
+    /* save because we store ptrs to the content */
     tinfo->doc_config = doc;
-    
+
     ptr = xmlDocGetRootElement(doc);
-    if (!ptr || ptr->type != XML_ELEMENT_NODE 
+    if (!ptr || ptr->type != XML_ELEMENT_NODE
         || XML_STRCMP(ptr->name, "dom"))
     {
         dom_log(YLOG_WARN, tinfo, ptr,
-                "bad root element <%s>, expected root element <dom>", 
-                ptr->name);  
+                "bad root element <%s>, expected root element <dom>",
+                ptr->name);
         return ZEBRA_FAIL;
     }
 
@@ -522,8 +625,8 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
             */
             struct _xmlAttr *attr;
             struct filter_extract *f =
-                odr_malloc(tinfo->odr_config, sizeof(*f));
-            
+                nmem_malloc(tinfo->nmem_config, sizeof(*f));
+
             tinfo->extract = f;
             f->name = 0;
             f->convert = 0;
@@ -541,8 +644,8 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
             parse_convert(tinfo, ptr->children, &f->convert);
         }
         else if (!XML_STRCMP(ptr->name, "retrieve"))
-        {  
-            /* 
+        {
+            /*
                <retrieve name="F">
                <xslt stylesheet="some.xsl"/>
                <xslt stylesheet="some.xsl"/>
@@ -551,8 +654,8 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
             struct _xmlAttr *attr;
             struct filter_retrieve **fp = &tinfo->retrieve_list;
             struct filter_retrieve *f =
-                odr_malloc(tinfo->odr_config, sizeof(*f));
-            
+                nmem_malloc(tinfo->nmem_config, sizeof(*f));
+
             while (*fp)
                 fp = &(*fp)->next;
 
@@ -564,7 +667,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
 
             for (attr = ptr->properties; attr; attr = attr->next)
             {
-                if (attr_content(attr, "identifier", 
+                if (attr_content(attr, "identifier",
                                  &f->identifier))
                     ;
                 else if (attr_content(attr, "name", &f->name))
@@ -587,8 +690,8 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
               </retrieve>
             */
             struct filter_store *f =
-                odr_malloc(tinfo->odr_config, sizeof(*f));
-            
+                nmem_malloc(tinfo->nmem_config, sizeof(*f));
+
             tinfo->store = f;
             f->convert = 0;
             parse_convert(tinfo, ptr->children, &f->convert);
@@ -632,7 +735,7 @@ static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
     }
     if (!tinfo->input_list)
     {
-        struct filter_input *p 
+        struct filter_input *p
             = new_input(tinfo, DOM_INPUT_XMLREADER);
         p->u.xmlreader.split_level = 0;
         p->u.xmlreader.reader = 0;
@@ -649,15 +752,15 @@ static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
     if (!est)
         return f;
     for (; f; f = f->next)
-    { 
+    {
         /* find requested schema */
-        if (est) 
-        {    
+        if (est)
+        {
             if (f->identifier && !strcmp(f->identifier, est))
                 return f;
             if (f->name && !strcmp(f->name, est))
                 return f;
-        } 
+        }
     }
     return 0;
 }
@@ -673,7 +776,7 @@ static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
 
     if (tinfo->fname && !strcmp(args, tinfo->fname))
        return ZEBRA_OK;
-    
+
     tinfo->profile_path = res_get(res, "profilePath");
 
     destroy_dom(tinfo);
@@ -684,8 +787,8 @@ static void filter_destroy(void *clientData)
 {
     struct filter_info *tinfo = clientData;
     destroy_dom(tinfo);
-    odr_destroy(tinfo->odr_config);
-    odr_destroy(tinfo->odr_record);
+    nmem_destroy(tinfo->nmem_config);
+    nmem_destroy(tinfo->nmem_record);
     xfree(tinfo);
 }
 
@@ -701,35 +804,30 @@ static int ioclose_ex(void *context)
 }
 
 
-/* DOM filter style indexing */
-static int attr_content_xml(struct _xmlAttr *attr, const char *name,
-                            const char **dst_content)
-{
-    if (0 == XML_STRCMP(attr->name, name) && attr->children 
-        && attr->children->type == XML_TEXT_NODE)
-    {
-        *dst_content = (const char *) (attr->children->content);
-        return 1;
-    }
-    return 0;
-}
-
 
 /* DOM filter style indexing */
-static void index_value_of(struct filter_info *tinfo, 
+static void index_value_of(struct filter_info *tinfo,
                            struct recExtractCtrl *extctr,
-                           RecWord* recword, 
-                           xmlNodePtr node, 
+                           RecWord* recword,
+                           xmlNodePtr node,
                            const char *index_p)
 {
     if (tinfo->record_info_invoked == 1)
     {
         xmlChar *text = xmlNodeGetContent(node);
         size_t text_len = strlen((const char *)text);
-        
+
         /* if there is no text, we do not need to proceed */
         if (text_len)
-        {            
+        {
+            /* keep seqno base so that all text will have
+               identical seqno's for multiple fields , e.g
+               <z:index name="title:w any:w title:p">.. */
+
+            zint seqno_base = recword->seqno;
+            zint seqno_max = recword->seqno;
+
+
             const char *look = index_p;
             const char *bval;
             const char *eval;
@@ -748,7 +846,7 @@ static void index_value_of(struct filter_info *tinfo,
                 /* setting name and type to zero */
                 *index = '\0';
                 *type = '\0';
-    
+
                 /* parsing one index name */
                 bval = look;
                 while (*look && ':' != *look && ' ' != *look)
@@ -758,13 +856,13 @@ static void index_value_of(struct filter_info *tinfo,
                 eval = look;
                 strncpy((char *)index, (const char *)bval, eval - bval);
                 index[eval - bval] = '\0';
-    
-    
+
+
                 /* parsing one index type, if existing */
                 if (':' == *look)
                 {
                     look++;
-      
+
                     bval = look;
                     while (*look && ' ' != *look)
                     {
@@ -776,61 +874,63 @@ static void index_value_of(struct filter_info *tinfo,
                 }
 
                 /* actually indexing the text given */
-                dom_log(YLOG_DEBUG, tinfo, 0, 
-                        "INDEX '%s:%s' '%s'", 
-                        index ? (const char *) index : "null",
-                        type ? (const char *) type : "null", 
-                        text ? (const char *) text : "null");
 
+                recword->seqno = seqno_base;
                 recword->index_name = (const char *)index;
-                if (type && *type)
-                    recword->index_type = *type;
+                if (*type)
+                    recword->index_type = (const char *) type;
 
                 /* writing debug out */
                 if (extctr->flagShowRecords)
-                    dom_log(YLOG_LOG, tinfo, 0, 
-                            "INDEX '%s:%s' '%s'", 
-                            index ? (const char *) index : "null",
-                            type ? (const char *) type : "null", 
-                            text ? (const char *) text : "null");
-                
-                /* actually indexing the text given */
-                recword->index_name = (const char *)index;
-                if (type && *type)
-                    recword->index_type = *type;
+                    dom_log(YLOG_LOG, tinfo, 0,
+                            "INDEX '%s:%s' '%s'",
+                            (const char *) index,
+                            (const char *) type,
+                            (const char *) text);
+
                 (extctr->tokenAdd)(recword);
 
+                if (seqno_max < recword->seqno)
+                    seqno_max = recword->seqno;
+
                 /* eat whitespaces */
                 if (*look && ' ' == *look)
                 {
                     look++;
-                } 
+                }
             }
+            recword->seqno = seqno_max;
         }
-        xmlFree(text); 
+        xmlFree(text);
     }
 }
 
 
 /* DOM filter style indexing */
-static void set_record_info(struct filter_info *tinfo, 
-                            struct recExtractCtrl *extctr, 
-                            xmlNodePtr node, 
-                            const char * id_p, 
-                            const char * rank_p, 
+static void set_record_info(struct filter_info *tinfo,
+                            struct recExtractCtrl *extctr,
+                            xmlNodePtr node,
+                            const char * id_p,
+                            const char * rank_p,
                             const char * type_p)
 {
     /* writing debug info out */
     if (extctr && extctr->flagShowRecords)
         dom_log(YLOG_LOG, tinfo, node,
-                "RECORD id=%s rank=%s type=%s", 
+                "RECORD id=%s rank=%s type=%s",
                 id_p ? (const char *) id_p : "(null)",
                 rank_p ? (const char *) rank_p : "(null)",
                 type_p ? (const char *) type_p : "(null)");
-    
+
 
     if (id_p && *id_p)
-        sscanf((const char *)id_p, "%255s", extctr->match_criteria);
+    {
+        size_t l = strlen(id_p);
+        if (l >= sizeof(extctr->match_criteria))
+            l = sizeof(extctr->match_criteria)-1;
+        memcpy(extctr->match_criteria, id_p, l);
+        extctr->match_criteria[l] = '\0';
+    }
 
     if (rank_p && *rank_p)
         extctr->staticrank = atozint((const char *)rank_p);
@@ -849,7 +949,6 @@ static void set_record_info(struct filter_info *tinfo,
         else
             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
         extctr->action = action;
-        yaz_log(YLOG_LOG, "In mod_dom.c: setting action to %d", action);
     }
 
     if (tinfo->record_info_invoked == 1)
@@ -863,25 +962,25 @@ static void set_record_info(struct filter_info *tinfo,
 
 
 /* DOM filter style indexing */
-static void process_xml_element_zebra_node(struct filter_info *tinfo, 
-                                           struct recExtractCtrl *extctr, 
-                                           RecWord* recword, 
+static void process_xml_element_zebra_node(struct filter_info *tinfo,
+                                           struct recExtractCtrl *extctr,
+                                           RecWord* recword,
                                            xmlNodePtr node)
 {
     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
     {
-         if (0 == XML_STRCMP(node->name, "index"))
-         {
+        if (0 == XML_STRCMP(node->name, "index"))
+        {
             const char *index_p = 0;
 
-            struct _xmlAttr *attr;      
+            struct _xmlAttr *attr;
             for (attr = node->properties; attr; attr = attr->next)
             {
-                if (attr_content_xml(attr, "name", &index_p))
+                if (attr_content(attr, "name", &index_p))
                 {
                     index_value_of(tinfo, extctr, recword, node, index_p);
-                }  
+                }
                 else
                 {
                     dom_log(YLOG_WARN, tinfo, node,
@@ -899,11 +998,11 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
             struct _xmlAttr *attr;
             for (attr = node->properties; attr; attr = attr->next)
             {
-                if (attr_content_xml(attr, "id", &id_p))
+                if (attr_content(attr, "id", &id_p))
                     ;
-                else if (attr_content_xml(attr, "rank", &rank_p))
+                else if (attr_content(attr, "rank", &rank_p))
                     ;
-                else if (attr_content_xml(attr, "type", &type_p))
+                else if (attr_content(attr, "type", &type_p))
                     ;
                 else
                 {
@@ -913,7 +1012,7 @@ static void process_xml_element_zebra_node(struct filter_info *tinfo,
                 }
             }
             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
-        } 
+        }
         else
         {
             dom_log(YLOG_WARN, tinfo, node,
@@ -931,9 +1030,6 @@ static int attr_content_pi(const char **c_ptr, const char *name,
     const char *look = *c_ptr;
     int ret = 0;
 
-    *value = '\0';
-    while (*look && ' ' == *look)
-        look++;
     if (strlen(look) > name_len)
     {
         if (look[name_len] == '=' && !memcmp(look, name, name_len))
@@ -950,15 +1046,13 @@ static int attr_content_pi(const char **c_ptr, const char *name,
             ret = 1;
         }
     }
-    while (*look && ' ' == *look)
-        look++;
     *c_ptr = look;
     return ret;
 }
 
 /* DOM filter style indexing */
-static void process_xml_pi_node(struct filter_info *tinfo, 
-                                struct recExtractCtrl *extctr, 
+static void process_xml_pi_node(struct filter_info *tinfo,
+                                struct recExtractCtrl *extctr,
                                 xmlNodePtr node,
                                 const char **index_pp)
 {
@@ -967,45 +1061,54 @@ static void process_xml_pi_node(struct filter_info *tinfo,
     {
         xmlChar *pi_p =  node->content;
         const char *look = (const char *) node->content;
-    
+
         /* parsing PI record instructions */
         if (0 == strncmp((const char *)look, "record", 6))
         {
             char id[256];
             char rank[256];
             char type[256];
-            
+
             *id = '\0';
             *rank = '\0';
             *type = '\0';
             look += 6;
-            while (*look)
+            for (;;)
+            {
+                /* eat whitespace */
+                while (' ' == *look)
+                    look++;
+                if (*look == '\0')
+                    break;
                 if (attr_content_pi(&look, "id", id, sizeof(id)))
                     ;
                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
                     ;
                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
+                    ;
+                else
                 {
                     dom_log(YLOG_WARN, tinfo, node,
                             "content '%s', can not parse '%s'",
                             pi_p, look);
                     break;
                 }
+            }
             set_record_info(tinfo, extctr, node, id, rank, type);
-        } 
+        }
         /* parsing index instruction */
         else if (0 == strncmp((const char *)look, "index", 5))
         {
             look += 5;
-      
+
             /* eat whitespace */
             while (*look && ' ' == *look)
                 look++;
 
             /* export index instructions to outside */
             *index_pp = look;
-        } 
-        else 
+        }
+        else
         {
             dom_log(YLOG_WARN, tinfo, node,
                     "content '%s', can not parse '%s'",
@@ -1015,18 +1118,18 @@ static void process_xml_pi_node(struct filter_info *tinfo,
 }
 
 /* DOM filter style indexing */
-static void process_xml_element_node(struct filter_info *tinfo, 
-                                     struct recExtractCtrl *extctr, 
-                                     RecWord* recword, 
+static void process_xml_element_node(struct filter_info *tinfo,
+                                     struct recExtractCtrl *extctr,
+                                     RecWord* recword,
                                      xmlNodePtr node)
 {
     /* remember indexing instruction from PI to next element node */
     const char *index_p = 0;
 
-    /* check if we are an element node in the special zebra namespace 
+    /* check if we are an element node in the special zebra namespace
        and either set record data or index value-of node content*/
     process_xml_element_zebra_node(tinfo, extctr, recword, node);
-  
+
     /* loop through kid nodes */
     for (node = node->children; node; node = node->next)
     {
@@ -1052,8 +1155,8 @@ static void process_xml_element_node(struct filter_info *tinfo,
 
 
 /* DOM filter style indexing */
-static void extract_dom_doc_node(struct filter_info *tinfo, 
-                                 struct recExtractCtrl *extctr, 
+static void extract_dom_doc_node(struct filter_info *tinfo,
+                                 struct recExtractCtrl *extctr,
                                  xmlDocPtr doc)
 {
     /* only need to do the initialization once, reuse recword for all terms */
@@ -1064,19 +1167,15 @@ static void extract_dom_doc_node(struct filter_info *tinfo,
 }
 
 
-
-
-static int convert_extract_doc(struct filter_info *tinfo, 
+static int convert_extract_doc(struct filter_info *tinfo,
                                struct filter_input *input,
-                               struct recExtractCtrl *p, 
+                               struct recExtractCtrl *p,
                                xmlDocPtr doc)
-
 {
     xmlChar *buf_out;
     int len_out;
     const char *params[10];
     xsltStylesheetPtr last_xsp = 0;
-    xmlDocPtr store_doc = 0;
 
     /* per default do not ingest record */
     tinfo->record_info_invoked = 0;
@@ -1087,34 +1186,53 @@ static int convert_extract_doc(struct filter_info *tinfo,
 
     /* we actuallu have a document which needs to be processed further */
     params[0] = 0;
-    set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
-
-    /* input conversion */
-    perform_convert(tinfo, p, input->convert, params, &doc, 0);
+    set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
 
-    if (tinfo->store)
+    if (p && p->flagShowRecords)
     {
-        /* store conversion */
-        store_doc = xmlCopyDoc(doc, 1);
-        perform_convert(tinfo, p, tinfo->store->convert,
-                        params, &store_doc, &last_xsp);
+        xmlChar *buf_out;
+        int len_out;
+        xmlDocDumpMemory(doc, &buf_out, &len_out);
+#if 0
+        FILE *outf = fopen("extract.xml", "w");
+        fwrite(buf_out, 1, len_out, outf);
+        fclose(outf);
+#endif
+        yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
     }
-    
-    /* saving either store doc or original doc in case no store doc exists */
-    if (last_xsp)
-        xsltSaveResultToString(&buf_out, &len_out, 
-                               store_doc ? store_doc : doc, last_xsp);
-    else
-        xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
 
-    (*p->setStoreData)(p, buf_out, len_out);
-    xmlFree(buf_out);
+    if (p->setStoreData)
+    {
+        xmlDocPtr store_doc = 0;
+
+        /* input conversion */
+        perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
+
+        if (tinfo->store)
+        {
+            /* store conversion */
+            store_doc = xmlCopyDoc(doc, 1);
+            perform_convert(tinfo, p, 0, tinfo->store->convert,
+                            params, &store_doc, &last_xsp);
+        }
+
+        /* saving either store doc or original doc in case no store doc exists */
+        if (last_xsp)
+            xsltSaveResultToString(&buf_out, &len_out,
+                                   store_doc ? store_doc : doc, last_xsp);
+        else
+            xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
+
+        if (p->setStoreData)
+            (*p->setStoreData)(p, buf_out, len_out);
+        xmlFree(buf_out);
+        if (store_doc)
+            xmlFreeDoc(store_doc);
+    }
 
-    if (store_doc)
-        xmlFreeDoc(store_doc);
 
     /* extract conversion */
-    perform_convert(tinfo, p, tinfo->extract->convert, params, &doc, 0);
+    perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
 
 
     /* finally, do the indexing */
@@ -1122,7 +1240,7 @@ static int convert_extract_doc(struct filter_info *tinfo,
         extract_dom_doc_node(tinfo, p, doc);
        xmlFreeDoc(doc);
     }
-    
+
     /* there was nothing to index, so there is no inserted/updated record */
     if (tinfo->record_info_invoked == 0)
         return RECCTRL_EXTRACT_SKIP;
@@ -1142,7 +1260,7 @@ static int extract_xml_split(struct filter_info *tinfo,
             xmlFreeTextReader(input->u.xmlreader.reader);
         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
                                                    p /* I/O handler */,
-                                                   0 /* URL */, 
+                                                   0 /* URL */,
                                                    0 /* encoding */,
                                                    XML_PARSE_XINCLUDE
                                                    | XML_PARSE_NOENT
@@ -1157,36 +1275,36 @@ static int extract_xml_split(struct filter_info *tinfo,
         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
 
-        if (type == XML_READER_TYPE_ELEMENT && 
+        if (type == XML_READER_TYPE_ELEMENT &&
             input->u.xmlreader.split_level == depth)
         {
             xmlNodePtr ptr;
 
             /* per default do not ingest record */
             tinfo->record_info_invoked = 0;
-            
+
             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
             if (ptr)
-                {
+            {
                 /* we have a new document */
 
                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
-                
+
                 xmlDocSetRootElement(doc, ptr2);
-                
+
                 /* writing debug info out */
                 if (p->flagShowRecords)
                 {
                     xmlChar *buf_out = 0;
                     int len_out = 0;
                     xmlDocDumpMemory(doc, &buf_out, &len_out);
-                    yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
+                    yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s",
                             tinfo->fname ? tinfo->fname : "(none)",
-                            depth, len_out, buf_out); 
+                            depth, len_out, buf_out);
                     xmlFree(buf_out);
                 }
-                
+
                 return convert_extract_doc(tinfo, input, p, doc);
             }
             else
@@ -1203,13 +1321,13 @@ static int extract_xml_split(struct filter_info *tinfo,
     return RECCTRL_EXTRACT_EOF;
 }
 
-static int extract_xml_full(struct filter_info *tinfo, 
+static int extract_xml_full(struct filter_info *tinfo,
                             struct filter_input *input,
                             struct recExtractCtrl *p)
 {
     if (p->first_record) /* only one record per stream */
     {
-        xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
+        xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex,
                                   p /* I/O handler */,
                                   0 /* URL */,
                                   0 /* encoding */,
@@ -1275,10 +1393,11 @@ static int extract_iso2709(struct filter_info *tinfo,
     {
         xmlDocPtr rdoc;
         xmlNode *root_ptr;
-        yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0);
+        yaz_marc_write_xml(input->u.marc.handle, &root_ptr,
+                           "http://www.loc.gov/MARC21/slim", 0, 0);
         rdoc = xmlNewDoc((const xmlChar*) "1.0");
         xmlDocSetRootElement(rdoc, root_ptr);
-        return convert_extract_doc(tinfo, input, p, rdoc);        
+        return convert_extract_doc(tinfo, input, p, rdoc);
     }
     return RECCTRL_EXTRACT_OK;
 }
@@ -1290,8 +1409,11 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
 
     if (!input)
         return RECCTRL_EXTRACT_ERROR_GENERIC;
-    
-    odr_reset(tinfo->odr_record);
+
+    nmem_reset(tinfo->nmem_record);
+
+    if (p->setStoreData == 0)
+        return extract_xml_full(tinfo, input, p);
     switch(input->type)
     {
     case DOM_INPUT_XMLREADER:
@@ -1309,7 +1431,8 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
 static int ioread_ret(void *context, char *buffer, int len)
 {
     struct recRetrieveCtrl *p = context;
-    return p->stream->readf(p->stream, buffer, len);
+    int r = p->stream->readf(p->stream, buffer, len);
+    return r;
 }
 
 static int ioclose_ret(void *context)
@@ -1317,7 +1440,7 @@ static int ioclose_ret(void *context)
     return 0;
 }
 
-static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
+static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
 {
     /* const char *esn = zebra_dom_ns; */
     const char *esn = 0;
@@ -1334,7 +1457,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
         {
             esn = p->comp->u.simple->u.generic;
         }
-        else if (p->comp->which == Z_RecordComp_complex 
+        else if (p->comp->which == Z_RecordComp_complex
                  && p->comp->u.complex->generic->elementSpec
                  && p->comp->u.complex->generic->elementSpec->which ==
                  Z_ElementSpec_elementSetName)
@@ -1347,29 +1470,30 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     {
         p->diagnostic =
             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
+        p->addinfo = odr_strdup_null(p->odr, esn);
         return 0;
     }
 
     params[0] = 0;
-    set_param_int(params, "id", p->localno, p->odr);
+    set_param_int(params, "id", p->localno, p->odr->mem);
     if (p->fname)
-       set_param_str(params, "filename", p->fname, p->odr);
+       set_param_str(params, "filename", p->fname, p->odr->mem);
     if (p->staticrank >= 0)
-       set_param_int(params, "rank", p->staticrank, p->odr);
+       set_param_int(params, "rank", p->staticrank, p->odr->mem);
 
     if (esn)
-        set_param_str(params, "schema", esn, p->odr);
+        set_param_str(params, "schema", esn, p->odr->mem);
     else
         if (retrieve->name)
-            set_param_str(params, "schema", retrieve->name, p->odr);
+            set_param_str(params, "schema", retrieve->name, p->odr->mem);
         else if (retrieve->identifier)
-            set_param_str(params, "schema", retrieve->identifier, p->odr);
+            set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
         else
-            set_param_str(params, "schema", "", p->odr);
+            set_param_str(params, "schema", "", p->odr->mem);
 
     if (p->score >= 0)
-       set_param_int(params, "score", p->score, p->odr);
-    set_param_int(params, "size", p->recordSize, p->odr);
+       set_param_int(params, "score", p->score, p->odr->mem);
+    set_param_int(params, "size", p->recordSize, p->odr->mem);
 
     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
                    0 /* URL */,
@@ -1382,12 +1506,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
     }
 
     /* retrieve conversion */
-    perform_convert(tinfo, 0, retrieve->convert, params, &doc, &last_xsp);
+    perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
     if (!doc)
     {
         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
     }
-    else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
+    else if (!p->input_format
+             || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
     {
         xmlChar *buf_out;
         int len_out;
@@ -1395,15 +1520,15 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
         if (last_xsp)
             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
         else
-            xmlDocDumpMemory(doc, &buf_out, &len_out);            
+            xmlDocDumpMemory(doc, &buf_out, &len_out);
 
-        p->output_format = VAL_TEXT_XML;
+        p->output_format = yaz_oid_recsyn_xml;
         p->rec_len = len_out;
         p->rec_buf = odr_malloc(p->odr, p->rec_len);
         memcpy(p->rec_buf, buf_out, p->rec_len);
         xmlFree(buf_out);
     }
-    else if (p->output_format == VAL_SUTRS)
+    else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
     {
         xmlChar *buf_out;
         int len_out;
@@ -1411,13 +1536,13 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
         if (last_xsp)
             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
         else
-            xmlDocDumpMemory(doc, &buf_out, &len_out);            
-        
-        p->output_format = VAL_SUTRS;
+            xmlDocDumpMemory(doc, &buf_out, &len_out);
+
+        p->output_format = yaz_oid_recsyn_sutrs;
         p->rec_len = len_out;
         p->rec_buf = odr_malloc(p->odr, p->rec_len);
         memcpy(p->rec_buf, buf_out, p->rec_len);
-       
+
         xmlFree(buf_out);
     }
     else
@@ -1439,7 +1564,7 @@ static struct recType filter_type = {
 };
 
 RecType
-#ifdef IDZEBRA_STATIC_DOM
+#if IDZEBRA_STATIC_DOM
 idzebra_filter_dom
 #else
 idzebra_filter
@@ -1452,6 +1577,7 @@ idzebra_filter
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab