removed a few debug stmts
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2009 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24
25 #include <yaz/diagbib1.h>
26 #include <yaz/tpath.h>
27 #include <yaz/snprintf.h>
28
29 #include <libxml/xmlversion.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/xmlreader.h>
34 #include <libxslt/transform.h>
35 #include <libxslt/xsltutils.h>
36
37 #if YAZ_HAVE_EXSLT
38 #include <libexslt/exslt.h>
39 #endif
40
41 #include <idzebra/util.h>
42 #include <idzebra/recctrl.h>
43 #include <yaz/oid_db.h>
44
45 /* DOM filter style indexing */
46 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
47 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
48
49 /* DOM filter style indexing */
50 #define ZEBRA_PI_NAME "zebra-2.0"
51 static const char *zebra_pi_name = ZEBRA_PI_NAME;
52
53 enum convert_type {
54     convert_xslt_type,
55     convert_meta_type
56 };
57
58 struct convert_xslt {
59     const char *stylesheet;
60     xsltStylesheetPtr stylesheet_xsp;
61 };
62
63 struct convert_meta {
64     int dummy;
65 };
66
67 struct convert_s {
68     enum convert_type which;
69     union {
70         struct convert_xslt xslt;
71         struct convert_meta meta;
72     } u;
73     struct convert_s *next;
74 };
75
76 struct filter_extract {
77     const char *name;
78     struct convert_s *convert;
79 };
80
81 struct filter_store {
82     struct convert_s *convert;
83 };
84
85 struct filter_retrieve {
86     const char *name;
87     const char *identifier;
88     struct convert_s *convert;
89     struct filter_retrieve *next;
90 };
91
92 #define DOM_INPUT_XMLREADER 1
93 #define DOM_INPUT_MARC 2
94 struct filter_input {
95     const char *syntax;
96     const char *name;
97     struct convert_s *convert;
98     int type;
99     union {
100         struct {
101             xmlTextReaderPtr reader;
102             int split_level;
103         } xmlreader;
104         struct {
105             const char *input_charset;
106             yaz_marc_t handle;
107             yaz_iconv_t iconv;
108         } marc;
109     } u;
110     struct filter_input *next;
111 };
112   
113 struct filter_info {
114     char *fname;
115     char *full_name;
116     const char *profile_path;
117     NMEM nmem_record;
118     NMEM nmem_config;
119     xmlDocPtr doc_config;
120     struct filter_extract *extract;
121     struct filter_retrieve *retrieve_list;
122     struct filter_input *input_list;
123     struct filter_store *store;
124     int record_info_invoked;
125 };
126
127
128
129 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
130 #define XML_STRLEN(a) strlen((char*)a)
131
132
133 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
134
135 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
136                     const char *fmt, ...)
137 #ifdef __GNUC__
138     __attribute__ ((format (printf, 4, 5)))
139 #endif
140     ;
141
142 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
143                     const char *fmt, ...)
144 {
145     va_list ap;
146     char buf[4096];
147
148     va_start(ap, fmt);
149     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
150     if (ptr)
151     {
152         yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
153                 xmlGetLineNo(ptr), buf);
154     }
155     else
156     {
157         yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
158     }
159     va_end(ap);
160 }
161
162
163 static void set_param_str(const char **params, const char *name,
164                           const char *value, NMEM nmem)
165 {
166     char *quoted = nmem_malloc(nmem, 3 + strlen(value));
167     sprintf(quoted, "'%s'", value);
168     while (*params)
169         params++;
170     params[0] = name;
171     params[1] = quoted;
172     params[2] = 0;
173 }
174
175 static void set_param_int(const char **params, const char *name,
176                           zint value, NMEM nmem)
177 {
178     char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
179     while (*params)
180         params++;
181     sprintf(quoted, "'" ZINT_FORMAT "'", value);
182     params[0] = name;
183     params[1] = quoted;
184     params[2] = 0;
185 }
186
187 static void *filter_init(Res res, RecType recType)
188 {
189     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
190     tinfo->fname = 0;
191     tinfo->full_name = 0;
192     tinfo->profile_path = 0;
193     tinfo->nmem_record = nmem_create();
194     tinfo->nmem_config = nmem_create();
195     tinfo->extract = 0;
196     tinfo->retrieve_list = 0;
197     tinfo->input_list = 0;
198     tinfo->store = 0;
199     tinfo->doc_config = 0;
200     tinfo->record_info_invoked = 0;
201
202 #if YAZ_HAVE_EXSLT
203     exsltRegisterAll(); 
204 #endif
205
206     return tinfo;
207 }
208
209 static int attr_content(struct _xmlAttr *attr, const char *name,
210                         const char **dst_content)
211 {
212     if (!XML_STRCMP(attr->name, name) && attr->children 
213         && attr->children->type == XML_TEXT_NODE)
214     {
215         *dst_content = (const char *)(attr->children->content);
216         return 1;
217     }
218     return 0;
219 }
220
221 static void destroy_xsp(struct convert_s *c)
222 {
223     while (c)
224     {
225         if (c->which == convert_xslt_type)
226         {
227             if (c->u.xslt.stylesheet_xsp)
228                 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
229         }
230         c = c->next;
231     }
232 }
233
234 static void destroy_dom(struct filter_info *tinfo)
235 {
236     if (tinfo->extract)
237     {
238         destroy_xsp(tinfo->extract->convert);
239         tinfo->extract = 0;
240     }
241     if (tinfo->store)
242     {
243         destroy_xsp(tinfo->store->convert);
244         tinfo->store = 0;
245     }
246     if (tinfo->input_list)
247     {
248         struct filter_input *i_ptr;
249         for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
250         {
251             switch(i_ptr->type)
252             {
253             case DOM_INPUT_XMLREADER:
254                 if (i_ptr->u.xmlreader.reader)
255                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
256                 break;
257             case DOM_INPUT_MARC:
258                 yaz_iconv_close(i_ptr->u.marc.iconv);
259                 yaz_marc_destroy(i_ptr->u.marc.handle);
260                 break;
261             }
262             destroy_xsp(i_ptr->convert);
263         }
264         tinfo->input_list = 0;
265     }
266     if (tinfo->retrieve_list)
267     {
268         struct filter_retrieve *r_ptr;
269         for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
270             destroy_xsp(r_ptr->convert);
271         tinfo->retrieve_list = 0;
272     }
273
274     if (tinfo->doc_config)
275     {
276         xmlFreeDoc(tinfo->doc_config);
277         tinfo->doc_config = 0;
278     }
279     nmem_reset(tinfo->nmem_config);
280 }
281
282 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
283                                struct convert_s **l)
284 {
285     *l = 0;
286     FOR_EACH_ELEMENT(ptr) {
287         if (!XML_STRCMP(ptr->name, "xslt"))
288         {
289             struct _xmlAttr *attr;
290             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
291             
292             p->next = 0;
293             p->which = convert_xslt_type;
294             p->u.xslt.stylesheet = 0;
295             p->u.xslt.stylesheet_xsp = 0;
296             
297             for (attr = ptr->properties; attr; attr = attr->next)
298                 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
299                     ;
300                 else
301                 {
302                     dom_log(YLOG_WARN, tinfo, ptr,
303                             "bad attribute @%s", attr->name);
304                 }
305             if (p->u.xslt.stylesheet)
306             {
307                 char tmp_xslt_full_name[1024];
308                 if (!yaz_filepath_resolve(p->u.xslt.stylesheet, 
309                                           tinfo->profile_path,
310                                           NULL, 
311                                           tmp_xslt_full_name))
312                 {
313                     dom_log(YLOG_WARN, tinfo, 0,
314                             "stylesheet %s not found in "
315                             "path %s",
316                             p->u.xslt.stylesheet, 
317                             tinfo->profile_path);
318                     return ZEBRA_FAIL;
319                 }
320                 
321                 p->u.xslt.stylesheet_xsp
322                     = xsltParseStylesheetFile((const xmlChar*) 
323                                               tmp_xslt_full_name);
324                 if (!p->u.xslt.stylesheet_xsp)
325                 {
326                     dom_log(YLOG_WARN, tinfo, 0,
327                             "could not parse xslt stylesheet %s",
328                             tmp_xslt_full_name);
329                     return ZEBRA_FAIL;
330                 }
331             }
332             else
333             {
334                 dom_log(YLOG_WARN, tinfo, ptr,
335                         "missing attribute 'stylesheet'");
336                 return ZEBRA_FAIL;
337             }
338             *l = p;
339             l = &p->next;
340         }
341         else if (!XML_STRCMP(ptr->name, "process-meta"))
342         {
343             struct _xmlAttr *attr;
344             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
345             
346             p->next = 0;
347             p->which = convert_meta_type;
348             
349             for (attr = ptr->properties; attr; attr = attr->next)
350                 dom_log(YLOG_WARN, tinfo, ptr,
351                         "bad attribute @%s", attr->name);
352             *l = p;
353             l = &p->next;
354         }
355         else
356         {
357             dom_log(YLOG_WARN, tinfo, ptr,
358                     "bad element '%s', expected <xslt>", ptr->name);
359             return ZEBRA_FAIL;
360         }
361     }
362     return ZEBRA_OK;
363 }
364
365 static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, 
366                         struct recRetrieveCtrl *retctr)
367 {
368
369     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
370         0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
371     {
372         if (0 == XML_STRCMP(node->name, "meta"))
373         {
374             const char *element_set_name = 0;
375             
376             struct _xmlAttr *attr;      
377             for (attr = node->properties; attr; attr = attr->next)
378             {
379                 if (attr_content(attr, "name", &element_set_name))
380                     ;
381                 else
382                 {
383                     dom_log(YLOG_WARN, tinfo, node,
384                             "bad attribute @%s, expected @name", attr->name);
385                 }
386             }
387             if (element_set_name)
388             {
389                 WRBUF result = wrbuf_alloc();
390                 WRBUF addinfo = wrbuf_alloc();
391                 const Odr_oid *input_format = yaz_oid_recsyn_xml;
392                 const Odr_oid *output_format = 0;
393                 int ret;
394                 
395                 ret = retctr->special_fetch(retctr->handle,
396                                             element_set_name,
397                                             input_format, &output_format,
398                                             result, addinfo);
399                 if (ret == 0)
400                 {
401                     xmlDocPtr sub_doc = 
402                         xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
403                     if (sub_doc)
404                     {
405                         xmlNodePtr t = xmlDocGetRootElement(sub_doc);
406                         xmlReplaceNode(node, xmlCopyNode(t, 1));
407                         xmlFreeDoc(sub_doc);
408                     }
409                 }
410                 wrbuf_destroy(result);
411                 wrbuf_destroy(addinfo);
412             }
413         }
414     }
415     for (node = node->children; node; node = node->next)
416         process_meta(tinfo, doc, node, retctr);
417     return 0;
418 }
419
420 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
421                                  struct recExtractCtrl *extctr,
422                                  struct recRetrieveCtrl *retctr,
423                                  struct convert_s *convert,
424                                  const char **params,
425                                  xmlDocPtr *doc,
426                                  xsltStylesheetPtr *last_xsp)
427 {
428     for (; convert; convert = convert->next)
429     {
430         if (convert->which == convert_xslt_type)
431         {
432             xmlChar *buf_out = 0;
433             int len_out = 0;
434             xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
435                                                     *doc, params);
436             if (last_xsp)
437                 *last_xsp = convert->u.xslt.stylesheet_xsp;
438             
439             if (!res_doc)
440                 break;
441             
442             /* now saving into buffer and re-reading into DOM to avoid annoing
443                XSLT problem with thrown-out indentation text nodes */
444             xsltSaveResultToString(&buf_out, &len_out, res_doc,
445                                    convert->u.xslt.stylesheet_xsp); 
446             xmlFreeDoc(res_doc);
447             
448             xmlFreeDoc(*doc);
449             
450             *doc = xmlParseMemory((const char *) buf_out, len_out);
451             
452             /* writing debug info out */
453             if (extctr && extctr->flagShowRecords)
454                 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
455                         tinfo->fname ? tinfo->fname : "(none)", 
456                         convert->u.xslt.stylesheet,
457                         len_out, buf_out);
458             
459             xmlFree(buf_out);
460         }
461         else if (convert->which == convert_meta_type)
462         {
463             if (retctr) /* only execute meta on retrieval */
464             {
465                 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
466
467                 /* last stylesheet absent */
468                 if (last_xsp)
469                     *last_xsp = 0;
470             }
471         }
472     }
473     return ZEBRA_OK;
474 }
475
476 static struct filter_input *new_input(struct filter_info *tinfo, int type)
477 {
478     struct filter_input *p;
479     struct filter_input **np = &tinfo->input_list;
480     for (;*np; np = &(*np)->next)
481         ;
482     p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
483     p->next = 0;
484     p->syntax = 0;
485     p->name = 0;
486     p->convert = 0;
487     p->type = type;
488     return p;
489 }
490
491 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
492                              const char *syntax, const char *name)
493 {
494     FOR_EACH_ELEMENT(ptr) {
495         if (!XML_STRCMP(ptr->name, "marc"))
496         {
497             yaz_iconv_t iconv = 0;
498             const char *input_charset = "marc-8";
499             struct _xmlAttr *attr;
500             
501             for (attr = ptr->properties; attr; attr = attr->next)
502             {
503                 if (attr_content(attr, "inputcharset", &input_charset))
504                     ;
505                 else
506                 {
507                     dom_log(YLOG_WARN, tinfo, ptr,
508                             "bad attribute @%s, expected @inputcharset",
509                             attr->name);
510                 }
511             }
512             iconv = yaz_iconv_open("utf-8", input_charset);
513             if (!iconv)
514             {
515                 dom_log(YLOG_WARN, tinfo, ptr, 
516                         "unsupported @charset '%s'", input_charset);
517                 return ZEBRA_FAIL;
518             }
519             else
520             {
521                 struct filter_input *p 
522                     = new_input(tinfo, DOM_INPUT_MARC);
523                 p->u.marc.handle = yaz_marc_create();
524                 p->u.marc.iconv = iconv;
525                 
526                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
527                 
528                 ptr = ptr->next;
529                 
530                 parse_convert(tinfo, ptr, &p->convert);
531             }
532             break;
533
534         }
535         else if (!XML_STRCMP(ptr->name, "xmlreader"))
536         {
537             struct filter_input *p 
538                 = new_input(tinfo, DOM_INPUT_XMLREADER);
539             struct _xmlAttr *attr;
540             const char *level_str = 0;
541
542             p->u.xmlreader.split_level = 0;
543             p->u.xmlreader.reader = 0;
544
545             for (attr = ptr->properties; attr; attr = attr->next)
546             {
547                 if (attr_content(attr, "level", &level_str))
548                     ;
549                 else
550                 {
551                     dom_log(YLOG_WARN, tinfo, ptr,
552                             "bad attribute @%s, expected @level",
553                             attr->name);
554                 }
555             }
556             if (level_str)
557                 p->u.xmlreader.split_level = atoi(level_str);
558                 
559             ptr = ptr->next;
560
561             parse_convert(tinfo, ptr, &p->convert);
562             break;
563         }
564         else
565         {
566             dom_log(YLOG_WARN, tinfo, ptr,
567                     "bad element <%s>, expected <marc>|<xmlreader>",
568                     ptr->name);
569             return ZEBRA_FAIL;
570         }
571     }
572     return ZEBRA_OK;
573 }
574
575 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
576 {
577     char tmp_full_name[1024];
578     xmlNodePtr ptr;
579     xmlDocPtr doc;
580
581     tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
582     
583     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
584                              NULL, tmp_full_name))
585         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
586     else
587         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
588     
589     yaz_log(YLOG_LOG, "%s dom filter: "
590             "loading config file %s", tinfo->fname, tinfo->full_name);
591
592     doc = xmlParseFile(tinfo->full_name);
593     if (!doc)
594     {
595         yaz_log(YLOG_WARN, "%s: dom filter: "
596                 "failed to parse config file %s",
597                 tinfo->fname, tinfo->full_name);
598         return ZEBRA_FAIL;
599     }
600     /* save because we store ptrs to the content */ 
601     tinfo->doc_config = doc;
602     
603     ptr = xmlDocGetRootElement(doc);
604     if (!ptr || ptr->type != XML_ELEMENT_NODE 
605         || XML_STRCMP(ptr->name, "dom"))
606     {
607         dom_log(YLOG_WARN, tinfo, ptr,
608                 "bad root element <%s>, expected root element <dom>", 
609                 ptr->name);  
610         return ZEBRA_FAIL;
611     }
612
613     ptr = ptr->children;
614     FOR_EACH_ELEMENT(ptr) {
615         if (!XML_STRCMP(ptr->name, "extract"))
616         {
617             /*
618               <extract name="index">
619               <xslt stylesheet="first.xsl"/>
620               <xslt stylesheet="second.xsl"/>
621               </extract>
622             */
623             struct _xmlAttr *attr;
624             struct filter_extract *f =
625                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
626             
627             tinfo->extract = f;
628             f->name = 0;
629             f->convert = 0;
630             for (attr = ptr->properties; attr; attr = attr->next)
631             {
632                 if (attr_content(attr, "name", &f->name))
633                     ;
634                 else
635                 {
636                     dom_log(YLOG_WARN, tinfo, ptr,
637                             "bad attribute @%s, expected @name",
638                             attr->name);
639                 }
640             }
641             parse_convert(tinfo, ptr->children, &f->convert);
642         }
643         else if (!XML_STRCMP(ptr->name, "retrieve"))
644         {  
645             /* 
646                <retrieve name="F">
647                <xslt stylesheet="some.xsl"/>
648                <xslt stylesheet="some.xsl"/>
649                </retrieve>
650             */
651             struct _xmlAttr *attr;
652             struct filter_retrieve **fp = &tinfo->retrieve_list;
653             struct filter_retrieve *f =
654                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
655             
656             while (*fp)
657                 fp = &(*fp)->next;
658
659             *fp = f;
660             f->name = 0;
661             f->identifier = 0;
662             f->convert = 0;
663             f->next = 0;
664
665             for (attr = ptr->properties; attr; attr = attr->next)
666             {
667                 if (attr_content(attr, "identifier", 
668                                  &f->identifier))
669                     ;
670                 else if (attr_content(attr, "name", &f->name))
671                     ;
672                 else
673                 {
674                     dom_log(YLOG_WARN, tinfo, ptr,
675                             "bad attribute @%s,  expected @identifier|@name",
676                             attr->name);
677                 }
678             }
679             parse_convert(tinfo, ptr->children, &f->convert);
680         }
681         else if (!XML_STRCMP(ptr->name, "store"))
682         {
683             /*
684               <store name="F">
685               <xslt stylesheet="some.xsl"/>
686               <xslt stylesheet="some.xsl"/>
687               </retrieve>
688             */
689             struct filter_store *f =
690                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
691             
692             tinfo->store = f;
693             f->convert = 0;
694             parse_convert(tinfo, ptr->children, &f->convert);
695         }
696         else if (!XML_STRCMP(ptr->name, "input"))
697         {
698             /*
699               <input syntax="xml">
700               <xmlreader level="1"/>
701               </input>
702               <input syntax="usmarc">
703               <marc inputcharset="marc-8"/>
704               </input>
705             */
706             struct _xmlAttr *attr;
707             const char  *syntax = 0;
708             const char *name = 0;
709             for (attr = ptr->properties; attr; attr = attr->next)
710             {
711                 if (attr_content(attr, "syntax", &syntax))
712                     ;
713                 else if (attr_content(attr, "name", &name))
714                     ;
715                 else
716                 {
717                     dom_log(YLOG_WARN, tinfo, ptr,
718                             "bad attribute @%s,  expected @syntax|@name",
719                             attr->name);
720                 }
721             }
722             parse_input(tinfo, ptr->children, syntax, name);
723         }
724         else
725         {
726             dom_log(YLOG_WARN, tinfo, ptr,
727                     "bad element <%s>, "
728                     "expected <extract>|<input>|<retrieve>|<store>",
729                     ptr->name);
730             return ZEBRA_FAIL;
731         }
732     }
733     if (!tinfo->input_list)
734     {
735         struct filter_input *p 
736             = new_input(tinfo, DOM_INPUT_XMLREADER);
737         p->u.xmlreader.split_level = 0;
738         p->u.xmlreader.reader = 0;
739     }
740     return ZEBRA_OK;
741 }
742
743 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
744                                                const char *est)
745 {
746     struct filter_retrieve *f = tinfo->retrieve_list;
747
748     /* return first schema if no est is provided */
749     if (!est)
750         return f;
751     for (; f; f = f->next)
752     { 
753         /* find requested schema */
754         if (est) 
755         {    
756             if (f->identifier && !strcmp(f->identifier, est))
757                 return f;
758             if (f->name && !strcmp(f->name, est))
759                 return f;
760         } 
761     }
762     return 0;
763 }
764
765 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
766 {
767     struct filter_info *tinfo = clientData;
768     if (!args || !*args)
769     {
770         yaz_log(YLOG_WARN, "dom filter: need config file");
771         return ZEBRA_FAIL;
772     }
773
774     if (tinfo->fname && !strcmp(args, tinfo->fname))
775         return ZEBRA_OK;
776     
777     tinfo->profile_path = res_get(res, "profilePath");
778
779     destroy_dom(tinfo);
780     return parse_dom(tinfo, args);
781 }
782
783 static void filter_destroy(void *clientData)
784 {
785     struct filter_info *tinfo = clientData;
786     destroy_dom(tinfo);
787     nmem_destroy(tinfo->nmem_config);
788     nmem_destroy(tinfo->nmem_record);
789     xfree(tinfo);
790 }
791
792 static int ioread_ex(void *context, char *buffer, int len)
793 {
794     struct recExtractCtrl *p = context;
795     return p->stream->readf(p->stream, buffer, len);
796 }
797
798 static int ioclose_ex(void *context)
799 {
800     return 0;
801 }
802
803
804
805 /* DOM filter style indexing */
806 static void index_value_of(struct filter_info *tinfo, 
807                            struct recExtractCtrl *extctr,
808                            RecWord* recword, 
809                            xmlNodePtr node, 
810                            const char *index_p)
811 {
812     if (tinfo->record_info_invoked == 1)
813     {
814         xmlChar *text = xmlNodeGetContent(node);
815         size_t text_len = strlen((const char *)text);
816         
817         /* if there is no text, we do not need to proceed */
818         if (text_len)
819         {            
820             /* keep seqno base so that all text will have
821                identical seqno's for multiple fields , e.g
822                <z:index name="title:w any:w title:p">.. */
823             
824             zint seqno_base = recword->seqno;
825             zint seqno_max = recword->seqno;
826        
827
828             const char *look = index_p;
829             const char *bval;
830             const char *eval;
831
832             xmlChar index[256];
833             xmlChar type[256];
834
835             /* assingning text to be indexed */
836             recword->term_buf = (const char *)text;
837             recword->term_len = text_len;
838
839             /* parsing all index name/type pairs */
840             /* may not start with ' ' or ':' */
841             while (*look && ' ' != *look && ':' != *look)
842             {
843                 /* setting name and type to zero */
844                 *index = '\0';
845                 *type = '\0';
846     
847                 /* parsing one index name */
848                 bval = look;
849                 while (*look && ':' != *look && ' ' != *look)
850                 {
851                     look++;
852                 }
853                 eval = look;
854                 strncpy((char *)index, (const char *)bval, eval - bval);
855                 index[eval - bval] = '\0';
856     
857     
858                 /* parsing one index type, if existing */
859                 if (':' == *look)
860                 {
861                     look++;
862       
863                     bval = look;
864                     while (*look && ' ' != *look)
865                     {
866                         look++;
867                     }
868                     eval = look;
869                     strncpy((char *)type, (const char *)bval, eval - bval);
870                     type[eval - bval] = '\0';
871                 }
872
873                 /* actually indexing the text given */
874
875                 recword->seqno = seqno_base;
876                 recword->index_name = (const char *)index;
877                 if (*type)
878                     recword->index_type = (const char *) type;
879
880                 /* writing debug out */
881                 if (extctr->flagShowRecords)
882                     dom_log(YLOG_LOG, tinfo, 0, 
883                             "INDEX '%s:%s' '%s'", 
884                             (const char *) index,
885                             (const char *) type, 
886                             (const char *) text);
887                 
888                 (extctr->tokenAdd)(recword);
889
890                 if (seqno_max < recword->seqno)
891                     seqno_max = recword->seqno;
892
893                 /* eat whitespaces */
894                 if (*look && ' ' == *look)
895                 {
896                     look++;
897                 } 
898             }
899             recword->seqno = seqno_max;
900         }
901         xmlFree(text); 
902     }
903 }
904
905
906 /* DOM filter style indexing */
907 static void set_record_info(struct filter_info *tinfo, 
908                             struct recExtractCtrl *extctr, 
909                             xmlNodePtr node, 
910                             const char * id_p, 
911                             const char * rank_p, 
912                             const char * type_p)
913 {
914     /* writing debug info out */
915     if (extctr && extctr->flagShowRecords)
916         dom_log(YLOG_LOG, tinfo, node,
917                 "RECORD id=%s rank=%s type=%s", 
918                 id_p ? (const char *) id_p : "(null)",
919                 rank_p ? (const char *) rank_p : "(null)",
920                 type_p ? (const char *) type_p : "(null)");
921     
922
923     if (id_p && *id_p)
924         sscanf((const char *)id_p, "%255s", extctr->match_criteria);
925
926     if (rank_p && *rank_p)
927         extctr->staticrank = atozint((const char *)rank_p);
928
929     if (type_p && *type_p)
930     {
931         enum zebra_recctrl_action_t action = action_update;
932         if (!strcmp(type_p, "insert"))
933             action = action_insert;
934         else if (!strcmp(type_p, "delete"))
935             action = action_delete;
936         else if (!strcmp(type_p, "replace"))
937             action = action_replace;
938         else if (!strcmp(type_p, "update"))
939             action = action_update;
940         else
941             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
942         extctr->action = action;
943     }
944
945     if (tinfo->record_info_invoked == 1)
946     {
947         /* warn about multiple only once */
948         dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
949     }
950     tinfo->record_info_invoked++;
951
952 }
953
954
955 /* DOM filter style indexing */
956 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
957                                            struct recExtractCtrl *extctr, 
958                                            RecWord* recword, 
959                                            xmlNodePtr node)
960 {
961     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
962         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
963     {
964         if (0 == XML_STRCMP(node->name, "index"))
965         {
966             const char *index_p = 0;
967
968             struct _xmlAttr *attr;      
969             for (attr = node->properties; attr; attr = attr->next)
970             {
971                 if (attr_content(attr, "name", &index_p))
972                 {
973                     index_value_of(tinfo, extctr, recword, node, index_p);
974                 }  
975                 else
976                 {
977                     dom_log(YLOG_WARN, tinfo, node,
978                             "bad attribute @%s, expected @name",
979                             attr->name);
980                 }
981             }
982         }
983         else if (0 == XML_STRCMP(node->name, "record"))
984         {
985             const char *id_p = 0;
986             const char *rank_p = 0;
987             const char *type_p = 0;
988
989             struct _xmlAttr *attr;
990             for (attr = node->properties; attr; attr = attr->next)
991             {
992                 if (attr_content(attr, "id", &id_p))
993                     ;
994                 else if (attr_content(attr, "rank", &rank_p))
995                     ;
996                 else if (attr_content(attr, "type", &type_p))
997                     ;
998                 else
999                 {
1000                     dom_log(YLOG_WARN, tinfo, node,
1001                             "bad attribute @%s, expected @id|@rank|@type",
1002                             attr->name);
1003                 }
1004             }
1005             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
1006         } 
1007         else
1008         {
1009             dom_log(YLOG_WARN, tinfo, node,
1010                     "bad element <%s>,"
1011                     " expected <record>|<index> in namespace '%s'",
1012                     node->name, zebra_dom_ns);
1013         }
1014     }
1015 }
1016
1017 static int attr_content_pi(const char **c_ptr, const char *name,
1018                            char *value, size_t value_max)
1019 {
1020     size_t name_len = strlen(name);
1021     const char *look = *c_ptr;
1022     int ret = 0;
1023
1024     *value = '\0';
1025     while (*look && ' ' == *look)
1026         look++;
1027     if (strlen(look) > name_len)
1028     {
1029         if (look[name_len] == '=' && !memcmp(look, name, name_len))
1030         {
1031             size_t i = 0;
1032             look += name_len+1;
1033             while (*look && ' ' != *look)
1034             {
1035                 if (i < value_max-1)
1036                     value[i++] = *look;
1037                 look++;
1038             }
1039             value[i] = '\0';
1040             ret = 1;
1041         }
1042     }
1043     while (*look && ' ' == *look)
1044         look++;
1045     *c_ptr = look;
1046     return ret;
1047 }
1048
1049 /* DOM filter style indexing */
1050 static void process_xml_pi_node(struct filter_info *tinfo, 
1051                                 struct recExtractCtrl *extctr, 
1052                                 xmlNodePtr node,
1053                                 const char **index_pp)
1054 {
1055     /* if right PI name, continue parsing PI */
1056     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1057     {
1058         xmlChar *pi_p =  node->content;
1059         const char *look = (const char *) node->content;
1060     
1061         /* parsing PI record instructions */
1062         if (0 == strncmp((const char *)look, "record", 6))
1063         {
1064             char id[256];
1065             char rank[256];
1066             char type[256];
1067             
1068             *id = '\0';
1069             *rank = '\0';
1070             *type = '\0';
1071             look += 6;
1072             while (*look)
1073                 if (attr_content_pi(&look, "id", id, sizeof(id)))
1074                     ;
1075                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1076                     ;
1077                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1078                 {
1079                     dom_log(YLOG_WARN, tinfo, node,
1080                             "content '%s', can not parse '%s'",
1081                             pi_p, look);
1082                     break;
1083                 }
1084             set_record_info(tinfo, extctr, node, id, rank, type);
1085         } 
1086         /* parsing index instruction */
1087         else if (0 == strncmp((const char *)look, "index", 5))
1088         {
1089             look += 5;
1090       
1091             /* eat whitespace */
1092             while (*look && ' ' == *look)
1093                 look++;
1094
1095             /* export index instructions to outside */
1096             *index_pp = look;
1097         } 
1098         else 
1099         {
1100             dom_log(YLOG_WARN, tinfo, node,
1101                     "content '%s', can not parse '%s'",
1102                     pi_p, look);
1103         }
1104     }
1105 }
1106
1107 /* DOM filter style indexing */
1108 static void process_xml_element_node(struct filter_info *tinfo, 
1109                                      struct recExtractCtrl *extctr, 
1110                                      RecWord* recword, 
1111                                      xmlNodePtr node)
1112 {
1113     /* remember indexing instruction from PI to next element node */
1114     const char *index_p = 0;
1115
1116     /* check if we are an element node in the special zebra namespace 
1117        and either set record data or index value-of node content*/
1118     process_xml_element_zebra_node(tinfo, extctr, recword, node);
1119   
1120     /* loop through kid nodes */
1121     for (node = node->children; node; node = node->next)
1122     {
1123         /* check and set PI record and index index instructions */
1124         if (node->type == XML_PI_NODE)
1125         {
1126             process_xml_pi_node(tinfo, extctr, node, &index_p);
1127         }
1128         else if (node->type == XML_ELEMENT_NODE)
1129         {
1130             /* if there was a PI index instruction before this element */
1131             if (index_p)
1132             {
1133                 index_value_of(tinfo, extctr, recword, node, index_p);
1134                 index_p = 0;
1135             }
1136             process_xml_element_node(tinfo, extctr, recword,node);
1137         }
1138         else
1139             continue;
1140     }
1141 }
1142
1143
1144 /* DOM filter style indexing */
1145 static void extract_dom_doc_node(struct filter_info *tinfo, 
1146                                  struct recExtractCtrl *extctr, 
1147                                  xmlDocPtr doc)
1148 {
1149     /* only need to do the initialization once, reuse recword for all terms */
1150     RecWord recword;
1151     (*extctr->init)(extctr, &recword);
1152
1153     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1154 }
1155
1156
1157 static int convert_extract_doc(struct filter_info *tinfo, 
1158                                struct filter_input *input,
1159                                struct recExtractCtrl *p, 
1160                                xmlDocPtr doc)
1161 {
1162     xmlChar *buf_out;
1163     int len_out;
1164     const char *params[10];
1165     xsltStylesheetPtr last_xsp = 0;
1166
1167     /* per default do not ingest record */
1168     tinfo->record_info_invoked = 0;
1169
1170     /* exit if empty document given */
1171     if (!doc)
1172         return RECCTRL_EXTRACT_SKIP;
1173
1174     /* we actuallu have a document which needs to be processed further */
1175     params[0] = 0;
1176     set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1177
1178     if (p && p->flagShowRecords)
1179     {
1180         xmlChar *buf_out;
1181         int len_out;
1182         xmlDocDumpMemory(doc, &buf_out, &len_out);
1183 #if 0 
1184         FILE *outf = fopen("extract.xml", "w");
1185         fwrite(buf_out, 1, len_out, outf);
1186         fclose(outf);
1187 #endif
1188         yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1189     }
1190
1191     if (p->setStoreData)
1192     {
1193         xmlDocPtr store_doc = 0;
1194
1195         /* input conversion */
1196         perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1197         
1198         if (tinfo->store)
1199         {
1200             /* store conversion */
1201             store_doc = xmlCopyDoc(doc, 1);
1202             perform_convert(tinfo, p, 0, tinfo->store->convert,
1203                             params, &store_doc, &last_xsp);
1204         }
1205         
1206         /* saving either store doc or original doc in case no store doc exists */
1207         if (last_xsp)
1208             xsltSaveResultToString(&buf_out, &len_out, 
1209                                    store_doc ? store_doc : doc, last_xsp);
1210         else
1211             xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1212         
1213         if (p->setStoreData)
1214             (*p->setStoreData)(p, buf_out, len_out);
1215         xmlFree(buf_out);
1216         if (store_doc)
1217             xmlFreeDoc(store_doc);
1218     }
1219
1220
1221     /* extract conversion */
1222     perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1223
1224
1225     /* finally, do the indexing */
1226     if (doc){
1227         extract_dom_doc_node(tinfo, p, doc);
1228         xmlFreeDoc(doc);
1229     }
1230     
1231     /* there was nothing to index, so there is no inserted/updated record */
1232     if (tinfo->record_info_invoked == 0)
1233         return RECCTRL_EXTRACT_SKIP;
1234
1235     return RECCTRL_EXTRACT_OK;
1236 }
1237
1238 static int extract_xml_split(struct filter_info *tinfo,
1239                              struct filter_input *input,
1240                              struct recExtractCtrl *p)
1241 {
1242     int ret;
1243
1244     if (p->first_record)
1245     {
1246         if (input->u.xmlreader.reader)
1247             xmlFreeTextReader(input->u.xmlreader.reader);
1248         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1249                                                    p /* I/O handler */,
1250                                                    0 /* URL */, 
1251                                                    0 /* encoding */,
1252                                                    XML_PARSE_XINCLUDE
1253                                                    | XML_PARSE_NOENT
1254                                                    | XML_PARSE_NONET);
1255     }
1256     if (!input->u.xmlreader.reader)
1257         return RECCTRL_EXTRACT_ERROR_GENERIC;
1258
1259     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1260     while (ret == 1)
1261     {
1262         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1263         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1264
1265         if (type == XML_READER_TYPE_ELEMENT && 
1266             input->u.xmlreader.split_level == depth)
1267         {
1268             xmlNodePtr ptr;
1269
1270             /* per default do not ingest record */
1271             tinfo->record_info_invoked = 0;
1272             
1273             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1274             if (ptr)
1275             {
1276                 /* we have a new document */
1277
1278                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1279                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1280                 
1281                 xmlDocSetRootElement(doc, ptr2);
1282                 
1283                 /* writing debug info out */
1284                 if (p->flagShowRecords)
1285                 {
1286                     xmlChar *buf_out = 0;
1287                     int len_out = 0;
1288                     xmlDocDumpMemory(doc, &buf_out, &len_out);
1289                     yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
1290                             tinfo->fname ? tinfo->fname : "(none)",
1291                             depth, len_out, buf_out); 
1292                     xmlFree(buf_out);
1293                 }
1294                 
1295                 return convert_extract_doc(tinfo, input, p, doc);
1296             }
1297             else
1298             {
1299                 xmlFreeTextReader(input->u.xmlreader.reader);
1300                 input->u.xmlreader.reader = 0;
1301                 return RECCTRL_EXTRACT_ERROR_GENERIC;
1302             }
1303         }
1304         ret = xmlTextReaderRead(input->u.xmlreader.reader);
1305     }
1306     xmlFreeTextReader(input->u.xmlreader.reader);
1307     input->u.xmlreader.reader = 0;
1308     return RECCTRL_EXTRACT_EOF;
1309 }
1310
1311 static int extract_xml_full(struct filter_info *tinfo, 
1312                             struct filter_input *input,
1313                             struct recExtractCtrl *p)
1314 {
1315     if (p->first_record) /* only one record per stream */
1316     {
1317         xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1318                                   p /* I/O handler */,
1319                                   0 /* URL */,
1320                                   0 /* encoding */,
1321                                   XML_PARSE_XINCLUDE
1322                                   | XML_PARSE_NOENT
1323                                   | XML_PARSE_NONET);
1324         if (!doc)
1325         {
1326             return RECCTRL_EXTRACT_ERROR_GENERIC;
1327         }
1328         return convert_extract_doc(tinfo, input, p, doc);
1329     }
1330     else
1331         return RECCTRL_EXTRACT_EOF;
1332 }
1333
1334 static int extract_iso2709(struct filter_info *tinfo,
1335                            struct filter_input *input,
1336                            struct recExtractCtrl *p)
1337 {
1338     char buf[100000];
1339     int record_length;
1340     int read_bytes, r;
1341
1342     if (p->stream->readf(p->stream, buf, 5) != 5)
1343         return RECCTRL_EXTRACT_EOF;
1344     while (*buf < '0' || *buf > '9')
1345     {
1346         int i;
1347
1348         dom_log(YLOG_WARN, tinfo, 0,
1349                 "MARC: Skipping bad byte %d (0x%02X)",
1350                 *buf & 0xff, *buf & 0xff);
1351         for (i = 0; i<4; i++)
1352             buf[i] = buf[i+1];
1353
1354         if (p->stream->readf(p->stream, buf+4, 1) != 1)
1355             return RECCTRL_EXTRACT_EOF;
1356     }
1357     record_length = atoi_n (buf, 5);
1358     if (record_length < 25)
1359     {
1360         dom_log(YLOG_WARN, tinfo, 0,
1361                 "MARC record length < 25, is %d",  record_length);
1362         return RECCTRL_EXTRACT_ERROR_GENERIC;
1363     }
1364     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1365     if (read_bytes < record_length-5)
1366     {
1367         dom_log(YLOG_WARN, tinfo, 0,
1368                 "couldn't read whole MARC record");
1369         return RECCTRL_EXTRACT_ERROR_GENERIC;
1370     }
1371     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1372     if (r < record_length)
1373     {
1374         dom_log (YLOG_WARN, tinfo, 0,
1375                  "parsing of MARC record failed r=%d length=%d",
1376                  r, record_length);
1377         return RECCTRL_EXTRACT_ERROR_GENERIC;
1378     }
1379     else
1380     {
1381         xmlDocPtr rdoc;
1382         xmlNode *root_ptr;
1383         yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 
1384                            "http://www.loc.gov/MARC21/slim", 0, 0);
1385         rdoc = xmlNewDoc((const xmlChar*) "1.0");
1386         xmlDocSetRootElement(rdoc, root_ptr);
1387         return convert_extract_doc(tinfo, input, p, rdoc);        
1388     }
1389     return RECCTRL_EXTRACT_OK;
1390 }
1391
1392 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1393 {
1394     struct filter_info *tinfo = clientData;
1395     struct filter_input *input = tinfo->input_list;
1396
1397     if (!input)
1398         return RECCTRL_EXTRACT_ERROR_GENERIC;
1399     
1400     nmem_reset(tinfo->nmem_record);
1401
1402     if (p->setStoreData == 0)
1403         return extract_xml_full(tinfo, input, p);
1404     switch(input->type)
1405     {
1406     case DOM_INPUT_XMLREADER:
1407         if (input->u.xmlreader.split_level == 0)
1408             return extract_xml_full(tinfo, input, p);
1409         else
1410             return extract_xml_split(tinfo, input, p);
1411         break;
1412     case DOM_INPUT_MARC:
1413         return extract_iso2709(tinfo, input, p);
1414     }
1415     return RECCTRL_EXTRACT_ERROR_GENERIC;
1416 }
1417
1418 static int ioread_ret(void *context, char *buffer, int len)
1419 {
1420     struct recRetrieveCtrl *p = context;
1421     int r = p->stream->readf(p->stream, buffer, len);
1422     return r;
1423 }
1424
1425 static int ioclose_ret(void *context)
1426 {
1427     return 0;
1428 }
1429
1430 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1431 {
1432     /* const char *esn = zebra_dom_ns; */
1433     const char *esn = 0;
1434     const char *params[32];
1435     struct filter_info *tinfo = clientData;
1436     xmlDocPtr doc;
1437     struct filter_retrieve *retrieve;
1438     xsltStylesheetPtr last_xsp = 0;
1439
1440     if (p->comp)
1441     {
1442         if (p->comp->which == Z_RecordComp_simple
1443             && p->comp->u.simple->which == Z_ElementSetNames_generic)
1444         {
1445             esn = p->comp->u.simple->u.generic;
1446         }
1447         else if (p->comp->which == Z_RecordComp_complex 
1448                  && p->comp->u.complex->generic->elementSpec
1449                  && p->comp->u.complex->generic->elementSpec->which ==
1450                  Z_ElementSpec_elementSetName)
1451         {
1452             esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1453         }
1454     }
1455     retrieve = lookup_retrieve(tinfo, esn);
1456     if (!retrieve)
1457     {
1458         p->diagnostic =
1459             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1460         p->addinfo = odr_strdup(p->odr, esn);
1461         return 0;
1462     }
1463
1464     params[0] = 0;
1465     set_param_int(params, "id", p->localno, p->odr->mem);
1466     if (p->fname)
1467         set_param_str(params, "filename", p->fname, p->odr->mem);
1468     if (p->staticrank >= 0)
1469         set_param_int(params, "rank", p->staticrank, p->odr->mem);
1470
1471     if (esn)
1472         set_param_str(params, "schema", esn, p->odr->mem);
1473     else
1474         if (retrieve->name)
1475             set_param_str(params, "schema", retrieve->name, p->odr->mem);
1476         else if (retrieve->identifier)
1477             set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1478         else
1479             set_param_str(params, "schema", "", p->odr->mem);
1480
1481     if (p->score >= 0)
1482         set_param_int(params, "score", p->score, p->odr->mem);
1483     set_param_int(params, "size", p->recordSize, p->odr->mem);
1484
1485     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1486                     0 /* URL */,
1487                     0 /* encoding */,
1488                     XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1489     if (!doc)
1490     {
1491         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1492         return 0;
1493     }
1494
1495     /* retrieve conversion */
1496     perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1497     if (!doc)
1498     {
1499         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1500     }
1501     else if (!p->input_format
1502              || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1503     {
1504         xmlChar *buf_out;
1505         int len_out;
1506
1507         if (last_xsp)
1508             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1509         else
1510             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1511
1512         p->output_format = yaz_oid_recsyn_xml;
1513         p->rec_len = len_out;
1514         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1515         memcpy(p->rec_buf, buf_out, p->rec_len);
1516         xmlFree(buf_out);
1517     }
1518     else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1519     {
1520         xmlChar *buf_out;
1521         int len_out;
1522
1523         if (last_xsp)
1524             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1525         else
1526             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1527         
1528         p->output_format = yaz_oid_recsyn_sutrs;
1529         p->rec_len = len_out;
1530         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1531         memcpy(p->rec_buf, buf_out, p->rec_len);
1532         
1533         xmlFree(buf_out);
1534     }
1535     else
1536     {
1537         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1538     }
1539     xmlFreeDoc(doc);
1540     return 0;
1541 }
1542
1543 static struct recType filter_type = {
1544     0,
1545     "dom",
1546     filter_init,
1547     filter_config,
1548     filter_destroy,
1549     filter_extract,
1550     filter_retrieve
1551 };
1552
1553 RecType
1554 #ifdef IDZEBRA_STATIC_DOM
1555 idzebra_filter_dom
1556 #else
1557 idzebra_filter
1558 #endif
1559
1560 [] = {
1561     &filter_type,
1562     0,
1563 };
1564 /*
1565  * Local variables:
1566  * c-basic-offset: 4
1567  * c-file-style: "Stroustrup"
1568  * indent-tabs-mode: nil
1569  * End:
1570  * vim: shiftwidth=4 tabstop=8 expandtab
1571  */
1572