Added facility to embed metadata for the DOM filter.
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24
25 #include <yaz/diagbib1.h>
26 #include <yaz/tpath.h>
27 #include <yaz/snprintf.h>
28
29 #include <libxml/xmlversion.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/xmlreader.h>
34 #include <libxslt/transform.h>
35 #include <libxslt/xsltutils.h>
36
37 #if YAZ_HAVE_EXSLT
38 #include <libexslt/exslt.h>
39 #endif
40
41 #include <idzebra/util.h>
42 #include <idzebra/recctrl.h>
43 #include <yaz/oid_db.h>
44
45 /* DOM filter style indexing */
46 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
47 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
48
49 /* DOM filter style indexing */
50 #define ZEBRA_PI_NAME "zebra-2.0"
51 static const char *zebra_pi_name = ZEBRA_PI_NAME;
52
53 enum convert_type {
54     convert_xslt_type,
55     convert_meta_type
56 };
57
58 struct convert_xslt {
59     const char *stylesheet;
60     xsltStylesheetPtr stylesheet_xsp;
61 };
62
63 struct convert_meta {
64     int dummy;
65 };
66
67 struct convert_s {
68     enum convert_type which;
69     union {
70         struct convert_xslt xslt;
71         struct convert_meta meta;
72     } u;
73     struct convert_s *next;
74 };
75
76 struct filter_extract {
77     const char *name;
78     struct convert_s *convert;
79 };
80
81 struct filter_store {
82     struct convert_s *convert;
83 };
84
85 struct filter_retrieve {
86     const char *name;
87     const char *identifier;
88     struct convert_s *convert;
89     struct filter_retrieve *next;
90 };
91
92 #define DOM_INPUT_XMLREADER 1
93 #define DOM_INPUT_MARC 2
94 struct filter_input {
95     const char *syntax;
96     const char *name;
97     struct convert_s *convert;
98     int type;
99     union {
100         struct {
101             xmlTextReaderPtr reader;
102             int split_level;
103         } xmlreader;
104         struct {
105             const char *input_charset;
106             yaz_marc_t handle;
107             yaz_iconv_t iconv;
108         } marc;
109     } u;
110     struct filter_input *next;
111 };
112   
113 struct filter_info {
114     char *fname;
115     char *full_name;
116     const char *profile_path;
117     NMEM nmem_record;
118     NMEM nmem_config;
119     xmlDocPtr doc_config;
120     struct filter_extract *extract;
121     struct filter_retrieve *retrieve_list;
122     struct filter_input *input_list;
123     struct filter_store *store;
124     int record_info_invoked;
125 };
126
127
128
129 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
130 #define XML_STRLEN(a) strlen((char*)a)
131
132
133 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
134
135 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
136                     const char *fmt, ...)
137 #ifdef __GNUC__
138     __attribute__ ((format (printf, 4, 5)))
139 #endif
140     ;
141
142 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
143                     const char *fmt, ...)
144 {
145     va_list ap;
146     char buf[4096];
147
148     va_start(ap, fmt);
149     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
150     if (ptr)
151     {
152         yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
153                 xmlGetLineNo(ptr), buf);
154     }
155     else
156     {
157         yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
158     }
159     va_end(ap);
160 }
161
162
163 static void set_param_str(const char **params, const char *name,
164                           const char *value, NMEM nmem)
165 {
166     char *quoted = nmem_malloc(nmem, 3 + strlen(value));
167     sprintf(quoted, "'%s'", value);
168     while (*params)
169         params++;
170     params[0] = name;
171     params[1] = quoted;
172     params[2] = 0;
173 }
174
175 static void set_param_int(const char **params, const char *name,
176                           zint value, NMEM nmem)
177 {
178     char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
179     while (*params)
180         params++;
181     sprintf(quoted, "'" ZINT_FORMAT "'", value);
182     params[0] = name;
183     params[1] = quoted;
184     params[2] = 0;
185 }
186
187 static void *filter_init(Res res, RecType recType)
188 {
189     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
190     tinfo->fname = 0;
191     tinfo->full_name = 0;
192     tinfo->profile_path = 0;
193     tinfo->nmem_record = nmem_create();
194     tinfo->nmem_config = nmem_create();
195     tinfo->extract = 0;
196     tinfo->retrieve_list = 0;
197     tinfo->input_list = 0;
198     tinfo->store = 0;
199     tinfo->doc_config = 0;
200     tinfo->record_info_invoked = 0;
201
202 #if YAZ_HAVE_EXSLT
203     exsltRegisterAll(); 
204 #endif
205
206     return tinfo;
207 }
208
209 static int attr_content(struct _xmlAttr *attr, const char *name,
210                         const char **dst_content)
211 {
212     if (!XML_STRCMP(attr->name, name) && attr->children 
213         && attr->children->type == XML_TEXT_NODE)
214     {
215         *dst_content = (const char *)(attr->children->content);
216         return 1;
217     }
218     return 0;
219 }
220
221 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
222                             const char **dst_content)
223 {
224     if (0 == XML_STRCMP(attr->name, name) && attr->children 
225         && attr->children->type == XML_TEXT_NODE)
226     {
227         *dst_content = (const char *) (attr->children->content);
228         return 1;
229     }
230     return 0;
231 }
232
233 static void destroy_xsp(struct convert_s *c)
234 {
235     while (c)
236     {
237         if (c->which == convert_xslt_type)
238         {
239             if (c->u.xslt.stylesheet_xsp)
240                 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
241         }
242         c = c->next;
243     }
244 }
245
246 static void destroy_dom(struct filter_info *tinfo)
247 {
248     if (tinfo->extract)
249     {
250         destroy_xsp(tinfo->extract->convert);
251         tinfo->extract = 0;
252     }
253     if (tinfo->store)
254     {
255         destroy_xsp(tinfo->store->convert);
256         tinfo->store = 0;
257     }
258     if (tinfo->input_list)
259     {
260         struct filter_input *i_ptr;
261         for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
262         {
263             switch(i_ptr->type)
264             {
265             case DOM_INPUT_XMLREADER:
266                 if (i_ptr->u.xmlreader.reader)
267                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
268                 break;
269             case DOM_INPUT_MARC:
270                 yaz_iconv_close(i_ptr->u.marc.iconv);
271                 yaz_marc_destroy(i_ptr->u.marc.handle);
272                 break;
273             }
274             destroy_xsp(i_ptr->convert);
275         }
276         tinfo->input_list = 0;
277     }
278     if (tinfo->retrieve_list)
279     {
280         struct filter_retrieve *r_ptr;
281         for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
282             destroy_xsp(r_ptr->convert);
283         tinfo->retrieve_list = 0;
284     }
285
286     if (tinfo->doc_config)
287     {
288         xmlFreeDoc(tinfo->doc_config);
289         tinfo->doc_config = 0;
290     }
291     nmem_reset(tinfo->nmem_config);
292 }
293
294 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
295                                struct convert_s **l)
296 {
297     *l = 0;
298     FOR_EACH_ELEMENT(ptr) {
299         if (!XML_STRCMP(ptr->name, "xslt"))
300         {
301             struct _xmlAttr *attr;
302             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
303             
304             p->next = 0;
305             p->which = convert_xslt_type;
306             p->u.xslt.stylesheet = 0;
307             p->u.xslt.stylesheet_xsp = 0;
308             
309             for (attr = ptr->properties; attr; attr = attr->next)
310                 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
311                     ;
312                 else
313                 {
314                     dom_log(YLOG_WARN, tinfo, ptr,
315                             "bad attribute @%s", attr->name);
316                 }
317             if (p->u.xslt.stylesheet)
318             {
319                 char tmp_xslt_full_name[1024];
320                 if (!yaz_filepath_resolve(p->u.xslt.stylesheet, 
321                                           tinfo->profile_path,
322                                           NULL, 
323                                           tmp_xslt_full_name))
324                 {
325                     dom_log(YLOG_WARN, tinfo, 0,
326                             "stylesheet %s not found in "
327                             "path %s",
328                             p->u.xslt.stylesheet, 
329                             tinfo->profile_path);
330                     return ZEBRA_FAIL;
331                 }
332                 
333                 p->u.xslt.stylesheet_xsp
334                     = xsltParseStylesheetFile((const xmlChar*) 
335                                               tmp_xslt_full_name);
336                 if (!p->u.xslt.stylesheet_xsp)
337                 {
338                     dom_log(YLOG_WARN, tinfo, 0,
339                             "could not parse xslt stylesheet %s",
340                             tmp_xslt_full_name);
341                     return ZEBRA_FAIL;
342                 }
343             }
344             else
345             {
346                 dom_log(YLOG_WARN, tinfo, ptr,
347                         "missing attribute 'stylesheet'");
348                 return ZEBRA_FAIL;
349             }
350             *l = p;
351             l = &p->next;
352         }
353         else if (!XML_STRCMP(ptr->name, "meta"))
354         {
355             struct _xmlAttr *attr;
356             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
357             
358             p->next = 0;
359             p->which = convert_meta_type;
360             
361             for (attr = ptr->properties; attr; attr = attr->next)
362                 dom_log(YLOG_WARN, tinfo, ptr,
363                         "bad attribute @%s", attr->name);
364             *l = p;
365             l = &p->next;
366         }
367         else
368         {
369             dom_log(YLOG_WARN, tinfo, ptr,
370                     "bad element '%s', expected <xslt>", ptr->name);
371             return ZEBRA_FAIL;
372         }
373     }
374     return ZEBRA_OK;
375 }
376
377 static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, 
378                         struct recRetrieveCtrl *retctr)
379 {
380
381     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
382         0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
383     {
384         if (0 == XML_STRCMP(node->name, "meta"))
385         {
386             const char *element_set_name = 0;
387             
388             struct _xmlAttr *attr;      
389             for (attr = node->properties; attr; attr = attr->next)
390             {
391                 if (attr_content_xml(attr, "element_set_name", &element_set_name))
392                     ;
393                 else
394                 {
395                     dom_log(YLOG_WARN, tinfo, node,
396                             "bad attribute @%s, expected @element_set_name",
397                             attr->name);
398                 }
399             }
400             if (element_set_name)
401             {
402                 WRBUF result = wrbuf_alloc();
403                 WRBUF addinfo = wrbuf_alloc();
404                 const Odr_oid *input_format = yaz_oid_recsyn_xml;
405                 const Odr_oid *output_format = 0;
406                 int ret;
407                 
408                 ret = retctr->special_fetch(retctr->handle,
409                                             element_set_name,
410                                             input_format, &output_format,
411                                             result, addinfo);
412                 if (ret == 0)
413                 {
414                     xmlDocPtr sub_doc = 
415                         xmlParseMemory(    wrbuf_buf(result), wrbuf_len(result));
416                     if (sub_doc)
417                     {
418                         xmlNodePtr t = xmlDocGetRootElement(sub_doc);
419                         xmlAddChild(node, xmlCopyNode(t, 1));
420                         xmlFreeDoc(sub_doc);
421                     }
422                 }
423                 wrbuf_destroy(result);
424                 wrbuf_destroy(addinfo);
425             }
426         }
427     }
428     for (node = node->children; node; node = node->next)
429         process_meta(tinfo, doc, node, retctr);
430     return 0;
431 }
432
433 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
434                                  struct recExtractCtrl *extctr,
435                                  struct recRetrieveCtrl *retctr,
436                                  struct convert_s *convert,
437                                  const char **params,
438                                  xmlDocPtr *doc,
439                                  xsltStylesheetPtr *last_xsp)
440 {
441     for (; convert; convert = convert->next)
442     {
443         if (convert->which == convert_xslt_type)
444         {
445             xmlChar *buf_out = 0;
446             int len_out = 0;
447             xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
448                                                     *doc, params);
449             if (last_xsp)
450                 *last_xsp = convert->u.xslt.stylesheet_xsp;
451             
452             if (!res_doc)
453                 break;
454             
455             /* now saving into buffer and re-reading into DOM to avoid annoing
456                XSLT problem with thrown-out indentation text nodes */
457             xsltSaveResultToString(&buf_out, &len_out, res_doc,
458                                    convert->u.xslt.stylesheet_xsp); 
459             xmlFreeDoc(res_doc);
460             
461             xmlFreeDoc(*doc);
462             
463             *doc = xmlParseMemory((const char *) buf_out, len_out);
464             
465             /* writing debug info out */
466             if (extctr && extctr->flagShowRecords)
467                 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
468                         tinfo->fname ? tinfo->fname : "(none)", 
469                         convert->u.xslt.stylesheet,
470                         len_out, buf_out);
471             
472             xmlFree(buf_out);
473         }
474         else if (convert->which == convert_meta_type)
475         {
476             if (retctr) /* only execute meta on retrieval */
477             {
478                 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
479
480                 /* last stylesheet absent */
481                 if (last_xsp)
482                     *last_xsp = 0;
483             }
484         }
485     }
486     return ZEBRA_OK;
487 }
488
489 static struct filter_input *new_input(struct filter_info *tinfo, int type)
490 {
491     struct filter_input *p;
492     struct filter_input **np = &tinfo->input_list;
493     for (;*np; np = &(*np)->next)
494         ;
495     p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
496     p->next = 0;
497     p->syntax = 0;
498     p->name = 0;
499     p->convert = 0;
500     p->type = type;
501     return p;
502 }
503
504 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
505                              const char *syntax, const char *name)
506 {
507     FOR_EACH_ELEMENT(ptr) {
508         if (!XML_STRCMP(ptr->name, "marc"))
509         {
510             yaz_iconv_t iconv = 0;
511             const char *input_charset = "marc-8";
512             struct _xmlAttr *attr;
513             
514             for (attr = ptr->properties; attr; attr = attr->next)
515             {
516                 if (attr_content(attr, "inputcharset", &input_charset))
517                     ;
518                 else
519                 {
520                     dom_log(YLOG_WARN, tinfo, ptr,
521                             "bad attribute @%s, expected @inputcharset",
522                             attr->name);
523                 }
524             }
525             iconv = yaz_iconv_open("utf-8", input_charset);
526             if (!iconv)
527             {
528                 dom_log(YLOG_WARN, tinfo, ptr, 
529                         "unsupported @charset '%s'", input_charset);
530                 return ZEBRA_FAIL;
531             }
532             else
533             {
534                 struct filter_input *p 
535                     = new_input(tinfo, DOM_INPUT_MARC);
536                 p->u.marc.handle = yaz_marc_create();
537                 p->u.marc.iconv = iconv;
538                 
539                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
540                 
541                 ptr = ptr->next;
542                 
543                 parse_convert(tinfo, ptr, &p->convert);
544             }
545             break;
546
547         }
548         else if (!XML_STRCMP(ptr->name, "xmlreader"))
549         {
550             struct filter_input *p 
551                 = new_input(tinfo, DOM_INPUT_XMLREADER);
552             struct _xmlAttr *attr;
553             const char *level_str = 0;
554
555             p->u.xmlreader.split_level = 0;
556             p->u.xmlreader.reader = 0;
557
558             for (attr = ptr->properties; attr; attr = attr->next)
559             {
560                 if (attr_content(attr, "level", &level_str))
561                     ;
562                 else
563                 {
564                     dom_log(YLOG_WARN, tinfo, ptr,
565                             "bad attribute @%s, expected @level",
566                             attr->name);
567                 }
568             }
569             if (level_str)
570                 p->u.xmlreader.split_level = atoi(level_str);
571                 
572             ptr = ptr->next;
573
574             parse_convert(tinfo, ptr, &p->convert);
575             break;
576         }
577         else
578         {
579             dom_log(YLOG_WARN, tinfo, ptr,
580                     "bad element <%s>, expected <marc>|<xmlreader>",
581                     ptr->name);
582             return ZEBRA_FAIL;
583         }
584     }
585     return ZEBRA_OK;
586 }
587
588 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
589 {
590     char tmp_full_name[1024];
591     xmlNodePtr ptr;
592     xmlDocPtr doc;
593
594     tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
595     
596     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
597                              NULL, tmp_full_name))
598         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
599     else
600         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
601     
602     yaz_log(YLOG_LOG, "%s dom filter: "
603             "loading config file %s", tinfo->fname, tinfo->full_name);
604
605     doc = xmlParseFile(tinfo->full_name);
606     if (!doc)
607     {
608         yaz_log(YLOG_WARN, "%s: dom filter: "
609                 "failed to parse config file %s",
610                 tinfo->fname, tinfo->full_name);
611         return ZEBRA_FAIL;
612     }
613     /* save because we store ptrs to the content */ 
614     tinfo->doc_config = doc;
615     
616     ptr = xmlDocGetRootElement(doc);
617     if (!ptr || ptr->type != XML_ELEMENT_NODE 
618         || XML_STRCMP(ptr->name, "dom"))
619     {
620         dom_log(YLOG_WARN, tinfo, ptr,
621                 "bad root element <%s>, expected root element <dom>", 
622                 ptr->name);  
623         return ZEBRA_FAIL;
624     }
625
626     ptr = ptr->children;
627     FOR_EACH_ELEMENT(ptr) {
628         if (!XML_STRCMP(ptr->name, "extract"))
629         {
630             /*
631               <extract name="index">
632               <xslt stylesheet="first.xsl"/>
633               <xslt stylesheet="second.xsl"/>
634               </extract>
635             */
636             struct _xmlAttr *attr;
637             struct filter_extract *f =
638                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
639             
640             tinfo->extract = f;
641             f->name = 0;
642             f->convert = 0;
643             for (attr = ptr->properties; attr; attr = attr->next)
644             {
645                 if (attr_content(attr, "name", &f->name))
646                     ;
647                 else
648                 {
649                     dom_log(YLOG_WARN, tinfo, ptr,
650                             "bad attribute @%s, expected @name",
651                             attr->name);
652                 }
653             }
654             parse_convert(tinfo, ptr->children, &f->convert);
655         }
656         else if (!XML_STRCMP(ptr->name, "retrieve"))
657         {  
658             /* 
659                <retrieve name="F">
660                <xslt stylesheet="some.xsl"/>
661                <xslt stylesheet="some.xsl"/>
662                </retrieve>
663             */
664             struct _xmlAttr *attr;
665             struct filter_retrieve **fp = &tinfo->retrieve_list;
666             struct filter_retrieve *f =
667                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
668             
669             while (*fp)
670                 fp = &(*fp)->next;
671
672             *fp = f;
673             f->name = 0;
674             f->identifier = 0;
675             f->convert = 0;
676             f->next = 0;
677
678             for (attr = ptr->properties; attr; attr = attr->next)
679             {
680                 if (attr_content(attr, "identifier", 
681                                  &f->identifier))
682                     ;
683                 else if (attr_content(attr, "name", &f->name))
684                     ;
685                 else
686                 {
687                     dom_log(YLOG_WARN, tinfo, ptr,
688                             "bad attribute @%s,  expected @identifier|@name",
689                             attr->name);
690                 }
691             }
692             parse_convert(tinfo, ptr->children, &f->convert);
693         }
694         else if (!XML_STRCMP(ptr->name, "store"))
695         {
696             /*
697               <store name="F">
698               <xslt stylesheet="some.xsl"/>
699               <xslt stylesheet="some.xsl"/>
700               </retrieve>
701             */
702             struct filter_store *f =
703                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
704             
705             tinfo->store = f;
706             f->convert = 0;
707             parse_convert(tinfo, ptr->children, &f->convert);
708         }
709         else if (!XML_STRCMP(ptr->name, "input"))
710         {
711             /*
712               <input syntax="xml">
713               <xmlreader level="1"/>
714               </input>
715               <input syntax="usmarc">
716               <marc inputcharset="marc-8"/>
717               </input>
718             */
719             struct _xmlAttr *attr;
720             const char  *syntax = 0;
721             const char *name = 0;
722             for (attr = ptr->properties; attr; attr = attr->next)
723             {
724                 if (attr_content(attr, "syntax", &syntax))
725                     ;
726                 else if (attr_content(attr, "name", &name))
727                     ;
728                 else
729                 {
730                     dom_log(YLOG_WARN, tinfo, ptr,
731                             "bad attribute @%s,  expected @syntax|@name",
732                             attr->name);
733                 }
734             }
735             parse_input(tinfo, ptr->children, syntax, name);
736         }
737         else
738         {
739             dom_log(YLOG_WARN, tinfo, ptr,
740                     "bad element <%s>, "
741                     "expected <extract>|<input>|<retrieve>|<store>",
742                     ptr->name);
743             return ZEBRA_FAIL;
744         }
745     }
746     if (!tinfo->input_list)
747     {
748         struct filter_input *p 
749             = new_input(tinfo, DOM_INPUT_XMLREADER);
750         p->u.xmlreader.split_level = 0;
751         p->u.xmlreader.reader = 0;
752     }
753     return ZEBRA_OK;
754 }
755
756 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
757                                                const char *est)
758 {
759     struct filter_retrieve *f = tinfo->retrieve_list;
760
761     /* return first schema if no est is provided */
762     if (!est)
763         return f;
764     for (; f; f = f->next)
765     { 
766         /* find requested schema */
767         if (est) 
768         {    
769             if (f->identifier && !strcmp(f->identifier, est))
770                 return f;
771             if (f->name && !strcmp(f->name, est))
772                 return f;
773         } 
774     }
775     return 0;
776 }
777
778 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
779 {
780     struct filter_info *tinfo = clientData;
781     if (!args || !*args)
782     {
783         yaz_log(YLOG_WARN, "dom filter: need config file");
784         return ZEBRA_FAIL;
785     }
786
787     if (tinfo->fname && !strcmp(args, tinfo->fname))
788         return ZEBRA_OK;
789     
790     tinfo->profile_path = res_get(res, "profilePath");
791
792     destroy_dom(tinfo);
793     return parse_dom(tinfo, args);
794 }
795
796 static void filter_destroy(void *clientData)
797 {
798     struct filter_info *tinfo = clientData;
799     destroy_dom(tinfo);
800     nmem_destroy(tinfo->nmem_config);
801     nmem_destroy(tinfo->nmem_record);
802     xfree(tinfo);
803 }
804
805 static int ioread_ex(void *context, char *buffer, int len)
806 {
807     struct recExtractCtrl *p = context;
808     return p->stream->readf(p->stream, buffer, len);
809 }
810
811 static int ioclose_ex(void *context)
812 {
813     return 0;
814 }
815
816
817
818 /* DOM filter style indexing */
819 static void index_value_of(struct filter_info *tinfo, 
820                            struct recExtractCtrl *extctr,
821                            RecWord* recword, 
822                            xmlNodePtr node, 
823                            const char *index_p)
824 {
825     if (tinfo->record_info_invoked == 1)
826     {
827         xmlChar *text = xmlNodeGetContent(node);
828         size_t text_len = strlen((const char *)text);
829        
830         /* if there is no text, we do not need to proceed */
831         if (text_len)
832         {            
833             const char *look = index_p;
834             const char *bval;
835             const char *eval;
836
837             xmlChar index[256];
838             xmlChar type[256];
839
840             /* assingning text to be indexed */
841             recword->term_buf = (const char *)text;
842             recword->term_len = text_len;
843
844             /* parsing all index name/type pairs */
845             /* may not start with ' ' or ':' */
846             while (*look && ' ' != *look && ':' != *look)
847             {
848                 /* setting name and type to zero */
849                 *index = '\0';
850                 *type = '\0';
851     
852                 /* parsing one index name */
853                 bval = look;
854                 while (*look && ':' != *look && ' ' != *look)
855                 {
856                     look++;
857                 }
858                 eval = look;
859                 strncpy((char *)index, (const char *)bval, eval - bval);
860                 index[eval - bval] = '\0';
861     
862     
863                 /* parsing one index type, if existing */
864                 if (':' == *look)
865                 {
866                     look++;
867       
868                     bval = look;
869                     while (*look && ' ' != *look)
870                     {
871                         look++;
872                     }
873                     eval = look;
874                     strncpy((char *)type, (const char *)bval, eval - bval);
875                     type[eval - bval] = '\0';
876                 }
877
878                 /* actually indexing the text given */
879
880                 recword->index_name = (const char *)index;
881                 if (*type)
882                     recword->index_type = (const char *) type;
883
884                 /* writing debug out */
885                 if (extctr->flagShowRecords)
886                     dom_log(YLOG_LOG, tinfo, 0, 
887                             "INDEX '%s:%s' '%s'", 
888                             (const char *) index,
889                             (const char *) type, 
890                             (const char *) text);
891                 
892                 (extctr->tokenAdd)(recword);
893
894                 /* eat whitespaces */
895                 if (*look && ' ' == *look)
896                 {
897                     look++;
898                 } 
899             }
900         }
901         xmlFree(text); 
902     }
903 }
904
905
906 /* DOM filter style indexing */
907 static void set_record_info(struct filter_info *tinfo, 
908                             struct recExtractCtrl *extctr, 
909                             xmlNodePtr node, 
910                             const char * id_p, 
911                             const char * rank_p, 
912                             const char * type_p)
913 {
914     /* writing debug info out */
915     if (extctr && extctr->flagShowRecords)
916         dom_log(YLOG_LOG, tinfo, node,
917                 "RECORD id=%s rank=%s type=%s", 
918                 id_p ? (const char *) id_p : "(null)",
919                 rank_p ? (const char *) rank_p : "(null)",
920                 type_p ? (const char *) type_p : "(null)");
921     
922
923     if (id_p && *id_p)
924         sscanf((const char *)id_p, "%255s", extctr->match_criteria);
925
926     if (rank_p && *rank_p)
927         extctr->staticrank = atozint((const char *)rank_p);
928
929     if (type_p && *type_p)
930     {
931         enum zebra_recctrl_action_t action = action_update;
932         if (!strcmp(type_p, "insert"))
933             action = action_insert;
934         else if (!strcmp(type_p, "delete"))
935             action = action_delete;
936         else if (!strcmp(type_p, "replace"))
937             action = action_replace;
938         else if (!strcmp(type_p, "update"))
939             action = action_update;
940         else
941             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
942         extctr->action = action;
943     }
944
945     if (tinfo->record_info_invoked == 1)
946     {
947         /* warn about multiple only once */
948         dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
949     }
950     tinfo->record_info_invoked++;
951
952 }
953
954
955 /* DOM filter style indexing */
956 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
957                                            struct recExtractCtrl *extctr, 
958                                            RecWord* recword, 
959                                            xmlNodePtr node)
960 {
961     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
962         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
963     {
964         if (0 == XML_STRCMP(node->name, "index"))
965         {
966             const char *index_p = 0;
967
968             struct _xmlAttr *attr;      
969             for (attr = node->properties; attr; attr = attr->next)
970             {
971                 if (attr_content_xml(attr, "name", &index_p))
972                 {
973                     index_value_of(tinfo, extctr, recword, node, index_p);
974                 }  
975                 else
976                 {
977                     dom_log(YLOG_WARN, tinfo, node,
978                             "bad attribute @%s, expected @name",
979                             attr->name);
980                 }
981             }
982         }
983         else if (0 == XML_STRCMP(node->name, "record"))
984         {
985             const char *id_p = 0;
986             const char *rank_p = 0;
987             const char *type_p = 0;
988
989             struct _xmlAttr *attr;
990             for (attr = node->properties; attr; attr = attr->next)
991             {
992                 if (attr_content_xml(attr, "id", &id_p))
993                     ;
994                 else if (attr_content_xml(attr, "rank", &rank_p))
995                     ;
996                 else if (attr_content_xml(attr, "type", &type_p))
997                     ;
998                 else
999                 {
1000                     dom_log(YLOG_WARN, tinfo, node,
1001                             "bad attribute @%s, expected @id|@rank|@type",
1002                             attr->name);
1003                 }
1004             }
1005             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
1006         } 
1007         else
1008         {
1009             dom_log(YLOG_WARN, tinfo, node,
1010                     "bad element <%s>,"
1011                     " expected <record>|<index> in namespace '%s'",
1012                     node->name, zebra_dom_ns);
1013         }
1014     }
1015 }
1016
1017 static int attr_content_pi(const char **c_ptr, const char *name,
1018                            char *value, size_t value_max)
1019 {
1020     size_t name_len = strlen(name);
1021     const char *look = *c_ptr;
1022     int ret = 0;
1023
1024     *value = '\0';
1025     while (*look && ' ' == *look)
1026         look++;
1027     if (strlen(look) > name_len)
1028     {
1029         if (look[name_len] == '=' && !memcmp(look, name, name_len))
1030         {
1031             size_t i = 0;
1032             look += name_len+1;
1033             while (*look && ' ' != *look)
1034             {
1035                 if (i < value_max-1)
1036                     value[i++] = *look;
1037                 look++;
1038             }
1039             value[i] = '\0';
1040             ret = 1;
1041         }
1042     }
1043     while (*look && ' ' == *look)
1044         look++;
1045     *c_ptr = look;
1046     return ret;
1047 }
1048
1049 /* DOM filter style indexing */
1050 static void process_xml_pi_node(struct filter_info *tinfo, 
1051                                 struct recExtractCtrl *extctr, 
1052                                 xmlNodePtr node,
1053                                 const char **index_pp)
1054 {
1055     /* if right PI name, continue parsing PI */
1056     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1057     {
1058         xmlChar *pi_p =  node->content;
1059         const char *look = (const char *) node->content;
1060     
1061         /* parsing PI record instructions */
1062         if (0 == strncmp((const char *)look, "record", 6))
1063         {
1064             char id[256];
1065             char rank[256];
1066             char type[256];
1067             
1068             *id = '\0';
1069             *rank = '\0';
1070             *type = '\0';
1071             look += 6;
1072             while (*look)
1073                 if (attr_content_pi(&look, "id", id, sizeof(id)))
1074                     ;
1075                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1076                     ;
1077                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1078                 {
1079                     dom_log(YLOG_WARN, tinfo, node,
1080                             "content '%s', can not parse '%s'",
1081                             pi_p, look);
1082                     break;
1083                 }
1084             set_record_info(tinfo, extctr, node, id, rank, type);
1085         } 
1086         /* parsing index instruction */
1087         else if (0 == strncmp((const char *)look, "index", 5))
1088         {
1089             look += 5;
1090       
1091             /* eat whitespace */
1092             while (*look && ' ' == *look)
1093                 look++;
1094
1095             /* export index instructions to outside */
1096             *index_pp = look;
1097         } 
1098         else 
1099         {
1100             dom_log(YLOG_WARN, tinfo, node,
1101                     "content '%s', can not parse '%s'",
1102                     pi_p, look);
1103         }
1104     }
1105 }
1106
1107 /* DOM filter style indexing */
1108 static void process_xml_element_node(struct filter_info *tinfo, 
1109                                      struct recExtractCtrl *extctr, 
1110                                      RecWord* recword, 
1111                                      xmlNodePtr node)
1112 {
1113     /* remember indexing instruction from PI to next element node */
1114     const char *index_p = 0;
1115
1116     /* check if we are an element node in the special zebra namespace 
1117        and either set record data or index value-of node content*/
1118     process_xml_element_zebra_node(tinfo, extctr, recword, node);
1119   
1120     /* loop through kid nodes */
1121     for (node = node->children; node; node = node->next)
1122     {
1123         /* check and set PI record and index index instructions */
1124         if (node->type == XML_PI_NODE)
1125         {
1126             process_xml_pi_node(tinfo, extctr, node, &index_p);
1127         }
1128         else if (node->type == XML_ELEMENT_NODE)
1129         {
1130             /* if there was a PI index instruction before this element */
1131             if (index_p)
1132             {
1133                 index_value_of(tinfo, extctr, recword, node, index_p);
1134                 index_p = 0;
1135             }
1136             process_xml_element_node(tinfo, extctr, recword,node);
1137         }
1138         else
1139             continue;
1140     }
1141 }
1142
1143
1144 /* DOM filter style indexing */
1145 static void extract_dom_doc_node(struct filter_info *tinfo, 
1146                                  struct recExtractCtrl *extctr, 
1147                                  xmlDocPtr doc)
1148 {
1149     /* only need to do the initialization once, reuse recword for all terms */
1150     RecWord recword;
1151     (*extctr->init)(extctr, &recword);
1152
1153     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1154 }
1155
1156
1157 static int convert_extract_doc(struct filter_info *tinfo, 
1158                                struct filter_input *input,
1159                                struct recExtractCtrl *p, 
1160                                xmlDocPtr doc)
1161 {
1162     xmlChar *buf_out;
1163     int len_out;
1164     const char *params[10];
1165     xsltStylesheetPtr last_xsp = 0;
1166     xmlDocPtr store_doc = 0;
1167
1168     /* per default do not ingest record */
1169     tinfo->record_info_invoked = 0;
1170
1171     /* exit if empty document given */
1172     if (!doc)
1173         return RECCTRL_EXTRACT_SKIP;
1174
1175     /* we actuallu have a document which needs to be processed further */
1176     params[0] = 0;
1177     set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1178
1179     if (p && p->flagShowRecords)
1180     {
1181         xmlChar *buf_out;
1182         int len_out;
1183 #if 0 
1184         FILE *outf = fopen("extract.xml", "w");
1185         xmlDocDumpMemory(doc, &buf_out, &len_out);
1186         fwrite(buf_out, 1, len_out, outf);
1187 #endif
1188         yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1189 #if 0
1190         fclose(outf);
1191 #endif
1192     }
1193
1194     /* input conversion */
1195     perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1196
1197     if (tinfo->store)
1198     {
1199         /* store conversion */
1200         store_doc = xmlCopyDoc(doc, 1);
1201         perform_convert(tinfo, p, 0, tinfo->store->convert,
1202                         params, &store_doc, &last_xsp);
1203     }
1204     
1205     /* saving either store doc or original doc in case no store doc exists */
1206     if (last_xsp)
1207         xsltSaveResultToString(&buf_out, &len_out, 
1208                                store_doc ? store_doc : doc, last_xsp);
1209     else
1210         xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1211
1212     if (p->setStoreData)
1213         (*p->setStoreData)(p, buf_out, len_out);
1214     xmlFree(buf_out);
1215
1216     if (store_doc)
1217         xmlFreeDoc(store_doc);
1218
1219     /* extract conversion */
1220     perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1221
1222
1223     /* finally, do the indexing */
1224     if (doc){
1225         extract_dom_doc_node(tinfo, p, doc);
1226         xmlFreeDoc(doc);
1227     }
1228     
1229     /* there was nothing to index, so there is no inserted/updated record */
1230     if (tinfo->record_info_invoked == 0)
1231         return RECCTRL_EXTRACT_SKIP;
1232
1233     return RECCTRL_EXTRACT_OK;
1234 }
1235
1236 static int extract_xml_split(struct filter_info *tinfo,
1237                              struct filter_input *input,
1238                              struct recExtractCtrl *p)
1239 {
1240     int ret;
1241
1242     if (p->first_record)
1243     {
1244         if (input->u.xmlreader.reader)
1245             xmlFreeTextReader(input->u.xmlreader.reader);
1246         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1247                                                    p /* I/O handler */,
1248                                                    0 /* URL */, 
1249                                                    0 /* encoding */,
1250                                                    XML_PARSE_XINCLUDE
1251                                                    | XML_PARSE_NOENT
1252                                                    | XML_PARSE_NONET);
1253     }
1254     if (!input->u.xmlreader.reader)
1255         return RECCTRL_EXTRACT_ERROR_GENERIC;
1256
1257     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1258     while (ret == 1)
1259     {
1260         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1261         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1262
1263         if (type == XML_READER_TYPE_ELEMENT && 
1264             input->u.xmlreader.split_level == depth)
1265         {
1266             xmlNodePtr ptr;
1267
1268             /* per default do not ingest record */
1269             tinfo->record_info_invoked = 0;
1270             
1271             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1272             if (ptr)
1273                 {
1274                 /* we have a new document */
1275
1276                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1277                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1278                 
1279                 xmlDocSetRootElement(doc, ptr2);
1280                 
1281                 /* writing debug info out */
1282                 if (p->flagShowRecords)
1283                 {
1284                     xmlChar *buf_out = 0;
1285                     int len_out = 0;
1286                     xmlDocDumpMemory(doc, &buf_out, &len_out);
1287                     yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
1288                             tinfo->fname ? tinfo->fname : "(none)",
1289                             depth, len_out, buf_out); 
1290                     xmlFree(buf_out);
1291                 }
1292                 
1293                 return convert_extract_doc(tinfo, input, p, doc);
1294             }
1295             else
1296             {
1297                 xmlFreeTextReader(input->u.xmlreader.reader);
1298                 input->u.xmlreader.reader = 0;
1299                 return RECCTRL_EXTRACT_ERROR_GENERIC;
1300             }
1301         }
1302         ret = xmlTextReaderRead(input->u.xmlreader.reader);
1303     }
1304     xmlFreeTextReader(input->u.xmlreader.reader);
1305     input->u.xmlreader.reader = 0;
1306     return RECCTRL_EXTRACT_EOF;
1307 }
1308
1309 static int extract_xml_full(struct filter_info *tinfo, 
1310                             struct filter_input *input,
1311                             struct recExtractCtrl *p)
1312 {
1313     if (p->first_record) /* only one record per stream */
1314     {
1315         xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1316                                   p /* I/O handler */,
1317                                   0 /* URL */,
1318                                   0 /* encoding */,
1319                                   XML_PARSE_XINCLUDE
1320                                   | XML_PARSE_NOENT
1321                                   | XML_PARSE_NONET);
1322         if (!doc)
1323         {
1324             return RECCTRL_EXTRACT_ERROR_GENERIC;
1325         }
1326         return convert_extract_doc(tinfo, input, p, doc);
1327     }
1328     else
1329         return RECCTRL_EXTRACT_EOF;
1330 }
1331
1332 static int extract_iso2709(struct filter_info *tinfo,
1333                            struct filter_input *input,
1334                            struct recExtractCtrl *p)
1335 {
1336     char buf[100000];
1337     int record_length;
1338     int read_bytes, r;
1339
1340     if (p->stream->readf(p->stream, buf, 5) != 5)
1341         return RECCTRL_EXTRACT_EOF;
1342     while (*buf < '0' || *buf > '9')
1343     {
1344         int i;
1345
1346         dom_log(YLOG_WARN, tinfo, 0,
1347                 "MARC: Skipping bad byte %d (0x%02X)",
1348                 *buf & 0xff, *buf & 0xff);
1349         for (i = 0; i<4; i++)
1350             buf[i] = buf[i+1];
1351
1352         if (p->stream->readf(p->stream, buf+4, 1) != 1)
1353             return RECCTRL_EXTRACT_EOF;
1354     }
1355     record_length = atoi_n (buf, 5);
1356     if (record_length < 25)
1357     {
1358         dom_log(YLOG_WARN, tinfo, 0,
1359                 "MARC record length < 25, is %d",  record_length);
1360         return RECCTRL_EXTRACT_ERROR_GENERIC;
1361     }
1362     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1363     if (read_bytes < record_length-5)
1364     {
1365         dom_log(YLOG_WARN, tinfo, 0,
1366                 "couldn't read whole MARC record");
1367         return RECCTRL_EXTRACT_ERROR_GENERIC;
1368     }
1369     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1370     if (r < record_length)
1371     {
1372         dom_log (YLOG_WARN, tinfo, 0,
1373                  "parsing of MARC record failed r=%d length=%d",
1374                  r, record_length);
1375         return RECCTRL_EXTRACT_ERROR_GENERIC;
1376     }
1377     else
1378     {
1379         xmlDocPtr rdoc;
1380         xmlNode *root_ptr;
1381         yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 
1382                            "http://www.loc.gov/MARC21/slim", 0, 0);
1383         rdoc = xmlNewDoc((const xmlChar*) "1.0");
1384         xmlDocSetRootElement(rdoc, root_ptr);
1385         return convert_extract_doc(tinfo, input, p, rdoc);        
1386     }
1387     return RECCTRL_EXTRACT_OK;
1388 }
1389
1390 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1391 {
1392     struct filter_info *tinfo = clientData;
1393     struct filter_input *input = tinfo->input_list;
1394
1395     if (!input)
1396         return RECCTRL_EXTRACT_ERROR_GENERIC;
1397     
1398     nmem_reset(tinfo->nmem_record);
1399
1400     if (p->setStoreData == 0)
1401         return extract_xml_full(tinfo, input, p);
1402     switch(input->type)
1403     {
1404     case DOM_INPUT_XMLREADER:
1405         if (input->u.xmlreader.split_level == 0)
1406             return extract_xml_full(tinfo, input, p);
1407         else
1408             return extract_xml_split(tinfo, input, p);
1409         break;
1410     case DOM_INPUT_MARC:
1411         return extract_iso2709(tinfo, input, p);
1412     }
1413     return RECCTRL_EXTRACT_ERROR_GENERIC;
1414 }
1415
1416 static int ioread_ret(void *context, char *buffer, int len)
1417 {
1418     struct recRetrieveCtrl *p = context;
1419     return p->stream->readf(p->stream, buffer, len);
1420 }
1421
1422 static int ioclose_ret(void *context)
1423 {
1424     return 0;
1425 }
1426
1427 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1428 {
1429     /* const char *esn = zebra_dom_ns; */
1430     const char *esn = 0;
1431     const char *params[32];
1432     struct filter_info *tinfo = clientData;
1433     xmlDocPtr doc;
1434     struct filter_retrieve *retrieve;
1435     xsltStylesheetPtr last_xsp = 0;
1436
1437     if (p->comp)
1438     {
1439         if (p->comp->which == Z_RecordComp_simple
1440             && p->comp->u.simple->which == Z_ElementSetNames_generic)
1441         {
1442             esn = p->comp->u.simple->u.generic;
1443         }
1444         else if (p->comp->which == Z_RecordComp_complex 
1445                  && p->comp->u.complex->generic->elementSpec
1446                  && p->comp->u.complex->generic->elementSpec->which ==
1447                  Z_ElementSpec_elementSetName)
1448         {
1449             esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1450         }
1451     }
1452     retrieve = lookup_retrieve(tinfo, esn);
1453     if (!retrieve)
1454     {
1455         p->diagnostic =
1456             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1457         p->addinfo = odr_strdup(p->odr, esn);
1458         return 0;
1459     }
1460
1461     params[0] = 0;
1462     set_param_int(params, "id", p->localno, p->odr->mem);
1463     if (p->fname)
1464         set_param_str(params, "filename", p->fname, p->odr->mem);
1465     if (p->staticrank >= 0)
1466         set_param_int(params, "rank", p->staticrank, p->odr->mem);
1467
1468     if (esn)
1469         set_param_str(params, "schema", esn, p->odr->mem);
1470     else
1471         if (retrieve->name)
1472             set_param_str(params, "schema", retrieve->name, p->odr->mem);
1473         else if (retrieve->identifier)
1474             set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1475         else
1476             set_param_str(params, "schema", "", p->odr->mem);
1477
1478     if (p->score >= 0)
1479         set_param_int(params, "score", p->score, p->odr->mem);
1480     set_param_int(params, "size", p->recordSize, p->odr->mem);
1481
1482     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1483                     0 /* URL */,
1484                     0 /* encoding */,
1485                     XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1486     if (!doc)
1487     {
1488         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1489         return 0;
1490     }
1491
1492     /* retrieve conversion */
1493     perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1494     if (!doc)
1495     {
1496         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1497     }
1498     else if (!p->input_format
1499              || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1500     {
1501         xmlChar *buf_out;
1502         int len_out;
1503
1504         if (last_xsp)
1505             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1506         else
1507             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1508
1509         p->output_format = yaz_oid_recsyn_xml;
1510         p->rec_len = len_out;
1511         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1512         memcpy(p->rec_buf, buf_out, p->rec_len);
1513         xmlFree(buf_out);
1514     }
1515     else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1516     {
1517         xmlChar *buf_out;
1518         int len_out;
1519
1520         if (last_xsp)
1521             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1522         else
1523             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1524         
1525         p->output_format = yaz_oid_recsyn_sutrs;
1526         p->rec_len = len_out;
1527         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1528         memcpy(p->rec_buf, buf_out, p->rec_len);
1529         
1530         xmlFree(buf_out);
1531     }
1532     else
1533     {
1534         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1535     }
1536     xmlFreeDoc(doc);
1537     return 0;
1538 }
1539
1540 static struct recType filter_type = {
1541     0,
1542     "dom",
1543     filter_init,
1544     filter_config,
1545     filter_destroy,
1546     filter_extract,
1547     filter_retrieve
1548 };
1549
1550 RecType
1551 #ifdef IDZEBRA_STATIC_DOM
1552 idzebra_filter_dom
1553 #else
1554 idzebra_filter
1555 #endif
1556
1557 [] = {
1558     &filter_type,
1559     0,
1560 };
1561 /*
1562  * Local variables:
1563  * c-basic-offset: 4
1564  * indent-tabs-mode: nil
1565  * End:
1566  * vim: shiftwidth=4 tabstop=8 expandtab
1567  */
1568