Fixed snippets creation for DOM and usage of input xsl.
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24
25 #include <yaz/diagbib1.h>
26 #include <yaz/tpath.h>
27 #include <yaz/snprintf.h>
28
29 #include <libxml/xmlversion.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/xmlreader.h>
34 #include <libxslt/transform.h>
35 #include <libxslt/xsltutils.h>
36
37 #if YAZ_HAVE_EXSLT
38 #include <libexslt/exslt.h>
39 #endif
40
41 #include <idzebra/util.h>
42 #include <idzebra/recctrl.h>
43 #include <yaz/oid_db.h>
44
45 /* DOM filter style indexing */
46 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
47 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
48
49 /* DOM filter style indexing */
50 #define ZEBRA_PI_NAME "zebra-2.0"
51 static const char *zebra_pi_name = ZEBRA_PI_NAME;
52
53 enum convert_type {
54     convert_xslt_type,
55     convert_meta_type
56 };
57
58 struct convert_xslt {
59     const char *stylesheet;
60     xsltStylesheetPtr stylesheet_xsp;
61 };
62
63 struct convert_meta {
64     int dummy;
65 };
66
67 struct convert_s {
68     enum convert_type which;
69     union {
70         struct convert_xslt xslt;
71         struct convert_meta meta;
72     } u;
73     struct convert_s *next;
74 };
75
76 struct filter_extract {
77     const char *name;
78     struct convert_s *convert;
79 };
80
81 struct filter_store {
82     struct convert_s *convert;
83 };
84
85 struct filter_retrieve {
86     const char *name;
87     const char *identifier;
88     struct convert_s *convert;
89     struct filter_retrieve *next;
90 };
91
92 #define DOM_INPUT_XMLREADER 1
93 #define DOM_INPUT_MARC 2
94 struct filter_input {
95     const char *syntax;
96     const char *name;
97     struct convert_s *convert;
98     int type;
99     union {
100         struct {
101             xmlTextReaderPtr reader;
102             int split_level;
103         } xmlreader;
104         struct {
105             const char *input_charset;
106             yaz_marc_t handle;
107             yaz_iconv_t iconv;
108         } marc;
109     } u;
110     struct filter_input *next;
111 };
112   
113 struct filter_info {
114     char *fname;
115     char *full_name;
116     const char *profile_path;
117     NMEM nmem_record;
118     NMEM nmem_config;
119     xmlDocPtr doc_config;
120     struct filter_extract *extract;
121     struct filter_retrieve *retrieve_list;
122     struct filter_input *input_list;
123     struct filter_store *store;
124     int record_info_invoked;
125 };
126
127
128
129 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
130 #define XML_STRLEN(a) strlen((char*)a)
131
132
133 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
134
135 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
136                     const char *fmt, ...)
137 #ifdef __GNUC__
138     __attribute__ ((format (printf, 4, 5)))
139 #endif
140     ;
141
142 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
143                     const char *fmt, ...)
144 {
145     va_list ap;
146     char buf[4096];
147
148     va_start(ap, fmt);
149     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
150     if (ptr)
151     {
152         yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
153                 xmlGetLineNo(ptr), buf);
154     }
155     else
156     {
157         yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
158     }
159     va_end(ap);
160 }
161
162
163 static void set_param_str(const char **params, const char *name,
164                           const char *value, NMEM nmem)
165 {
166     char *quoted = nmem_malloc(nmem, 3 + strlen(value));
167     sprintf(quoted, "'%s'", value);
168     while (*params)
169         params++;
170     params[0] = name;
171     params[1] = quoted;
172     params[2] = 0;
173 }
174
175 static void set_param_int(const char **params, const char *name,
176                           zint value, NMEM nmem)
177 {
178     char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
179     while (*params)
180         params++;
181     sprintf(quoted, "'" ZINT_FORMAT "'", value);
182     params[0] = name;
183     params[1] = quoted;
184     params[2] = 0;
185 }
186
187 static void *filter_init(Res res, RecType recType)
188 {
189     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
190     tinfo->fname = 0;
191     tinfo->full_name = 0;
192     tinfo->profile_path = 0;
193     tinfo->nmem_record = nmem_create();
194     tinfo->nmem_config = nmem_create();
195     tinfo->extract = 0;
196     tinfo->retrieve_list = 0;
197     tinfo->input_list = 0;
198     tinfo->store = 0;
199     tinfo->doc_config = 0;
200     tinfo->record_info_invoked = 0;
201
202 #if YAZ_HAVE_EXSLT
203     exsltRegisterAll(); 
204 #endif
205
206     return tinfo;
207 }
208
209 static int attr_content(struct _xmlAttr *attr, const char *name,
210                         const char **dst_content)
211 {
212     if (!XML_STRCMP(attr->name, name) && attr->children 
213         && attr->children->type == XML_TEXT_NODE)
214     {
215         *dst_content = (const char *)(attr->children->content);
216         return 1;
217     }
218     return 0;
219 }
220
221 static void destroy_xsp(struct convert_s *c)
222 {
223     while (c)
224     {
225         if (c->which == convert_xslt_type)
226         {
227             if (c->u.xslt.stylesheet_xsp)
228                 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
229         }
230         c = c->next;
231     }
232 }
233
234 static void destroy_dom(struct filter_info *tinfo)
235 {
236     if (tinfo->extract)
237     {
238         destroy_xsp(tinfo->extract->convert);
239         tinfo->extract = 0;
240     }
241     if (tinfo->store)
242     {
243         destroy_xsp(tinfo->store->convert);
244         tinfo->store = 0;
245     }
246     if (tinfo->input_list)
247     {
248         struct filter_input *i_ptr;
249         for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
250         {
251             switch(i_ptr->type)
252             {
253             case DOM_INPUT_XMLREADER:
254                 if (i_ptr->u.xmlreader.reader)
255                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
256                 break;
257             case DOM_INPUT_MARC:
258                 yaz_iconv_close(i_ptr->u.marc.iconv);
259                 yaz_marc_destroy(i_ptr->u.marc.handle);
260                 break;
261             }
262             destroy_xsp(i_ptr->convert);
263         }
264         tinfo->input_list = 0;
265     }
266     if (tinfo->retrieve_list)
267     {
268         struct filter_retrieve *r_ptr;
269         for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
270             destroy_xsp(r_ptr->convert);
271         tinfo->retrieve_list = 0;
272     }
273
274     if (tinfo->doc_config)
275     {
276         xmlFreeDoc(tinfo->doc_config);
277         tinfo->doc_config = 0;
278     }
279     nmem_reset(tinfo->nmem_config);
280 }
281
282 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
283                                struct convert_s **l)
284 {
285     *l = 0;
286     FOR_EACH_ELEMENT(ptr) {
287         if (!XML_STRCMP(ptr->name, "xslt"))
288         {
289             struct _xmlAttr *attr;
290             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
291             
292             p->next = 0;
293             p->which = convert_xslt_type;
294             p->u.xslt.stylesheet = 0;
295             p->u.xslt.stylesheet_xsp = 0;
296             
297             for (attr = ptr->properties; attr; attr = attr->next)
298                 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
299                     ;
300                 else
301                 {
302                     dom_log(YLOG_WARN, tinfo, ptr,
303                             "bad attribute @%s", attr->name);
304                 }
305             if (p->u.xslt.stylesheet)
306             {
307                 char tmp_xslt_full_name[1024];
308                 if (!yaz_filepath_resolve(p->u.xslt.stylesheet, 
309                                           tinfo->profile_path,
310                                           NULL, 
311                                           tmp_xslt_full_name))
312                 {
313                     dom_log(YLOG_WARN, tinfo, 0,
314                             "stylesheet %s not found in "
315                             "path %s",
316                             p->u.xslt.stylesheet, 
317                             tinfo->profile_path);
318                     return ZEBRA_FAIL;
319                 }
320                 
321                 p->u.xslt.stylesheet_xsp
322                     = xsltParseStylesheetFile((const xmlChar*) 
323                                               tmp_xslt_full_name);
324                 if (!p->u.xslt.stylesheet_xsp)
325                 {
326                     dom_log(YLOG_WARN, tinfo, 0,
327                             "could not parse xslt stylesheet %s",
328                             tmp_xslt_full_name);
329                     return ZEBRA_FAIL;
330                 }
331             }
332             else
333             {
334                 dom_log(YLOG_WARN, tinfo, ptr,
335                         "missing attribute 'stylesheet'");
336                 return ZEBRA_FAIL;
337             }
338             *l = p;
339             l = &p->next;
340         }
341         else if (!XML_STRCMP(ptr->name, "meta"))
342         {
343             struct _xmlAttr *attr;
344             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
345             
346             p->next = 0;
347             p->which = convert_meta_type;
348             
349             for (attr = ptr->properties; attr; attr = attr->next)
350                 dom_log(YLOG_WARN, tinfo, ptr,
351                         "bad attribute @%s", attr->name);
352             *l = p;
353             l = &p->next;
354         }
355         else
356         {
357             dom_log(YLOG_WARN, tinfo, ptr,
358                     "bad element '%s', expected <xslt>", ptr->name);
359             return ZEBRA_FAIL;
360         }
361     }
362     return ZEBRA_OK;
363 }
364
365 static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, 
366                         struct recRetrieveCtrl *retctr)
367 {
368
369     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
370         0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
371     {
372         if (0 == XML_STRCMP(node->name, "meta"))
373         {
374             const char *element_set_name = 0;
375             
376             struct _xmlAttr *attr;      
377             for (attr = node->properties; attr; attr = attr->next)
378             {
379                 if (attr_content(attr, "element_set_name", &element_set_name))
380                     ;
381                 else
382                 {
383                     dom_log(YLOG_WARN, tinfo, node,
384                             "bad attribute @%s, expected @element_set_name",
385                             attr->name);
386                 }
387             }
388             if (element_set_name)
389             {
390                 WRBUF result = wrbuf_alloc();
391                 WRBUF addinfo = wrbuf_alloc();
392                 const Odr_oid *input_format = yaz_oid_recsyn_xml;
393                 const Odr_oid *output_format = 0;
394                 int ret;
395                 
396                 ret = retctr->special_fetch(retctr->handle,
397                                             element_set_name,
398                                             input_format, &output_format,
399                                             result, addinfo);
400                 if (ret == 0)
401                 {
402                     xmlDocPtr sub_doc = 
403                         xmlParseMemory(    wrbuf_buf(result), wrbuf_len(result));
404                     if (sub_doc)
405                     {
406                         xmlNodePtr t = xmlDocGetRootElement(sub_doc);
407                         xmlAddChild(node, xmlCopyNode(t, 1));
408                         xmlFreeDoc(sub_doc);
409                     }
410                 }
411                 wrbuf_destroy(result);
412                 wrbuf_destroy(addinfo);
413             }
414         }
415     }
416     for (node = node->children; node; node = node->next)
417         process_meta(tinfo, doc, node, retctr);
418     return 0;
419 }
420
421 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
422                                  struct recExtractCtrl *extctr,
423                                  struct recRetrieveCtrl *retctr,
424                                  struct convert_s *convert,
425                                  const char **params,
426                                  xmlDocPtr *doc,
427                                  xsltStylesheetPtr *last_xsp)
428 {
429     for (; convert; convert = convert->next)
430     {
431         if (convert->which == convert_xslt_type)
432         {
433             xmlChar *buf_out = 0;
434             int len_out = 0;
435             xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
436                                                     *doc, params);
437             if (last_xsp)
438                 *last_xsp = convert->u.xslt.stylesheet_xsp;
439             
440             if (!res_doc)
441                 break;
442             
443             /* now saving into buffer and re-reading into DOM to avoid annoing
444                XSLT problem with thrown-out indentation text nodes */
445             xsltSaveResultToString(&buf_out, &len_out, res_doc,
446                                    convert->u.xslt.stylesheet_xsp); 
447             xmlFreeDoc(res_doc);
448             
449             xmlFreeDoc(*doc);
450             
451             *doc = xmlParseMemory((const char *) buf_out, len_out);
452             
453             /* writing debug info out */
454             if (extctr && extctr->flagShowRecords)
455                 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
456                         tinfo->fname ? tinfo->fname : "(none)", 
457                         convert->u.xslt.stylesheet,
458                         len_out, buf_out);
459             
460             xmlFree(buf_out);
461         }
462         else if (convert->which == convert_meta_type)
463         {
464             if (retctr) /* only execute meta on retrieval */
465             {
466                 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
467
468                 /* last stylesheet absent */
469                 if (last_xsp)
470                     *last_xsp = 0;
471             }
472         }
473     }
474     return ZEBRA_OK;
475 }
476
477 static struct filter_input *new_input(struct filter_info *tinfo, int type)
478 {
479     struct filter_input *p;
480     struct filter_input **np = &tinfo->input_list;
481     for (;*np; np = &(*np)->next)
482         ;
483     p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
484     p->next = 0;
485     p->syntax = 0;
486     p->name = 0;
487     p->convert = 0;
488     p->type = type;
489     return p;
490 }
491
492 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
493                              const char *syntax, const char *name)
494 {
495     FOR_EACH_ELEMENT(ptr) {
496         if (!XML_STRCMP(ptr->name, "marc"))
497         {
498             yaz_iconv_t iconv = 0;
499             const char *input_charset = "marc-8";
500             struct _xmlAttr *attr;
501             
502             for (attr = ptr->properties; attr; attr = attr->next)
503             {
504                 if (attr_content(attr, "inputcharset", &input_charset))
505                     ;
506                 else
507                 {
508                     dom_log(YLOG_WARN, tinfo, ptr,
509                             "bad attribute @%s, expected @inputcharset",
510                             attr->name);
511                 }
512             }
513             iconv = yaz_iconv_open("utf-8", input_charset);
514             if (!iconv)
515             {
516                 dom_log(YLOG_WARN, tinfo, ptr, 
517                         "unsupported @charset '%s'", input_charset);
518                 return ZEBRA_FAIL;
519             }
520             else
521             {
522                 struct filter_input *p 
523                     = new_input(tinfo, DOM_INPUT_MARC);
524                 p->u.marc.handle = yaz_marc_create();
525                 p->u.marc.iconv = iconv;
526                 
527                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
528                 
529                 ptr = ptr->next;
530                 
531                 parse_convert(tinfo, ptr, &p->convert);
532             }
533             break;
534
535         }
536         else if (!XML_STRCMP(ptr->name, "xmlreader"))
537         {
538             struct filter_input *p 
539                 = new_input(tinfo, DOM_INPUT_XMLREADER);
540             struct _xmlAttr *attr;
541             const char *level_str = 0;
542
543             p->u.xmlreader.split_level = 0;
544             p->u.xmlreader.reader = 0;
545
546             for (attr = ptr->properties; attr; attr = attr->next)
547             {
548                 if (attr_content(attr, "level", &level_str))
549                     ;
550                 else
551                 {
552                     dom_log(YLOG_WARN, tinfo, ptr,
553                             "bad attribute @%s, expected @level",
554                             attr->name);
555                 }
556             }
557             if (level_str)
558                 p->u.xmlreader.split_level = atoi(level_str);
559                 
560             ptr = ptr->next;
561
562             parse_convert(tinfo, ptr, &p->convert);
563             break;
564         }
565         else
566         {
567             dom_log(YLOG_WARN, tinfo, ptr,
568                     "bad element <%s>, expected <marc>|<xmlreader>",
569                     ptr->name);
570             return ZEBRA_FAIL;
571         }
572     }
573     return ZEBRA_OK;
574 }
575
576 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
577 {
578     char tmp_full_name[1024];
579     xmlNodePtr ptr;
580     xmlDocPtr doc;
581
582     tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
583     
584     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
585                              NULL, tmp_full_name))
586         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
587     else
588         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
589     
590     yaz_log(YLOG_LOG, "%s dom filter: "
591             "loading config file %s", tinfo->fname, tinfo->full_name);
592
593     doc = xmlParseFile(tinfo->full_name);
594     if (!doc)
595     {
596         yaz_log(YLOG_WARN, "%s: dom filter: "
597                 "failed to parse config file %s",
598                 tinfo->fname, tinfo->full_name);
599         return ZEBRA_FAIL;
600     }
601     /* save because we store ptrs to the content */ 
602     tinfo->doc_config = doc;
603     
604     ptr = xmlDocGetRootElement(doc);
605     if (!ptr || ptr->type != XML_ELEMENT_NODE 
606         || XML_STRCMP(ptr->name, "dom"))
607     {
608         dom_log(YLOG_WARN, tinfo, ptr,
609                 "bad root element <%s>, expected root element <dom>", 
610                 ptr->name);  
611         return ZEBRA_FAIL;
612     }
613
614     ptr = ptr->children;
615     FOR_EACH_ELEMENT(ptr) {
616         if (!XML_STRCMP(ptr->name, "extract"))
617         {
618             /*
619               <extract name="index">
620               <xslt stylesheet="first.xsl"/>
621               <xslt stylesheet="second.xsl"/>
622               </extract>
623             */
624             struct _xmlAttr *attr;
625             struct filter_extract *f =
626                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
627             
628             tinfo->extract = f;
629             f->name = 0;
630             f->convert = 0;
631             for (attr = ptr->properties; attr; attr = attr->next)
632             {
633                 if (attr_content(attr, "name", &f->name))
634                     ;
635                 else
636                 {
637                     dom_log(YLOG_WARN, tinfo, ptr,
638                             "bad attribute @%s, expected @name",
639                             attr->name);
640                 }
641             }
642             parse_convert(tinfo, ptr->children, &f->convert);
643         }
644         else if (!XML_STRCMP(ptr->name, "retrieve"))
645         {  
646             /* 
647                <retrieve name="F">
648                <xslt stylesheet="some.xsl"/>
649                <xslt stylesheet="some.xsl"/>
650                </retrieve>
651             */
652             struct _xmlAttr *attr;
653             struct filter_retrieve **fp = &tinfo->retrieve_list;
654             struct filter_retrieve *f =
655                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
656             
657             while (*fp)
658                 fp = &(*fp)->next;
659
660             *fp = f;
661             f->name = 0;
662             f->identifier = 0;
663             f->convert = 0;
664             f->next = 0;
665
666             for (attr = ptr->properties; attr; attr = attr->next)
667             {
668                 if (attr_content(attr, "identifier", 
669                                  &f->identifier))
670                     ;
671                 else if (attr_content(attr, "name", &f->name))
672                     ;
673                 else
674                 {
675                     dom_log(YLOG_WARN, tinfo, ptr,
676                             "bad attribute @%s,  expected @identifier|@name",
677                             attr->name);
678                 }
679             }
680             parse_convert(tinfo, ptr->children, &f->convert);
681         }
682         else if (!XML_STRCMP(ptr->name, "store"))
683         {
684             /*
685               <store name="F">
686               <xslt stylesheet="some.xsl"/>
687               <xslt stylesheet="some.xsl"/>
688               </retrieve>
689             */
690             struct filter_store *f =
691                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
692             
693             tinfo->store = f;
694             f->convert = 0;
695             parse_convert(tinfo, ptr->children, &f->convert);
696         }
697         else if (!XML_STRCMP(ptr->name, "input"))
698         {
699             /*
700               <input syntax="xml">
701               <xmlreader level="1"/>
702               </input>
703               <input syntax="usmarc">
704               <marc inputcharset="marc-8"/>
705               </input>
706             */
707             struct _xmlAttr *attr;
708             const char  *syntax = 0;
709             const char *name = 0;
710             for (attr = ptr->properties; attr; attr = attr->next)
711             {
712                 if (attr_content(attr, "syntax", &syntax))
713                     ;
714                 else if (attr_content(attr, "name", &name))
715                     ;
716                 else
717                 {
718                     dom_log(YLOG_WARN, tinfo, ptr,
719                             "bad attribute @%s,  expected @syntax|@name",
720                             attr->name);
721                 }
722             }
723             parse_input(tinfo, ptr->children, syntax, name);
724         }
725         else
726         {
727             dom_log(YLOG_WARN, tinfo, ptr,
728                     "bad element <%s>, "
729                     "expected <extract>|<input>|<retrieve>|<store>",
730                     ptr->name);
731             return ZEBRA_FAIL;
732         }
733     }
734     if (!tinfo->input_list)
735     {
736         struct filter_input *p 
737             = new_input(tinfo, DOM_INPUT_XMLREADER);
738         p->u.xmlreader.split_level = 0;
739         p->u.xmlreader.reader = 0;
740     }
741     return ZEBRA_OK;
742 }
743
744 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
745                                                const char *est)
746 {
747     struct filter_retrieve *f = tinfo->retrieve_list;
748
749     /* return first schema if no est is provided */
750     if (!est)
751         return f;
752     for (; f; f = f->next)
753     { 
754         /* find requested schema */
755         if (est) 
756         {    
757             if (f->identifier && !strcmp(f->identifier, est))
758                 return f;
759             if (f->name && !strcmp(f->name, est))
760                 return f;
761         } 
762     }
763     return 0;
764 }
765
766 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
767 {
768     struct filter_info *tinfo = clientData;
769     if (!args || !*args)
770     {
771         yaz_log(YLOG_WARN, "dom filter: need config file");
772         return ZEBRA_FAIL;
773     }
774
775     if (tinfo->fname && !strcmp(args, tinfo->fname))
776         return ZEBRA_OK;
777     
778     tinfo->profile_path = res_get(res, "profilePath");
779
780     destroy_dom(tinfo);
781     return parse_dom(tinfo, args);
782 }
783
784 static void filter_destroy(void *clientData)
785 {
786     struct filter_info *tinfo = clientData;
787     destroy_dom(tinfo);
788     nmem_destroy(tinfo->nmem_config);
789     nmem_destroy(tinfo->nmem_record);
790     xfree(tinfo);
791 }
792
793 static int ioread_ex(void *context, char *buffer, int len)
794 {
795     struct recExtractCtrl *p = context;
796     return p->stream->readf(p->stream, buffer, len);
797 }
798
799 static int ioclose_ex(void *context)
800 {
801     return 0;
802 }
803
804
805
806 /* DOM filter style indexing */
807 static void index_value_of(struct filter_info *tinfo, 
808                            struct recExtractCtrl *extctr,
809                            RecWord* recword, 
810                            xmlNodePtr node, 
811                            const char *index_p)
812 {
813     if (tinfo->record_info_invoked == 1)
814     {
815         xmlChar *text = xmlNodeGetContent(node);
816         size_t text_len = strlen((const char *)text);
817        
818         /* if there is no text, we do not need to proceed */
819         if (text_len)
820         {            
821             const char *look = index_p;
822             const char *bval;
823             const char *eval;
824
825             xmlChar index[256];
826             xmlChar type[256];
827
828             /* assingning text to be indexed */
829             recword->term_buf = (const char *)text;
830             recword->term_len = text_len;
831
832             /* parsing all index name/type pairs */
833             /* may not start with ' ' or ':' */
834             while (*look && ' ' != *look && ':' != *look)
835             {
836                 /* setting name and type to zero */
837                 *index = '\0';
838                 *type = '\0';
839     
840                 /* parsing one index name */
841                 bval = look;
842                 while (*look && ':' != *look && ' ' != *look)
843                 {
844                     look++;
845                 }
846                 eval = look;
847                 strncpy((char *)index, (const char *)bval, eval - bval);
848                 index[eval - bval] = '\0';
849     
850     
851                 /* parsing one index type, if existing */
852                 if (':' == *look)
853                 {
854                     look++;
855       
856                     bval = look;
857                     while (*look && ' ' != *look)
858                     {
859                         look++;
860                     }
861                     eval = look;
862                     strncpy((char *)type, (const char *)bval, eval - bval);
863                     type[eval - bval] = '\0';
864                 }
865
866                 /* actually indexing the text given */
867
868                 recword->index_name = (const char *)index;
869                 if (*type)
870                     recword->index_type = (const char *) type;
871
872                 /* writing debug out */
873                 if (extctr->flagShowRecords)
874                     dom_log(YLOG_LOG, tinfo, 0, 
875                             "INDEX '%s:%s' '%s'", 
876                             (const char *) index,
877                             (const char *) type, 
878                             (const char *) text);
879                 
880                 (extctr->tokenAdd)(recword);
881
882                 /* eat whitespaces */
883                 if (*look && ' ' == *look)
884                 {
885                     look++;
886                 } 
887             }
888         }
889         xmlFree(text); 
890     }
891 }
892
893
894 /* DOM filter style indexing */
895 static void set_record_info(struct filter_info *tinfo, 
896                             struct recExtractCtrl *extctr, 
897                             xmlNodePtr node, 
898                             const char * id_p, 
899                             const char * rank_p, 
900                             const char * type_p)
901 {
902     /* writing debug info out */
903     if (extctr && extctr->flagShowRecords)
904         dom_log(YLOG_LOG, tinfo, node,
905                 "RECORD id=%s rank=%s type=%s", 
906                 id_p ? (const char *) id_p : "(null)",
907                 rank_p ? (const char *) rank_p : "(null)",
908                 type_p ? (const char *) type_p : "(null)");
909     
910
911     if (id_p && *id_p)
912         sscanf((const char *)id_p, "%255s", extctr->match_criteria);
913
914     if (rank_p && *rank_p)
915         extctr->staticrank = atozint((const char *)rank_p);
916
917     if (type_p && *type_p)
918     {
919         enum zebra_recctrl_action_t action = action_update;
920         if (!strcmp(type_p, "insert"))
921             action = action_insert;
922         else if (!strcmp(type_p, "delete"))
923             action = action_delete;
924         else if (!strcmp(type_p, "replace"))
925             action = action_replace;
926         else if (!strcmp(type_p, "update"))
927             action = action_update;
928         else
929             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
930         extctr->action = action;
931     }
932
933     if (tinfo->record_info_invoked == 1)
934     {
935         /* warn about multiple only once */
936         dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
937     }
938     tinfo->record_info_invoked++;
939
940 }
941
942
943 /* DOM filter style indexing */
944 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
945                                            struct recExtractCtrl *extctr, 
946                                            RecWord* recword, 
947                                            xmlNodePtr node)
948 {
949     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
950         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
951     {
952         if (0 == XML_STRCMP(node->name, "index"))
953         {
954             const char *index_p = 0;
955
956             struct _xmlAttr *attr;      
957             for (attr = node->properties; attr; attr = attr->next)
958             {
959                 if (attr_content(attr, "name", &index_p))
960                 {
961                     index_value_of(tinfo, extctr, recword, node, index_p);
962                 }  
963                 else
964                 {
965                     dom_log(YLOG_WARN, tinfo, node,
966                             "bad attribute @%s, expected @name",
967                             attr->name);
968                 }
969             }
970         }
971         else if (0 == XML_STRCMP(node->name, "record"))
972         {
973             const char *id_p = 0;
974             const char *rank_p = 0;
975             const char *type_p = 0;
976
977             struct _xmlAttr *attr;
978             for (attr = node->properties; attr; attr = attr->next)
979             {
980                 if (attr_content(attr, "id", &id_p))
981                     ;
982                 else if (attr_content(attr, "rank", &rank_p))
983                     ;
984                 else if (attr_content(attr, "type", &type_p))
985                     ;
986                 else
987                 {
988                     dom_log(YLOG_WARN, tinfo, node,
989                             "bad attribute @%s, expected @id|@rank|@type",
990                             attr->name);
991                 }
992             }
993             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
994         } 
995         else
996         {
997             dom_log(YLOG_WARN, tinfo, node,
998                     "bad element <%s>,"
999                     " expected <record>|<index> in namespace '%s'",
1000                     node->name, zebra_dom_ns);
1001         }
1002     }
1003 }
1004
1005 static int attr_content_pi(const char **c_ptr, const char *name,
1006                            char *value, size_t value_max)
1007 {
1008     size_t name_len = strlen(name);
1009     const char *look = *c_ptr;
1010     int ret = 0;
1011
1012     *value = '\0';
1013     while (*look && ' ' == *look)
1014         look++;
1015     if (strlen(look) > name_len)
1016     {
1017         if (look[name_len] == '=' && !memcmp(look, name, name_len))
1018         {
1019             size_t i = 0;
1020             look += name_len+1;
1021             while (*look && ' ' != *look)
1022             {
1023                 if (i < value_max-1)
1024                     value[i++] = *look;
1025                 look++;
1026             }
1027             value[i] = '\0';
1028             ret = 1;
1029         }
1030     }
1031     while (*look && ' ' == *look)
1032         look++;
1033     *c_ptr = look;
1034     return ret;
1035 }
1036
1037 /* DOM filter style indexing */
1038 static void process_xml_pi_node(struct filter_info *tinfo, 
1039                                 struct recExtractCtrl *extctr, 
1040                                 xmlNodePtr node,
1041                                 const char **index_pp)
1042 {
1043     /* if right PI name, continue parsing PI */
1044     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1045     {
1046         xmlChar *pi_p =  node->content;
1047         const char *look = (const char *) node->content;
1048     
1049         /* parsing PI record instructions */
1050         if (0 == strncmp((const char *)look, "record", 6))
1051         {
1052             char id[256];
1053             char rank[256];
1054             char type[256];
1055             
1056             *id = '\0';
1057             *rank = '\0';
1058             *type = '\0';
1059             look += 6;
1060             while (*look)
1061                 if (attr_content_pi(&look, "id", id, sizeof(id)))
1062                     ;
1063                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1064                     ;
1065                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1066                 {
1067                     dom_log(YLOG_WARN, tinfo, node,
1068                             "content '%s', can not parse '%s'",
1069                             pi_p, look);
1070                     break;
1071                 }
1072             set_record_info(tinfo, extctr, node, id, rank, type);
1073         } 
1074         /* parsing index instruction */
1075         else if (0 == strncmp((const char *)look, "index", 5))
1076         {
1077             look += 5;
1078       
1079             /* eat whitespace */
1080             while (*look && ' ' == *look)
1081                 look++;
1082
1083             /* export index instructions to outside */
1084             *index_pp = look;
1085         } 
1086         else 
1087         {
1088             dom_log(YLOG_WARN, tinfo, node,
1089                     "content '%s', can not parse '%s'",
1090                     pi_p, look);
1091         }
1092     }
1093 }
1094
1095 /* DOM filter style indexing */
1096 static void process_xml_element_node(struct filter_info *tinfo, 
1097                                      struct recExtractCtrl *extctr, 
1098                                      RecWord* recword, 
1099                                      xmlNodePtr node)
1100 {
1101     /* remember indexing instruction from PI to next element node */
1102     const char *index_p = 0;
1103
1104     /* check if we are an element node in the special zebra namespace 
1105        and either set record data or index value-of node content*/
1106     process_xml_element_zebra_node(tinfo, extctr, recword, node);
1107   
1108     /* loop through kid nodes */
1109     for (node = node->children; node; node = node->next)
1110     {
1111         /* check and set PI record and index index instructions */
1112         if (node->type == XML_PI_NODE)
1113         {
1114             process_xml_pi_node(tinfo, extctr, node, &index_p);
1115         }
1116         else if (node->type == XML_ELEMENT_NODE)
1117         {
1118             /* if there was a PI index instruction before this element */
1119             if (index_p)
1120             {
1121                 index_value_of(tinfo, extctr, recword, node, index_p);
1122                 index_p = 0;
1123             }
1124             process_xml_element_node(tinfo, extctr, recword,node);
1125         }
1126         else
1127             continue;
1128     }
1129 }
1130
1131
1132 /* DOM filter style indexing */
1133 static void extract_dom_doc_node(struct filter_info *tinfo, 
1134                                  struct recExtractCtrl *extctr, 
1135                                  xmlDocPtr doc)
1136 {
1137     /* only need to do the initialization once, reuse recword for all terms */
1138     RecWord recword;
1139     (*extctr->init)(extctr, &recword);
1140
1141     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1142 }
1143
1144
1145 static int convert_extract_doc(struct filter_info *tinfo, 
1146                                struct filter_input *input,
1147                                struct recExtractCtrl *p, 
1148                                xmlDocPtr doc)
1149 {
1150     xmlChar *buf_out;
1151     int len_out;
1152     const char *params[10];
1153     xsltStylesheetPtr last_xsp = 0;
1154
1155     /* per default do not ingest record */
1156     tinfo->record_info_invoked = 0;
1157
1158     /* exit if empty document given */
1159     if (!doc)
1160         return RECCTRL_EXTRACT_SKIP;
1161
1162     /* we actuallu have a document which needs to be processed further */
1163     params[0] = 0;
1164     set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1165
1166     if (p && p->flagShowRecords)
1167     {
1168         xmlChar *buf_out;
1169         int len_out;
1170 #if 0 
1171         FILE *outf = fopen("extract.xml", "w");
1172         xmlDocDumpMemory(doc, &buf_out, &len_out);
1173         fwrite(buf_out, 1, len_out, outf);
1174 #endif
1175         yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1176 #if 0
1177         fclose(outf);
1178 #endif
1179     }
1180
1181     if (p->setStoreData)
1182     {
1183         xmlDocPtr store_doc = 0;
1184
1185         /* input conversion */
1186         perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1187         
1188         if (tinfo->store)
1189         {
1190             /* store conversion */
1191             store_doc = xmlCopyDoc(doc, 1);
1192             perform_convert(tinfo, p, 0, tinfo->store->convert,
1193                             params, &store_doc, &last_xsp);
1194         }
1195         
1196         /* saving either store doc or original doc in case no store doc exists */
1197         if (last_xsp)
1198             xsltSaveResultToString(&buf_out, &len_out, 
1199                                    store_doc ? store_doc : doc, last_xsp);
1200         else
1201             xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1202         
1203         if (p->setStoreData)
1204             (*p->setStoreData)(p, buf_out, len_out);
1205         xmlFree(buf_out);
1206         if (store_doc)
1207             xmlFreeDoc(store_doc);
1208     }
1209
1210
1211     /* extract conversion */
1212     perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1213
1214
1215     /* finally, do the indexing */
1216     if (doc){
1217         extract_dom_doc_node(tinfo, p, doc);
1218         xmlFreeDoc(doc);
1219     }
1220     
1221     /* there was nothing to index, so there is no inserted/updated record */
1222     if (tinfo->record_info_invoked == 0)
1223         return RECCTRL_EXTRACT_SKIP;
1224
1225     return RECCTRL_EXTRACT_OK;
1226 }
1227
1228 static int extract_xml_split(struct filter_info *tinfo,
1229                              struct filter_input *input,
1230                              struct recExtractCtrl *p)
1231 {
1232     int ret;
1233
1234     if (p->first_record)
1235     {
1236         if (input->u.xmlreader.reader)
1237             xmlFreeTextReader(input->u.xmlreader.reader);
1238         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1239                                                    p /* I/O handler */,
1240                                                    0 /* URL */, 
1241                                                    0 /* encoding */,
1242                                                    XML_PARSE_XINCLUDE
1243                                                    | XML_PARSE_NOENT
1244                                                    | XML_PARSE_NONET);
1245     }
1246     if (!input->u.xmlreader.reader)
1247         return RECCTRL_EXTRACT_ERROR_GENERIC;
1248
1249     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1250     while (ret == 1)
1251     {
1252         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1253         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1254
1255         if (type == XML_READER_TYPE_ELEMENT && 
1256             input->u.xmlreader.split_level == depth)
1257         {
1258             xmlNodePtr ptr;
1259
1260             /* per default do not ingest record */
1261             tinfo->record_info_invoked = 0;
1262             
1263             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1264             if (ptr)
1265             {
1266                 /* we have a new document */
1267
1268                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1269                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1270                 
1271                 xmlDocSetRootElement(doc, ptr2);
1272                 
1273                 /* writing debug info out */
1274                 if (p->flagShowRecords)
1275                 {
1276                     xmlChar *buf_out = 0;
1277                     int len_out = 0;
1278                     xmlDocDumpMemory(doc, &buf_out, &len_out);
1279                     yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
1280                             tinfo->fname ? tinfo->fname : "(none)",
1281                             depth, len_out, buf_out); 
1282                     xmlFree(buf_out);
1283                 }
1284                 
1285                 return convert_extract_doc(tinfo, input, p, doc);
1286             }
1287             else
1288             {
1289                 xmlFreeTextReader(input->u.xmlreader.reader);
1290                 input->u.xmlreader.reader = 0;
1291                 return RECCTRL_EXTRACT_ERROR_GENERIC;
1292             }
1293         }
1294         ret = xmlTextReaderRead(input->u.xmlreader.reader);
1295     }
1296     xmlFreeTextReader(input->u.xmlreader.reader);
1297     input->u.xmlreader.reader = 0;
1298     return RECCTRL_EXTRACT_EOF;
1299 }
1300
1301 static int extract_xml_full(struct filter_info *tinfo, 
1302                             struct filter_input *input,
1303                             struct recExtractCtrl *p)
1304 {
1305     if (p->first_record) /* only one record per stream */
1306     {
1307         xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1308                                   p /* I/O handler */,
1309                                   0 /* URL */,
1310                                   0 /* encoding */,
1311                                   XML_PARSE_XINCLUDE
1312                                   | XML_PARSE_NOENT
1313                                   | XML_PARSE_NONET);
1314         if (!doc)
1315         {
1316             return RECCTRL_EXTRACT_ERROR_GENERIC;
1317         }
1318         return convert_extract_doc(tinfo, input, p, doc);
1319     }
1320     else
1321         return RECCTRL_EXTRACT_EOF;
1322 }
1323
1324 static int extract_iso2709(struct filter_info *tinfo,
1325                            struct filter_input *input,
1326                            struct recExtractCtrl *p)
1327 {
1328     char buf[100000];
1329     int record_length;
1330     int read_bytes, r;
1331
1332     if (p->stream->readf(p->stream, buf, 5) != 5)
1333         return RECCTRL_EXTRACT_EOF;
1334     while (*buf < '0' || *buf > '9')
1335     {
1336         int i;
1337
1338         dom_log(YLOG_WARN, tinfo, 0,
1339                 "MARC: Skipping bad byte %d (0x%02X)",
1340                 *buf & 0xff, *buf & 0xff);
1341         for (i = 0; i<4; i++)
1342             buf[i] = buf[i+1];
1343
1344         if (p->stream->readf(p->stream, buf+4, 1) != 1)
1345             return RECCTRL_EXTRACT_EOF;
1346     }
1347     record_length = atoi_n (buf, 5);
1348     if (record_length < 25)
1349     {
1350         dom_log(YLOG_WARN, tinfo, 0,
1351                 "MARC record length < 25, is %d",  record_length);
1352         return RECCTRL_EXTRACT_ERROR_GENERIC;
1353     }
1354     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1355     if (read_bytes < record_length-5)
1356     {
1357         dom_log(YLOG_WARN, tinfo, 0,
1358                 "couldn't read whole MARC record");
1359         return RECCTRL_EXTRACT_ERROR_GENERIC;
1360     }
1361     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1362     if (r < record_length)
1363     {
1364         dom_log (YLOG_WARN, tinfo, 0,
1365                  "parsing of MARC record failed r=%d length=%d",
1366                  r, record_length);
1367         return RECCTRL_EXTRACT_ERROR_GENERIC;
1368     }
1369     else
1370     {
1371         xmlDocPtr rdoc;
1372         xmlNode *root_ptr;
1373         yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 
1374                            "http://www.loc.gov/MARC21/slim", 0, 0);
1375         rdoc = xmlNewDoc((const xmlChar*) "1.0");
1376         xmlDocSetRootElement(rdoc, root_ptr);
1377         return convert_extract_doc(tinfo, input, p, rdoc);        
1378     }
1379     return RECCTRL_EXTRACT_OK;
1380 }
1381
1382 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1383 {
1384     struct filter_info *tinfo = clientData;
1385     struct filter_input *input = tinfo->input_list;
1386
1387     if (!input)
1388         return RECCTRL_EXTRACT_ERROR_GENERIC;
1389     
1390     nmem_reset(tinfo->nmem_record);
1391
1392     if (p->setStoreData == 0)
1393         return extract_xml_full(tinfo, input, p);
1394     switch(input->type)
1395     {
1396     case DOM_INPUT_XMLREADER:
1397         if (input->u.xmlreader.split_level == 0)
1398             return extract_xml_full(tinfo, input, p);
1399         else
1400             return extract_xml_split(tinfo, input, p);
1401         break;
1402     case DOM_INPUT_MARC:
1403         return extract_iso2709(tinfo, input, p);
1404     }
1405     return RECCTRL_EXTRACT_ERROR_GENERIC;
1406 }
1407
1408 static int ioread_ret(void *context, char *buffer, int len)
1409 {
1410     struct recRetrieveCtrl *p = context;
1411     return p->stream->readf(p->stream, buffer, len);
1412 }
1413
1414 static int ioclose_ret(void *context)
1415 {
1416     return 0;
1417 }
1418
1419 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1420 {
1421     /* const char *esn = zebra_dom_ns; */
1422     const char *esn = 0;
1423     const char *params[32];
1424     struct filter_info *tinfo = clientData;
1425     xmlDocPtr doc;
1426     struct filter_retrieve *retrieve;
1427     xsltStylesheetPtr last_xsp = 0;
1428
1429     if (p->comp)
1430     {
1431         if (p->comp->which == Z_RecordComp_simple
1432             && p->comp->u.simple->which == Z_ElementSetNames_generic)
1433         {
1434             esn = p->comp->u.simple->u.generic;
1435         }
1436         else if (p->comp->which == Z_RecordComp_complex 
1437                  && p->comp->u.complex->generic->elementSpec
1438                  && p->comp->u.complex->generic->elementSpec->which ==
1439                  Z_ElementSpec_elementSetName)
1440         {
1441             esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1442         }
1443     }
1444     retrieve = lookup_retrieve(tinfo, esn);
1445     if (!retrieve)
1446     {
1447         p->diagnostic =
1448             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1449         p->addinfo = odr_strdup(p->odr, esn);
1450         return 0;
1451     }
1452
1453     params[0] = 0;
1454     set_param_int(params, "id", p->localno, p->odr->mem);
1455     if (p->fname)
1456         set_param_str(params, "filename", p->fname, p->odr->mem);
1457     if (p->staticrank >= 0)
1458         set_param_int(params, "rank", p->staticrank, p->odr->mem);
1459
1460     if (esn)
1461         set_param_str(params, "schema", esn, p->odr->mem);
1462     else
1463         if (retrieve->name)
1464             set_param_str(params, "schema", retrieve->name, p->odr->mem);
1465         else if (retrieve->identifier)
1466             set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1467         else
1468             set_param_str(params, "schema", "", p->odr->mem);
1469
1470     if (p->score >= 0)
1471         set_param_int(params, "score", p->score, p->odr->mem);
1472     set_param_int(params, "size", p->recordSize, p->odr->mem);
1473
1474     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1475                     0 /* URL */,
1476                     0 /* encoding */,
1477                     XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1478     if (!doc)
1479     {
1480         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1481         return 0;
1482     }
1483
1484     /* retrieve conversion */
1485     perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1486     if (!doc)
1487     {
1488         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1489     }
1490     else if (!p->input_format
1491              || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1492     {
1493         xmlChar *buf_out;
1494         int len_out;
1495
1496         if (last_xsp)
1497             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1498         else
1499             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1500
1501         p->output_format = yaz_oid_recsyn_xml;
1502         p->rec_len = len_out;
1503         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1504         memcpy(p->rec_buf, buf_out, p->rec_len);
1505         xmlFree(buf_out);
1506     }
1507     else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1508     {
1509         xmlChar *buf_out;
1510         int len_out;
1511
1512         if (last_xsp)
1513             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1514         else
1515             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1516         
1517         p->output_format = yaz_oid_recsyn_sutrs;
1518         p->rec_len = len_out;
1519         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1520         memcpy(p->rec_buf, buf_out, p->rec_len);
1521         
1522         xmlFree(buf_out);
1523     }
1524     else
1525     {
1526         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1527     }
1528     xmlFreeDoc(doc);
1529     return 0;
1530 }
1531
1532 static struct recType filter_type = {
1533     0,
1534     "dom",
1535     filter_init,
1536     filter_config,
1537     filter_destroy,
1538     filter_extract,
1539     filter_retrieve
1540 };
1541
1542 RecType
1543 #ifdef IDZEBRA_STATIC_DOM
1544 idzebra_filter_dom
1545 #else
1546 idzebra_filter
1547 #endif
1548
1549 [] = {
1550     &filter_type,
1551     0,
1552 };
1553 /*
1554  * Local variables:
1555  * c-basic-offset: 4
1556  * indent-tabs-mode: nil
1557  * End:
1558  * vim: shiftwidth=4 tabstop=8 expandtab
1559  */
1560