Update m4
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdarg.h>
27
28 #include <yaz/diagbib1.h>
29 #include <yaz/tpath.h>
30 #include <yaz/snprintf.h>
31
32 #include <libxml/xmlversion.h>
33 #include <libxml/parser.h>
34 #include <libxml/tree.h>
35 #include <libxml/xmlIO.h>
36 #include <libxml/xmlreader.h>
37 #include <libxslt/transform.h>
38 #include <libxslt/xsltutils.h>
39
40 #if YAZ_HAVE_EXSLT
41 #include <libexslt/exslt.h>
42 #endif
43
44 #include <idzebra/util.h>
45 #include <idzebra/recctrl.h>
46 #include <yaz/oid_db.h>
47
48 /* DOM filter style indexing */
49 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
50 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
51
52 /* DOM filter style indexing */
53 #define ZEBRA_PI_NAME "zebra-2.0"
54 static const char *zebra_pi_name = ZEBRA_PI_NAME;
55
56 enum convert_type {
57     convert_xslt_type,
58     convert_meta_type
59 };
60
61 struct convert_xslt {
62     const char *stylesheet;
63     xsltStylesheetPtr stylesheet_xsp;
64 };
65
66 struct convert_meta {
67     int dummy;
68 };
69
70 struct convert_s {
71     enum convert_type which;
72     union {
73         struct convert_xslt xslt;
74         struct convert_meta meta;
75     } u;
76     struct convert_s *next;
77 };
78
79 struct filter_extract {
80     const char *name;
81     struct convert_s *convert;
82 };
83
84 struct filter_store {
85     struct convert_s *convert;
86 };
87
88 struct filter_retrieve {
89     const char *name;
90     const char *identifier;
91     struct convert_s *convert;
92     struct filter_retrieve *next;
93 };
94
95 #define DOM_INPUT_XMLREADER 1
96 #define DOM_INPUT_MARC 2
97 struct filter_input {
98     const char *syntax;
99     const char *name;
100     struct convert_s *convert;
101     int type;
102     union {
103         struct {
104             xmlTextReaderPtr reader;
105             int split_level;
106         } xmlreader;
107         struct {
108             const char *input_charset;
109             yaz_marc_t handle;
110             yaz_iconv_t iconv;
111         } marc;
112     } u;
113     struct filter_input *next;
114 };
115   
116 struct filter_info {
117     char *fname;
118     char *full_name;
119     const char *profile_path;
120     NMEM nmem_record;
121     NMEM nmem_config;
122     xmlDocPtr doc_config;
123     struct filter_extract *extract;
124     struct filter_retrieve *retrieve_list;
125     struct filter_input *input_list;
126     struct filter_store *store;
127     int record_info_invoked;
128 };
129
130
131
132 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
133 #define XML_STRLEN(a) strlen((char*)a)
134
135
136 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
137
138 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
139                     const char *fmt, ...)
140 #ifdef __GNUC__
141     __attribute__ ((format (printf, 4, 5)))
142 #endif
143     ;
144
145 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
146                     const char *fmt, ...)
147 {
148     va_list ap;
149     char buf[4096];
150
151     va_start(ap, fmt);
152     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
153     if (ptr)
154     {
155         yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none", 
156                 xmlGetLineNo(ptr), buf);
157     }
158     else
159     {
160         yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
161     }
162     va_end(ap);
163 }
164
165
166 static void set_param_str(const char **params, const char *name,
167                           const char *value, NMEM nmem)
168 {
169     char *quoted = nmem_malloc(nmem, 3 + strlen(value));
170     sprintf(quoted, "'%s'", value);
171     while (*params)
172         params++;
173     params[0] = name;
174     params[1] = quoted;
175     params[2] = 0;
176 }
177
178 static void set_param_int(const char **params, const char *name,
179                           zint value, NMEM nmem)
180 {
181     char *quoted = nmem_malloc(nmem, 30); /* 25 digits enough for 2^64 */
182     while (*params)
183         params++;
184     sprintf(quoted, "'" ZINT_FORMAT "'", value);
185     params[0] = name;
186     params[1] = quoted;
187     params[2] = 0;
188 }
189
190 static void *filter_init(Res res, RecType recType)
191 {
192     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
193     tinfo->fname = 0;
194     tinfo->full_name = 0;
195     tinfo->profile_path = 0;
196     tinfo->nmem_record = nmem_create();
197     tinfo->nmem_config = nmem_create();
198     tinfo->extract = 0;
199     tinfo->retrieve_list = 0;
200     tinfo->input_list = 0;
201     tinfo->store = 0;
202     tinfo->doc_config = 0;
203     tinfo->record_info_invoked = 0;
204
205 #if YAZ_HAVE_EXSLT
206     exsltRegisterAll(); 
207 #endif
208
209     return tinfo;
210 }
211
212 static int attr_content(struct _xmlAttr *attr, const char *name,
213                         const char **dst_content)
214 {
215     if (!XML_STRCMP(attr->name, name) && attr->children 
216         && attr->children->type == XML_TEXT_NODE)
217     {
218         *dst_content = (const char *)(attr->children->content);
219         return 1;
220     }
221     return 0;
222 }
223
224 static void destroy_xsp(struct convert_s *c)
225 {
226     while (c)
227     {
228         if (c->which == convert_xslt_type)
229         {
230             if (c->u.xslt.stylesheet_xsp)
231                 xsltFreeStylesheet(c->u.xslt.stylesheet_xsp);
232         }
233         c = c->next;
234     }
235 }
236
237 static void destroy_dom(struct filter_info *tinfo)
238 {
239     if (tinfo->extract)
240     {
241         destroy_xsp(tinfo->extract->convert);
242         tinfo->extract = 0;
243     }
244     if (tinfo->store)
245     {
246         destroy_xsp(tinfo->store->convert);
247         tinfo->store = 0;
248     }
249     if (tinfo->input_list)
250     {
251         struct filter_input *i_ptr;
252         for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
253         {
254             switch(i_ptr->type)
255             {
256             case DOM_INPUT_XMLREADER:
257                 if (i_ptr->u.xmlreader.reader)
258                     xmlFreeTextReader(i_ptr->u.xmlreader.reader);
259                 break;
260             case DOM_INPUT_MARC:
261                 yaz_iconv_close(i_ptr->u.marc.iconv);
262                 yaz_marc_destroy(i_ptr->u.marc.handle);
263                 break;
264             }
265             destroy_xsp(i_ptr->convert);
266         }
267         tinfo->input_list = 0;
268     }
269     if (tinfo->retrieve_list)
270     {
271         struct filter_retrieve *r_ptr;
272         for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
273             destroy_xsp(r_ptr->convert);
274         tinfo->retrieve_list = 0;
275     }
276
277     if (tinfo->doc_config)
278     {
279         xmlFreeDoc(tinfo->doc_config);
280         tinfo->doc_config = 0;
281     }
282     nmem_reset(tinfo->nmem_config);
283 }
284
285 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
286                                struct convert_s **l)
287 {
288     *l = 0;
289     FOR_EACH_ELEMENT(ptr) {
290         if (!XML_STRCMP(ptr->name, "xslt"))
291         {
292             struct _xmlAttr *attr;
293             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
294             
295             p->next = 0;
296             p->which = convert_xslt_type;
297             p->u.xslt.stylesheet = 0;
298             p->u.xslt.stylesheet_xsp = 0;
299             
300             for (attr = ptr->properties; attr; attr = attr->next)
301                 if (attr_content(attr, "stylesheet", &p->u.xslt.stylesheet))
302                     ;
303                 else
304                 {
305                     dom_log(YLOG_WARN, tinfo, ptr,
306                             "bad attribute @%s", attr->name);
307                 }
308             if (p->u.xslt.stylesheet)
309             {
310                 char tmp_xslt_full_name[1024];
311                 if (!yaz_filepath_resolve(p->u.xslt.stylesheet, 
312                                           tinfo->profile_path,
313                                           NULL, 
314                                           tmp_xslt_full_name))
315                 {
316                     dom_log(YLOG_WARN, tinfo, 0,
317                             "stylesheet %s not found in "
318                             "path %s",
319                             p->u.xslt.stylesheet, 
320                             tinfo->profile_path);
321                     return ZEBRA_FAIL;
322                 }
323                 
324                 p->u.xslt.stylesheet_xsp
325                     = xsltParseStylesheetFile((const xmlChar*) 
326                                               tmp_xslt_full_name);
327                 if (!p->u.xslt.stylesheet_xsp)
328                 {
329                     dom_log(YLOG_WARN, tinfo, 0,
330                             "could not parse xslt stylesheet %s",
331                             tmp_xslt_full_name);
332                     return ZEBRA_FAIL;
333                 }
334             }
335             else
336             {
337                 dom_log(YLOG_WARN, tinfo, ptr,
338                         "missing attribute 'stylesheet'");
339                 return ZEBRA_FAIL;
340             }
341             *l = p;
342             l = &p->next;
343         }
344         else if (!XML_STRCMP(ptr->name, "process-meta"))
345         {
346             struct _xmlAttr *attr;
347             struct convert_s *p = nmem_malloc(tinfo->nmem_config, sizeof(*p));
348             
349             p->next = 0;
350             p->which = convert_meta_type;
351             
352             for (attr = ptr->properties; attr; attr = attr->next)
353                 dom_log(YLOG_WARN, tinfo, ptr,
354                         "bad attribute @%s", attr->name);
355             *l = p;
356             l = &p->next;
357         }
358         else
359         {
360             dom_log(YLOG_WARN, tinfo, ptr,
361                     "bad element '%s', expected <xslt>", ptr->name);
362             return ZEBRA_FAIL;
363         }
364     }
365     return ZEBRA_OK;
366 }
367
368 static int process_meta(struct filter_info *tinfo, xmlDocPtr doc, xmlNodePtr node, 
369                         struct recRetrieveCtrl *retctr)
370 {
371
372     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href &&
373         0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
374     {
375         if (0 == XML_STRCMP(node->name, "meta"))
376         {
377             const char *element_set_name = 0;
378             
379             struct _xmlAttr *attr;      
380             for (attr = node->properties; attr; attr = attr->next)
381             {
382                 if (attr_content(attr, "name", &element_set_name))
383                     ;
384                 else
385                 {
386                     dom_log(YLOG_WARN, tinfo, node,
387                             "bad attribute @%s, expected @name", attr->name);
388                 }
389             }
390             if (element_set_name)
391             {
392                 WRBUF result = wrbuf_alloc();
393                 WRBUF addinfo = wrbuf_alloc();
394                 const Odr_oid *input_format = yaz_oid_recsyn_xml;
395                 const Odr_oid *output_format = 0;
396                 int ret;
397                 
398                 ret = retctr->special_fetch(retctr->handle,
399                                             element_set_name,
400                                             input_format, &output_format,
401                                             result, addinfo);
402                 if (ret == 0)
403                 {
404                     xmlDocPtr sub_doc = 
405                         xmlParseMemory(wrbuf_buf(result), wrbuf_len(result));
406                     if (sub_doc)
407                     {
408                         xmlNodePtr t = xmlDocGetRootElement(sub_doc);
409                         xmlReplaceNode(node, xmlCopyNode(t, 1));
410                         xmlFreeDoc(sub_doc);
411                     }
412                 }
413                 wrbuf_destroy(result);
414                 wrbuf_destroy(addinfo);
415             }
416         }
417     }
418     for (node = node->children; node; node = node->next)
419         process_meta(tinfo, doc, node, retctr);
420     return 0;
421 }
422
423 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
424                                  struct recExtractCtrl *extctr,
425                                  struct recRetrieveCtrl *retctr,
426                                  struct convert_s *convert,
427                                  const char **params,
428                                  xmlDocPtr *doc,
429                                  xsltStylesheetPtr *last_xsp)
430 {
431     for (; convert; convert = convert->next)
432     {
433         if (convert->which == convert_xslt_type)
434         {
435             xmlChar *buf_out = 0;
436             int len_out = 0;
437             xmlDocPtr res_doc = xsltApplyStylesheet(convert->u.xslt.stylesheet_xsp,
438                                                     *doc, params);
439             if (last_xsp)
440                 *last_xsp = convert->u.xslt.stylesheet_xsp;
441             
442             if (!res_doc)
443                 break;
444             
445             /* now saving into buffer and re-reading into DOM to avoid annoing
446                XSLT problem with thrown-out indentation text nodes */
447             xsltSaveResultToString(&buf_out, &len_out, res_doc,
448                                    convert->u.xslt.stylesheet_xsp); 
449             xmlFreeDoc(res_doc);
450             
451             xmlFreeDoc(*doc);
452             
453             *doc = xmlParseMemory((const char *) buf_out, len_out);
454             
455             /* writing debug info out */
456             if (extctr && extctr->flagShowRecords)
457                 yaz_log(YLOG_LOG, "%s: XSLT %s\n %.*s", 
458                         tinfo->fname ? tinfo->fname : "(none)", 
459                         convert->u.xslt.stylesheet,
460                         len_out, buf_out);
461             
462             xmlFree(buf_out);
463         }
464         else if (convert->which == convert_meta_type)
465         {
466             if (retctr) /* only execute meta on retrieval */
467             {
468                 process_meta(tinfo, *doc, xmlDocGetRootElement(*doc), retctr);
469
470                 /* last stylesheet absent */
471                 if (last_xsp)
472                     *last_xsp = 0;
473             }
474         }
475     }
476     return ZEBRA_OK;
477 }
478
479 static struct filter_input *new_input(struct filter_info *tinfo, int type)
480 {
481     struct filter_input *p;
482     struct filter_input **np = &tinfo->input_list;
483     for (;*np; np = &(*np)->next)
484         ;
485     p = *np = nmem_malloc(tinfo->nmem_config, sizeof(*p));
486     p->next = 0;
487     p->syntax = 0;
488     p->name = 0;
489     p->convert = 0;
490     p->type = type;
491     return p;
492 }
493
494 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
495                              const char *syntax, const char *name)
496 {
497     FOR_EACH_ELEMENT(ptr) {
498         if (!XML_STRCMP(ptr->name, "marc"))
499         {
500             yaz_iconv_t iconv = 0;
501             const char *input_charset = "marc-8";
502             struct _xmlAttr *attr;
503             
504             for (attr = ptr->properties; attr; attr = attr->next)
505             {
506                 if (attr_content(attr, "inputcharset", &input_charset))
507                     ;
508                 else
509                 {
510                     dom_log(YLOG_WARN, tinfo, ptr,
511                             "bad attribute @%s, expected @inputcharset",
512                             attr->name);
513                 }
514             }
515             iconv = yaz_iconv_open("utf-8", input_charset);
516             if (!iconv)
517             {
518                 dom_log(YLOG_WARN, tinfo, ptr, 
519                         "unsupported @charset '%s'", input_charset);
520                 return ZEBRA_FAIL;
521             }
522             else
523             {
524                 struct filter_input *p 
525                     = new_input(tinfo, DOM_INPUT_MARC);
526                 p->u.marc.handle = yaz_marc_create();
527                 p->u.marc.iconv = iconv;
528                 
529                 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
530                 
531                 ptr = ptr->next;
532                 
533                 parse_convert(tinfo, ptr, &p->convert);
534             }
535             break;
536
537         }
538         else if (!XML_STRCMP(ptr->name, "xmlreader"))
539         {
540             struct filter_input *p 
541                 = new_input(tinfo, DOM_INPUT_XMLREADER);
542             struct _xmlAttr *attr;
543             const char *level_str = 0;
544
545             p->u.xmlreader.split_level = 0;
546             p->u.xmlreader.reader = 0;
547
548             for (attr = ptr->properties; attr; attr = attr->next)
549             {
550                 if (attr_content(attr, "level", &level_str))
551                     ;
552                 else
553                 {
554                     dom_log(YLOG_WARN, tinfo, ptr,
555                             "bad attribute @%s, expected @level",
556                             attr->name);
557                 }
558             }
559             if (level_str)
560                 p->u.xmlreader.split_level = atoi(level_str);
561                 
562             ptr = ptr->next;
563
564             parse_convert(tinfo, ptr, &p->convert);
565             break;
566         }
567         else
568         {
569             dom_log(YLOG_WARN, tinfo, ptr,
570                     "bad element <%s>, expected <marc>|<xmlreader>",
571                     ptr->name);
572             return ZEBRA_FAIL;
573         }
574     }
575     return ZEBRA_OK;
576 }
577
578 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
579 {
580     char tmp_full_name[1024];
581     xmlNodePtr ptr;
582     xmlDocPtr doc;
583
584     tinfo->fname = nmem_strdup(tinfo->nmem_config, fname);
585     
586     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
587                              NULL, tmp_full_name))
588         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tmp_full_name);
589     else
590         tinfo->full_name = nmem_strdup(tinfo->nmem_config, tinfo->fname);
591     
592     yaz_log(YLOG_LOG, "%s dom filter: "
593             "loading config file %s", tinfo->fname, tinfo->full_name);
594
595     doc = xmlParseFile(tinfo->full_name);
596     if (!doc)
597     {
598         yaz_log(YLOG_WARN, "%s: dom filter: "
599                 "failed to parse config file %s",
600                 tinfo->fname, tinfo->full_name);
601         return ZEBRA_FAIL;
602     }
603     /* save because we store ptrs to the content */ 
604     tinfo->doc_config = doc;
605     
606     ptr = xmlDocGetRootElement(doc);
607     if (!ptr || ptr->type != XML_ELEMENT_NODE 
608         || XML_STRCMP(ptr->name, "dom"))
609     {
610         dom_log(YLOG_WARN, tinfo, ptr,
611                 "bad root element <%s>, expected root element <dom>", 
612                 ptr->name);  
613         return ZEBRA_FAIL;
614     }
615
616     ptr = ptr->children;
617     FOR_EACH_ELEMENT(ptr) {
618         if (!XML_STRCMP(ptr->name, "extract"))
619         {
620             /*
621               <extract name="index">
622               <xslt stylesheet="first.xsl"/>
623               <xslt stylesheet="second.xsl"/>
624               </extract>
625             */
626             struct _xmlAttr *attr;
627             struct filter_extract *f =
628                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
629             
630             tinfo->extract = f;
631             f->name = 0;
632             f->convert = 0;
633             for (attr = ptr->properties; attr; attr = attr->next)
634             {
635                 if (attr_content(attr, "name", &f->name))
636                     ;
637                 else
638                 {
639                     dom_log(YLOG_WARN, tinfo, ptr,
640                             "bad attribute @%s, expected @name",
641                             attr->name);
642                 }
643             }
644             parse_convert(tinfo, ptr->children, &f->convert);
645         }
646         else if (!XML_STRCMP(ptr->name, "retrieve"))
647         {  
648             /* 
649                <retrieve name="F">
650                <xslt stylesheet="some.xsl"/>
651                <xslt stylesheet="some.xsl"/>
652                </retrieve>
653             */
654             struct _xmlAttr *attr;
655             struct filter_retrieve **fp = &tinfo->retrieve_list;
656             struct filter_retrieve *f =
657                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
658             
659             while (*fp)
660                 fp = &(*fp)->next;
661
662             *fp = f;
663             f->name = 0;
664             f->identifier = 0;
665             f->convert = 0;
666             f->next = 0;
667
668             for (attr = ptr->properties; attr; attr = attr->next)
669             {
670                 if (attr_content(attr, "identifier", 
671                                  &f->identifier))
672                     ;
673                 else if (attr_content(attr, "name", &f->name))
674                     ;
675                 else
676                 {
677                     dom_log(YLOG_WARN, tinfo, ptr,
678                             "bad attribute @%s,  expected @identifier|@name",
679                             attr->name);
680                 }
681             }
682             parse_convert(tinfo, ptr->children, &f->convert);
683         }
684         else if (!XML_STRCMP(ptr->name, "store"))
685         {
686             /*
687               <store name="F">
688               <xslt stylesheet="some.xsl"/>
689               <xslt stylesheet="some.xsl"/>
690               </retrieve>
691             */
692             struct filter_store *f =
693                 nmem_malloc(tinfo->nmem_config, sizeof(*f));
694             
695             tinfo->store = f;
696             f->convert = 0;
697             parse_convert(tinfo, ptr->children, &f->convert);
698         }
699         else if (!XML_STRCMP(ptr->name, "input"))
700         {
701             /*
702               <input syntax="xml">
703               <xmlreader level="1"/>
704               </input>
705               <input syntax="usmarc">
706               <marc inputcharset="marc-8"/>
707               </input>
708             */
709             struct _xmlAttr *attr;
710             const char  *syntax = 0;
711             const char *name = 0;
712             for (attr = ptr->properties; attr; attr = attr->next)
713             {
714                 if (attr_content(attr, "syntax", &syntax))
715                     ;
716                 else if (attr_content(attr, "name", &name))
717                     ;
718                 else
719                 {
720                     dom_log(YLOG_WARN, tinfo, ptr,
721                             "bad attribute @%s,  expected @syntax|@name",
722                             attr->name);
723                 }
724             }
725             parse_input(tinfo, ptr->children, syntax, name);
726         }
727         else
728         {
729             dom_log(YLOG_WARN, tinfo, ptr,
730                     "bad element <%s>, "
731                     "expected <extract>|<input>|<retrieve>|<store>",
732                     ptr->name);
733             return ZEBRA_FAIL;
734         }
735     }
736     if (!tinfo->input_list)
737     {
738         struct filter_input *p 
739             = new_input(tinfo, DOM_INPUT_XMLREADER);
740         p->u.xmlreader.split_level = 0;
741         p->u.xmlreader.reader = 0;
742     }
743     return ZEBRA_OK;
744 }
745
746 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
747                                                const char *est)
748 {
749     struct filter_retrieve *f = tinfo->retrieve_list;
750
751     /* return first schema if no est is provided */
752     if (!est)
753         return f;
754     for (; f; f = f->next)
755     { 
756         /* find requested schema */
757         if (est) 
758         {    
759             if (f->identifier && !strcmp(f->identifier, est))
760                 return f;
761             if (f->name && !strcmp(f->name, est))
762                 return f;
763         } 
764     }
765     return 0;
766 }
767
768 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
769 {
770     struct filter_info *tinfo = clientData;
771     if (!args || !*args)
772     {
773         yaz_log(YLOG_WARN, "dom filter: need config file");
774         return ZEBRA_FAIL;
775     }
776
777     if (tinfo->fname && !strcmp(args, tinfo->fname))
778         return ZEBRA_OK;
779     
780     tinfo->profile_path = res_get(res, "profilePath");
781
782     destroy_dom(tinfo);
783     return parse_dom(tinfo, args);
784 }
785
786 static void filter_destroy(void *clientData)
787 {
788     struct filter_info *tinfo = clientData;
789     destroy_dom(tinfo);
790     nmem_destroy(tinfo->nmem_config);
791     nmem_destroy(tinfo->nmem_record);
792     xfree(tinfo);
793 }
794
795 static int ioread_ex(void *context, char *buffer, int len)
796 {
797     struct recExtractCtrl *p = context;
798     return p->stream->readf(p->stream, buffer, len);
799 }
800
801 static int ioclose_ex(void *context)
802 {
803     return 0;
804 }
805
806
807
808 /* DOM filter style indexing */
809 static void index_value_of(struct filter_info *tinfo, 
810                            struct recExtractCtrl *extctr,
811                            RecWord* recword, 
812                            xmlNodePtr node, 
813                            const char *index_p)
814 {
815     if (tinfo->record_info_invoked == 1)
816     {
817         xmlChar *text = xmlNodeGetContent(node);
818         size_t text_len = strlen((const char *)text);
819         
820         /* if there is no text, we do not need to proceed */
821         if (text_len)
822         {            
823             /* keep seqno base so that all text will have
824                identical seqno's for multiple fields , e.g
825                <z:index name="title:w any:w title:p">.. */
826             
827             zint seqno_base = recword->seqno;
828             zint seqno_max = recword->seqno;
829        
830
831             const char *look = index_p;
832             const char *bval;
833             const char *eval;
834
835             xmlChar index[256];
836             xmlChar type[256];
837
838             /* assingning text to be indexed */
839             recword->term_buf = (const char *)text;
840             recword->term_len = text_len;
841
842             /* parsing all index name/type pairs */
843             /* may not start with ' ' or ':' */
844             while (*look && ' ' != *look && ':' != *look)
845             {
846                 /* setting name and type to zero */
847                 *index = '\0';
848                 *type = '\0';
849     
850                 /* parsing one index name */
851                 bval = look;
852                 while (*look && ':' != *look && ' ' != *look)
853                 {
854                     look++;
855                 }
856                 eval = look;
857                 strncpy((char *)index, (const char *)bval, eval - bval);
858                 index[eval - bval] = '\0';
859     
860     
861                 /* parsing one index type, if existing */
862                 if (':' == *look)
863                 {
864                     look++;
865       
866                     bval = look;
867                     while (*look && ' ' != *look)
868                     {
869                         look++;
870                     }
871                     eval = look;
872                     strncpy((char *)type, (const char *)bval, eval - bval);
873                     type[eval - bval] = '\0';
874                 }
875
876                 /* actually indexing the text given */
877
878                 recword->seqno = seqno_base;
879                 recword->index_name = (const char *)index;
880                 if (*type)
881                     recword->index_type = (const char *) type;
882
883                 /* writing debug out */
884                 if (extctr->flagShowRecords)
885                     dom_log(YLOG_LOG, tinfo, 0, 
886                             "INDEX '%s:%s' '%s'", 
887                             (const char *) index,
888                             (const char *) type, 
889                             (const char *) text);
890                 
891                 (extctr->tokenAdd)(recword);
892
893                 if (seqno_max < recword->seqno)
894                     seqno_max = recword->seqno;
895
896                 /* eat whitespaces */
897                 if (*look && ' ' == *look)
898                 {
899                     look++;
900                 } 
901             }
902             recword->seqno = seqno_max;
903         }
904         xmlFree(text); 
905     }
906 }
907
908
909 /* DOM filter style indexing */
910 static void set_record_info(struct filter_info *tinfo, 
911                             struct recExtractCtrl *extctr, 
912                             xmlNodePtr node, 
913                             const char * id_p, 
914                             const char * rank_p, 
915                             const char * type_p)
916 {
917     /* writing debug info out */
918     if (extctr && extctr->flagShowRecords)
919         dom_log(YLOG_LOG, tinfo, node,
920                 "RECORD id=%s rank=%s type=%s", 
921                 id_p ? (const char *) id_p : "(null)",
922                 rank_p ? (const char *) rank_p : "(null)",
923                 type_p ? (const char *) type_p : "(null)");
924     
925
926     if (id_p && *id_p)
927     {
928         size_t l = strlen(id_p);
929         if (l >= sizeof(extctr->match_criteria))
930             l = sizeof(extctr->match_criteria)-1;
931         memcpy(extctr->match_criteria, id_p, l);
932         extctr->match_criteria[l] = '\0';
933     }
934
935     if (rank_p && *rank_p)
936         extctr->staticrank = atozint((const char *)rank_p);
937
938     if (type_p && *type_p)
939     {
940         enum zebra_recctrl_action_t action = action_update;
941         if (!strcmp(type_p, "insert"))
942             action = action_insert;
943         else if (!strcmp(type_p, "delete"))
944             action = action_delete;
945         else if (!strcmp(type_p, "replace"))
946             action = action_replace;
947         else if (!strcmp(type_p, "update"))
948             action = action_update;
949         else
950             dom_log(YLOG_WARN, tinfo, node, "bad @type value: %s", type_p);
951         extctr->action = action;
952     }
953
954     if (tinfo->record_info_invoked == 1)
955     {
956         /* warn about multiple only once */
957         dom_log(YLOG_WARN, tinfo, node, "multiple record elements");
958     }
959     tinfo->record_info_invoked++;
960
961 }
962
963
964 /* DOM filter style indexing */
965 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
966                                            struct recExtractCtrl *extctr, 
967                                            RecWord* recword, 
968                                            xmlNodePtr node)
969 {
970     if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
971         && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
972     {
973         if (0 == XML_STRCMP(node->name, "index"))
974         {
975             const char *index_p = 0;
976
977             struct _xmlAttr *attr;      
978             for (attr = node->properties; attr; attr = attr->next)
979             {
980                 if (attr_content(attr, "name", &index_p))
981                 {
982                     index_value_of(tinfo, extctr, recword, node, index_p);
983                 }  
984                 else
985                 {
986                     dom_log(YLOG_WARN, tinfo, node,
987                             "bad attribute @%s, expected @name",
988                             attr->name);
989                 }
990             }
991         }
992         else if (0 == XML_STRCMP(node->name, "record"))
993         {
994             const char *id_p = 0;
995             const char *rank_p = 0;
996             const char *type_p = 0;
997
998             struct _xmlAttr *attr;
999             for (attr = node->properties; attr; attr = attr->next)
1000             {
1001                 if (attr_content(attr, "id", &id_p))
1002                     ;
1003                 else if (attr_content(attr, "rank", &rank_p))
1004                     ;
1005                 else if (attr_content(attr, "type", &type_p))
1006                     ;
1007                 else
1008                 {
1009                     dom_log(YLOG_WARN, tinfo, node,
1010                             "bad attribute @%s, expected @id|@rank|@type",
1011                             attr->name);
1012                 }
1013             }
1014             set_record_info(tinfo, extctr, node, id_p, rank_p, type_p);
1015         } 
1016         else
1017         {
1018             dom_log(YLOG_WARN, tinfo, node,
1019                     "bad element <%s>,"
1020                     " expected <record>|<index> in namespace '%s'",
1021                     node->name, zebra_dom_ns);
1022         }
1023     }
1024 }
1025
1026 static int attr_content_pi(const char **c_ptr, const char *name,
1027                            char *value, size_t value_max)
1028 {
1029     size_t name_len = strlen(name);
1030     const char *look = *c_ptr;
1031     int ret = 0;
1032
1033     if (strlen(look) > name_len)
1034     {
1035         if (look[name_len] == '=' && !memcmp(look, name, name_len))
1036         {
1037             size_t i = 0;
1038             look += name_len+1;
1039             while (*look && ' ' != *look)
1040             {
1041                 if (i < value_max-1)
1042                     value[i++] = *look;
1043                 look++;
1044             }
1045             value[i] = '\0';
1046             ret = 1;
1047         }
1048     }
1049     *c_ptr = look;
1050     return ret;
1051 }
1052
1053 /* DOM filter style indexing */
1054 static void process_xml_pi_node(struct filter_info *tinfo, 
1055                                 struct recExtractCtrl *extctr, 
1056                                 xmlNodePtr node,
1057                                 const char **index_pp)
1058 {
1059     /* if right PI name, continue parsing PI */
1060     if (0 == strcmp(zebra_pi_name, (const char *)node->name))
1061     {
1062         xmlChar *pi_p =  node->content;
1063         const char *look = (const char *) node->content;
1064     
1065         /* parsing PI record instructions */
1066         if (0 == strncmp((const char *)look, "record", 6))
1067         {
1068             char id[256];
1069             char rank[256];
1070             char type[256];
1071             
1072             *id = '\0';
1073             *rank = '\0';
1074             *type = '\0';
1075             look += 6;
1076             for (;;)
1077             {
1078                 /* eat whitespace */
1079                 while (' ' == *look)
1080                     look++;
1081                 if (*look == '\0')
1082                     break;
1083                 if (attr_content_pi(&look, "id", id, sizeof(id)))
1084                     ;
1085                 else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
1086                     ;
1087                 else if (attr_content_pi(&look, "type", type, sizeof(type)))
1088                     ;
1089                 else
1090                 {
1091                     dom_log(YLOG_WARN, tinfo, node,
1092                             "content '%s', can not parse '%s'",
1093                             pi_p, look);
1094                     break;
1095                 }
1096             }
1097             set_record_info(tinfo, extctr, node, id, rank, type);
1098         } 
1099         /* parsing index instruction */
1100         else if (0 == strncmp((const char *)look, "index", 5))
1101         {
1102             look += 5;
1103       
1104             /* eat whitespace */
1105             while (*look && ' ' == *look)
1106                 look++;
1107
1108             /* export index instructions to outside */
1109             *index_pp = look;
1110         } 
1111         else 
1112         {
1113             dom_log(YLOG_WARN, tinfo, node,
1114                     "content '%s', can not parse '%s'",
1115                     pi_p, look);
1116         }
1117     }
1118 }
1119
1120 /* DOM filter style indexing */
1121 static void process_xml_element_node(struct filter_info *tinfo, 
1122                                      struct recExtractCtrl *extctr, 
1123                                      RecWord* recword, 
1124                                      xmlNodePtr node)
1125 {
1126     /* remember indexing instruction from PI to next element node */
1127     const char *index_p = 0;
1128
1129     /* check if we are an element node in the special zebra namespace 
1130        and either set record data or index value-of node content*/
1131     process_xml_element_zebra_node(tinfo, extctr, recword, node);
1132   
1133     /* loop through kid nodes */
1134     for (node = node->children; node; node = node->next)
1135     {
1136         /* check and set PI record and index index instructions */
1137         if (node->type == XML_PI_NODE)
1138         {
1139             process_xml_pi_node(tinfo, extctr, node, &index_p);
1140         }
1141         else if (node->type == XML_ELEMENT_NODE)
1142         {
1143             /* if there was a PI index instruction before this element */
1144             if (index_p)
1145             {
1146                 index_value_of(tinfo, extctr, recword, node, index_p);
1147                 index_p = 0;
1148             }
1149             process_xml_element_node(tinfo, extctr, recword,node);
1150         }
1151         else
1152             continue;
1153     }
1154 }
1155
1156
1157 /* DOM filter style indexing */
1158 static void extract_dom_doc_node(struct filter_info *tinfo, 
1159                                  struct recExtractCtrl *extctr, 
1160                                  xmlDocPtr doc)
1161 {
1162     /* only need to do the initialization once, reuse recword for all terms */
1163     RecWord recword;
1164     (*extctr->init)(extctr, &recword);
1165
1166     process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1167 }
1168
1169
1170 static int convert_extract_doc(struct filter_info *tinfo, 
1171                                struct filter_input *input,
1172                                struct recExtractCtrl *p, 
1173                                xmlDocPtr doc)
1174 {
1175     xmlChar *buf_out;
1176     int len_out;
1177     const char *params[10];
1178     xsltStylesheetPtr last_xsp = 0;
1179
1180     /* per default do not ingest record */
1181     tinfo->record_info_invoked = 0;
1182
1183     /* exit if empty document given */
1184     if (!doc)
1185         return RECCTRL_EXTRACT_SKIP;
1186
1187     /* we actuallu have a document which needs to be processed further */
1188     params[0] = 0;
1189     set_param_str(params, "schema", zebra_dom_ns, tinfo->nmem_record);
1190
1191     if (p && p->flagShowRecords)
1192     {
1193         xmlChar *buf_out;
1194         int len_out;
1195         xmlDocDumpMemory(doc, &buf_out, &len_out);
1196 #if 0 
1197         FILE *outf = fopen("extract.xml", "w");
1198         fwrite(buf_out, 1, len_out, outf);
1199         fclose(outf);
1200 #endif
1201         yaz_log(YLOG_LOG, "Extract Doc: %.*s", len_out, buf_out);
1202     }
1203
1204     if (p->setStoreData)
1205     {
1206         xmlDocPtr store_doc = 0;
1207
1208         /* input conversion */
1209         perform_convert(tinfo, p, 0, input->convert, params, &doc, 0);
1210         
1211         if (tinfo->store)
1212         {
1213             /* store conversion */
1214             store_doc = xmlCopyDoc(doc, 1);
1215             perform_convert(tinfo, p, 0, tinfo->store->convert,
1216                             params, &store_doc, &last_xsp);
1217         }
1218         
1219         /* saving either store doc or original doc in case no store doc exists */
1220         if (last_xsp)
1221             xsltSaveResultToString(&buf_out, &len_out, 
1222                                    store_doc ? store_doc : doc, last_xsp);
1223         else
1224             xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1225         
1226         if (p->setStoreData)
1227             (*p->setStoreData)(p, buf_out, len_out);
1228         xmlFree(buf_out);
1229         if (store_doc)
1230             xmlFreeDoc(store_doc);
1231     }
1232
1233
1234     /* extract conversion */
1235     perform_convert(tinfo, p, 0, tinfo->extract->convert, params, &doc, 0);
1236
1237
1238     /* finally, do the indexing */
1239     if (doc){
1240         extract_dom_doc_node(tinfo, p, doc);
1241         xmlFreeDoc(doc);
1242     }
1243     
1244     /* there was nothing to index, so there is no inserted/updated record */
1245     if (tinfo->record_info_invoked == 0)
1246         return RECCTRL_EXTRACT_SKIP;
1247
1248     return RECCTRL_EXTRACT_OK;
1249 }
1250
1251 static int extract_xml_split(struct filter_info *tinfo,
1252                              struct filter_input *input,
1253                              struct recExtractCtrl *p)
1254 {
1255     int ret;
1256
1257     if (p->first_record)
1258     {
1259         if (input->u.xmlreader.reader)
1260             xmlFreeTextReader(input->u.xmlreader.reader);
1261         input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1262                                                    p /* I/O handler */,
1263                                                    0 /* URL */, 
1264                                                    0 /* encoding */,
1265                                                    XML_PARSE_XINCLUDE
1266                                                    | XML_PARSE_NOENT
1267                                                    | XML_PARSE_NONET);
1268     }
1269     if (!input->u.xmlreader.reader)
1270         return RECCTRL_EXTRACT_ERROR_GENERIC;
1271
1272     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1273     while (ret == 1)
1274     {
1275         int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1276         int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1277
1278         if (type == XML_READER_TYPE_ELEMENT && 
1279             input->u.xmlreader.split_level == depth)
1280         {
1281             xmlNodePtr ptr;
1282
1283             /* per default do not ingest record */
1284             tinfo->record_info_invoked = 0;
1285             
1286             ptr = xmlTextReaderExpand(input->u.xmlreader.reader);
1287             if (ptr)
1288             {
1289                 /* we have a new document */
1290
1291                 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1292                 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1293                 
1294                 xmlDocSetRootElement(doc, ptr2);
1295                 
1296                 /* writing debug info out */
1297                 if (p->flagShowRecords)
1298                 {
1299                     xmlChar *buf_out = 0;
1300                     int len_out = 0;
1301                     xmlDocDumpMemory(doc, &buf_out, &len_out);
1302                     yaz_log(YLOG_LOG, "%s: XMLREADER level: %i\n%.*s", 
1303                             tinfo->fname ? tinfo->fname : "(none)",
1304                             depth, len_out, buf_out); 
1305                     xmlFree(buf_out);
1306                 }
1307                 
1308                 return convert_extract_doc(tinfo, input, p, doc);
1309             }
1310             else
1311             {
1312                 xmlFreeTextReader(input->u.xmlreader.reader);
1313                 input->u.xmlreader.reader = 0;
1314                 return RECCTRL_EXTRACT_ERROR_GENERIC;
1315             }
1316         }
1317         ret = xmlTextReaderRead(input->u.xmlreader.reader);
1318     }
1319     xmlFreeTextReader(input->u.xmlreader.reader);
1320     input->u.xmlreader.reader = 0;
1321     return RECCTRL_EXTRACT_EOF;
1322 }
1323
1324 static int extract_xml_full(struct filter_info *tinfo, 
1325                             struct filter_input *input,
1326                             struct recExtractCtrl *p)
1327 {
1328     if (p->first_record) /* only one record per stream */
1329     {
1330         xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1331                                   p /* I/O handler */,
1332                                   0 /* URL */,
1333                                   0 /* encoding */,
1334                                   XML_PARSE_XINCLUDE
1335                                   | XML_PARSE_NOENT
1336                                   | XML_PARSE_NONET);
1337         if (!doc)
1338         {
1339             return RECCTRL_EXTRACT_ERROR_GENERIC;
1340         }
1341         return convert_extract_doc(tinfo, input, p, doc);
1342     }
1343     else
1344         return RECCTRL_EXTRACT_EOF;
1345 }
1346
1347 static int extract_iso2709(struct filter_info *tinfo,
1348                            struct filter_input *input,
1349                            struct recExtractCtrl *p)
1350 {
1351     char buf[100000];
1352     int record_length;
1353     int read_bytes, r;
1354
1355     if (p->stream->readf(p->stream, buf, 5) != 5)
1356         return RECCTRL_EXTRACT_EOF;
1357     while (*buf < '0' || *buf > '9')
1358     {
1359         int i;
1360
1361         dom_log(YLOG_WARN, tinfo, 0,
1362                 "MARC: Skipping bad byte %d (0x%02X)",
1363                 *buf & 0xff, *buf & 0xff);
1364         for (i = 0; i<4; i++)
1365             buf[i] = buf[i+1];
1366
1367         if (p->stream->readf(p->stream, buf+4, 1) != 1)
1368             return RECCTRL_EXTRACT_EOF;
1369     }
1370     record_length = atoi_n (buf, 5);
1371     if (record_length < 25)
1372     {
1373         dom_log(YLOG_WARN, tinfo, 0,
1374                 "MARC record length < 25, is %d",  record_length);
1375         return RECCTRL_EXTRACT_ERROR_GENERIC;
1376     }
1377     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1378     if (read_bytes < record_length-5)
1379     {
1380         dom_log(YLOG_WARN, tinfo, 0,
1381                 "couldn't read whole MARC record");
1382         return RECCTRL_EXTRACT_ERROR_GENERIC;
1383     }
1384     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1385     if (r < record_length)
1386     {
1387         dom_log (YLOG_WARN, tinfo, 0,
1388                  "parsing of MARC record failed r=%d length=%d",
1389                  r, record_length);
1390         return RECCTRL_EXTRACT_ERROR_GENERIC;
1391     }
1392     else
1393     {
1394         xmlDocPtr rdoc;
1395         xmlNode *root_ptr;
1396         yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 
1397                            "http://www.loc.gov/MARC21/slim", 0, 0);
1398         rdoc = xmlNewDoc((const xmlChar*) "1.0");
1399         xmlDocSetRootElement(rdoc, root_ptr);
1400         return convert_extract_doc(tinfo, input, p, rdoc);        
1401     }
1402     return RECCTRL_EXTRACT_OK;
1403 }
1404
1405 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1406 {
1407     struct filter_info *tinfo = clientData;
1408     struct filter_input *input = tinfo->input_list;
1409
1410     if (!input)
1411         return RECCTRL_EXTRACT_ERROR_GENERIC;
1412     
1413     nmem_reset(tinfo->nmem_record);
1414
1415     if (p->setStoreData == 0)
1416         return extract_xml_full(tinfo, input, p);
1417     switch(input->type)
1418     {
1419     case DOM_INPUT_XMLREADER:
1420         if (input->u.xmlreader.split_level == 0)
1421             return extract_xml_full(tinfo, input, p);
1422         else
1423             return extract_xml_split(tinfo, input, p);
1424         break;
1425     case DOM_INPUT_MARC:
1426         return extract_iso2709(tinfo, input, p);
1427     }
1428     return RECCTRL_EXTRACT_ERROR_GENERIC;
1429 }
1430
1431 static int ioread_ret(void *context, char *buffer, int len)
1432 {
1433     struct recRetrieveCtrl *p = context;
1434     int r = p->stream->readf(p->stream, buffer, len);
1435     return r;
1436 }
1437
1438 static int ioclose_ret(void *context)
1439 {
1440     return 0;
1441 }
1442
1443 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
1444 {
1445     /* const char *esn = zebra_dom_ns; */
1446     const char *esn = 0;
1447     const char *params[32];
1448     struct filter_info *tinfo = clientData;
1449     xmlDocPtr doc;
1450     struct filter_retrieve *retrieve;
1451     xsltStylesheetPtr last_xsp = 0;
1452
1453     if (p->comp)
1454     {
1455         if (p->comp->which == Z_RecordComp_simple
1456             && p->comp->u.simple->which == Z_ElementSetNames_generic)
1457         {
1458             esn = p->comp->u.simple->u.generic;
1459         }
1460         else if (p->comp->which == Z_RecordComp_complex 
1461                  && p->comp->u.complex->generic->elementSpec
1462                  && p->comp->u.complex->generic->elementSpec->which ==
1463                  Z_ElementSpec_elementSetName)
1464         {
1465             esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1466         }
1467     }
1468     retrieve = lookup_retrieve(tinfo, esn);
1469     if (!retrieve)
1470     {
1471         p->diagnostic =
1472             YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1473         p->addinfo = odr_strdup_null(p->odr, esn);
1474         return 0;
1475     }
1476
1477     params[0] = 0;
1478     set_param_int(params, "id", p->localno, p->odr->mem);
1479     if (p->fname)
1480         set_param_str(params, "filename", p->fname, p->odr->mem);
1481     if (p->staticrank >= 0)
1482         set_param_int(params, "rank", p->staticrank, p->odr->mem);
1483
1484     if (esn)
1485         set_param_str(params, "schema", esn, p->odr->mem);
1486     else
1487         if (retrieve->name)
1488             set_param_str(params, "schema", retrieve->name, p->odr->mem);
1489         else if (retrieve->identifier)
1490             set_param_str(params, "schema", retrieve->identifier, p->odr->mem);
1491         else
1492             set_param_str(params, "schema", "", p->odr->mem);
1493
1494     if (p->score >= 0)
1495         set_param_int(params, "score", p->score, p->odr->mem);
1496     set_param_int(params, "size", p->recordSize, p->odr->mem);
1497
1498     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1499                     0 /* URL */,
1500                     0 /* encoding */,
1501                     XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
1502     if (!doc)
1503     {
1504         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1505         return 0;
1506     }
1507
1508     /* retrieve conversion */
1509     perform_convert(tinfo, 0, p, retrieve->convert, params, &doc, &last_xsp);
1510     if (!doc)
1511     {
1512         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1513     }
1514     else if (!p->input_format
1515              || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1516     {
1517         xmlChar *buf_out;
1518         int len_out;
1519
1520         if (last_xsp)
1521             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1522         else
1523             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1524
1525         p->output_format = yaz_oid_recsyn_xml;
1526         p->rec_len = len_out;
1527         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1528         memcpy(p->rec_buf, buf_out, p->rec_len);
1529         xmlFree(buf_out);
1530     }
1531     else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
1532     {
1533         xmlChar *buf_out;
1534         int len_out;
1535
1536         if (last_xsp)
1537             xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1538         else
1539             xmlDocDumpMemory(doc, &buf_out, &len_out);            
1540         
1541         p->output_format = yaz_oid_recsyn_sutrs;
1542         p->rec_len = len_out;
1543         p->rec_buf = odr_malloc(p->odr, p->rec_len);
1544         memcpy(p->rec_buf, buf_out, p->rec_len);
1545         
1546         xmlFree(buf_out);
1547     }
1548     else
1549     {
1550         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1551     }
1552     xmlFreeDoc(doc);
1553     return 0;
1554 }
1555
1556 static struct recType filter_type = {
1557     0,
1558     "dom",
1559     filter_init,
1560     filter_config,
1561     filter_destroy,
1562     filter_extract,
1563     filter_retrieve
1564 };
1565
1566 RecType
1567 #if IDZEBRA_STATIC_DOM
1568 idzebra_filter_dom
1569 #else
1570 idzebra_filter
1571 #endif
1572
1573 [] = {
1574     &filter_type,
1575     0,
1576 };
1577 /*
1578  * Local variables:
1579  * c-basic-offset: 4
1580  * c-file-style: "Stroustrup"
1581  * indent-tabs-mode: nil
1582  * End:
1583  * vim: shiftwidth=4 tabstop=8 expandtab
1584  */
1585