pretty formatting warning messages, always giving the file name and
[idzebra-moved-to-github.git] / index / mod_dom.c
1 /* $Id: mod_dom.c,v 1.13 2007-02-15 14:33:41 marc Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5    This file is part of the Zebra server.
6
7    Zebra is free software; you can redistribute it and/or modify it under
8    the terms of the GNU General Public License as published by the Free
9    Software Foundation; either version 2, or (at your option) any later
10    version.
11
12    Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or
14    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15    for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #include <ctype.h>
26
27 #include <yaz/diagbib1.h>
28 #include <yaz/tpath.h>
29
30 #include <libxml/xmlversion.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #include <libxml/xmlIO.h>
34 #include <libxml/xmlreader.h>
35 #include <libxslt/transform.h>
36 #include <libxslt/xsltutils.h>
37
38 #if YAZ_HAVE_EXSLT
39 #include <libexslt/exslt.h>
40 #endif
41
42 #include <idzebra/util.h>
43 #include <idzebra/recctrl.h>
44
45
46
47 /* Alvis style indexing */
48 #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
49 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
50
51 /* DOM filter style indexing */
52 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
53 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
54
55 /* DOM filter style indexing */
56 #define ZEBRA_PI_NAME "zebra-2.0"
57 static const char *zebra_pi_name = ZEBRA_PI_NAME;
58
59
60
61 struct convert_s {
62     const char *stylesheet;
63     xsltStylesheetPtr stylesheet_xsp;
64     struct convert_s *next;
65 };
66
67 struct filter_extract {
68     const char *name;
69     struct convert_s *convert;
70 };
71
72 struct filter_store {
73     struct convert_s *convert;
74 };
75
76 struct filter_retrieve {
77     const char *name;
78     const char *identifier;
79     struct convert_s *convert;
80     struct filter_retrieve *next;
81 };
82
83 #define DOM_INPUT_XMLREADER 1
84 #define DOM_INPUT_MARC 2
85 struct filter_input {
86     const char *syntax;
87     const char *name;
88     struct convert_s *convert;
89     int type;
90     union {
91         struct {
92             const char *input_charset;
93             yaz_marc_t handle;
94             yaz_iconv_t iconv;
95         } marc;
96         struct {
97             xmlTextReaderPtr reader;
98             int split_level;
99         } xmlreader;
100     } u;
101     struct filter_input *next;
102 };
103   
104 struct filter_info {
105     char *fname;
106     char *full_name;
107     const char *profile_path;
108     ODR odr_record;
109     ODR odr_config;
110     xmlDocPtr doc_config;
111     struct filter_extract *extract;
112     struct filter_retrieve *retrieve_list;
113     struct filter_input *input_list;
114     struct filter_store *store;
115 };
116
117 #define XML_STRCMP(a,b)   strcmp((char*)a, b)
118 #define XML_STRLEN(a) strlen((char*)a)
119
120
121
122
123 static void set_param_str(const char **params, const char *name,
124                           const char *value, ODR odr)
125 {
126     char *quoted = odr_malloc(odr, 3 + strlen(value));
127     sprintf(quoted, "'%s'", value);
128     while (*params)
129         params++;
130     params[0] = name;
131     params[1] = quoted;
132     params[2] = 0;
133 }
134
135 static void set_param_int(const char **params, const char *name,
136                           zint value, ODR odr)
137 {
138     char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
139     while (*params)
140         params++;
141     sprintf(quoted, "'" ZINT_FORMAT "'", value);
142     params[0] = name;
143     params[1] = quoted;
144     params[2] = 0;
145 }
146
147 static void *filter_init(Res res, RecType recType)
148 {
149     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
150     tinfo->fname = 0;
151     tinfo->full_name = 0;
152     tinfo->profile_path = 0;
153     tinfo->odr_record = odr_createmem(ODR_ENCODE);
154     tinfo->odr_config = odr_createmem(ODR_ENCODE);
155     tinfo->extract = 0;
156     tinfo->retrieve_list = 0;
157     tinfo->input_list = 0;
158     tinfo->store = 0;
159     tinfo->doc_config = 0;
160
161 #if YAZ_HAVE_EXSLT
162     exsltRegisterAll(); 
163 #endif
164
165     return tinfo;
166 }
167
168 static int attr_content(struct _xmlAttr *attr, const char *name,
169                         const char **dst_content)
170 {
171     if (!XML_STRCMP(attr->name, name) && attr->children 
172         && attr->children->type == XML_TEXT_NODE)
173         {
174             *dst_content = (const char *)(attr->children->content);
175             return 1;
176         }
177     return 0;
178 }
179
180 static void destroy_xsp(struct convert_s *c)
181 {
182     while(c)
183         {
184             if (c->stylesheet_xsp)
185                 xsltFreeStylesheet(c->stylesheet_xsp);
186             c = c->next;
187         }
188 }
189
190 static void destroy_dom(struct filter_info *tinfo)
191 {
192     if (tinfo->extract)
193         {
194             destroy_xsp(tinfo->extract->convert);
195             tinfo->extract = 0;
196         }
197     if (tinfo->store)
198         {
199             destroy_xsp(tinfo->store->convert);
200             tinfo->store = 0;
201         }
202     if (tinfo->input_list)
203         {
204             struct filter_input *i_ptr;
205             for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
206                 {
207                     switch(i_ptr->type)
208                         {
209                         case DOM_INPUT_XMLREADER:
210                             if (i_ptr->u.xmlreader.reader)
211                                 xmlFreeTextReader(i_ptr->u.xmlreader.reader);
212                             break;
213                         case DOM_INPUT_MARC:
214                             yaz_iconv_close(i_ptr->u.marc.iconv);
215                             yaz_marc_destroy(i_ptr->u.marc.handle);
216                             break;
217                         }
218                     destroy_xsp(i_ptr->convert);
219                 }
220             tinfo->input_list = 0;
221         }
222     if (tinfo->retrieve_list)
223         {
224             struct filter_retrieve *r_ptr;
225             for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
226                 destroy_xsp(r_ptr->convert);
227             tinfo->retrieve_list = 0;
228         }
229
230     if (tinfo->doc_config)
231         {
232             xmlFreeDoc(tinfo->doc_config);
233             tinfo->doc_config = 0;
234         }
235     odr_reset(tinfo->odr_config);
236 }
237
238 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
239                                struct convert_s **l)
240 {
241     *l = 0;
242     for(; ptr; ptr = ptr->next)
243         {
244             if (ptr->type != XML_ELEMENT_NODE)
245                 continue;
246             if (!XML_STRCMP(ptr->name, "xslt"))
247                 {
248                     struct _xmlAttr *attr;
249                     struct convert_s *p 
250                         = odr_malloc(tinfo->odr_config, sizeof(*p));
251
252                     p->next = 0;
253                     p->stylesheet = 0;
254                     p->stylesheet_xsp = 0;
255
256                     for (attr = ptr->properties; attr; attr = attr->next)
257                         if (attr_content(attr, "stylesheet", &p->stylesheet))
258                             ;
259                         else
260                             yaz_log(YLOG_WARN, "%s: dom filter: "
261                                     "%s bad attribute @%s, "
262                                     "expected @stylesheet",
263                                     tinfo->fname, 
264                                     xmlGetNodePath(ptr), attr->name);
265                     if (p->stylesheet)
266                         {
267                             char tmp_xslt_full_name[1024];
268                             if (!yaz_filepath_resolve(p->stylesheet, 
269                                                       tinfo->profile_path,
270                                                       NULL, 
271                                                       tmp_xslt_full_name))
272                                 {
273                                     yaz_log(YLOG_WARN, "%s: dom filter: "
274                                             "stylesheet %s not found in "
275                                             "path %s",
276                                             tinfo->fname,
277                                             p->stylesheet, 
278                                             tinfo->profile_path);
279                                     return ZEBRA_FAIL;
280                                 }
281                 
282                             p->stylesheet_xsp
283                                 = xsltParseStylesheetFile((const xmlChar*) 
284                                                           tmp_xslt_full_name);
285                             if (!p->stylesheet_xsp)
286                                 {
287                                     yaz_log(YLOG_WARN, "%s: dom filter: "
288                                             "could not parse xslt "
289                                             "stylesheet %s",
290                                             tinfo->fname, tmp_xslt_full_name);
291                                     return ZEBRA_FAIL;
292                                 }
293                         }
294                     else
295                         {
296                             yaz_log(YLOG_WARN, "%s: dom filter: "
297                                     "%s missing attribute 'stylesheet' ", 
298                                     tinfo->fname, xmlGetNodePath(ptr));
299                             return ZEBRA_FAIL;
300                         }
301                     *l = p;
302                     l = &p->next;
303                 }
304             else
305                 {
306                     yaz_log(YLOG_LOG, 
307                             "%s: dom filter: "
308                             "%s bad node '%s'",
309                             tinfo->fname, xmlGetNodePath(ptr), ptr->name);
310                     return ZEBRA_FAIL;
311                 }
312         
313         }
314     return ZEBRA_OK;
315 }
316
317 static ZEBRA_RES perform_convert(struct filter_info *tinfo, 
318                                  struct convert_s *convert,
319                                  const char **params,
320                                  xmlDocPtr *doc,
321                                  xsltStylesheetPtr *last_xsp)
322 {
323     for (; convert; convert = convert->next)
324         {
325             xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
326                                                     *doc, params);
327             if (last_xsp)
328                 *last_xsp = convert->stylesheet_xsp;
329             xmlFreeDoc(*doc);
330             *doc = res_doc;
331         }
332     return ZEBRA_OK;
333 }
334
335 static struct filter_input *new_input(struct filter_info *tinfo, int type)
336 {
337     struct filter_input *p;
338     struct filter_input **np = &tinfo->input_list;
339     for (;*np; np = &(*np)->next)
340         ;
341     p = *np = odr_malloc(tinfo->odr_config, sizeof(*p));
342     p->next = 0;
343     p->syntax = 0;
344     p->name = 0;
345     p->convert = 0;
346     p->type = type;
347     return p;
348 }
349
350 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
351                              const char *syntax,
352                              const char *name)
353 {
354     for (; ptr; ptr = ptr->next)
355         {
356             if (ptr->type != XML_ELEMENT_NODE)
357                 continue;
358             if (!XML_STRCMP(ptr->name, "marc"))
359                 {
360                     yaz_iconv_t iconv = 0;
361                     const char *input_charset = "marc-8";
362                     struct _xmlAttr *attr;
363             
364                     for (attr = ptr->properties; attr; attr = attr->next)
365                         {
366                             if (attr_content(attr, "charset", &input_charset))
367                                 ;
368                             else
369                                 yaz_log(YLOG_WARN, "%s: dom filter: "
370                                         "%s bad attribute @%s,"
371                                         " expected @charset",
372                                         tinfo->fname, 
373                                         xmlGetNodePath(ptr), attr->name);
374                         }
375                     iconv = yaz_iconv_open("utf-8", input_charset);
376                     if (!iconv)
377                         {
378                             yaz_log(YLOG_WARN, "%s: dom filter: "
379                                     "%s unsupported @charset '%s'", 
380                                     tinfo->fname, xmlGetNodePath(ptr),
381                                     input_charset);
382                             return ZEBRA_FAIL;
383                         }
384                     else
385                         {
386                             struct filter_input *p 
387                                 = new_input(tinfo, DOM_INPUT_MARC);
388                             p->u.marc.handle = yaz_marc_create();
389                             p->u.marc.iconv = iconv;
390                 
391                             yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
392                 
393                             ptr = ptr->next;
394                 
395                             parse_convert(tinfo, ptr, &p->convert);
396                         }
397                     break;
398
399                 }
400             else if (!XML_STRCMP(ptr->name, "xmlreader"))
401                 {
402                     struct filter_input *p 
403                         = new_input(tinfo, DOM_INPUT_XMLREADER);
404                     struct _xmlAttr *attr;
405                     const char *level_str = 0;
406
407                     p->u.xmlreader.split_level = 0;
408                     p->u.xmlreader.reader = 0;
409
410                     for (attr = ptr->properties; attr; attr = attr->next)
411                         {
412                             if (attr_content(attr, "level", &level_str))
413                                 ;
414                             else
415                                 yaz_log(YLOG_WARN, "%s: dom filter: "
416                                         "%s bad attribute @%s,"
417                                         " expected @level",
418                                         tinfo->fname, xmlGetNodePath(ptr),
419                                         attr->name);
420                         }
421                     if (level_str)
422                         p->u.xmlreader.split_level = atoi(level_str);
423                 
424                     ptr = ptr->next;
425
426                     parse_convert(tinfo, ptr, &p->convert);
427                     break;
428                 }
429             else
430                 {
431                     yaz_log(YLOG_WARN, "%s: dom filter: "
432                             "%s bad element <%s>,"
433                             " expected <marc>|<xmlreader>",
434                             tinfo->fname, xmlGetNodePath(ptr), ptr->name);
435                     return ZEBRA_FAIL;
436                 }
437         }
438     return ZEBRA_OK;
439 }
440
441 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
442 {
443     char tmp_full_name[1024];
444     xmlNodePtr ptr;
445     xmlDocPtr doc;
446
447     tinfo->fname = odr_strdup(tinfo->odr_config, fname);
448     
449     if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path, 
450                              NULL, tmp_full_name))
451         tinfo->full_name = odr_strdup(tinfo->odr_config, tmp_full_name);
452     else
453         tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname);
454     
455     yaz_log(YLOG_LOG, "%s dom filter: "
456             "loading config file %s", tinfo->fname, tinfo->full_name);
457     
458     doc = xmlParseFile(tinfo->full_name);
459     if (!doc)
460         {
461             yaz_log(YLOG_WARN, "%s: dom filter: "
462                     "failed to parse config file %s",
463                     tinfo->fname, tinfo->full_name);
464             return ZEBRA_FAIL;
465         }
466     /* save because we store ptrs to the content */ 
467     tinfo->doc_config = doc;
468     
469     ptr = xmlDocGetRootElement(doc);
470     if (!ptr || ptr->type != XML_ELEMENT_NODE 
471         || XML_STRCMP(ptr->name, "dom"))
472         {
473             yaz_log(YLOG_WARN, "%s: dom filter: "
474                     "%s bad root element <%s>,"
475                     " expected root element <dom>", 
476                     tinfo->fname, xmlGetNodePath(ptr), ptr->name);  
477             return ZEBRA_FAIL;
478         }
479
480     for (ptr = ptr->children; ptr; ptr = ptr->next)
481         {
482             if (ptr->type != XML_ELEMENT_NODE)
483                 continue;
484             if (!XML_STRCMP(ptr->name, "extract"))
485                 {
486                     /*
487                       <extract name="index">
488                       <xslt stylesheet="first.xsl"/>
489                       <xslt stylesheet="second.xsl"/>
490                       </extract>
491                     */
492                     struct _xmlAttr *attr;
493                     struct filter_extract *f =
494                         odr_malloc(tinfo->odr_config, sizeof(*f));
495             
496                     tinfo->extract = f;
497                     f->name = 0;
498                     f->convert = 0;
499                     for (attr = ptr->properties; attr; attr = attr->next)
500                         {
501                             if (attr_content(attr, "name", &f->name))
502                                 ;
503                             else
504                                 yaz_log(YLOG_WARN, "%s: dom filter: "
505                                         "%s bad attribute @%s"
506                                         " expected @name",
507                                         tinfo->fname, 
508                                         xmlGetNodePath(ptr),attr->name);
509
510                         }
511                     parse_convert(tinfo, ptr->children, &f->convert);
512                 }
513             else if (!XML_STRCMP(ptr->name, "retrieve"))
514                 {  
515                     /* 
516                        <retrieve name="F">
517                        <xslt stylesheet="some.xsl"/>
518                        <xslt stylesheet="some.xsl"/>
519                        </retrieve>
520                     */
521                     struct _xmlAttr *attr;
522                     struct filter_retrieve **fp = &tinfo->retrieve_list;
523                     struct filter_retrieve *f =
524                         odr_malloc(tinfo->odr_config, sizeof(*f));
525             
526                     while (*fp)
527                         fp = &(*fp)->next;
528
529                     *fp = f;
530                     f->name = 0;
531                     f->identifier = 0;
532                     f->convert = 0;
533                     f->next = 0;
534
535                     for (attr = ptr->properties; attr; attr = attr->next)
536                         {
537                             if (attr_content(attr, "identifier", 
538                                              &f->identifier))
539                                 ;
540                             else if (attr_content(attr, "name", &f->name))
541                                 ;
542                             else
543                                 yaz_log(YLOG_WARN, "%s: dom filter: "
544                                         "%s bad attribute @%s"
545                                         " expected @identifier|@name",
546                                         tinfo->fname, 
547                                         xmlGetNodePath(ptr),attr->name);
548                         }
549                     parse_convert(tinfo, ptr->children, &f->convert);
550                 }
551             else if (!XML_STRCMP(ptr->name, "store"))
552                 {
553                     /*
554                       <store name="F">
555                       <xslt stylesheet="some.xsl"/>
556                       <xslt stylesheet="some.xsl"/>
557                       </retrieve>
558                     */
559                     struct filter_store *f =
560                         odr_malloc(tinfo->odr_config, sizeof(*f));
561             
562                     tinfo->store = f;
563                     f->convert = 0;
564                     parse_convert(tinfo, ptr->children, &f->convert);
565                 }
566             else if (!XML_STRCMP(ptr->name, "input"))
567                 {
568                     /*
569                       <input syntax="xml">
570                       <xmlreader level="1"/>
571                       </input>
572                       <input syntax="usmarc">
573                       <marc inputcharset="marc-8"/>
574                       </input>
575                     */
576                     struct _xmlAttr *attr;
577                     const char  *syntax = 0;
578                     const char *name = 0;
579                     for (attr = ptr->properties; attr; attr = attr->next)
580                         {
581                             if (attr_content(attr, "syntax", &syntax))
582                                 ;
583                             else if (attr_content(attr, "name", &name))
584                                 ;
585                             else
586                                 yaz_log(YLOG_WARN, "%s: dom filter: "
587                                         "%s bad attribute @%s"
588                                         " expected @syntax|@name",
589                                         tinfo->fname, 
590                                         xmlGetNodePath(ptr),attr->name);
591                         }
592                     parse_input(tinfo, ptr->children, syntax, name);
593                 }
594             else
595                 {
596                     yaz_log(YLOG_WARN, "%s: dom filter: "
597                             "%s bad element <%s>,"
598                             " expected <extract>|<input>|<retrieve>|<store>",
599                             tinfo->fname, xmlGetNodePath(ptr), ptr->name);
600                     return ZEBRA_FAIL;
601                 }
602         }
603     return ZEBRA_OK;
604 }
605
606 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
607                                                const char *est)
608 {
609     struct filter_retrieve *f = tinfo->retrieve_list;
610
611     /* return first schema if no est is provided */
612     if (!est)
613         return f;
614     for (; f; f = f->next)
615         { 
616             /* find requested schema */
617             if (est) 
618                 {    
619                     if (f->identifier && !strcmp(f->identifier, est))
620                         return f;
621                     if (f->name && !strcmp(f->name, est))
622                         return f;
623                 } 
624         }
625     return 0;
626 }
627
628 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
629 {
630     struct filter_info *tinfo = clientData;
631     if (!args || !*args)
632         {
633             yaz_log(YLOG_WARN, "dom filter: need config file");
634             return ZEBRA_FAIL;
635         }
636
637     if (tinfo->fname && !strcmp(args, tinfo->fname))
638         return ZEBRA_OK;
639     
640     tinfo->profile_path = res_get(res, "profilePath");
641
642     destroy_dom(tinfo);
643     return parse_dom(tinfo, args);
644 }
645
646 static void filter_destroy(void *clientData)
647 {
648     struct filter_info *tinfo = clientData;
649     destroy_dom(tinfo);
650     odr_destroy(tinfo->odr_config);
651     odr_destroy(tinfo->odr_record);
652     xfree(tinfo);
653 }
654
655 static int ioread_ex(void *context, char *buffer, int len)
656 {
657     struct recExtractCtrl *p = context;
658     return p->stream->readf(p->stream, buffer, len);
659 }
660
661 static int ioclose_ex(void *context)
662 {
663     return 0;
664 }
665
666
667
668 /* Alvis style indexing */
669 static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
670                         xmlNodePtr ptr, RecWord *recWord)
671 {
672     for(; ptr; ptr = ptr->next)
673         {
674             index_cdata(tinfo, ctrl, ptr->children, recWord);
675             if (ptr->type != XML_TEXT_NODE)
676                 continue;
677             recWord->term_buf = (const char *)ptr->content;
678             recWord->term_len = XML_STRLEN(ptr->content);
679             (*ctrl->tokenAdd)(recWord);
680         }
681 }
682
683 /* Alvis style indexing */
684 static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
685                        xmlNodePtr ptr, RecWord *recWord)
686 {
687     for(; ptr; ptr = ptr->next)
688         {
689             index_node(tinfo, ctrl, ptr->children, recWord);
690             if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
691                 XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
692                 continue;
693             if (!XML_STRCMP(ptr->name, "index"))
694                 {
695                     const char *name_str = 0;
696                     const char *type_str = 0;
697                     const char *xpath_str = 0;
698                     struct _xmlAttr *attr;
699                     for (attr = ptr->properties; attr; attr = attr->next)
700                         {
701                             if (attr_content(attr, "name", &name_str))
702                                 ;
703                             else if (attr_content(attr, "xpath", &xpath_str))
704                                 ;
705                             else if (attr_content(attr, "type", &type_str))
706                                 ;
707                             else
708                                 yaz_log(YLOG_WARN, "%s: dom filter: "
709                                         "bad attribute %s for <index>",
710                                         tinfo->fname, attr->name);
711                         }
712                     if (name_str)
713                         {
714                             /* save default type */
715                             int prev_type = recWord->index_type; 
716
717                             /* type was given */
718                             if (type_str && *type_str)
719                                 recWord->index_type = *type_str; 
720
721                             recWord->index_name = name_str;
722                             index_cdata(tinfo, ctrl, ptr->children, recWord);
723
724                             /* restore it again */
725                             recWord->index_type = prev_type;     
726                         }
727                 }
728         }
729 }
730
731 /* Alvis style indexing */
732 static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
733                          xmlNodePtr ptr, RecWord *recWord)
734 {
735     const char *type_str = "update";
736
737     if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
738         !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
739         && !XML_STRCMP(ptr->name, "record"))
740         {
741             const char *id_str = 0;
742             const char *rank_str = 0;
743             struct _xmlAttr *attr;
744             for (attr = ptr->properties; attr; attr = attr->next)
745                 {
746                     if (attr_content(attr, "type", &type_str))
747                         ;
748                     else if (attr_content(attr, "id", &id_str))
749                         ;
750                     else if (attr_content(attr, "rank", &rank_str))
751                         ;
752                     else
753                         yaz_log(YLOG_WARN, "%s: dom filter: "
754                                 "bad attribute %s for <record>",
755                                 tinfo->fname, attr->name);
756                 }
757             if (id_str)
758                 sscanf(id_str, "%255s", ctrl->match_criteria);
759
760             if (rank_str)
761                 ctrl->staticrank = atozint(rank_str);
762             ptr = ptr->children;
763         }
764
765     if (!strcmp("update", type_str))
766         index_node(tinfo, ctrl, ptr, recWord);
767     else if (!strcmp("delete", type_str))
768         yaz_log(YLOG_WARN, "%s dom filter: "
769                 "delete: to be implemented");
770     else
771         yaz_log(YLOG_WARN, "dom filter: "
772                 "unknown record type '%s'", 
773                 type_str);
774 }
775
776
777 /* Alvis style indexing */
778 static void extract_doc_alvis(struct filter_info *tinfo, 
779                               struct recExtractCtrl *extctr, 
780                               xmlDocPtr doc)
781 {
782     if (doc){
783         RecWord recWord;
784         xmlChar *buf_out;
785         int len_out;
786         xmlNodePtr root_ptr;
787
788         (*extctr->init)(extctr, &recWord);
789         
790         if (extctr->flagShowRecords){
791             xmlDocDumpMemory(doc, &buf_out, &len_out);
792             fwrite(buf_out, len_out, 1, stdout);
793             xmlFree(buf_out);
794         }
795         root_ptr = xmlDocGetRootElement(doc);
796         if (root_ptr)
797             index_record(tinfo, extctr, root_ptr, &recWord);
798         else
799             yaz_log(YLOG_WARN, "%s dom filter: "
800                     "No root for index XML record");
801     }
802 }
803
804
805 /* DOM filter style indexing */
806 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
807                             xmlChar **dst_content)
808 {
809     if (0 == XML_STRCMP(attr->name, name) && attr->children 
810         && attr->children->type == XML_TEXT_NODE)
811         {
812             *dst_content = (attr->children->content);
813             return 1;
814         }
815     return 0;
816 }
817
818
819 /* DOM filter style indexing */
820 static void index_value_of(struct filter_info *tinfo, 
821                            struct recExtractCtrl *extctr, 
822                            xmlNodePtr node, 
823                            xmlChar * index_p)
824 {
825     xmlChar *text = xmlNodeGetContent(node);
826     size_t text_len = strlen((const char *)text);
827
828
829     /* if there is no text, we do not need to proceed */
830     if (text_len)
831         {            
832             xmlChar *look = index_p;
833             xmlChar *bval;
834             xmlChar *eval;
835
836             xmlChar index[256];
837             xmlChar type[256];
838
839             /* assingning text to be indexed */
840             RecWord recWord;
841             (*extctr->init)(extctr, &recWord);
842             recWord.term_buf = (const char *)text;
843             recWord.term_len = text_len;
844
845             /* parsing all index name/type pairs */
846             /* may not start with ' ' or ':' */
847             while (*look && ' ' != *look && ':' != *look){
848     
849                 /* setting name and type to zero */
850                 *index = '\0';
851                 *type = '\0';
852     
853                 /* parsing one index name */
854                 bval = look;
855                 while (*look && ':' != *look && ' ' != *look){
856                     look++;
857                 }
858                 eval = look;
859                 strncpy((char *)index, (const char *)bval, eval - bval);
860                 index[eval - bval] = '\0';
861     
862     
863                 /* parsing one index type, if existing */
864                 if (':' == *look){
865                     look++;
866       
867                     bval = look;
868                     while (*look && ' ' != *look){
869                         look++;
870                     }
871                     eval = look;
872                     strncpy((char *)type, (const char *)bval, eval - bval);
873                     type[eval - bval] = '\0';
874                 }
875
876                 /* actually indexing the text given */
877                 yaz_log(YLOG_DEBUG, "%s dom filter: "
878                         "INDEX  '%s:%s' '%s'", 
879                         tinfo->fname, index, type, text);
880
881                 recWord.index_name = (const char *)index;
882                 if (type && *type)
883                     recWord.index_type = *type;
884                 (extctr->tokenAdd)(&recWord);
885
886                 /* eat whitespaces */
887                 if (*look && ' ' == *look && *(look+1)){
888                     look++;
889                 } 
890             }
891         }
892     
893     xmlFree(text); 
894 }
895
896
897 /* DOM filter style indexing */
898 static void set_record_info(struct filter_info *tinfo, 
899                             struct recExtractCtrl *extctr, 
900                             xmlChar * id_p, 
901                             xmlChar * rank_p, 
902                             xmlChar * type_p)
903 {
904     yaz_log(YLOG_DEBUG, "%s dom filter: "
905             "RECORD id=%s rank=%s type=%s", 
906             tinfo->fname,  id_p, rank_p, type_p);
907     
908     if (id_p)
909         sscanf((const char *)id_p, "%255s", extctr->match_criteria);
910
911     if (rank_p)
912         extctr->staticrank = atozint((const char *)rank_p);
913
914     /*     if (!strcmp("update", type_str)) */
915     /*         index_node(tinfo, ctrl, ptr, recWord); */
916     /*     else if (!strcmp("delete", type_str)) */
917     /*         yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
918     /*     else */
919     /*         yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'",  */
920     /*                 type_str); */
921
922 }
923
924
925 /* DOM filter style indexing */
926 static void process_xml_element_zebra_node(struct filter_info *tinfo, 
927                                            struct recExtractCtrl *extctr, 
928                                            xmlNodePtr node)
929 {
930     if (node->type == XML_ELEMENT_NODE 
931         && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)){
932     
933         if (0 == XML_STRCMP(node->name, "index")){
934             xmlChar *index_p = 0;
935
936             struct _xmlAttr *attr;      
937             for (attr = node->properties; attr; attr = attr->next){
938                 if (attr_content_xml(attr, "name", &index_p)){
939                     index_value_of(tinfo, extctr, node, index_p);        
940                 }  
941                 else
942                     yaz_log(YLOG_WARN,"%s dom filter: "
943                             "%s bad attribute @%s, expected @name",
944                             tinfo->fname, xmlGetNodePath(node), attr->name);
945             }
946         }
947         else if (0 == XML_STRCMP(node->name, "record")){
948             xmlChar *id_p = 0;
949             xmlChar *rank_p = 0;
950             xmlChar *type_p = 0;
951
952             struct _xmlAttr *attr;
953             for (attr = node->properties; attr; attr = attr->next){
954                 if (attr_content_xml(attr, "id", &id_p))
955                     ;
956                 else if (attr_content_xml(attr, "rank", &rank_p))
957                     ;
958                 else if (attr_content_xml(attr, "type", &type_p))
959                    ;
960                 else
961                     yaz_log(YLOG_WARN,"%s dom filter: "
962                             "%s bad attribute @%s,"
963                            " expected @id|@rank|@type",
964                            tinfo->fname, xmlGetNodePath(node), attr->name);
965
966                 if (type_p && 0 != strcmp("update", (const char *)type_p))
967                     yaz_log(YLOG_WARN,"%s dom filter: "
968                             "%s attribute @%s,"
969                             " only implemented '@type='update'",
970                             tinfo->fname, xmlGetNodePath(node), attr->name);
971           
972
973             }
974             set_record_info(tinfo, extctr, id_p, rank_p, type_p);
975         } else {
976             yaz_log(YLOG_WARN,"%s dom filter: "
977                     "%s bad element <%s>,"
978                     " expected <record>|<index> in namespace '%s'",
979                     tinfo->fname, xmlGetNodePath(node), 
980                     node->name, zebra_dom_ns);
981       
982         }
983     }
984 }
985
986
987 /* DOM filter style indexing */
988 static void process_xml_pi_node(struct filter_info *tinfo, 
989                                 struct recExtractCtrl *extctr, 
990                                 xmlNodePtr node,
991                                 xmlChar **index_pp)
992 {
993
994     /* yaz_log(YLOG_DEBUG,"PI     %s\n", xmlGetNodePath(node)); */
995
996     /* if right PI name, continue parsing PI */
997     if (0 == strcmp(zebra_pi_name, (const char *)node->name)){
998         xmlChar *pi_p =  node->content;
999         xmlChar *look = pi_p;
1000     
1001         xmlChar *bval;
1002         xmlChar *eval;
1003
1004         /* parsing PI record instructions */
1005         if (0 == strncmp((const char *)look, "record", 6)){
1006             xmlChar id[256];
1007             xmlChar rank[256];
1008             xmlChar type[256];
1009
1010             *id = '\0';
1011             *rank = '\0';
1012             *type = '\0';
1013       
1014             look += 6;
1015       
1016             /* eat whitespace */
1017             while (*look && ' ' == *look && *(look+1))
1018                 look++;
1019
1020             /* parse possible id */
1021             if (*look && 0 == strncmp((const char *)look, "id=", 3)){
1022                 look += 3;
1023                 bval = look;
1024                 while (*look && ' ' != *look)
1025                     look++;
1026                 eval = look;
1027                 strncpy((char *)id, (const char *)bval, eval - bval);
1028                 id[eval - bval] = '\0';
1029             }
1030       
1031             /* eat whitespace */
1032             while (*look && ' ' == *look && *(look+1))
1033                 look++;
1034       
1035             /* parse possible rank */
1036             if (*look && 0 == strncmp((const char *)look, "rank=", 5)){
1037                 look += 6;
1038                 bval = look;
1039                 while (*look && ' ' != *look)
1040                     look++;
1041                 eval = look;
1042                 strncpy((char *)rank, (const char *)bval, eval - bval);
1043                 rank[eval - bval] = '\0';
1044             }
1045
1046             /* eat whitespace */
1047             while (*look && ' ' == *look && *(look+1))
1048                 look++;
1049
1050             if (look && '\0' != *look)
1051                 yaz_log(YLOG_WARN,"%s dom filter: "
1052                         "%s content '%s', can not parse '%s'",
1053                         tinfo->fname, xmlGetNodePath(node), pi_p, look);
1054             else 
1055                 set_record_info(tinfo, extctr, id, rank, 0);
1056
1057         } 
1058    
1059         /* parsing index instruction */
1060         else   if (0 == strncmp((const char *)look, "index", 5)){
1061             look += 5;
1062       
1063             /* eat whitespace */
1064             while (*look && ' ' == *look && *(look+1))
1065                 look++;
1066
1067             /* export index instructions to outside */
1068             *index_pp = look;
1069         } 
1070         else 
1071             yaz_log(YLOG_WARN,"%s dom filter: "
1072                     "%s content '%s', can not parse '%s'",
1073                     tinfo->fname, xmlGetNodePath(node), pi_p, look);
1074     }
1075 }
1076
1077 /* DOM filter style indexing */
1078 static void process_xml_element_node(struct filter_info *tinfo, 
1079                                      struct recExtractCtrl *extctr, 
1080                                      xmlNodePtr node)
1081 {
1082     /* remember indexing instruction from PI to next element node */
1083     xmlChar *index_p = 0;
1084
1085     /* yaz_log(YLOG_DEBUG,"ELEM   %s\n", xmlGetNodePath(node)); */
1086
1087     /* check if we are an element node in the special zebra namespace 
1088        and either set record data or index value-of node content*/
1089     process_xml_element_zebra_node(tinfo, extctr, node);
1090   
1091     /* loop through kid nodes */
1092     for (node = node->children; node; node = node->next)
1093         {
1094             /* check and set PI record and index index instructions */
1095             if (node->type == XML_PI_NODE){
1096                 process_xml_pi_node(tinfo, extctr, node, &index_p);
1097             }
1098             else if (node->type == XML_ELEMENT_NODE){
1099                 /* if there was a PI index instruction before this element */
1100                 if (index_p){
1101                     index_value_of(tinfo, extctr, node, index_p);            
1102                     index_p = 0;
1103                 }
1104                 process_xml_element_node(tinfo, extctr, node);
1105             }
1106             else
1107                 continue;
1108         }
1109 }
1110
1111
1112 /* DOM filter style indexing */
1113 static void extract_dom_doc_node(struct filter_info *tinfo, 
1114                                  struct recExtractCtrl *extctr, 
1115                                  xmlDocPtr doc)
1116 {
1117     /* yaz_log(YLOG_DEBUG,"DOC    %s\n", xmlGetNodePath((xmlNodePtr)doc)); */
1118
1119     xmlChar *buf_out;
1120     int len_out;
1121     if (extctr->flagShowRecords){
1122         xmlDocDumpMemory(doc, &buf_out, &len_out);
1123         fwrite(buf_out, len_out, 1, stdout);
1124         xmlFree(buf_out);
1125     }
1126
1127     process_xml_element_node(tinfo, extctr, (xmlNodePtr)doc);
1128 }
1129
1130
1131
1132
1133 static int convert_extract_doc(struct filter_info *tinfo, 
1134                                struct filter_input *input,
1135                                struct recExtractCtrl *p, 
1136                                xmlDocPtr doc)
1137
1138 {
1139     xmlChar *buf_out;
1140     int len_out;
1141     const char *params[10];
1142     xsltStylesheetPtr last_xsp = 0;
1143     xmlDocPtr store_doc = 0;
1144
1145     params[0] = 0;
1146     set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
1147
1148     /* input conversion */
1149     perform_convert(tinfo, input->convert, params, &doc, 0);
1150
1151     if (tinfo->store)
1152         {
1153             /* store conversion */
1154             store_doc = xmlCopyDoc(doc, 1);
1155             perform_convert(tinfo, tinfo->store->convert,
1156                             params, &store_doc, &last_xsp);
1157         }
1158     
1159     if (last_xsp)
1160         xsltSaveResultToString(&buf_out, &len_out, 
1161                                store_doc ? store_doc : doc, last_xsp);
1162     else
1163         xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1164     if (p->flagShowRecords)
1165         fwrite(buf_out, len_out, 1, stdout);
1166     (*p->setStoreData)(p, buf_out, len_out);
1167     xmlFree(buf_out);
1168
1169     if (store_doc)
1170         xmlFreeDoc(store_doc);
1171
1172     /* extract conversion */
1173     perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
1174
1175     /* finally, do the indexing */
1176     if (doc){
1177         extract_dom_doc_node(tinfo, p, doc);
1178         /* extract_doc_alvis(tinfo, p, doc); */
1179         xmlFreeDoc(doc);
1180     }
1181
1182     return RECCTRL_EXTRACT_OK;
1183 }
1184
1185 static int extract_xml_split(struct filter_info *tinfo,
1186                              struct filter_input *input,
1187                              struct recExtractCtrl *p)
1188 {
1189     int ret;
1190
1191     if (p->first_record)
1192         {
1193             if (input->u.xmlreader.reader)
1194                 xmlFreeTextReader(input->u.xmlreader.reader);
1195             input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1196                                                        p /* I/O handler */,
1197                                                        0 /* URL */, 
1198                                                        0 /* encoding */,
1199                                                        XML_PARSE_XINCLUDE|
1200                                                        XML_PARSE_NOENT);
1201         }
1202     if (!input->u.xmlreader.reader)
1203         return RECCTRL_EXTRACT_ERROR_GENERIC;
1204
1205     ret = xmlTextReaderRead(input->u.xmlreader.reader);
1206     while (ret == 1)
1207         {
1208             int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1209             int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1210             if (type == XML_READER_TYPE_ELEMENT && 
1211                 input->u.xmlreader.split_level == depth)
1212                 {
1213                     xmlNodePtr ptr
1214                         = xmlTextReaderExpand(input->u.xmlreader.reader);
1215                     if (ptr)
1216                         {
1217                             xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1218                             xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1219                 
1220                             xmlDocSetRootElement(doc, ptr2);
1221                 
1222                             return convert_extract_doc(tinfo, input, p, doc);
1223                         }
1224                     else
1225                         {
1226                             xmlFreeTextReader(input->u.xmlreader.reader);
1227                             input->u.xmlreader.reader = 0;
1228                             return RECCTRL_EXTRACT_ERROR_GENERIC;
1229                         }
1230                 }
1231             ret = xmlTextReaderRead(input->u.xmlreader.reader);
1232         }
1233     xmlFreeTextReader(input->u.xmlreader.reader);
1234     input->u.xmlreader.reader = 0;
1235     return RECCTRL_EXTRACT_EOF;
1236 }
1237
1238 static int extract_xml_full(struct filter_info *tinfo, 
1239                             struct filter_input *input,
1240                             struct recExtractCtrl *p)
1241 {
1242     if (p->first_record) /* only one record per stream */
1243         {
1244             xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, 
1245                                       p /* I/O handler */,
1246                                       0 /* URL */,
1247                                       0 /* encoding */,
1248                                       XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
1249             if (!doc)
1250                 {
1251                     return RECCTRL_EXTRACT_ERROR_GENERIC;
1252                 }
1253             return convert_extract_doc(tinfo, input, p, doc);
1254         }
1255     else
1256         return RECCTRL_EXTRACT_EOF;
1257 }
1258
1259 static int extract_iso2709(struct filter_info *tinfo,
1260                            struct filter_input *input,
1261                            struct recExtractCtrl *p)
1262 {
1263     char buf[100000];
1264     int record_length;
1265     int read_bytes, r;
1266
1267     if (p->stream->readf(p->stream, buf, 5) != 5)
1268         return RECCTRL_EXTRACT_EOF;
1269     while (*buf < '0' || *buf > '9')
1270         {
1271             int i;
1272
1273             yaz_log(YLOG_WARN, "%s dom filter: "
1274                     "MARC: Skipping bad byte %d (0x%02X)",
1275                     tinfo->fname, *buf & 0xff, *buf & 0xff);
1276             for (i = 0; i<4; i++)
1277                 buf[i] = buf[i+1];
1278
1279             if (p->stream->readf(p->stream, buf+4, 1) != 1)
1280                 return RECCTRL_EXTRACT_EOF;
1281         }
1282     record_length = atoi_n (buf, 5);
1283     if (record_length < 25)
1284         {
1285             yaz_log (YLOG_WARN, "%s dom filter: "
1286                      "MARC record length < 25, is %d", 
1287                      tinfo->fname, record_length);
1288             return RECCTRL_EXTRACT_ERROR_GENERIC;
1289         }
1290     read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1291     if (read_bytes < record_length-5)
1292         {
1293             yaz_log (YLOG_WARN, "%s dom filter: "
1294                      "Couldn't read whole MARC record",
1295                      tinfo->fname);
1296             return RECCTRL_EXTRACT_ERROR_GENERIC;
1297         }
1298     r = yaz_marc_read_iso2709(input->u.marc.handle,  buf, record_length);
1299     if (r < record_length)
1300         {
1301             yaz_log (YLOG_WARN, "%s dom filter: "
1302                      "Parsing of MARC record failed r=%d length=%d",
1303                      tinfo->fname, r, record_length);
1304             return RECCTRL_EXTRACT_ERROR_GENERIC;
1305         }
1306     else
1307         {
1308             xmlDocPtr rdoc;
1309             xmlNode *root_ptr;
1310             yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0);
1311             rdoc = xmlNewDoc((const xmlChar*) "1.0");
1312             xmlDocSetRootElement(rdoc, root_ptr);
1313             return convert_extract_doc(tinfo, input, p, rdoc);        
1314         }
1315     return RECCTRL_EXTRACT_OK;
1316 }
1317
1318 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1319 {
1320     struct filter_info *tinfo = clientData;
1321     struct filter_input *input = tinfo->input_list;
1322
1323     if (!input)
1324         return RECCTRL_EXTRACT_ERROR_GENERIC;
1325
1326     odr_reset(tinfo->odr_record);
1327     switch(input->type)
1328         {
1329         case DOM_INPUT_XMLREADER:
1330             if (input->u.xmlreader.split_level == 0)
1331                 return extract_xml_full(tinfo, input, p);
1332             else
1333                 return extract_xml_split(tinfo, input, p);
1334             break;
1335         case DOM_INPUT_MARC:
1336             return extract_iso2709(tinfo, input, p);
1337         }
1338     return RECCTRL_EXTRACT_ERROR_GENERIC;
1339 }
1340
1341 static int ioread_ret(void *context, char *buffer, int len)
1342 {
1343     struct recRetrieveCtrl *p = context;
1344     return p->stream->readf(p->stream, buffer, len);
1345 }
1346
1347 static int ioclose_ret(void *context)
1348 {
1349     return 0;
1350 }
1351
1352 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
1353 {
1354     /* const char *esn = zebra_dom_ns; */
1355     const char *esn = 0;
1356     const char *params[32];
1357     struct filter_info *tinfo = clientData;
1358     xmlDocPtr doc;
1359     struct filter_retrieve *retrieve;
1360     xsltStylesheetPtr last_xsp = 0;
1361
1362     if (p->comp)
1363         {
1364             if (p->comp->which == Z_RecordComp_simple
1365                 && p->comp->u.simple->which == Z_ElementSetNames_generic)
1366                 {
1367                     esn = p->comp->u.simple->u.generic;
1368                 }
1369             else if (p->comp->which == Z_RecordComp_complex 
1370                      && p->comp->u.complex->generic->elementSpec
1371                      && p->comp->u.complex->generic->elementSpec->which ==
1372                      Z_ElementSpec_elementSetName)
1373                 {
1374                     esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1375                 }
1376         }
1377     retrieve = lookup_retrieve(tinfo, esn);
1378     if (!retrieve)
1379         {
1380             p->diagnostic =
1381                 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1382             return 0;
1383         }
1384
1385     params[0] = 0;
1386     set_param_int(params, "id", p->localno, p->odr);
1387     if (p->fname)
1388         set_param_str(params, "filename", p->fname, p->odr);
1389     if (p->staticrank >= 0)
1390         set_param_int(params, "rank", p->staticrank, p->odr);
1391
1392     if (esn)
1393         set_param_str(params, "schema", esn, p->odr);
1394     else
1395         if (retrieve->name)
1396             set_param_str(params, "schema", retrieve->name, p->odr);
1397         else if (retrieve->identifier)
1398             set_param_str(params, "schema", retrieve->identifier, p->odr);
1399         else
1400             set_param_str(params, "schema", "", p->odr);
1401
1402     if (p->score >= 0)
1403         set_param_int(params, "score", p->score, p->odr);
1404     set_param_int(params, "size", p->recordSize, p->odr);
1405
1406     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1407                     0 /* URL */,
1408                     0 /* encoding */,
1409                     XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
1410     if (!doc)
1411         {
1412             p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1413             return 0;
1414         }
1415
1416     /* retrieve conversion */
1417     perform_convert(tinfo, retrieve->convert, params, &doc, &last_xsp);
1418     if (!doc)
1419         {
1420             p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1421         }
1422     else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
1423         {
1424             xmlChar *buf_out;
1425             int len_out;
1426
1427             if (last_xsp)
1428                 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1429             else
1430                 xmlDocDumpMemory(doc, &buf_out, &len_out);            
1431
1432             p->output_format = VAL_TEXT_XML;
1433             p->rec_len = len_out;
1434             p->rec_buf = odr_malloc(p->odr, p->rec_len);
1435             memcpy(p->rec_buf, buf_out, p->rec_len);
1436             xmlFree(buf_out);
1437         }
1438     else if (p->output_format == VAL_SUTRS)
1439         {
1440             xmlChar *buf_out;
1441             int len_out;
1442
1443             if (last_xsp)
1444                 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1445             else
1446                 xmlDocDumpMemory(doc, &buf_out, &len_out);            
1447         
1448             p->output_format = VAL_SUTRS;
1449             p->rec_len = len_out;
1450             p->rec_buf = odr_malloc(p->odr, p->rec_len);
1451             memcpy(p->rec_buf, buf_out, p->rec_len);
1452         
1453             xmlFree(buf_out);
1454         }
1455     else
1456         {
1457             p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1458         }
1459     xmlFreeDoc(doc);
1460     return 0;
1461 }
1462
1463 static struct recType filter_type = {
1464     0,
1465     "dom",
1466     filter_init,
1467     filter_config,
1468     filter_destroy,
1469     filter_extract,
1470     filter_retrieve
1471 };
1472
1473 RecType
1474 #ifdef IDZEBRA_STATIC_DOM
1475 idzebra_filter_dom
1476 #else
1477 idzebra_filter
1478 #endif
1479
1480 [] = {
1481     &filter_type,
1482     0,
1483 };
1484 /*
1485  * Local variables:
1486  * c-basic-offset: 4
1487  * indent-tabs-mode: nil
1488  * End:
1489  * vim: shiftwidth=4 tabstop=8 expandtab
1490  */
1491