62c8dd04793e920a3e4dae1d0e20dea219fb8ded
[idzebra-moved-to-github.git] / recctrl / xslt.c
1 /* $Id: xslt.c,v 1.1 2005-04-28 08:20:40 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #include <ctype.h>
26
27 #include <yaz/diagbib1.h>
28 #include <libxml/xmlreader.h>
29 #include <libxslt/transform.h>
30
31 #include <idzebra/util.h>
32 #include <idzebra/recctrl.h>
33
34 struct filter_info {
35     xsltStylesheetPtr stylesheet_xsp;
36     xmlTextReaderPtr reader;
37     char *fname;
38     int split_depth;
39 };
40
41 static const char *zebra_index_ns = "http://indexdata.dk/zebra/indexing/1";
42
43 static void *filter_init (Res res, RecType recType)
44 {
45     struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
46     tinfo->stylesheet_xsp = 0;
47     tinfo->reader = 0;
48     tinfo->fname = 0;
49     tinfo->split_depth = 1;
50     return tinfo;
51 }
52
53 static void filter_config(void *clientData, Res res, const char *args)
54 {
55     struct filter_info *tinfo = clientData;
56     if (!args || !*args)
57         args = "default.xsl";
58     if (!tinfo->fname || strcmp(args, tinfo->fname))
59     {
60         /* different filename so must reread stylesheet */
61         xfree(tinfo->fname);
62         tinfo->fname = xstrdup(args);
63         if (tinfo->stylesheet_xsp)
64             xsltFreeStylesheet(tinfo->stylesheet_xsp);
65         tinfo->stylesheet_xsp =
66             xsltParseStylesheetFile((const xmlChar*) tinfo->fname);
67     }
68 }
69
70 static void filter_destroy(void *clientData)
71 {
72     struct filter_info *tinfo = clientData;
73     if (tinfo->stylesheet_xsp)
74         xsltFreeStylesheet(tinfo->stylesheet_xsp);
75     xfree(tinfo->fname);
76     xfree(tinfo);
77 }
78
79 static int ioread_ex(void *context, char *buffer, int len)
80 {
81     struct recExtractCtrl *p = context;
82     return (*p->readf)(p->fh, buffer, len);
83 }
84
85 static int ioclose_ex(void *context)
86 {
87     return 0;
88 }
89
90 static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
91                         xmlNodePtr ptr, RecWord *recWord)
92 {
93     for(; ptr; ptr = ptr->next)
94     {
95         index_field(tinfo, ctrl, ptr->children, recWord);
96         if (ptr->type != XML_TEXT_NODE)
97             continue;
98         recWord->term_buf = ptr->content;
99         recWord->term_len = strlen(ptr->content);
100         (*ctrl->tokenAdd)(recWord);
101     }
102 }
103
104 static void index_node(struct filter_info *tinfo,  struct recExtractCtrl *ctrl,
105                        xmlNodePtr ptr, RecWord *recWord)
106 {
107     for(; ptr; ptr = ptr->next)
108     {
109         index_node(tinfo, ctrl, ptr->children, recWord);
110         if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
111             strcmp(ptr->ns->href, zebra_index_ns))
112             continue;
113         if (!strcmp(ptr->name, "index"))
114         {
115             char *field_str = 0;
116             const char *xpath_str = 0;
117             struct _xmlAttr *attr;
118             for (attr = ptr->properties; attr; attr = attr->next)
119             {
120                 if (!strcmp(attr->name, "field") 
121                     && attr->children && attr->children->type == XML_TEXT_NODE)
122                     field_str = attr->children->content;
123                 if (!strcmp(attr->name, "xpath") 
124                     && attr->children && attr->children->type == XML_TEXT_NODE)
125                     xpath_str = attr->children->content;
126             }
127             if (field_str)
128             {
129                 recWord->attrStr = field_str;
130                 index_field(tinfo, ctrl, ptr->children, recWord);
131             }
132         }
133     }
134 }
135
136 static int filter_extract(void *clientData, struct recExtractCtrl *p)
137 {
138     static const char *params[] = {
139         "schema", "'http://indexdata.dk/zebra/indexing/1'",
140         0
141     };
142     struct filter_info *tinfo = clientData;
143     RecWord recWord;
144     int ret;
145
146     if (p->first_record)
147     {
148         if (tinfo->reader)
149             xmlFreeTextReader(tinfo->reader);
150         tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
151                                        p /* I/O handler */,
152                                        0 /* URL */, 
153                                        0 /* encoding */,
154                                        XML_PARSE_XINCLUDE);
155     }
156     if (!tinfo->reader)
157         return RECCTRL_EXTRACT_ERROR_GENERIC;
158
159     if (!tinfo->stylesheet_xsp)
160         return RECCTRL_EXTRACT_ERROR_GENERIC;
161
162     (*p->init)(p, &recWord);
163     recWord.reg_type = 'w';
164
165     ret = xmlTextReaderRead(tinfo->reader);
166     while (ret == 1) {
167         int type = xmlTextReaderNodeType(tinfo->reader);
168         int depth = xmlTextReaderDepth(tinfo->reader);
169         if (tinfo->split_depth == 0 ||
170             (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
171         {
172             xmlChar *buf_out;
173             int len_out;
174
175             xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
176             xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
177             xmlDocPtr doc = xmlNewDoc("1.0");
178
179             xmlDocSetRootElement(doc, ptr2);
180             
181             if (tinfo->stylesheet_xsp)
182             {
183                 xmlDocPtr resDoc = 
184                     xsltApplyStylesheet(tinfo->stylesheet_xsp,
185                                         doc, params);
186                 if (p->flagShowRecords)
187                 {
188                     xmlDocDumpMemory(resDoc, &buf_out, &len_out);
189                     fwrite(buf_out, len_out, 1, stdout);
190                     xmlFree(buf_out);
191                 }
192                 index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
193                 xmlFreeDoc(resDoc);
194             }
195             xmlDocDumpMemory(doc, &buf_out, &len_out);
196             if (p->flagShowRecords)
197                 fwrite(buf_out, len_out, 1, stdout);
198             (*p->setStoreData)(p, buf_out, len_out);
199             xmlFree(buf_out);
200
201             xmlFreeDoc(doc);
202             return RECCTRL_EXTRACT_OK;
203         }
204         ret = xmlTextReaderRead(tinfo->reader);
205     }
206     xmlFreeTextReader(tinfo->reader);
207     tinfo->reader = 0;
208     return RECCTRL_EXTRACT_EOF;
209 }
210
211 static int ioread_ret(void *context, char *buffer, int len)
212 {
213     struct recRetrieveCtrl *p = context;
214     return (*p->readf)(p->fh, buffer, len);
215 }
216
217 static int ioclose_ret(void *context)
218 {
219     return 0;
220 }
221
222 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
223 {
224     static const char *params[] = {
225         "schema", "'F'",
226         0
227     };
228     struct filter_info *tinfo = clientData;
229     xmlDocPtr resDoc;
230     xmlDocPtr doc;
231
232     if (p->comp)
233     {
234         const char *esn;
235         char *esn_quoted;
236         if (p->comp->which != Z_RecordComp_simple
237             || p->comp->u.simple->which != Z_ElementSetNames_generic)
238         {
239             p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
240             return 0;
241         }
242         esn = p->comp->u.simple->u.generic;
243         esn_quoted = odr_malloc(p->odr, 3 + strlen(esn));
244         sprintf(esn_quoted, "'%s'", esn);
245         params[1] = esn_quoted;
246     }
247     if (!tinfo->stylesheet_xsp)
248     {
249         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
250         return 0;
251     }
252     doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
253                     0 /* URL */,
254                     0 /* encoding */,
255                     XML_PARSE_XINCLUDE);
256     if (!doc)
257     {
258         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
259         return 0;
260     }
261     resDoc = xsltApplyStylesheet(tinfo->stylesheet_xsp,
262                                  doc, params);
263     if (!resDoc)
264     {
265         p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
266     }
267     else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
268     {
269         xmlChar *buf_out;
270         int len_out;
271         xmlDocDumpMemory(resDoc, &buf_out, &len_out);
272
273         p->output_format = VAL_TEXT_XML;
274         p->rec_len = len_out;
275         p->rec_buf = odr_malloc(p->odr, p->rec_len);
276         memcpy(p->rec_buf, buf_out, p->rec_len);
277         
278         xmlFree(buf_out);
279     }
280     else if (p->output_format == VAL_SUTRS)
281     {
282         xmlChar *buf_out;
283         int len_out;
284         xmlDocDumpMemory(resDoc, &buf_out, &len_out);
285
286         p->output_format = VAL_SUTRS;
287         p->rec_len = len_out;
288         p->rec_buf = odr_malloc(p->odr, p->rec_len);
289         memcpy(p->rec_buf, buf_out, p->rec_len);
290         
291         xmlFree(buf_out);
292     }
293     else
294     {
295         p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
296     }
297     xmlFreeDoc(resDoc);
298     xmlFreeDoc(doc);
299     return 0;
300 }
301
302 static struct recType filter_type = {
303     0,
304     "xslt",
305     filter_init,
306     filter_config,
307     filter_destroy,
308     filter_extract,
309     filter_retrieve
310 };
311
312 RecType
313 #ifdef IDZEBRA_STATIC_XSLT
314 idzebra_filter_xslt
315 #else
316 idzebra_filter
317 #endif
318
319 [] = {
320     &filter_type,
321     0,
322 };