Merge branch 'master' into sru_2_0
[yaz-moved-to-github.git] / src / record_render.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_render.c
7  * \brief Render Z39.50 records (NamePlusRecord)
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16
17 #include <yaz/marcdisp.h>
18 #include <yaz/record_render.h>
19 #include <yaz/yaz-iconv.h>
20 #include <yaz/proto.h>
21 #include <yaz/oid_db.h>
22 #include <yaz/nmem_xml.h>
23 #include <yaz/base64.h>
24
25 #if YAZ_HAVE_XML2
26 #include <libxml/xpath.h>
27 #include <libxml/xpathInternals.h>
28 #endif
29
30 static yaz_iconv_t iconv_create_charset(const char *record_charset,
31                                         yaz_iconv_t *cd2)
32 {
33     char charset_buf[40];
34     yaz_iconv_t cd = 0;
35     char *from_set1 = 0;
36     char *from_set2 = 0;
37     char *to_set = "utf-8";
38     if (record_charset && *record_charset)
39     {
40         char *cp = charset_buf;
41
42         strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
43         charset_buf[sizeof(charset_buf)-1] = '\0';
44
45         from_set1 = cp;
46         while (*cp && *cp != ',' && *cp != '/')
47             cp++;
48         if (*cp == '/')
49         {
50             *cp++ = '\0'; /* terminate from_set1 */
51             from_set2 = cp;
52             while (*cp && *cp != ',')
53                 cp++;
54         }
55         if (*cp == ',')
56         {
57             *cp++ = '\0';  /* terminate from_set1 or from_set2 */
58             to_set = cp;
59             while (*cp)
60                 cp++;
61         }
62     }
63
64     if (from_set1)
65         cd = yaz_iconv_open(to_set, from_set1);
66     if (cd2)
67     {
68         if (from_set2)
69             *cd2 = yaz_iconv_open(to_set, from_set2);
70         else
71             *cd2 = 0;
72     }
73     return cd;
74 }
75
76 static const char *return_marc_record(WRBUF wrbuf,
77                                       int marc_type,
78                                       int *len,
79                                       const char *buf, int sz,
80                                       const char *record_charset)
81 {
82     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
83     yaz_marc_t mt = yaz_marc_create();
84     const char *ret_string = 0;
85
86     if (cd)
87         yaz_marc_iconv(mt, cd);
88     yaz_marc_xml(mt, marc_type);
89     if (yaz_marc_decode_wrbuf(mt, buf, sz, wrbuf) > 0)
90     {
91         *len = wrbuf_len(wrbuf);
92         ret_string = wrbuf_cstr(wrbuf);
93     }
94     yaz_marc_destroy(mt);
95     if (cd)
96         yaz_iconv_close(cd);
97     return ret_string;
98 }
99
100 static const char *return_opac_record(WRBUF wrbuf,
101                                       int marc_type,
102                                       int *len,
103                                       Z_OPACRecord *opac_rec,
104                                       const char *record_charset)
105 {
106     yaz_iconv_t cd2;
107     yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
108     yaz_marc_t mt = yaz_marc_create();
109
110     if (cd)
111         yaz_marc_iconv(mt, cd);
112     yaz_marc_xml(mt, marc_type);
113
114     if (cd2)
115         yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
116     else
117         yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
118
119     yaz_marc_destroy(mt);
120
121     if (cd)
122         yaz_iconv_close(cd);
123     if (cd2)
124         yaz_iconv_close(cd2);
125     *len = wrbuf_len(wrbuf);
126     return wrbuf_cstr(wrbuf);
127 }
128
129 static const char *return_string_record(WRBUF wrbuf,
130                                         int *len,
131                                         const char *buf, int sz,
132                                         const char *record_charset)
133 {
134     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
135
136     if (cd)
137     {
138         wrbuf_iconv_write(wrbuf, cd, buf, sz);
139         wrbuf_iconv_reset(wrbuf, cd);
140
141         buf = wrbuf_cstr(wrbuf);
142         sz = wrbuf_len(wrbuf);
143         yaz_iconv_close(cd);
144     }
145     *len = sz;
146     return buf;
147 }
148
149 static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
150                                        Z_NamePlusRecord *npr,
151                                        int marctype, const char *charset)
152 {
153     Z_External *r = (Z_External *) npr->u.databaseRecord;
154     const Odr_oid *oid = r->direct_reference;
155
156     wrbuf_rewind(wrbuf);
157     /* render bibliographic record .. */
158     if (r->which == Z_External_OPAC)
159     {
160         return return_opac_record(wrbuf, marctype, len,
161                                   r->u.opac, charset);
162     }
163     if (r->which == Z_External_sutrs)
164         return return_string_record(wrbuf, len,
165                                     (char*) r->u.sutrs->buf,
166                                     r->u.sutrs->len,
167                                     charset);
168     else if (r->which == Z_External_octet)
169     {
170         if (oid_oidcmp(oid, yaz_oid_recsyn_xml)
171             && oid_oidcmp(oid, yaz_oid_recsyn_application_xml)
172             && oid_oidcmp(oid, yaz_oid_recsyn_html))
173         {
174             const char *ret_buf = return_marc_record(
175                 wrbuf, marctype, len,
176                 (const char *) r->u.octet_aligned->buf,
177                 r->u.octet_aligned->len,
178                 charset);
179             if (ret_buf)
180                 return ret_buf;
181             /* not ISO2709. Return fail unless raw (ISO2709) is wanted */
182             if (yaz_oid_is_iso2709(oid) && marctype != YAZ_MARC_ISO2709)
183                 return 0;
184         }
185         return return_string_record(wrbuf, len,
186                                     (const char *) r->u.octet_aligned->buf,
187                                     r->u.octet_aligned->len,
188                                     charset);
189     }
190     else if (r->which == Z_External_grs1)
191     {
192         yaz_display_grs1(wrbuf, r->u.grs1, 0);
193         return return_string_record(wrbuf, len,
194                                     wrbuf_buf(wrbuf),
195                                     wrbuf_len(wrbuf),
196                                     charset);
197     }
198     return 0;
199 }
200
201 static const char *get_record_format(WRBUF wrbuf, int *len,
202                                      Z_NamePlusRecord *npr,
203                                      int marctype, const char *charset,
204                                      const char *format)
205 {
206     const char *res = return_record_wrbuf(wrbuf, len, npr, marctype, charset);
207 #if YAZ_HAVE_XML2
208     if (*format == '1')
209     {
210         /* try to XML format res */
211         xmlDocPtr doc;
212         xmlKeepBlanksDefault(0); /* get get xmlDocFormatMemory to work! */
213         doc = xmlParseMemory(res, *len);
214         if (doc)
215         {
216             xmlChar *xml_mem;
217             int xml_size;
218             xmlDocDumpFormatMemory(doc, &xml_mem, &xml_size, 1);
219             wrbuf_rewind(wrbuf);
220             wrbuf_write(wrbuf, (const char *) xml_mem, xml_size);
221             xmlFree(xml_mem);
222             xmlFreeDoc(doc);
223             res = wrbuf_cstr(wrbuf);
224             *len = wrbuf_len(wrbuf);
225         }
226     }
227 #endif
228     return res;
229 }
230
231 #if YAZ_HAVE_XML2
232 static int replace_node(NMEM nmem, xmlNode *ptr,
233                         const char *type_spec, char *record_buf)
234 {
235     int ret = -1;
236     const char *res;
237     int len;
238     int m_len;
239     WRBUF wrbuf = wrbuf_alloc();
240     ODR odr = odr_createmem(ODR_ENCODE);
241     Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
242     npr->which = Z_NamePlusRecord_databaseRecord;
243
244     if (atoi_n_check(record_buf, 5, &m_len))
245         npr->u.databaseRecord =
246             z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
247     else
248         npr->u.databaseRecord =
249             z_ext_record_xml(odr, record_buf, strlen(record_buf));
250     res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
251     if (res)
252     {
253         xmlDoc *doc = xmlParseMemory(res, strlen(res));
254         if (doc)
255         {
256             xmlNode *nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
257             xmlReplaceNode(ptr, nptr);
258             xmlFreeDoc(doc);
259         }
260         else
261         {
262             xmlNode *nptr = xmlNewText(BAD_CAST res);
263             xmlReplaceNode(ptr, nptr);
264         }
265         ret = 0;
266     }
267     wrbuf_destroy(wrbuf);
268     odr_destroy(odr);
269     return ret;
270 }
271 #endif
272
273 static const char *base64_render(NMEM nmem, WRBUF wrbuf,
274                                  const char *buf, int *len,
275                                  const char *expr, const char *type_spec)
276 {
277 #if YAZ_HAVE_XML2
278     xmlDocPtr doc = xmlParseMemory(buf, *len);
279     if (doc)
280     {
281         xmlChar *buf_out;
282         int len_out;
283         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
284         if (xpathCtx)
285         {
286             xmlXPathObjectPtr xpathObj =
287                 xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
288             if (xpathObj)
289             {
290                 xmlNodeSetPtr nodes = xpathObj->nodesetval;
291                 if (nodes)
292                 {
293                     int i;
294                     for (i = 0; i < nodes->nodeNr; i++)
295                     {
296                         xmlNode *ptr = nodes->nodeTab[i];
297                         if (ptr->type == XML_TEXT_NODE)
298                         {
299                             const char *input =
300                                 nmem_text_node_cdata(ptr, nmem);
301                             char *output = nmem_malloc(
302                                 nmem, strlen(input) + 1);
303                             if (yaz_base64decode(input, output) == 0)
304                             {
305                                 if (!replace_node(nmem, ptr, type_spec, output))
306                                 {
307                                     /* replacement OK */
308                                     xmlFreeNode(ptr);
309                                     /* unset below to avoid a bad reference in
310                                        xmlXPathFreeObject below */
311                                     nodes->nodeTab[i] = 0;
312                                 }
313                             }
314                         }
315                     }
316                 }
317                 xmlXPathFreeObject(xpathObj);
318             }
319             xmlXPathFreeContext(xpathCtx);
320         }
321         xmlDocDumpMemory(doc, &buf_out, &len_out);
322         if (buf_out)
323         {
324             wrbuf_rewind(wrbuf);
325             wrbuf_write(wrbuf, (const char *) buf_out, len_out);
326             buf = wrbuf_cstr(wrbuf);
327             *len = len_out;
328         }
329         xmlFreeDoc(doc);
330         xmlFree(buf_out);
331     }
332 #endif
333     return buf;
334 }
335
336 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
337                               WRBUF wrbuf,
338                               const char *type_spec, int *len)
339 {
340     const char *ret = 0;
341     NMEM nmem = 0;
342     char *base64_xpath = 0;
343     size_t i;
344     char type[40];
345     char charset[40];
346     char format[3];
347     const char *cp = type_spec;
348     int len0;
349
350     if (!len)
351         len = &len0;
352
353     for (i = 0; cp[i] && cp[i] != ';' && cp[i] != ' ' && i < sizeof(type)-1;
354          i++)
355         type[i] = cp[i];
356     type[i] = '\0';
357     charset[0] = '\0';
358     format[0] = '\0';
359     while (1)
360     {
361         while (cp[i] == ' ')
362             i++;
363         if (cp[i] != ';')
364             break;
365         i++;
366         while (cp[i] == ' ')
367             i++;
368         if (!strncmp(cp + i, "charset=", 8))
369         {
370             size_t j = 0;
371             i = i + 8; /* skip charset= */
372             while (cp[i] == ' ')
373                 i++;
374             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
375             {
376                 if (j < sizeof(charset)-1)
377                     charset[j++] = cp[i];
378             }
379             charset[j] = '\0';
380         }
381         else if (!strncmp(cp + i, "format=", 7))
382         {
383             size_t j = 0;
384             i = i + 7;
385             while (cp[i] == ' ')
386                 i++;
387             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
388             {
389                 if (j < sizeof(format)-1)
390                     format[j++] = cp[i];
391             }
392             format[j] = '\0';
393         }
394         else if (!strncmp(cp + i, "base64=", 7))
395         {
396             size_t i0;
397             i = i + 7;
398             while (cp[i] == ' ')
399                 i++;
400             i0 = i;
401             while (cp[i] && cp[i] != ';')
402                 i++;
403
404             nmem = nmem_create();
405             base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
406         }
407     }
408     if (!strcmp(type, "database"))
409     {
410         *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
411         ret = npr->databaseName;
412     }
413     else if (!strcmp(type, "schema"))
414     {
415         *len = schema ? strlen(schema) : 0;
416         ret = schema;
417     }
418     else if (!strcmp(type, "syntax"))
419     {
420         const char *desc = 0;
421         if (npr->which == Z_NamePlusRecord_databaseRecord)
422         {
423             Z_External *r = (Z_External *) npr->u.databaseRecord;
424             desc = yaz_oid_to_string(yaz_oid_std(), r->direct_reference, 0);
425         }
426         if (!desc)
427             desc = "none";
428         *len = strlen(desc);
429         ret = desc;
430     }
431     if (npr->which != Z_NamePlusRecord_databaseRecord)
432         ;
433     else if (!strcmp(type, "render"))
434     {
435         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
436     }
437     else if (!strcmp(type, "xml"))
438     {
439         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
440                                 format);
441     }
442     else if (!strcmp(type, "txml"))
443     {
444         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
445                                 format);
446     }
447     else if (!strcmp(type, "raw"))
448     {
449         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
450                                 format);
451     }
452     else if (!strcmp(type, "ext"))
453     {
454         *len = -1;
455         ret = (const char *) npr->u.databaseRecord;
456     }
457     else if (!strcmp(type, "opac"))
458     {
459         if (npr->u.databaseRecord->which == Z_External_OPAC)
460             ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
461                                     format);
462     }
463
464     if (base64_xpath && *len != -1)
465     {
466         char *type_spec = nmem_malloc(nmem,
467                                       strlen(type) + strlen(charset) + 11);
468         strcpy(type_spec, type);
469         if (*charset)
470         {
471             strcat(type_spec, "; charset=");
472             strcat(type_spec, charset);
473         }
474         ret = base64_render(nmem, wrbuf, ret, len, base64_xpath, type_spec);
475     }
476     nmem_destroy(nmem);
477     return ret;
478 }
479
480 /*
481  * Local variables:
482  * c-basic-offset: 4
483  * c-file-style: "Stroustrup"
484  * indent-tabs-mode: nil
485  * End:
486  * vim: shiftwidth=4 tabstop=8 expandtab
487  */
488