Version 5.10.0
[yaz-moved-to-github.git] / src / record_render.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_render.c
7  * \brief Render Z39.50 records (NamePlusRecord)
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16
17 #include <yaz/marcdisp.h>
18 #include <yaz/record_render.h>
19 #include <yaz/yaz-iconv.h>
20 #include <yaz/proto.h>
21 #include <yaz/oid_db.h>
22 #include <yaz/nmem_xml.h>
23 #include <yaz/base64.h>
24
25 #if YAZ_HAVE_XML2
26 #include <libxml/xpath.h>
27 #include <libxml/xpathInternals.h>
28 #endif
29
30 static yaz_iconv_t iconv_create_charset(const char *record_charset,
31                                         yaz_iconv_t *cd2,
32                                         const char *marc_buf,
33                                         int sz)
34 {
35     char charset_buf[40];
36     yaz_iconv_t cd = 0;
37     char *from_set1 = 0;
38     char *from_set2 = 0;
39     char *to_set = "utf-8";
40     if (record_charset && *record_charset)
41     {
42         char *cp = charset_buf;
43
44         strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
45         charset_buf[sizeof(charset_buf)-1] = '\0';
46
47         from_set1 = cp;
48         while (*cp && *cp != ',' && *cp != '/')
49             cp++;
50         if (*cp == '/')
51         {
52             *cp++ = '\0'; /* terminate from_set1 */
53             from_set2 = cp;
54             while (*cp && *cp != ',')
55                 cp++;
56         }
57         if (*cp == ',')
58         {
59             *cp++ = '\0';  /* terminate from_set1 or from_set2 */
60             to_set = cp;
61             while (*cp)
62                 cp++;
63         }
64     }
65
66     if (from_set1)
67     {
68         if (yaz_marc_check_marc21_coding(from_set1, marc_buf, sz))
69             from_set1 = "utf-8";
70         cd = yaz_iconv_open(to_set, from_set1);
71     }
72     if (cd2)
73     {
74         if (from_set2)
75             *cd2 = yaz_iconv_open(to_set, from_set2);
76         else
77             *cd2 = 0;
78     }
79     return cd;
80 }
81
82 static const char *return_marc_record(WRBUF wrbuf,
83                                       int marc_type,
84                                       int *len,
85                                       const char *buf, int sz,
86                                       const char *record_charset)
87 {
88     yaz_iconv_t cd = iconv_create_charset(record_charset, 0, buf, sz);
89     yaz_marc_t mt = yaz_marc_create();
90     const char *ret_string = 0;
91
92     if (cd)
93         yaz_marc_iconv(mt, cd);
94     yaz_marc_xml(mt, marc_type);
95     if (yaz_marc_decode_wrbuf(mt, buf, sz, wrbuf) > 0)
96     {
97         *len = wrbuf_len(wrbuf);
98         ret_string = wrbuf_cstr(wrbuf);
99     }
100     yaz_marc_destroy(mt);
101     if (cd)
102         yaz_iconv_close(cd);
103     return ret_string;
104 }
105
106 static const char *return_opac_record(WRBUF wrbuf,
107                                       int marc_type,
108                                       int *len,
109                                       Z_OPACRecord *opac_rec,
110                                       const char *record_charset)
111 {
112     yaz_iconv_t cd, cd2;
113     const char *marc_buf = 0;
114     int marc_sz = 0;
115     yaz_marc_t mt = yaz_marc_create();
116
117     if (opac_rec->bibliographicRecord)
118     {
119         Z_External *ext = opac_rec->bibliographicRecord;
120         if (ext->which == Z_External_octet)
121         {
122             marc_buf = (const char *) ext->u.octet_aligned->buf;
123             marc_sz = ext->u.octet_aligned->len;
124         }
125     }
126     cd = iconv_create_charset(record_charset, &cd2, marc_buf, marc_sz);
127
128     if (cd)
129         yaz_marc_iconv(mt, cd);
130     yaz_marc_xml(mt, marc_type);
131
132     if (cd2)
133         yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
134     else
135         yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
136
137     yaz_marc_destroy(mt);
138
139     if (cd)
140         yaz_iconv_close(cd);
141     if (cd2)
142         yaz_iconv_close(cd2);
143     *len = wrbuf_len(wrbuf);
144     return wrbuf_cstr(wrbuf);
145 }
146
147 static const char *return_string_record(WRBUF wrbuf,
148                                         int *len,
149                                         const char *buf, int sz,
150                                         const char *record_charset)
151 {
152     yaz_iconv_t cd = iconv_create_charset(record_charset, 0, 0, 0);
153
154     if (cd)
155     {
156         wrbuf_iconv_write(wrbuf, cd, buf, sz);
157         wrbuf_iconv_reset(wrbuf, cd);
158
159         buf = wrbuf_cstr(wrbuf);
160         sz = wrbuf_len(wrbuf);
161         yaz_iconv_close(cd);
162     }
163     *len = sz;
164     return buf;
165 }
166
167 static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
168                                        Z_NamePlusRecord *npr,
169                                        int marctype, const char *charset)
170 {
171     Z_External *r = (Z_External *) npr->u.databaseRecord;
172     const Odr_oid *oid = r->direct_reference;
173
174     wrbuf_rewind(wrbuf);
175     /* render bibliographic record .. */
176     if (r->which == Z_External_OPAC)
177     {
178         return return_opac_record(wrbuf, marctype, len,
179                                   r->u.opac, charset);
180     }
181     if (r->which == Z_External_sutrs)
182         return return_string_record(wrbuf, len,
183                                     (char*) r->u.sutrs->buf,
184                                     r->u.sutrs->len,
185                                     charset);
186     else if (r->which == Z_External_octet)
187     {
188         if (oid_oidcmp(oid, yaz_oid_recsyn_xml)
189             && oid_oidcmp(oid, yaz_oid_recsyn_application_xml)
190             && oid_oidcmp(oid, yaz_oid_recsyn_mab)
191             && oid_oidcmp(oid, yaz_oid_recsyn_html))
192         {
193             const char *ret_buf = return_marc_record(
194                 wrbuf, marctype, len,
195                 (const char *) r->u.octet_aligned->buf,
196                 r->u.octet_aligned->len,
197                 charset);
198             if (ret_buf)
199                 return ret_buf;
200             /* not ISO2709. Return fail unless raw (ISO2709) is wanted */
201             if (yaz_oid_is_iso2709(oid) && marctype != YAZ_MARC_ISO2709)
202                 return 0;
203         }
204         return return_string_record(wrbuf, len,
205                                     (const char *) r->u.octet_aligned->buf,
206                                     r->u.octet_aligned->len,
207                                     charset);
208     }
209     else if (r->which == Z_External_grs1)
210     {
211         yaz_display_grs1(wrbuf, r->u.grs1, 0);
212         return return_string_record(wrbuf, len,
213                                     wrbuf_buf(wrbuf),
214                                     wrbuf_len(wrbuf),
215                                     charset);
216     }
217     return 0;
218 }
219
220 static const char *get_record_format(WRBUF wrbuf, int *len,
221                                      Z_NamePlusRecord *npr,
222                                      int marctype, const char *charset,
223                                      const char *format)
224 {
225     const char *res = return_record_wrbuf(wrbuf, len, npr, marctype, charset);
226 #if YAZ_HAVE_XML2
227     if (*format == '1')
228     {
229         /* try to XML format res */
230         xmlDocPtr doc;
231         xmlKeepBlanksDefault(0); /* get get xmlDocFormatMemory to work! */
232         doc = xmlParseMemory(res, *len);
233         if (doc)
234         {
235             xmlChar *xml_mem;
236             int xml_size;
237             xmlDocDumpFormatMemory(doc, &xml_mem, &xml_size, 1);
238             wrbuf_rewind(wrbuf);
239             wrbuf_write(wrbuf, (const char *) xml_mem, xml_size);
240             xmlFree(xml_mem);
241             xmlFreeDoc(doc);
242             res = wrbuf_cstr(wrbuf);
243             *len = wrbuf_len(wrbuf);
244         }
245     }
246 #endif
247     return res;
248 }
249
250 #if YAZ_HAVE_XML2
251 static int replace_node(NMEM nmem, xmlNode *ptr,
252                         const char *type_spec, char *record_buf)
253 {
254     int ret = -1;
255     const char *res;
256     int len;
257     int m_len;
258     WRBUF wrbuf = wrbuf_alloc();
259     ODR odr = odr_createmem(ODR_ENCODE);
260     Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
261     npr->which = Z_NamePlusRecord_databaseRecord;
262
263     if (atoi_n_check(record_buf, 5, &m_len))
264         npr->u.databaseRecord =
265             z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
266     else
267         npr->u.databaseRecord =
268             z_ext_record_xml(odr, record_buf, strlen(record_buf));
269     res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
270     if (res)
271     {
272         xmlDoc *doc = xmlParseMemory(res, strlen(res));
273         if (doc)
274         {
275             xmlNode *nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
276             xmlReplaceNode(ptr, nptr);
277             xmlFreeDoc(doc);
278         }
279         else
280         {
281             xmlNode *nptr = xmlNewText(BAD_CAST res);
282             xmlReplaceNode(ptr, nptr);
283         }
284         ret = 0;
285     }
286     wrbuf_destroy(wrbuf);
287     odr_destroy(odr);
288     return ret;
289 }
290 #endif
291
292 static const char *base64_render(NMEM nmem, WRBUF wrbuf,
293                                  const char *buf, int *len,
294                                  const char *expr, const char *type_spec)
295 {
296 #if YAZ_HAVE_XML2
297     xmlDocPtr doc = xmlParseMemory(buf, *len);
298     if (doc)
299     {
300         xmlChar *buf_out;
301         int len_out;
302         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
303         if (xpathCtx)
304         {
305             xmlXPathObjectPtr xpathObj =
306                 xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
307             if (xpathObj)
308             {
309                 xmlNodeSetPtr nodes = xpathObj->nodesetval;
310                 if (nodes)
311                 {
312                     int i;
313                     for (i = 0; i < nodes->nodeNr; i++)
314                     {
315                         xmlNode *ptr = nodes->nodeTab[i];
316                         if (ptr->type == XML_TEXT_NODE)
317                         {
318                             const char *input =
319                                 nmem_text_node_cdata(ptr, nmem);
320                             char *output = nmem_malloc(
321                                 nmem, strlen(input) + 1);
322                             if (yaz_base64decode(input, output) == 0)
323                             {
324                                 if (!replace_node(nmem, ptr, type_spec, output))
325                                 {
326                                     /* replacement OK */
327                                     xmlFreeNode(ptr);
328                                     /* unset below to avoid a bad reference in
329                                        xmlXPathFreeObject below */
330                                     nodes->nodeTab[i] = 0;
331                                 }
332                             }
333                         }
334                     }
335                 }
336                 xmlXPathFreeObject(xpathObj);
337             }
338             xmlXPathFreeContext(xpathCtx);
339         }
340         xmlDocDumpMemory(doc, &buf_out, &len_out);
341         if (buf_out)
342         {
343             wrbuf_rewind(wrbuf);
344             wrbuf_write(wrbuf, (const char *) buf_out, len_out);
345             buf = wrbuf_cstr(wrbuf);
346             *len = len_out;
347         }
348         xmlFreeDoc(doc);
349         xmlFree(buf_out);
350     }
351 #endif
352     return buf;
353 }
354
355 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
356                               WRBUF wrbuf,
357                               const char *type_spec, int *len)
358 {
359     const char *ret = 0;
360     NMEM nmem = 0;
361     char *base64_xpath = 0;
362     size_t i;
363     char type[40];
364     char charset[40];
365     char format[3];
366     const char *cp = type_spec;
367     int len0;
368
369     if (!len)
370         len = &len0;
371
372     for (i = 0; cp[i] && cp[i] != ';' && cp[i] != ' ' && i < sizeof(type)-1;
373          i++)
374         type[i] = cp[i];
375     type[i] = '\0';
376     charset[0] = '\0';
377     format[0] = '\0';
378     while (1)
379     {
380         while (cp[i] == ' ')
381             i++;
382         if (cp[i] != ';')
383             break;
384         i++;
385         while (cp[i] == ' ')
386             i++;
387         if (!strncmp(cp + i, "charset=", 8))
388         {
389             size_t j = 0;
390             i = i + 8; /* skip charset= */
391             while (cp[i] == ' ')
392                 i++;
393             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
394             {
395                 if (j < sizeof(charset)-1)
396                     charset[j++] = cp[i];
397             }
398             charset[j] = '\0';
399         }
400         else if (!strncmp(cp + i, "format=", 7))
401         {
402             size_t j = 0;
403             i = i + 7;
404             while (cp[i] == ' ')
405                 i++;
406             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
407             {
408                 if (j < sizeof(format)-1)
409                     format[j++] = cp[i];
410             }
411             format[j] = '\0';
412         }
413         else if (!strncmp(cp + i, "base64=", 7))
414         {
415             size_t i0;
416             i = i + 7;
417             while (cp[i] == ' ')
418                 i++;
419             i0 = i;
420             while (cp[i] && cp[i] != ';')
421                 i++;
422
423             nmem = nmem_create();
424             base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
425         }
426     }
427     if (!strcmp(type, "database"))
428     {
429         *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
430         ret = npr->databaseName;
431     }
432     else if (!strcmp(type, "schema"))
433     {
434         *len = schema ? strlen(schema) : 0;
435         ret = schema;
436     }
437     else if (!strcmp(type, "syntax"))
438     {
439         const char *desc = 0;
440         if (npr->which == Z_NamePlusRecord_databaseRecord)
441         {
442             Z_External *r = (Z_External *) npr->u.databaseRecord;
443             desc = yaz_oid_to_string(yaz_oid_std(), r->direct_reference, 0);
444         }
445         if (!desc)
446             desc = "none";
447         *len = strlen(desc);
448         ret = desc;
449     }
450     if (npr->which != Z_NamePlusRecord_databaseRecord)
451         ;
452     else if (!strcmp(type, "render"))
453     {
454         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
455     }
456     else if (!strcmp(type, "xml"))
457     {
458         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
459                                 format);
460     }
461     else if (!strcmp(type, "txml"))
462     {
463         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
464                                 format);
465     }
466     else if (!strcmp(type, "json"))
467     {
468         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_JSON, charset,
469                                 format);
470     }
471     else if (!strcmp(type, "raw"))
472     {
473         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
474                                 format);
475     }
476     else if (!strcmp(type, "ext"))
477     {
478         *len = -1;
479         ret = (const char *) npr->u.databaseRecord;
480     }
481     else if (!strcmp(type, "opac"))
482     {
483         if (npr->u.databaseRecord->which == Z_External_OPAC)
484             ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
485                                     format);
486     }
487
488     if (base64_xpath && *len != -1)
489     {
490         char *type_spec = nmem_malloc(nmem,
491                                       strlen(type) + strlen(charset) + 11);
492         strcpy(type_spec, type);
493         if (*charset)
494         {
495             strcat(type_spec, "; charset=");
496             strcat(type_spec, charset);
497         }
498         ret = base64_render(nmem, wrbuf, ret, len, base64_xpath, type_spec);
499     }
500     nmem_destroy(nmem);
501     return ret;
502 }
503
504 /*
505  * Local variables:
506  * c-basic-offset: 4
507  * c-file-style: "Stroustrup"
508  * indent-tabs-mode: nil
509  * End:
510  * vim: shiftwidth=4 tabstop=8 expandtab
511  */
512