record_render: base64 decoding of embedded records
[yaz-moved-to-github.git] / src / record_render.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_render.c
7  * \brief Render Z39.50 records (NamePlusRecord)
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16
17 #include <yaz/marcdisp.h>
18 #include <yaz/record_render.h>
19 #include <yaz/yaz-iconv.h>
20 #include <yaz/proto.h>
21 #include <yaz/oid_db.h>
22 #include <yaz/nmem_xml.h>
23 #include <yaz/base64.h>
24
25 #include <libxml/xpath.h>
26 #include <libxml/xpathInternals.h>
27
28 static yaz_iconv_t iconv_create_charset(const char *record_charset,
29                                         yaz_iconv_t *cd2)
30 {
31     char charset_buf[40];
32     yaz_iconv_t cd = 0;
33     char *from_set1 = 0;
34     char *from_set2 = 0;
35     char *to_set = 0;
36     if (record_charset && *record_charset)
37     {
38         char *cp = charset_buf;
39         
40         strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
41         charset_buf[sizeof(charset_buf)-1] = '\0';
42         
43         from_set1 = cp;
44         while (*cp && *cp != ',' && *cp != '/')
45             cp++;
46         if (*cp == '/')
47         {
48             *cp++ = '\0'; /* terminate from_set1 */
49             from_set2 = cp;
50             while (*cp && *cp != ',')
51                 cp++;
52         }
53         if (*cp == ',')
54         {
55             *cp++ = '\0';  /* terminate from_set1 or from_set2 */
56             to_set = cp;
57             while (*cp)
58                 cp++;
59         }
60     }
61     
62     if (from_set1)
63         cd = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set1);
64     if (cd2)
65     {
66         if (from_set2)
67             *cd2 = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set2);
68         else
69             *cd2 = 0;
70     }
71     return cd;
72 }
73
74 static const char *return_marc_record(WRBUF wrbuf,
75                                       int marc_type,
76                                       int *len,
77                                       const char *buf, int sz,
78                                       const char *record_charset)
79 {
80     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
81     yaz_marc_t mt = yaz_marc_create();
82     const char *ret_string = 0;
83
84     if (cd)
85         yaz_marc_iconv(mt, cd);
86     yaz_marc_xml(mt, marc_type);
87     if (yaz_marc_decode_wrbuf(mt, buf, sz, wrbuf) > 0)
88     {
89         if (len)
90             *len = wrbuf_len(wrbuf);
91         ret_string = wrbuf_cstr(wrbuf);
92     }
93     yaz_marc_destroy(mt);
94     if (cd)
95         yaz_iconv_close(cd);
96     return ret_string;
97 }
98
99 static const char *return_opac_record(WRBUF wrbuf,
100                                       int marc_type,
101                                       int *len,
102                                       Z_OPACRecord *opac_rec,
103                                       const char *record_charset)
104 {
105     yaz_iconv_t cd2;
106     yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
107     yaz_marc_t mt = yaz_marc_create();
108
109     if (cd)
110         yaz_marc_iconv(mt, cd);
111     yaz_marc_xml(mt, marc_type);
112
113     if (cd2)
114         yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
115     else
116         yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
117         
118     yaz_marc_destroy(mt);
119
120     if (cd)
121         yaz_iconv_close(cd);
122     if (cd2)
123         yaz_iconv_close(cd2);
124     if (len)
125         *len = wrbuf_len(wrbuf);
126     return wrbuf_cstr(wrbuf);
127 }
128
129 static const char *return_string_record(WRBUF wrbuf,
130                                         int *len,
131                                         const char *buf, int sz,
132                                         const char *record_charset)
133 {
134     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
135
136     if (cd)
137     {
138         wrbuf_iconv_write(wrbuf, cd, buf, sz);
139         wrbuf_iconv_reset(wrbuf, cd);
140
141         buf = wrbuf_cstr(wrbuf);
142         sz = wrbuf_len(wrbuf);
143         yaz_iconv_close(cd);
144     }
145     if (len)
146         *len = sz;
147     return buf;
148 }
149
150 static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
151                                        Z_NamePlusRecord *npr,
152                                        int marctype, const char *charset)
153 {
154     Z_External *r = (Z_External *) npr->u.databaseRecord;
155     const Odr_oid *oid = r->direct_reference;
156
157     wrbuf_rewind(wrbuf);
158     /* render bibliographic record .. */
159     if (r->which == Z_External_OPAC)
160     {
161         return return_opac_record(wrbuf, marctype, len,
162                                   r->u.opac, charset);
163     }
164     if (r->which == Z_External_sutrs)
165         return return_string_record(wrbuf, len,
166                                     (char*) r->u.sutrs->buf,
167                                     r->u.sutrs->len,
168                                     charset);
169     else if (r->which == Z_External_octet)
170     {
171         if (yaz_oid_is_iso2709(oid))
172         {
173             const char *ret_buf = return_marc_record(
174                 wrbuf, marctype, len,
175                 (const char *) r->u.octet_aligned->buf,
176                 r->u.octet_aligned->len,
177                 charset);
178             if (ret_buf)
179                 return ret_buf;
180             /* bad ISO2709. Return fail unless raw (ISO2709) is wanted */
181             if (marctype != YAZ_MARC_ISO2709)
182                 return 0;
183         }
184         return return_string_record(wrbuf, len,
185                                     (const char *) r->u.octet_aligned->buf,
186                                     r->u.octet_aligned->len,
187                                     charset);
188     }
189     else if (r->which == Z_External_grs1)
190     {
191         yaz_display_grs1(wrbuf, r->u.grs1, 0);
192         return return_string_record(wrbuf, len,
193                                     wrbuf_buf(wrbuf),
194                                     wrbuf_len(wrbuf),
195                                     charset);
196     }
197     return 0;
198 }
199     
200 static const char *get_record_format(WRBUF wrbuf, int *len,
201                                      Z_NamePlusRecord *npr,
202                                      int marctype, const char *charset,
203                                      const char *format)
204 {
205     const char *res = return_record_wrbuf(wrbuf, len, npr, marctype, charset);
206 #if YAZ_HAVE_XML2
207     if (*format == '1' && len)
208     {
209         /* try to XML format res */
210         xmlDocPtr doc;
211         xmlKeepBlanksDefault(0); /* get get xmlDocFormatMemory to work! */
212         doc = xmlParseMemory(res, *len);
213         if (doc)
214         {
215             xmlChar *xml_mem;
216             int xml_size;
217             xmlDocDumpFormatMemory(doc, &xml_mem, &xml_size, 1);
218             wrbuf_rewind(wrbuf);
219             wrbuf_write(wrbuf, (const char *) xml_mem, xml_size);
220             xmlFree(xml_mem);
221             xmlFreeDoc(doc);
222             res = wrbuf_cstr(wrbuf);
223             *len = wrbuf_len(wrbuf);
224         } 
225     }
226 #endif
227     return res;
228 }
229
230 static int replace_node(NMEM nmem, xmlNode *ptr,
231                         const char *type_spec, char *record_buf)
232 {
233     int ret = -1;
234     const char *res;
235     int len;
236     int m_len;
237     WRBUF wrbuf = wrbuf_alloc();
238     ODR odr = odr_createmem(ODR_ENCODE);
239     Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
240     npr->which = Z_NamePlusRecord_databaseRecord;
241
242     if (atoi_n_check(record_buf, 5, &m_len))
243         npr->u.databaseRecord =
244             z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
245     else
246         npr->u.databaseRecord =
247             z_ext_record_xml(odr, record_buf, strlen(record_buf));
248     res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
249     if (res)
250     {
251         xmlDoc *doc = xmlParseMemory(res, strlen(res));
252         xmlNode *nptr;
253         if (doc)
254         {
255             nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
256             xmlReplaceNode(ptr, nptr);
257             xmlFreeDoc(doc);
258         }
259         else
260         {
261             nptr = xmlNewText(BAD_CAST res);
262             xmlReplaceNode(ptr, nptr);
263         }
264         ret = 0;
265     }
266     wrbuf_destroy(wrbuf);
267     odr_destroy(odr);
268     return ret;
269 }
270
271 static const char *base64_render(NMEM nmem, WRBUF wrbuf,
272                                  const char *buf, int *len,
273                                  const char *expr, const char *type_spec)
274 {
275     xmlDocPtr doc = xmlParseMemory(buf, *len);
276     if (doc)
277     {
278         xmlChar *buf_out;
279         int len_out;
280         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
281         if (xpathCtx)
282         {
283             xmlXPathObjectPtr xpathObj =
284                 xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
285             if (xpathObj)
286             {
287                 xmlNodeSetPtr nodes = xpathObj->nodesetval;
288                 if (nodes)
289                 {
290                     int i;
291                     for (i = 0; i < nodes->nodeNr; i++)
292                     {
293                         xmlNode *ptr = nodes->nodeTab[i];
294                         if (ptr->type == XML_TEXT_NODE)
295                         {
296                             const char *input =
297                                 nmem_text_node_cdata(ptr, nmem);
298                             char *output = nmem_malloc(
299                                 nmem, strlen(input) + 1);
300                             if (yaz_base64decode(input, output) == 0)
301                             {
302                                 if (!replace_node(nmem, ptr, type_spec, output))
303                                 {
304                                     /* replacement OK */
305                                     xmlFreeNode(ptr);
306                                     /* unset below to avoid a bad reference in
307                                        xmlXPathFreeObject below */
308                                     nodes->nodeTab[i] = 0;
309                                 }
310                             }
311                         }
312                     }
313                 }
314                 xmlXPathFreeObject(xpathObj);
315             }
316             xmlXPathFreeContext(xpathCtx);
317         }
318         xmlDocDumpMemory(doc, &buf_out, &len_out);
319         if (buf_out)
320         {
321             wrbuf_rewind(wrbuf);
322             wrbuf_write(wrbuf, (const char *) buf_out, len_out);
323             buf = wrbuf_cstr(wrbuf);
324             *len = len_out;
325         }
326         xmlFreeDoc(doc);
327         xmlFree(buf_out);
328     }
329     return buf;
330 }
331
332 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
333                               WRBUF wrbuf,
334                               const char *type_spec, int *len)
335 {
336     const char *ret = 0;
337     NMEM nmem = 0;
338     char *base64_xpath = 0;
339     char *base64_type_spec = 0;
340     size_t i;
341     char type[40];
342     char charset[40];
343     char format[3];
344     const char *cp = type_spec;
345
346     for (i = 0; cp[i] && cp[i] != ';' && cp[i] != ' ' && i < sizeof(type)-1;
347          i++)
348         type[i] = cp[i];
349     type[i] = '\0';
350     charset[0] = '\0';
351     format[0] = '\0';
352     while (1)
353     {
354         while (cp[i] == ' ')
355             i++;
356         if (cp[i] != ';')
357             break;
358         i++;
359         while (cp[i] == ' ')
360             i++;
361         if (!strncmp(cp + i, "charset=", 8))
362         {
363             size_t j = 0;
364             i = i + 8; /* skip charset= */
365             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
366             {
367                 if (j < sizeof(charset)-1)
368                     charset[j++] = cp[i];
369             }
370             charset[j] = '\0';
371         }
372         else if (!strncmp(cp + i, "format=", 7))
373         {
374             size_t j = 0; 
375             i = i + 7;
376             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
377             {
378                 if (j < sizeof(format)-1)
379                     format[j++] = cp[i];
380             }
381             format[j] = '\0';
382         } 
383         else if (!strncmp(cp + i, "base64", 6))
384         {
385             i = i + 6;
386
387             while (cp[i] == ' ')
388                 i++;
389             if (cp[i] == '(')
390             {
391                 size_t i0;
392                 nmem = nmem_create();
393                 i++;
394                 while (cp[i] == ' ')
395                     i++;
396                 i0 = i;
397                 while (cp[i] != ',' && cp[i])
398                     i++;
399                 base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
400                 if (cp[i])
401                     i++;
402                 while (cp[i] == ' ')
403                     i++;
404                 i0 = i;
405                 while (cp[i] != ')' && cp[i])
406                     i++;
407                 base64_type_spec = nmem_strdupn(nmem, cp + i0, i - i0);
408                 if (cp[i])
409                     i++;
410             }
411         } 
412     }
413     if (!strcmp(type, "database"))
414     {
415         if (len)
416             *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
417         ret = npr->databaseName;
418     }
419     else if (!strcmp(type, "schema"))
420     {
421         if (len)
422             *len = schema ? strlen(schema) : 0;
423         ret = schema;
424     }
425     else if (!strcmp(type, "syntax"))
426     {
427         const char *desc = 0;   
428         if (npr->which == Z_NamePlusRecord_databaseRecord)
429         {
430             Z_External *r = (Z_External *) npr->u.databaseRecord;
431             desc = yaz_oid_to_string(yaz_oid_std(), r->direct_reference, 0);
432         }
433         if (!desc)
434             desc = "none";
435         if (len)
436             *len = strlen(desc);
437         ret = desc;
438     }
439     if (npr->which != Z_NamePlusRecord_databaseRecord)
440         ;
441     else if (!strcmp(type, "render"))
442     {
443         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
444     }
445     else if (!strcmp(type, "xml"))
446     {
447         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
448                                 format);
449     }
450     else if (!strcmp(type, "txml"))
451     {
452         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
453                                 format);
454     }
455     else if (!strcmp(type, "raw"))
456     {
457         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
458                                 format);
459     }
460     else if (!strcmp(type, "ext"))
461     {
462         if (len) *len = -1;
463         ret = (const char *) npr->u.databaseRecord;
464     }
465     else if (!strcmp(type, "opac"))
466     {
467         if (npr->u.databaseRecord->which == Z_External_OPAC)
468             ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
469                                     format);
470     }
471
472     if (base64_xpath)
473         ret = base64_render(nmem, wrbuf,
474                             ret, len, base64_xpath, base64_type_spec);
475     nmem_destroy(nmem);
476     return ret;
477 }
478
479 /*
480  * Local variables:
481  * c-basic-offset: 4
482  * c-file-style: "Stroustrup"
483  * indent-tabs-mode: nil
484  * End:
485  * vim: shiftwidth=4 tabstop=8 expandtab
486  */
487