ba5311e81bb71a8fa72622de97a034f768a61621
[yaz-moved-to-github.git] / src / record_render.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file record_render.c
7  * \brief Render Z39.50 records (NamePlusRecord)
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16
17 #include <yaz/marcdisp.h>
18 #include <yaz/record_render.h>
19 #include <yaz/yaz-iconv.h>
20 #include <yaz/proto.h>
21 #include <yaz/oid_db.h>
22 #include <yaz/nmem_xml.h>
23 #include <yaz/base64.h>
24
25 #if YAZ_HAVE_XML2
26 #include <libxml/xpath.h>
27 #include <libxml/xpathInternals.h>
28 #endif
29
30 static yaz_iconv_t iconv_create_charset(const char *record_charset,
31                                         yaz_iconv_t *cd2)
32 {
33     char charset_buf[40];
34     yaz_iconv_t cd = 0;
35     char *from_set1 = 0;
36     char *from_set2 = 0;
37     char *to_set = 0;
38     if (record_charset && *record_charset)
39     {
40         char *cp = charset_buf;
41         
42         strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
43         charset_buf[sizeof(charset_buf)-1] = '\0';
44         
45         from_set1 = cp;
46         while (*cp && *cp != ',' && *cp != '/')
47             cp++;
48         if (*cp == '/')
49         {
50             *cp++ = '\0'; /* terminate from_set1 */
51             from_set2 = cp;
52             while (*cp && *cp != ',')
53                 cp++;
54         }
55         if (*cp == ',')
56         {
57             *cp++ = '\0';  /* terminate from_set1 or from_set2 */
58             to_set = cp;
59             while (*cp)
60                 cp++;
61         }
62     }
63     
64     if (from_set1)
65         cd = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set1);
66     if (cd2)
67     {
68         if (from_set2)
69             *cd2 = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set2);
70         else
71             *cd2 = 0;
72     }
73     return cd;
74 }
75
76 static const char *return_marc_record(WRBUF wrbuf,
77                                       int marc_type,
78                                       int *len,
79                                       const char *buf, int sz,
80                                       const char *record_charset)
81 {
82     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
83     yaz_marc_t mt = yaz_marc_create();
84     const char *ret_string = 0;
85
86     if (cd)
87         yaz_marc_iconv(mt, cd);
88     yaz_marc_xml(mt, marc_type);
89     if (yaz_marc_decode_wrbuf(mt, buf, sz, wrbuf) > 0)
90     {
91         if (len)
92             *len = wrbuf_len(wrbuf);
93         ret_string = wrbuf_cstr(wrbuf);
94     }
95     yaz_marc_destroy(mt);
96     if (cd)
97         yaz_iconv_close(cd);
98     return ret_string;
99 }
100
101 static const char *return_opac_record(WRBUF wrbuf,
102                                       int marc_type,
103                                       int *len,
104                                       Z_OPACRecord *opac_rec,
105                                       const char *record_charset)
106 {
107     yaz_iconv_t cd2;
108     yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
109     yaz_marc_t mt = yaz_marc_create();
110
111     if (cd)
112         yaz_marc_iconv(mt, cd);
113     yaz_marc_xml(mt, marc_type);
114
115     if (cd2)
116         yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
117     else
118         yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
119         
120     yaz_marc_destroy(mt);
121
122     if (cd)
123         yaz_iconv_close(cd);
124     if (cd2)
125         yaz_iconv_close(cd2);
126     if (len)
127         *len = wrbuf_len(wrbuf);
128     return wrbuf_cstr(wrbuf);
129 }
130
131 static const char *return_string_record(WRBUF wrbuf,
132                                         int *len,
133                                         const char *buf, int sz,
134                                         const char *record_charset)
135 {
136     yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
137
138     if (cd)
139     {
140         wrbuf_iconv_write(wrbuf, cd, buf, sz);
141         wrbuf_iconv_reset(wrbuf, cd);
142
143         buf = wrbuf_cstr(wrbuf);
144         sz = wrbuf_len(wrbuf);
145         yaz_iconv_close(cd);
146     }
147     if (len)
148         *len = sz;
149     return buf;
150 }
151
152 static const char *return_record_wrbuf(WRBUF wrbuf, int *len,
153                                        Z_NamePlusRecord *npr,
154                                        int marctype, const char *charset)
155 {
156     Z_External *r = (Z_External *) npr->u.databaseRecord;
157     const Odr_oid *oid = r->direct_reference;
158
159     wrbuf_rewind(wrbuf);
160     /* render bibliographic record .. */
161     if (r->which == Z_External_OPAC)
162     {
163         return return_opac_record(wrbuf, marctype, len,
164                                   r->u.opac, charset);
165     }
166     if (r->which == Z_External_sutrs)
167         return return_string_record(wrbuf, len,
168                                     (char*) r->u.sutrs->buf,
169                                     r->u.sutrs->len,
170                                     charset);
171     else if (r->which == Z_External_octet)
172     {
173         if (yaz_oid_is_iso2709(oid))
174         {
175             const char *ret_buf = return_marc_record(
176                 wrbuf, marctype, len,
177                 (const char *) r->u.octet_aligned->buf,
178                 r->u.octet_aligned->len,
179                 charset);
180             if (ret_buf)
181                 return ret_buf;
182             /* bad ISO2709. Return fail unless raw (ISO2709) is wanted */
183             if (marctype != YAZ_MARC_ISO2709)
184                 return 0;
185         }
186         return return_string_record(wrbuf, len,
187                                     (const char *) r->u.octet_aligned->buf,
188                                     r->u.octet_aligned->len,
189                                     charset);
190     }
191     else if (r->which == Z_External_grs1)
192     {
193         yaz_display_grs1(wrbuf, r->u.grs1, 0);
194         return return_string_record(wrbuf, len,
195                                     wrbuf_buf(wrbuf),
196                                     wrbuf_len(wrbuf),
197                                     charset);
198     }
199     return 0;
200 }
201     
202 static const char *get_record_format(WRBUF wrbuf, int *len,
203                                      Z_NamePlusRecord *npr,
204                                      int marctype, const char *charset,
205                                      const char *format)
206 {
207     const char *res = return_record_wrbuf(wrbuf, len, npr, marctype, charset);
208 #if YAZ_HAVE_XML2
209     if (*format == '1' && len)
210     {
211         /* try to XML format res */
212         xmlDocPtr doc;
213         xmlKeepBlanksDefault(0); /* get get xmlDocFormatMemory to work! */
214         doc = xmlParseMemory(res, *len);
215         if (doc)
216         {
217             xmlChar *xml_mem;
218             int xml_size;
219             xmlDocDumpFormatMemory(doc, &xml_mem, &xml_size, 1);
220             wrbuf_rewind(wrbuf);
221             wrbuf_write(wrbuf, (const char *) xml_mem, xml_size);
222             xmlFree(xml_mem);
223             xmlFreeDoc(doc);
224             res = wrbuf_cstr(wrbuf);
225             *len = wrbuf_len(wrbuf);
226         } 
227     }
228 #endif
229     return res;
230 }
231
232 #if YAZ_HAVE_XML2
233 static int replace_node(NMEM nmem, xmlNode *ptr,
234                         const char *type_spec, char *record_buf)
235 {
236     int ret = -1;
237     const char *res;
238     int len;
239     int m_len;
240     WRBUF wrbuf = wrbuf_alloc();
241     ODR odr = odr_createmem(ODR_ENCODE);
242     Z_NamePlusRecord *npr = odr_malloc(odr, sizeof(*npr));
243     npr->which = Z_NamePlusRecord_databaseRecord;
244
245     if (atoi_n_check(record_buf, 5, &m_len))
246         npr->u.databaseRecord =
247             z_ext_record_usmarc(odr, record_buf, strlen(record_buf));
248     else
249         npr->u.databaseRecord =
250             z_ext_record_xml(odr, record_buf, strlen(record_buf));
251     res = yaz_record_render(npr, 0, wrbuf, type_spec, &len);
252     if (res)
253     {
254         xmlDoc *doc = xmlParseMemory(res, strlen(res));
255         xmlNode *nptr;
256         if (doc)
257         {
258             nptr = xmlCopyNode(xmlDocGetRootElement(doc), 1);
259             xmlReplaceNode(ptr, nptr);
260             xmlFreeDoc(doc);
261         }
262         else
263         {
264             nptr = xmlNewText(BAD_CAST res);
265             xmlReplaceNode(ptr, nptr);
266         }
267         ret = 0;
268     }
269     wrbuf_destroy(wrbuf);
270     odr_destroy(odr);
271     return ret;
272 }
273 #endif
274
275 static const char *base64_render(NMEM nmem, WRBUF wrbuf,
276                                  const char *buf, int *len,
277                                  const char *expr, const char *type_spec)
278 {
279 #if YAZ_HAVE_XML2
280     xmlDocPtr doc = xmlParseMemory(buf, *len);
281     if (doc)
282     {
283         xmlChar *buf_out;
284         int len_out;
285         xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
286         if (xpathCtx)
287         {
288             xmlXPathObjectPtr xpathObj =
289                 xmlXPathEvalExpression((const xmlChar *) expr, xpathCtx);
290             if (xpathObj)
291             {
292                 xmlNodeSetPtr nodes = xpathObj->nodesetval;
293                 if (nodes)
294                 {
295                     int i;
296                     for (i = 0; i < nodes->nodeNr; i++)
297                     {
298                         xmlNode *ptr = nodes->nodeTab[i];
299                         if (ptr->type == XML_TEXT_NODE)
300                         {
301                             const char *input =
302                                 nmem_text_node_cdata(ptr, nmem);
303                             char *output = nmem_malloc(
304                                 nmem, strlen(input) + 1);
305                             if (yaz_base64decode(input, output) == 0)
306                             {
307                                 if (!replace_node(nmem, ptr, type_spec, output))
308                                 {
309                                     /* replacement OK */
310                                     xmlFreeNode(ptr);
311                                     /* unset below to avoid a bad reference in
312                                        xmlXPathFreeObject below */
313                                     nodes->nodeTab[i] = 0;
314                                 }
315                             }
316                         }
317                     }
318                 }
319                 xmlXPathFreeObject(xpathObj);
320             }
321             xmlXPathFreeContext(xpathCtx);
322         }
323         xmlDocDumpMemory(doc, &buf_out, &len_out);
324         if (buf_out)
325         {
326             wrbuf_rewind(wrbuf);
327             wrbuf_write(wrbuf, (const char *) buf_out, len_out);
328             buf = wrbuf_cstr(wrbuf);
329             *len = len_out;
330         }
331         xmlFreeDoc(doc);
332         xmlFree(buf_out);
333     }
334 #endif
335     return buf;
336 }
337
338 const char *yaz_record_render(Z_NamePlusRecord *npr, const char *schema,
339                               WRBUF wrbuf,
340                               const char *type_spec, int *len)
341 {
342     const char *ret = 0;
343     NMEM nmem = 0;
344     char *base64_xpath = 0;
345     char *base64_type_spec = 0;
346     size_t i;
347     char type[40];
348     char charset[40];
349     char format[3];
350     const char *cp = type_spec;
351
352     for (i = 0; cp[i] && cp[i] != ';' && cp[i] != ' ' && i < sizeof(type)-1;
353          i++)
354         type[i] = cp[i];
355     type[i] = '\0';
356     charset[0] = '\0';
357     format[0] = '\0';
358     while (1)
359     {
360         while (cp[i] == ' ')
361             i++;
362         if (cp[i] != ';')
363             break;
364         i++;
365         while (cp[i] == ' ')
366             i++;
367         if (!strncmp(cp + i, "charset=", 8))
368         {
369             size_t j = 0;
370             i = i + 8; /* skip charset= */
371             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
372             {
373                 if (j < sizeof(charset)-1)
374                     charset[j++] = cp[i];
375             }
376             charset[j] = '\0';
377         }
378         else if (!strncmp(cp + i, "format=", 7))
379         {
380             size_t j = 0; 
381             i = i + 7;
382             for (j = 0; cp[i] && cp[i] != ';' && cp[i] != ' '; i++)
383             {
384                 if (j < sizeof(format)-1)
385                     format[j++] = cp[i];
386             }
387             format[j] = '\0';
388         } 
389         else if (!strncmp(cp + i, "base64", 6))
390         {
391             i = i + 6;
392
393             while (cp[i] == ' ')
394                 i++;
395             if (cp[i] == '(')
396             {
397                 size_t i0;
398                 nmem = nmem_create();
399                 i++;
400                 while (cp[i] == ' ')
401                     i++;
402                 i0 = i;
403                 while (cp[i] != ',' && cp[i])
404                     i++;
405                 base64_xpath = nmem_strdupn(nmem, cp + i0, i - i0);
406                 if (cp[i])
407                     i++;
408                 while (cp[i] == ' ')
409                     i++;
410                 i0 = i;
411                 while (cp[i] != ')' && cp[i])
412                     i++;
413                 base64_type_spec = nmem_strdupn(nmem, cp + i0, i - i0);
414                 if (cp[i])
415                     i++;
416             }
417         } 
418     }
419     if (!strcmp(type, "database"))
420     {
421         if (len)
422             *len = (npr->databaseName ? strlen(npr->databaseName) : 0);
423         ret = npr->databaseName;
424     }
425     else if (!strcmp(type, "schema"))
426     {
427         if (len)
428             *len = schema ? strlen(schema) : 0;
429         ret = schema;
430     }
431     else if (!strcmp(type, "syntax"))
432     {
433         const char *desc = 0;   
434         if (npr->which == Z_NamePlusRecord_databaseRecord)
435         {
436             Z_External *r = (Z_External *) npr->u.databaseRecord;
437             desc = yaz_oid_to_string(yaz_oid_std(), r->direct_reference, 0);
438         }
439         if (!desc)
440             desc = "none";
441         if (len)
442             *len = strlen(desc);
443         ret = desc;
444     }
445     if (npr->which != Z_NamePlusRecord_databaseRecord)
446         ;
447     else if (!strcmp(type, "render"))
448     {
449         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_LINE, charset, format);
450     }
451     else if (!strcmp(type, "xml"))
452     {
453         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
454                                 format);
455     }
456     else if (!strcmp(type, "txml"))
457     {
458         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_TURBOMARC, charset,
459                                 format);
460     }
461     else if (!strcmp(type, "raw"))
462     {
463         ret = get_record_format(wrbuf, len, npr, YAZ_MARC_ISO2709, charset,
464                                 format);
465     }
466     else if (!strcmp(type, "ext"))
467     {
468         if (len) *len = -1;
469         ret = (const char *) npr->u.databaseRecord;
470     }
471     else if (!strcmp(type, "opac"))
472     {
473         if (npr->u.databaseRecord->which == Z_External_OPAC)
474             ret = get_record_format(wrbuf, len, npr, YAZ_MARC_MARCXML, charset,
475                                     format);
476     }
477
478     if (base64_xpath)
479         ret = base64_render(nmem, wrbuf,
480                             ret, len, base64_xpath, base64_type_spec);
481     nmem_destroy(nmem);
482     return ret;
483 }
484
485 /*
486  * Local variables:
487  * c-basic-offset: 4
488  * c-file-style: "Stroustrup"
489  * indent-tabs-mode: nil
490  * End:
491  * vim: shiftwidth=4 tabstop=8 expandtab
492  */
493