Add unit test for ICU rule join
[yaz-moved-to-github.git] / test / test_record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 #if HAVE_CONFIG_H
6 #include <config.h>
7 #endif
8
9 #include <yaz/record_conv.h>
10 #include <yaz/test.h>
11 #include <yaz/wrbuf.h>
12 #include <string.h>
13 #include <yaz/log.h>
14
15 #if YAZ_HAVE_XML2
16
17 #include <libxml/parser.h>
18 #include <libxml/tree.h>
19
20 yaz_record_conv_t conv_configure(const char *xmlstring, WRBUF w)
21 {
22     xmlDocPtr doc = xmlParseMemory(xmlstring, strlen(xmlstring));
23     if (!doc)
24     {
25         wrbuf_printf(w, "xmlParseMemory");
26         return 0;
27     }
28     else
29     {
30         xmlNodePtr ptr = xmlDocGetRootElement(doc);
31         yaz_record_conv_t p = yaz_record_conv_create();
32
33         if (p)
34         {
35             const char *srcdir = getenv("srcdir");
36             if (srcdir)
37                 yaz_record_conv_set_path(p, srcdir);
38         }
39         if (!ptr)
40         {
41             wrbuf_printf(w, "xmlDocGetRootElement");
42             yaz_record_conv_destroy(p);
43             p = 0;
44         }
45         else if (!p)
46         {
47             wrbuf_printf(w, "yaz_record_conv_create");
48         }
49         else
50         {
51
52
53             int r = yaz_record_conv_configure(p, ptr);
54
55             if (r)
56             {
57                 wrbuf_puts(w, yaz_record_conv_get_error(p));
58                 yaz_record_conv_destroy(p);
59                 p = 0;
60             }
61         }
62         xmlFreeDoc(doc);
63         return p;
64     }
65 }
66
67 int conv_configure_test(const char *xmlstring, const char *expect_error,
68                         yaz_record_conv_t *pt)
69 {
70     WRBUF w = wrbuf_alloc();
71     int ret;
72
73     yaz_record_conv_t p = conv_configure(xmlstring, w);
74
75     if (!p)
76     {
77         if (expect_error && !strcmp(wrbuf_cstr(w), expect_error))
78             ret = 1;
79         else
80         {
81             ret = 0;
82             printf("%s\n", wrbuf_cstr(w));
83         }
84     }
85     else
86     {
87         if (expect_error)
88             ret = 0;
89         else
90             ret = 1;
91     }
92
93     if (pt)
94         *pt = p;
95     else
96         if (p)
97             yaz_record_conv_destroy(p);
98
99     wrbuf_destroy(w);
100     return ret;
101 }
102
103 static void tst_configure(void)
104 {
105
106
107
108     YAZ_CHECK(conv_configure_test("<bad", "xmlParseMemory", 0));
109
110
111     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
112                                   "<bad/></backend>",
113                                   "Element <backend>: expected <marc> or "
114                                   "<xslt> element, got <bad>", 0));
115
116 #if YAZ_HAVE_XSLT
117     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
118                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
119                                   "<marc"
120                                   " inputcharset=\"marc-8\""
121                                   " outputcharset=\"marc-8\""
122                                   "/>"
123                                   "</backend>",
124                                   "Element <marc>: attribute 'inputformat' "
125                                   "required", 0));
126     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
127                                   "<xslt/>"
128                                   "</backend>",
129                                   "Element <xslt>: attribute 'stylesheet' "
130                                   "expected", 0));
131     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
132                                   "<marc"
133                                   " inputcharset=\"utf-8\""
134                                   " outputcharset=\"marc-8\""
135                                   " inputformat=\"xml\""
136                                   " outputformat=\"marc\""
137                                   "/>"
138                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
139                                   "</backend>",
140                                   0, 0));
141 #else
142     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
143                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
144                                   "</backend>",
145                                   "xslt unsupported."
146                                   " YAZ compiled without XSLT support", 0));
147 #endif
148 }
149
150 static int conv_convert_test(yaz_record_conv_t p,
151                              const char *input_record,
152                              const char *output_expect_record)
153 {
154     int ret = 0;
155     if (!p)
156     {
157         YAZ_CHECK(ret);
158     }
159     else
160     {
161         WRBUF output_record = wrbuf_alloc();
162         int r = yaz_record_conv_record(p, input_record, strlen(input_record),
163                                        output_record);
164         if (r)
165         {
166             if (output_expect_record)
167             {
168                 printf("yaz_record_conv error=%s\n",
169                        yaz_record_conv_get_error(p));
170                 ret = 0;
171             }
172             else
173                 ret = 1;
174         }
175         else
176         {
177             if (!output_expect_record)
178             {
179                 ret = 0;
180             }
181             else if (strcmp(output_expect_record, wrbuf_cstr(output_record)))
182             {
183                 ret = 0;
184                 printf("got-output_record len=%ld: %s\n",
185                        (long) wrbuf_len(output_record),
186                        wrbuf_cstr(output_record));
187                 printf("output_expect_record len=%ld %s\n",
188                        (long) strlen(output_expect_record),
189                        output_expect_record);
190             }
191             else
192             {
193                 ret = 1;
194             }
195         }
196         wrbuf_destroy(output_record);
197     }
198     return ret;
199 }
200
201 static int conv_convert_test_iter(yaz_record_conv_t p,
202                                   const char *input_record,
203                                   const char *output_expect_record,
204                                   int num_iter)
205 {
206     int i;
207     int ret;
208     for (i = 0; i < num_iter; i++)
209     {
210         ret = conv_convert_test(p, input_record, output_expect_record);
211         if (!ret)
212             break;
213     }
214     return ret;
215 }
216
217 static void tst_convert1(void)
218 {
219     yaz_record_conv_t p = 0;
220     const char *marcxml_rec =
221         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
222         "  <leader>00080nam a22000498a 4500</leader>\n"
223         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
224         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
225         "    <subfield code=\"a\">   11224466 </subfield>\n"
226         "  </datafield>\n"
227         "</record>\n";
228     const char *tmarcxml_rec =
229         "<r xmlns=\"http://www.indexdata.com/MARC21/turboxml\">\n"
230         "  <l>00080nam a22000498a 4500</l>\n"
231         "  <c001>   11224466 </c001>\n"
232         "  <d010 i1=\" \" i2=\" \">\n"
233         "    <sa>   11224466 </sa>\n"
234         "  </d010>\n"
235         "</r>\n";
236     const char *iso2709_rec =
237         "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
238         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
239         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
240         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
241         "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
242
243     YAZ_CHECK(conv_configure_test("<backend>"
244                                   "<marc"
245                                   " inputcharset=\"utf-8\""
246                                   " outputcharset=\"marc-8\""
247                                   " inputformat=\"xml\""
248                                   " outputformat=\"marc\""
249                                   "/>"
250                                   "</backend>",
251                                   0, &p));
252     YAZ_CHECK(conv_convert_test(p, marcxml_rec, iso2709_rec));
253     YAZ_CHECK(conv_convert_test(p, tmarcxml_rec, iso2709_rec));
254     yaz_record_conv_destroy(p);
255
256     YAZ_CHECK(conv_configure_test("<backend>"
257                                   "<marc"
258                                   " outputcharset=\"utf-8\""
259                                   " inputcharset=\"marc-8\""
260                                   " outputformat=\"marcxml\""
261                                   " inputformat=\"marc\""
262                                   "/>"
263                                   "</backend>",
264                                   0, &p));
265     YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
266     yaz_record_conv_destroy(p);
267
268
269     YAZ_CHECK(conv_configure_test("<backend>"
270                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
271                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
272                                   "<marc"
273                                   " inputcharset=\"utf-8\""
274                                   " outputcharset=\"marc-8\""
275                                   " inputformat=\"xml\""
276                                   " outputformat=\"marc\""
277                                   "/>"
278                                   "<marc"
279                                   " outputcharset=\"utf-8\""
280                                   " inputcharset=\"marc-8\""
281                                   " outputformat=\"marcxml\""
282                                   " inputformat=\"marc\""
283                                   "/>"
284                                   "</backend>",
285                                   0, &p));
286     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
287     yaz_record_conv_destroy(p);
288
289
290     YAZ_CHECK(conv_configure_test("<backend>"
291                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
292                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
293                                   "<marc"
294                                   " outputcharset=\"marc-8\""
295                                   " inputformat=\"xml\""
296                                   " outputformat=\"marc\""
297                                   "/>"
298                                   "<marc"
299                                   " inputcharset=\"marc-8\""
300                                   " outputformat=\"marcxml\""
301                                   " inputformat=\"marc\""
302                                   "/>"
303                                   "</backend>",
304                                   0, &p));
305     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
306     yaz_record_conv_destroy(p);
307 }
308
309 static void tst_convert2(void)
310 {
311     yaz_record_conv_t p = 0;
312     const char *marcxml_rec =
313         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
314         "  <leader>00080nam a22000498a 4500</leader>\n"
315         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
316         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
317         "    <subfield code=\"a\">k&#xf8;benhavn</subfield>\n"
318         "  </datafield>\n"
319         "</record>\n";
320     const char *iso2709_rec =
321         "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
322         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
323         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
324         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
325         "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
326
327     YAZ_CHECK(conv_configure_test("<backend>"
328                                   "<marc"
329                                   " inputcharset=\"utf-8\""
330                                   " outputcharset=\"marc-8\""
331                                   " inputformat=\"xml\""
332                                   " outputformat=\"marc\""
333                                   "/>"
334                                   "</backend>",
335                                   0, &p));
336     YAZ_CHECK(conv_convert_test_iter(p, marcxml_rec, iso2709_rec, 100));
337     yaz_record_conv_destroy(p);
338 }
339
340 #endif
341
342 int main(int argc, char **argv)
343 {
344     YAZ_CHECK_INIT(argc, argv);
345     yaz_log_xml_errors(0, 0 /* disable log */);
346 #if YAZ_HAVE_XML2
347     tst_configure();
348 #endif
349 #if  YAZ_HAVE_XSLT
350     tst_convert1();
351     tst_convert2();
352 #endif
353     YAZ_CHECK_TERM;
354 }
355
356 /*
357  * Local variables:
358  * c-basic-offset: 4
359  * c-file-style: "Stroustrup"
360  * indent-tabs-mode: nil
361  * End:
362  * vim: shiftwidth=4 tabstop=8 expandtab
363  */
364