3cb5bfd6ae41d1c69231c2af3bc35c742d6b522a
[yaz-moved-to-github.git] / test / test_record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 #if HAVE_CONFIG_H
6 #include <config.h>
7 #endif
8
9 #include <yaz/record_conv.h>
10 #include <yaz/test.h>
11 #include <yaz/wrbuf.h>
12 #include <string.h>
13 #include <yaz/log.h>
14 #include <yaz/proto.h>
15 #include <yaz/prt-ext.h>
16 #include <yaz/oid_db.h>
17 #if YAZ_HAVE_XML2
18
19 #include <libxml/parser.h>
20 #include <libxml/tree.h>
21
22 #if YAZ_HAVE_XSLT
23 #include <libxslt/xslt.h>
24 #endif
25
26 yaz_record_conv_t conv_configure(const char *xmlstring, WRBUF w)
27 {
28     xmlDocPtr doc = xmlParseMemory(xmlstring, strlen(xmlstring));
29     if (!doc)
30     {
31         wrbuf_printf(w, "xmlParseMemory");
32         return 0;
33     }
34     else
35     {
36         xmlNodePtr ptr = xmlDocGetRootElement(doc);
37         yaz_record_conv_t p = yaz_record_conv_create();
38
39         if (p)
40         {
41             const char *srcdir = getenv("srcdir");
42             if (srcdir)
43                 yaz_record_conv_set_path(p, srcdir);
44         }
45         if (!ptr)
46         {
47             wrbuf_printf(w, "xmlDocGetRootElement");
48             yaz_record_conv_destroy(p);
49             p = 0;
50         }
51         else if (!p)
52         {
53             wrbuf_printf(w, "yaz_record_conv_create");
54         }
55         else
56         {
57
58
59             int r = yaz_record_conv_configure(p, ptr);
60
61             if (r)
62             {
63                 wrbuf_puts(w, yaz_record_conv_get_error(p));
64                 yaz_record_conv_destroy(p);
65                 p = 0;
66             }
67         }
68         xmlFreeDoc(doc);
69         return p;
70     }
71 }
72
73 int conv_configure_test(const char *xmlstring, const char *expect_error,
74                         yaz_record_conv_t *pt)
75 {
76     WRBUF w = wrbuf_alloc();
77     int ret;
78
79     yaz_record_conv_t p = conv_configure(xmlstring, w);
80
81     if (!p)
82     {
83         if (expect_error && !strcmp(wrbuf_cstr(w), expect_error))
84             ret = 1;
85         else
86         {
87             ret = 0;
88             printf("%s\n", wrbuf_cstr(w));
89         }
90     }
91     else
92     {
93         if (expect_error)
94             ret = 0;
95         else
96             ret = 1;
97     }
98
99     if (pt)
100         *pt = p;
101     else
102         if (p)
103             yaz_record_conv_destroy(p);
104
105     wrbuf_destroy(w);
106     return ret;
107 }
108
109 static void tst_configure(void)
110 {
111
112
113
114     YAZ_CHECK(conv_configure_test("<bad", "xmlParseMemory", 0));
115
116
117     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
118                                   "<bad/></backend>",
119                                   "Element <backend>: expected <marc> or "
120                                   "<xslt> element, got <bad>", 0));
121
122 #if YAZ_HAVE_XSLT
123     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
124                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
125                                   "<marc"
126                                   " inputcharset=\"marc-8\""
127                                   " outputcharset=\"marc-8\""
128                                   "/>"
129                                   "</backend>",
130                                   "Element <marc>: attribute 'inputformat' "
131                                   "required", 0));
132     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
133                                   "<xslt/>"
134                                   "</backend>",
135                                   "Element <xslt>: attribute 'stylesheet' "
136                                   "expected", 0));
137     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
138                                   "<marc"
139                                   " inputcharset=\"utf-8\""
140                                   " outputcharset=\"marc-8\""
141                                   " inputformat=\"xml\""
142                                   " outputformat=\"marc\""
143                                   "/>"
144                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
145                                   "</backend>",
146                                   0, 0));
147 #else
148     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
149                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
150                                   "</backend>",
151                                   "xslt unsupported."
152                                   " YAZ compiled without XSLT support", 0));
153 #endif
154 }
155
156 static int conv_convert_test(yaz_record_conv_t p,
157                              const char *input_record,
158                              const char *output_expect_record)
159 {
160     int ret = 0;
161     if (!p)
162     {
163         YAZ_CHECK(ret);
164     }
165     else
166     {
167         WRBUF output_record = wrbuf_alloc();
168         int r = yaz_record_conv_record(p, input_record, strlen(input_record),
169                                        output_record);
170         if (r)
171         {
172             if (output_expect_record)
173             {
174                 printf("yaz_record_conv error=%s\n",
175                        yaz_record_conv_get_error(p));
176                 ret = 0;
177             }
178             else
179                 ret = 1;
180         }
181         else
182         {
183             if (!output_expect_record)
184             {
185                 ret = 0;
186             }
187             else if (strcmp(output_expect_record, wrbuf_cstr(output_record)))
188             {
189                 ret = 0;
190                 printf("got-output_record len=%ld: %s\n",
191                        (long) wrbuf_len(output_record),
192                        wrbuf_cstr(output_record));
193                 printf("output_expect_record len=%ld %s\n",
194                        (long) strlen(output_expect_record),
195                        output_expect_record);
196             }
197             else
198             {
199                 ret = 1;
200             }
201         }
202         wrbuf_destroy(output_record);
203     }
204     return ret;
205 }
206
207 static int conv_convert_test_iter(yaz_record_conv_t p,
208                                   const char *input_record,
209                                   const char *output_expect_record,
210                                   int num_iter)
211 {
212     int i;
213     int ret;
214     for (i = 0; i < num_iter; i++)
215     {
216         ret = conv_convert_test(p, input_record, output_expect_record);
217         if (!ret)
218             break;
219     }
220     return ret;
221 }
222
223 static void tst_convert1(void)
224 {
225     yaz_record_conv_t p = 0;
226     const char *marcxml_rec =
227         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
228         "  <leader>00080nam a22000498a 4500</leader>\n"
229         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
230         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
231         "    <subfield code=\"a\">   11224466 </subfield>\n"
232         "  </datafield>\n"
233         "</record>\n";
234     const char *tmarcxml_rec =
235         "<r xmlns=\"http://www.indexdata.com/MARC21/turboxml\">\n"
236         "  <l>00080nam a22000498a 4500</l>\n"
237         "  <c001>   11224466 </c001>\n"
238         "  <d010 i1=\" \" i2=\" \">\n"
239         "    <sa>   11224466 </sa>\n"
240         "  </d010>\n"
241         "</r>\n";
242     const char *iso2709_rec =
243         "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
244         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
245         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
246         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
247         "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
248
249     YAZ_CHECK(conv_configure_test("<backend>"
250                                   "<marc"
251                                   " inputcharset=\"utf-8\""
252                                   " outputcharset=\"marc-8\""
253                                   " inputformat=\"xml\""
254                                   " outputformat=\"marc\""
255                                   "/>"
256                                   "</backend>",
257                                   0, &p));
258     YAZ_CHECK(conv_convert_test(p, marcxml_rec, iso2709_rec));
259     YAZ_CHECK(conv_convert_test(p, tmarcxml_rec, iso2709_rec));
260     yaz_record_conv_destroy(p);
261
262     YAZ_CHECK(conv_configure_test("<backend>"
263                                   "<marc"
264                                   " outputcharset=\"utf-8\""
265                                   " inputcharset=\"marc-8\""
266                                   " outputformat=\"marcxml\""
267                                   " inputformat=\"marc\""
268                                   "/>"
269                                   "</backend>",
270                                   0, &p));
271     YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
272     yaz_record_conv_destroy(p);
273
274
275     YAZ_CHECK(conv_configure_test("<backend>"
276                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
277                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
278                                   "<marc"
279                                   " inputcharset=\"utf-8\""
280                                   " outputcharset=\"marc-8\""
281                                   " inputformat=\"xml\""
282                                   " outputformat=\"marc\""
283                                   "/>"
284                                   "<marc"
285                                   " outputcharset=\"utf-8\""
286                                   " inputcharset=\"marc-8\""
287                                   " outputformat=\"marcxml\""
288                                   " inputformat=\"marc\""
289                                   "/>"
290                                   "</backend>",
291                                   0, &p));
292     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
293     yaz_record_conv_destroy(p);
294
295
296     YAZ_CHECK(conv_configure_test("<backend>"
297                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
298                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
299                                   "<marc"
300                                   " outputcharset=\"marc-8\""
301                                   " inputformat=\"xml\""
302                                   " outputformat=\"marc\""
303                                   "/>"
304                                   "<marc"
305                                   " inputcharset=\"marc-8\""
306                                   " outputformat=\"marcxml\""
307                                   " inputformat=\"marc\""
308                                   "/>"
309                                   "</backend>",
310                                   0, &p));
311     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
312     yaz_record_conv_destroy(p);
313 }
314
315 static void tst_convert2(void)
316 {
317     yaz_record_conv_t p = 0;
318     const char *marcxml_rec =
319         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
320         "  <leader>00080nam a22000498a 4500</leader>\n"
321         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
322         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
323         "    <subfield code=\"a\">k&#xf8;benhavn</subfield>\n"
324         "  </datafield>\n"
325         "</record>\n";
326     const char *iso2709_rec =
327         "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
328         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
329         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
330         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
331         "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
332
333     YAZ_CHECK(conv_configure_test("<backend>"
334                                   "<marc"
335                                   " inputcharset=\"utf-8\""
336                                   " outputcharset=\"marc-8\""
337                                   " inputformat=\"xml\""
338                                   " outputformat=\"marc\""
339                                   "/>"
340                                   "</backend>",
341                                   0, &p));
342     YAZ_CHECK(conv_convert_test_iter(p, marcxml_rec, iso2709_rec, 100));
343     yaz_record_conv_destroy(p);
344 }
345
346 static void tst_convert3(void)
347 {
348     NMEM nmem = nmem_create();
349     int ret;
350     yaz_record_conv_t p = 0;
351
352     const char *iso2709_rec =
353         "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
354         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
355         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
356         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
357         "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
358
359     const char *opacxml_rec =
360         "<opacRecord>\n"
361         "  <bibliographicRecord>\n"
362         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
363         "  <leader>00077nam a22000498a 4500</leader>\n"
364         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
365         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
366         "    <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */
367         "benhavn</subfield>\n"
368         "  </datafield>\n"
369         "</record>\n"
370         "  </bibliographicRecord>\n"
371         "<holdings>\n"
372         " <holding>\n"
373         "  <typeOfRecord>u</typeOfRecord>\n"
374         "  <encodingLevel>U</encodingLevel>\n"
375         "  <receiptAcqStatus>0</receiptAcqStatus>\n"
376         "  <dateOfReport>000000</dateOfReport>\n"
377         "  <nucCode>s-FM/GC</nucCode>\n"
378         "  <localLocation>Main or Science/Business Reading Rms - STORED OFFSITE</localLocation>\n"
379         "  <callNumber>MLCM 89/00602 (N)</callNumber>\n"
380         "  <shelvingData>FT MEADE</shelvingData>\n"
381         "  <copyNumber>Copy 1</copyNumber>\n"
382         "  <volumes>\n"
383         "   <volume>\n"
384         "    <enumeration>1</enumeration>\n"
385         "    <chronology>2</chronology>\n"
386         "    <enumAndChron>3</enumAndChron>\n"
387         "   </volume>\n"
388         "   <volume>\n"
389         "    <enumeration>1</enumeration>\n"
390         "    <chronology>2</chronology>\n"
391         "    <enumAndChron>3</enumAndChron>\n"
392         "   </volume>\n"
393         "  </volumes>\n"
394         "  <circulations>\n"
395         "   <circulation>\n"
396         "    <availableNow value=\"1\"/>\n"
397         "    <availabilityDate>20130129</availabilityDate>\n"
398         "    <itemId>1226176</itemId>\n"
399         "    <renewable value=\"0\"/>\n"
400         "    <onHold value=\"0\"/>\n"
401         "   </circulation>\n"
402         "  </circulations>\n"
403         " </holding>\n"
404         "</holdings>\n"
405         "</opacRecord>\n";
406
407     Z_OPACRecord *z_opac = nmem_malloc(nmem, sizeof(*z_opac));
408     Z_HoldingsAndCircData *h;
409     Z_CircRecord *circ;
410
411     z_opac->bibliographicRecord =
412         z_ext_record_oid_nmem(nmem, yaz_oid_recsyn_usmarc,
413                               iso2709_rec, strlen(iso2709_rec));
414     z_opac->num_holdingsData = 1;
415     z_opac->holdingsData = (Z_HoldingsRecord **)
416         nmem_malloc(nmem, sizeof(Z_HoldingsRecord *) * 1);
417     z_opac->holdingsData[0] = (Z_HoldingsRecord *)
418         nmem_malloc(nmem, sizeof(Z_HoldingsRecord));
419     z_opac->holdingsData[0]->which = Z_HoldingsRecord_holdingsAndCirc;
420     h = z_opac->holdingsData[0]->u.holdingsAndCirc = (Z_HoldingsAndCircData *)
421          nmem_malloc(nmem, sizeof(*h));
422     h->typeOfRecord = nmem_strdup(nmem, "u");
423     h->encodingLevel = nmem_strdup(nmem, "U");
424     h->format = 0;
425     h->receiptAcqStatus = nmem_strdup(nmem, "0");
426     h->generalRetention = 0;
427     h->completeness = 0;
428     h->dateOfReport = nmem_strdup(nmem, "000000");
429     h->nucCode = nmem_strdup(nmem, "s-FM/GC");
430     h->localLocation = nmem_strdup(nmem,
431                                    "Main or Science/Business Reading "
432                                    "Rms - STORED OFFSITE");
433     h->shelvingLocation = 0;
434     h->callNumber = nmem_strdup(nmem, "MLCM 89/00602 (N)");
435     h->shelvingData = nmem_strdup(nmem, "FT MEADE");
436     h->copyNumber = nmem_strdup(nmem, "Copy 1");
437     h->publicNote = 0;
438     h->reproductionNote = 0;
439     h->termsUseRepro = 0;
440     h->enumAndChron = 0;
441     h->num_volumes = 2;
442     h->volumes = 0;
443
444     h->volumes = (Z_Volume **)
445         nmem_malloc(nmem, 2 * sizeof(Z_Volume *));
446
447     h->volumes[0] = (Z_Volume *)
448         nmem_malloc(nmem, sizeof(Z_Volume));
449     h->volumes[1] = h->volumes[0];
450
451     h->volumes[0]->enumeration = nmem_strdup(nmem, "1");
452     h->volumes[0]->chronology = nmem_strdup(nmem, "2");
453     h->volumes[0]->enumAndChron = nmem_strdup(nmem, "3");
454
455     h->num_circulationData = 1;
456     h->circulationData = (Z_CircRecord **)
457         nmem_malloc(nmem, 1 * sizeof(Z_CircRecord *));
458     circ = h->circulationData[0] = (Z_CircRecord *)
459         nmem_malloc(nmem, sizeof(Z_CircRecord));
460     circ->availableNow = nmem_booldup(nmem, 1);
461     circ->availablityDate = nmem_strdup(nmem, "20130129");
462     circ->availableThru = 0;
463     circ->restrictions = 0;
464     circ->itemId = nmem_strdup(nmem, "1226176");
465     circ->renewable = nmem_booldup(nmem, 0);
466     circ->onHold = nmem_booldup(nmem, 0);
467     circ->enumAndChron = 0;
468     circ->midspine = 0;
469     circ->temporaryLocation = 0;
470
471     YAZ_CHECK(conv_configure_test("<backend>"
472                                   "<marc"
473                                   " inputcharset=\"marc-8\""
474                                   " outputcharset=\"utf-8\""
475                                   " inputformat=\"marc\""
476                                   " outputformat=\"marcxml\""
477                                   "/>"
478                                   "</backend>",
479                                   0, &p));
480
481     if (p)
482     {
483         WRBUF output_record = wrbuf_alloc();
484         ret = yaz_record_conv_opac_record(p, z_opac, output_record);
485         YAZ_CHECK(ret == 0);
486         if (ret == 0)
487         {
488             ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
489             YAZ_CHECK(ret == 0);
490             if (ret)
491             {
492                 printf("got-output_record len=%ld: %s\n",
493                        (long) wrbuf_len(output_record),
494                        wrbuf_cstr(output_record));
495                 printf("output_expect_record len=%ld %s\n",
496                        (long) strlen(opacxml_rec),
497                        opacxml_rec);
498             }
499         }
500         yaz_record_conv_destroy(p);
501         wrbuf_destroy(output_record);
502     }
503     {
504         Z_OPACRecord *opac = 0;
505         yaz_marc_t mt =  yaz_marc_create();
506         ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec),
507                               &opac, 0 /* iconv */, nmem);
508         YAZ_CHECK(ret);
509         YAZ_CHECK(opac);
510
511         if (opac)
512         {
513             WRBUF output_record = wrbuf_alloc();
514             char *p;
515
516             yaz_marc_xml(mt, YAZ_MARC_MARCXML);
517             yaz_opac_decode_wrbuf(mt, opac, output_record);
518
519             /* change MARC size to 00077 from 00078, due to
520                encoding of the aring (two bytes in UTF-8) */
521             p = strstr(wrbuf_buf(output_record), "00078");
522             YAZ_CHECK(p);
523             if (p)
524                 p[4] = '7';
525
526             ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
527             YAZ_CHECK(ret == 0);
528             if (ret)
529             {
530                 printf("got-output_record len=%ld: %s\n",
531                        (long) wrbuf_len(output_record),
532                        wrbuf_cstr(output_record));
533                 printf("output_expect_record len=%ld %s\n",
534                        (long) strlen(opacxml_rec),
535                        opacxml_rec);
536             }
537             wrbuf_destroy(output_record);
538         }
539         yaz_marc_destroy(mt);
540     }
541     nmem_destroy(nmem);
542 }
543
544 #endif
545
546 int main(int argc, char **argv)
547 {
548     YAZ_CHECK_INIT(argc, argv);
549     yaz_log_xml_errors(0, 0 /* disable log */);
550 #if YAZ_HAVE_XML2
551     tst_configure();
552 #endif
553 #if YAZ_HAVE_XSLT
554     tst_convert1();
555     tst_convert2();
556     tst_convert3();
557     xsltCleanupGlobals();
558 #endif
559 #if YAZ_HAVE_XML2
560     xmlCleanupParser();
561 #endif
562     YAZ_CHECK_TERM;
563 }
564
565 /*
566  * Local variables:
567  * c-basic-offset: 4
568  * c-file-style: "Stroustrup"
569  * indent-tabs-mode: nil
570  * End:
571  * vim: shiftwidth=4 tabstop=8 expandtab
572  */
573