ZOOM: new setting apdufile
[yaz-moved-to-github.git] / test / test_record_conv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 #if HAVE_CONFIG_H
6 #include <config.h>
7 #endif
8
9 #include <yaz/record_conv.h>
10 #include <yaz/test.h>
11 #include <yaz/wrbuf.h>
12 #include <string.h>
13 #include <yaz/log.h>
14 #include <yaz/proto.h>
15 #include <yaz/prt-ext.h>
16 #include <yaz/oid_db.h>
17 #if YAZ_HAVE_XML2
18
19 #include <libxml/parser.h>
20 #include <libxml/tree.h>
21
22 #if YAZ_HAVE_XSLT
23 #include <libxslt/xslt.h>
24 #endif
25
26 yaz_record_conv_t conv_configure(const char *xmlstring, WRBUF w)
27 {
28     xmlDocPtr doc = xmlParseMemory(xmlstring, strlen(xmlstring));
29     if (!doc)
30     {
31         wrbuf_printf(w, "xmlParseMemory");
32         return 0;
33     }
34     else
35     {
36         xmlNodePtr ptr = xmlDocGetRootElement(doc);
37         yaz_record_conv_t p = yaz_record_conv_create();
38
39         if (p)
40         {
41             const char *srcdir = getenv("srcdir");
42             if (srcdir)
43                 yaz_record_conv_set_path(p, srcdir);
44         }
45         if (!ptr)
46         {
47             wrbuf_printf(w, "xmlDocGetRootElement");
48             yaz_record_conv_destroy(p);
49             p = 0;
50         }
51         else if (!p)
52         {
53             wrbuf_printf(w, "yaz_record_conv_create");
54         }
55         else
56         {
57
58
59             int r = yaz_record_conv_configure(p, ptr);
60
61             if (r)
62             {
63                 wrbuf_puts(w, yaz_record_conv_get_error(p));
64                 yaz_record_conv_destroy(p);
65                 p = 0;
66             }
67         }
68         xmlFreeDoc(doc);
69         return p;
70     }
71 }
72
73 int conv_configure_test(const char *xmlstring, const char *expect_error,
74                         yaz_record_conv_t *pt)
75 {
76     WRBUF w = wrbuf_alloc();
77     int ret;
78
79     yaz_record_conv_t p = conv_configure(xmlstring, w);
80
81     if (!p)
82     {
83         if (expect_error && !strcmp(wrbuf_cstr(w), expect_error))
84             ret = 1;
85         else
86         {
87             ret = 0;
88             printf("%s\n", wrbuf_cstr(w));
89         }
90     }
91     else
92     {
93         if (expect_error)
94             ret = 0;
95         else
96             ret = 1;
97     }
98
99     if (pt)
100         *pt = p;
101     else
102         if (p)
103             yaz_record_conv_destroy(p);
104
105     wrbuf_destroy(w);
106     return ret;
107 }
108
109 static void tst_configure(void)
110 {
111
112
113
114     YAZ_CHECK(conv_configure_test("<bad", "xmlParseMemory", 0));
115
116
117     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
118                                   "<bad/></backend>",
119                                   "Element <backend>: expected <marc> or "
120                                   "<xslt> element, got <bad>", 0));
121
122 #if YAZ_HAVE_XSLT
123     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
124                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
125                                   "<marc"
126                                   " inputcharset=\"marc-8\""
127                                   " outputcharset=\"marc-8\""
128                                   "/>"
129                                   "</backend>",
130                                   "Element <marc>: attribute 'inputformat' "
131                                   "required", 0));
132     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
133                                   "<xslt/>"
134                                   "</backend>",
135                                   "Element <xslt>: attribute 'stylesheet' "
136                                   "expected", 0));
137     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
138                                   "<marc"
139                                   " inputcharset=\"utf-8\""
140                                   " outputcharset=\"marc-8\""
141                                   " inputformat=\"xml\""
142                                   " outputformat=\"marc\""
143                                   "/>"
144                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
145                                   "</backend>",
146                                   0, 0));
147 #else
148     YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
149                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
150                                   "</backend>",
151                                   "xslt unsupported."
152                                   " YAZ compiled without XSLT support", 0));
153 #endif
154 }
155
156 static int conv_convert_test(yaz_record_conv_t p,
157                              const char *input_record,
158                              const char *output_expect_record)
159 {
160     int ret = 0;
161     if (!p)
162     {
163         YAZ_CHECK(ret);
164     }
165     else
166     {
167         WRBUF output_record = wrbuf_alloc();
168         int r = yaz_record_conv_record(p, input_record, strlen(input_record),
169                                        output_record);
170         if (r)
171         {
172             if (output_expect_record)
173             {
174                 printf("yaz_record_conv error=%s\n",
175                        yaz_record_conv_get_error(p));
176                 ret = 0;
177             }
178             else
179                 ret = 1;
180         }
181         else
182         {
183             if (!output_expect_record)
184             {
185                 ret = 0;
186             }
187             else if (strcmp(output_expect_record, wrbuf_cstr(output_record)))
188             {
189                 ret = 0;
190                 printf("got-output_record len=%ld: %s\n",
191                        (long) wrbuf_len(output_record),
192                        wrbuf_cstr(output_record));
193                 printf("output_expect_record len=%ld %s\n",
194                        (long) strlen(output_expect_record),
195                        output_expect_record);
196             }
197             else
198             {
199                 ret = 1;
200             }
201         }
202         wrbuf_destroy(output_record);
203     }
204     return ret;
205 }
206
207 static int conv_convert_test_iter(yaz_record_conv_t p,
208                                   const char *input_record,
209                                   const char *output_expect_record,
210                                   int num_iter)
211 {
212     int i;
213     int ret;
214     for (i = 0; i < num_iter; i++)
215     {
216         ret = conv_convert_test(p, input_record, output_expect_record);
217         if (!ret)
218             break;
219     }
220     return ret;
221 }
222
223 static void tst_convert1(void)
224 {
225     yaz_record_conv_t p = 0;
226     const char *marcxml_rec =
227         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
228         "  <leader>00080nam a22000498a 4500</leader>\n"
229         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
230         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
231         "    <subfield code=\"a\">   11224466 </subfield>\n"
232         "  </datafield>\n"
233         "</record>\n";
234     const char *tmarcxml_rec =
235         "<r xmlns=\"http://www.indexdata.com/MARC21/turboxml\">\n"
236         "  <l>00080nam a22000498a 4500</l>\n"
237         "  <c001>   11224466 </c001>\n"
238         "  <d010 i1=\" \" i2=\" \">\n"
239         "    <sa>   11224466 </sa>\n"
240         "  </d010>\n"
241         "</r>\n";
242     const char *iso2709_rec =
243         "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
244         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
245         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
246         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
247         "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
248
249     const char *solrmarc_rec =
250         "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
251         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
252         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
253         "#30;\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;\x20\x20"
254         "#31;\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;#29;";
255
256     YAZ_CHECK(conv_configure_test("<backend>"
257                                   "<marc"
258                                   " inputcharset=\"utf-8\""
259                                   " outputcharset=\"marc-8\""
260                                   " inputformat=\"xml\""
261                                   " outputformat=\"marc\""
262                                   "/>"
263                                   "</backend>",
264                                   0, &p));
265     YAZ_CHECK(conv_convert_test(p, marcxml_rec, iso2709_rec));
266     YAZ_CHECK(conv_convert_test(p, tmarcxml_rec, iso2709_rec));
267     yaz_record_conv_destroy(p);
268
269     YAZ_CHECK(conv_configure_test("<backend>"
270                                   "<marc"
271                                   " outputcharset=\"utf-8\""
272                                   " inputcharset=\"marc-8\""
273                                   " outputformat=\"marcxml\""
274                                   " inputformat=\"marc\""
275                                   "/>"
276                                   "</backend>",
277                                   0, &p));
278     YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
279     yaz_record_conv_destroy(p);
280
281     YAZ_CHECK(conv_configure_test("<backend>"
282                                   "<solrmarc/>"
283                                   "<marc"
284                                   " outputcharset=\"utf-8\""
285                                   " inputcharset=\"marc-8\""
286                                   " outputformat=\"marcxml\""
287                                   " inputformat=\"marc\""
288                                   "/>"
289                                   "</backend>",
290                                   0, &p));
291     YAZ_CHECK(conv_convert_test(p, solrmarc_rec, marcxml_rec));
292     yaz_record_conv_destroy(p);
293
294     YAZ_CHECK(conv_configure_test("<backend>"
295                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
296                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
297                                   "<marc"
298                                   " inputcharset=\"utf-8\""
299                                   " outputcharset=\"marc-8\""
300                                   " inputformat=\"xml\""
301                                   " outputformat=\"marc\""
302                                   "/>"
303                                   "<marc"
304                                   " outputcharset=\"utf-8\""
305                                   " inputcharset=\"marc-8\""
306                                   " outputformat=\"marcxml\""
307                                   " inputformat=\"marc\""
308                                   "/>"
309                                   "</backend>",
310                                   0, &p));
311     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
312     yaz_record_conv_destroy(p);
313
314
315     YAZ_CHECK(conv_configure_test("<backend>"
316                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
317                                   "<xslt stylesheet=\"test_record_conv.xsl\"/>"
318                                   "<marc"
319                                   " outputcharset=\"marc-8\""
320                                   " inputformat=\"xml\""
321                                   " outputformat=\"marc\""
322                                   "/>"
323                                   "<marc"
324                                   " inputcharset=\"marc-8\""
325                                   " outputformat=\"marcxml\""
326                                   " inputformat=\"marc\""
327                                   "/>"
328                                   "</backend>",
329                                   0, &p));
330     YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
331     yaz_record_conv_destroy(p);
332 }
333
334 static void tst_convert2(void)
335 {
336     yaz_record_conv_t p = 0;
337     const char *marcxml_rec =
338         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
339         "  <leader>00080nam a22000498a 4500</leader>\n"
340         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
341         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
342         "    <subfield code=\"a\">k&#xf8;benhavn</subfield>\n"
343         "  </datafield>\n"
344         "</record>\n";
345     const char *iso2709_rec =
346         "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
347         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
348         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
349         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
350         "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
351
352     YAZ_CHECK(conv_configure_test("<backend>"
353                                   "<marc"
354                                   " inputcharset=\"utf-8\""
355                                   " outputcharset=\"marc-8\""
356                                   " inputformat=\"xml\""
357                                   " outputformat=\"marc\""
358                                   "/>"
359                                   "</backend>",
360                                   0, &p));
361     YAZ_CHECK(conv_convert_test_iter(p, marcxml_rec, iso2709_rec, 100));
362     yaz_record_conv_destroy(p);
363 }
364
365 static void tst_convert3(void)
366 {
367     NMEM nmem = nmem_create();
368     int ret;
369     yaz_record_conv_t p = 0;
370
371     const char *iso2709_rec =
372         "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
373         "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
374         "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
375         "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
376         "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
377
378     const char *opacxml_rec =
379         "<opacRecord>\n"
380         "  <bibliographicRecord>\n"
381         "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
382         "  <leader>00077nam a22000498a 4500</leader>\n"
383         "  <controlfield tag=\"001\">   11224466 </controlfield>\n"
384         "  <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
385         "    <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */
386         "benhavn</subfield>\n"
387         "  </datafield>\n"
388         "</record>\n"
389         "  </bibliographicRecord>\n"
390         "<holdings>\n"
391         " <holding>\n"
392         "  <typeOfRecord>u</typeOfRecord>\n"
393         "  <encodingLevel>U</encodingLevel>\n"
394         "  <receiptAcqStatus>0</receiptAcqStatus>\n"
395         "  <dateOfReport>000000</dateOfReport>\n"
396         "  <nucCode>s-FM/GC</nucCode>\n"
397         "  <localLocation>Main or Science/Business Reading Rms - STORED OFFSITE</localLocation>\n"
398         "  <callNumber>MLCM 89/00602 (N)</callNumber>\n"
399         "  <shelvingData>FT MEADE</shelvingData>\n"
400         "  <copyNumber>Copy 1</copyNumber>\n"
401         "  <volumes>\n"
402         "   <volume>\n"
403         "    <enumeration>1</enumeration>\n"
404         "    <chronology>2</chronology>\n"
405         "    <enumAndChron>3</enumAndChron>\n"
406         "   </volume>\n"
407         "   <volume>\n"
408         "    <enumeration>1</enumeration>\n"
409         "    <chronology>2</chronology>\n"
410         "    <enumAndChron>3</enumAndChron>\n"
411         "   </volume>\n"
412         "  </volumes>\n"
413         "  <circulations>\n"
414         "   <circulation>\n"
415         "    <availableNow value=\"1\"/>\n"
416         "    <availabilityDate>20130129</availabilityDate>\n"
417         "    <itemId>1226176</itemId>\n"
418         "    <renewable value=\"0\"/>\n"
419         "    <onHold value=\"0\"/>\n"
420         "   </circulation>\n"
421         "  </circulations>\n"
422         " </holding>\n"
423         "</holdings>\n"
424         "</opacRecord>\n";
425
426     Z_OPACRecord *z_opac = nmem_malloc(nmem, sizeof(*z_opac));
427     Z_HoldingsAndCircData *h;
428     Z_CircRecord *circ;
429
430     z_opac->bibliographicRecord =
431         z_ext_record_oid_nmem(nmem, yaz_oid_recsyn_usmarc,
432                               iso2709_rec, strlen(iso2709_rec));
433     z_opac->num_holdingsData = 1;
434     z_opac->holdingsData = (Z_HoldingsRecord **)
435         nmem_malloc(nmem, sizeof(Z_HoldingsRecord *) * 1);
436     z_opac->holdingsData[0] = (Z_HoldingsRecord *)
437         nmem_malloc(nmem, sizeof(Z_HoldingsRecord));
438     z_opac->holdingsData[0]->which = Z_HoldingsRecord_holdingsAndCirc;
439     h = z_opac->holdingsData[0]->u.holdingsAndCirc = (Z_HoldingsAndCircData *)
440          nmem_malloc(nmem, sizeof(*h));
441     h->typeOfRecord = nmem_strdup(nmem, "u");
442     h->encodingLevel = nmem_strdup(nmem, "U");
443     h->format = 0;
444     h->receiptAcqStatus = nmem_strdup(nmem, "0");
445     h->generalRetention = 0;
446     h->completeness = 0;
447     h->dateOfReport = nmem_strdup(nmem, "000000");
448     h->nucCode = nmem_strdup(nmem, "s-FM/GC");
449     h->localLocation = nmem_strdup(nmem,
450                                    "Main or Science/Business Reading "
451                                    "Rms - STORED OFFSITE");
452     h->shelvingLocation = 0;
453     h->callNumber = nmem_strdup(nmem, "MLCM 89/00602 (N)");
454     h->shelvingData = nmem_strdup(nmem, "FT MEADE");
455     h->copyNumber = nmem_strdup(nmem, "Copy 1");
456     h->publicNote = 0;
457     h->reproductionNote = 0;
458     h->termsUseRepro = 0;
459     h->enumAndChron = 0;
460     h->num_volumes = 2;
461     h->volumes = 0;
462
463     h->volumes = (Z_Volume **)
464         nmem_malloc(nmem, 2 * sizeof(Z_Volume *));
465
466     h->volumes[0] = (Z_Volume *)
467         nmem_malloc(nmem, sizeof(Z_Volume));
468     h->volumes[1] = h->volumes[0];
469
470     h->volumes[0]->enumeration = nmem_strdup(nmem, "1");
471     h->volumes[0]->chronology = nmem_strdup(nmem, "2");
472     h->volumes[0]->enumAndChron = nmem_strdup(nmem, "3");
473
474     h->num_circulationData = 1;
475     h->circulationData = (Z_CircRecord **)
476         nmem_malloc(nmem, 1 * sizeof(Z_CircRecord *));
477     circ = h->circulationData[0] = (Z_CircRecord *)
478         nmem_malloc(nmem, sizeof(Z_CircRecord));
479     circ->availableNow = nmem_booldup(nmem, 1);
480     circ->availablityDate = nmem_strdup(nmem, "20130129");
481     circ->availableThru = 0;
482     circ->restrictions = 0;
483     circ->itemId = nmem_strdup(nmem, "1226176");
484     circ->renewable = nmem_booldup(nmem, 0);
485     circ->onHold = nmem_booldup(nmem, 0);
486     circ->enumAndChron = 0;
487     circ->midspine = 0;
488     circ->temporaryLocation = 0;
489
490     YAZ_CHECK(conv_configure_test("<backend>"
491                                   "<marc"
492                                   " inputcharset=\"marc-8\""
493                                   " outputcharset=\"utf-8\""
494                                   " inputformat=\"marc\""
495                                   " outputformat=\"marcxml\""
496                                   "/>"
497                                   "</backend>",
498                                   0, &p));
499
500     if (p)
501     {
502         WRBUF output_record = wrbuf_alloc();
503         ret = yaz_record_conv_opac_record(p, z_opac, output_record);
504         YAZ_CHECK(ret == 0);
505         if (ret == 0)
506         {
507             ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
508             YAZ_CHECK(ret == 0);
509             if (ret)
510             {
511                 printf("got-output_record len=%ld: %s\n",
512                        (long) wrbuf_len(output_record),
513                        wrbuf_cstr(output_record));
514                 printf("output_expect_record len=%ld %s\n",
515                        (long) strlen(opacxml_rec),
516                        opacxml_rec);
517             }
518         }
519         yaz_record_conv_destroy(p);
520         wrbuf_destroy(output_record);
521     }
522     {
523         Z_OPACRecord *opac = 0;
524         yaz_marc_t mt =  yaz_marc_create();
525         ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec),
526                               &opac, 0 /* iconv */, nmem, 0);
527         YAZ_CHECK(ret);
528         YAZ_CHECK(opac);
529
530         if (opac)
531         {
532             WRBUF output_record = wrbuf_alloc();
533             char *p;
534
535             yaz_marc_xml(mt, YAZ_MARC_MARCXML);
536             yaz_opac_decode_wrbuf(mt, opac, output_record);
537
538             /* change MARC size to 00077 from 00078, due to
539                encoding of the aring (two bytes in UTF-8) */
540             p = strstr(wrbuf_buf(output_record), "00078");
541             YAZ_CHECK(p);
542             if (p)
543                 p[4] = '7';
544
545             ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
546             YAZ_CHECK(ret == 0);
547             if (ret)
548             {
549                 printf("got-output_record len=%ld: %s\n",
550                        (long) wrbuf_len(output_record),
551                        wrbuf_cstr(output_record));
552                 printf("output_expect_record len=%ld %s\n",
553                        (long) strlen(opacxml_rec),
554                        opacxml_rec);
555             }
556             wrbuf_destroy(output_record);
557         }
558         yaz_marc_destroy(mt);
559     }
560     nmem_destroy(nmem);
561 }
562
563 #endif
564
565 int main(int argc, char **argv)
566 {
567     YAZ_CHECK_INIT(argc, argv);
568     yaz_log_xml_errors(0, 0 /* disable log */);
569 #if YAZ_HAVE_XML2
570     tst_configure();
571 #endif
572 #if YAZ_HAVE_XSLT
573     tst_convert1();
574     tst_convert2();
575     tst_convert3();
576     xsltCleanupGlobals();
577 #endif
578 #if YAZ_HAVE_XML2
579     xmlCleanupParser();
580 #endif
581     YAZ_CHECK_TERM;
582 }
583
584 /*
585  * Local variables:
586  * c-basic-offset: 4
587  * c-file-style: "Stroustrup"
588  * indent-tabs-mode: nil
589  * End:
590  * vim: shiftwidth=4 tabstop=8 expandtab
591  */
592