1 /* $Id: xmlread.c,v 1.19 2005-03-31 12:42:07 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
35 #include <idzebra/recgrs.h>
38 #include <yaz/xmalloc.h>
42 #define XML_CHUNK 1024
45 data1_node *d1_stack[256];
52 static void cb_start (void *user, const char *el, const char **attr)
54 struct user_info *ui = (struct user_info*) user;
56 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
57 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
58 ui->d1_stack[ui->level-1]);
60 yaz_log (ui->loglevel, "cb_start %s", el);
63 static void cb_end (void *user, const char *el)
65 struct user_info *ui = (struct user_info*) user;
68 yaz_log (ui->loglevel, "cb_end %s", el);
71 static void cb_chardata (void *user, const char *s, int len)
73 struct user_info *ui = (struct user_info*) user;
75 yaz_log (ui->loglevel, "cb_chardata %.*s", len, s);
77 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
78 ui->d1_stack[ui->level -1]);
81 static void cb_decl (void *user, const char *version, const char *encoding,
84 struct user_info *ui = (struct user_info*) user;
85 const char *attr_list[7];
87 attr_list[0] = "version";
88 attr_list[1] = version;
90 attr_list[2] = "encoding";
91 attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */
93 attr_list[4] = "standalone";
94 attr_list[5] = standalone ? "yes" : "no";
98 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
99 ui->d1_stack[ui->level-1]);
101 yaz_log (YLOG_LOG, "decl version=%s encoding=%s",
102 version ? version : "null",
103 encoding ? encoding : "null");
107 static void cb_processing (void *user, const char *target,
110 struct user_info *ui = (struct user_info*) user;
112 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
113 ui->d1_stack[ui->level-1]);
114 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
116 yaz_log (ui->loglevel, "decl processing target=%s data=%s",
117 target ? target : "null",
118 data ? data : "null");
121 static void cb_comment (void *user, const char *data)
123 struct user_info *ui = (struct user_info*) user;
124 yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null");
125 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
128 static void cb_doctype_start (void *userData, const char *doctypeName,
129 const char *sysid, const char *pubid,
130 int has_internal_subset)
132 struct user_info *ui = (struct user_info*) userData;
133 yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s",
134 doctypeName, sysid, pubid);
137 static void cb_doctype_end (void *userData)
139 struct user_info *ui = (struct user_info*) userData;
140 yaz_log (ui->loglevel, "doctype end");
144 static void cb_entity_decl (void *userData, const char *entityName,
145 int is_parameter_entity,
146 const char *value, int value_length,
147 const char *base, const char *systemId,
148 const char *publicId, const char *notationName)
150 struct user_info *ui = (struct user_info*) userData;
151 yaz_log (ui->loglevel,
152 "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s"
153 " publicId=%s notationName=%s",
154 entityName, is_parameter_entity, value_length, value,
155 base, systemId, publicId, notationName);
159 static int cb_external_entity (XML_Parser pparser,
162 const char *systemId,
163 const char *publicId)
165 struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
170 yaz_log (ui->loglevel,
171 "external entity context=%s base=%s systemid=%s publicid=%s",
172 context, base, systemId, publicId);
176 if (!(inf = fopen (systemId, "rb")))
178 yaz_log (YLOG_WARN|YLOG_ERRNO, "fopen %s", systemId);
182 parser = XML_ExternalEntityParserCreate (pparser, "", 0);
186 void *buf = XML_GetBuffer (parser, XML_CHUNK);
189 yaz_log (YLOG_WARN, "XML_GetBuffer fail");
192 r = fread (buf, 1, XML_CHUNK, inf);
197 yaz_log (YLOG_WARN|YLOG_ERRNO, "fread %s", systemId);
202 if (!XML_ParseBuffer (parser, r, done))
205 yaz_log (YLOG_WARN, "%s:%d:%d:XML error: %s",
207 XML_GetCurrentLineNumber(parser),
208 XML_GetCurrentColumnNumber(parser),
209 XML_ErrorString(XML_GetErrorCode(parser)));
213 XML_ParserFree (parser);
219 static int cb_encoding_convert (void *data, const char *s)
221 iconv_t t = (iconv_t) data;
224 char outbuf_[2], *outbuf = outbuf_;
226 char *inbuf = (char *) s;
230 yaz_log(YLOG_LOG, "------------------------- cb_encoding_convert --- ");
232 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
233 if (ret == (size_t) (-1) && errno != E2BIG)
235 iconv (t, 0, 0, 0, 0);
240 memcpy (&code, outbuf_, sizeof(short));
244 static void cb_encoding_release (void *data)
246 iconv_t t = (iconv_t) data;
250 static int cb_encoding_handler (void *userData, const char *name,
255 struct user_info *ui = (struct user_info*) userData;
257 iconv_t t = iconv_open ("UNICODE", name);
258 if (t == (iconv_t) (-1))
261 info->data = 0; /* signal that multibyte is not in use */
262 yaz_log (ui->loglevel, "Encoding handler of %s", name);
263 for (i = 0; i<256; i++)
268 char *inbuf = inbuf_;
269 char *outbuf = outbuf_;
274 iconv (t, 0, 0, 0, 0); /* reset iconv */
276 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
277 if (ret == (size_t) (-1))
281 yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i);
282 info->map[i] = -1; /* invalid sequence */
285 { /* multi byte input */
302 assert (i >= 0 && i<255);
305 for (k = 0; k<len; k++)
307 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
309 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
310 if (ret == (size_t) (-1))
312 if (errno == EILSEQ || errno == E2BIG)
318 else if (errno == EINVAL)
324 else if (outleft == 0)
327 info->data = t; /* signal that multibyte is in use */
335 if (info->map[i] < -1)
336 yaz_log (ui->loglevel, "Encoding %d: multibyte input %d",
339 yaz_log (ui->loglevel, "Encoding %d: multibyte input failed",
344 info->map[i] = -1; /* no room for output */
346 yaz_log (YLOG_WARN, "Encoding %d: no room for output",
350 else if (outleft == 0)
353 memcpy (&code, outbuf_, sizeof(short));
358 { /* should never happen */
360 yaz_log (YLOG_DEBUG, "Encoding %d: bad state", i);
364 { /* at least one multi byte */
365 info->convert = cb_encoding_convert;
366 info->release = cb_encoding_release;
370 /* no multi byte - we no longer need iconv handler */
382 static void cb_ns_start(void *userData, const char *prefix, const char *uri)
384 struct user_info *ui = (struct user_info*) userData;
386 yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri);
389 static void cb_ns_end(void *userData, const char *prefix)
391 struct user_info *ui = (struct user_info*) userData;
393 yaz_log(ui->loglevel, "cb_ns_end %s", prefix);
395 data1_node *zebra_read_xml (data1_handle dh,
396 int (*rf)(void *, char *, size_t), void *fh,
400 struct user_info uinfo;
402 data1_node *first_node;
404 uinfo.loglevel = YLOG_DEBUG;
408 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
409 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
411 parser = XML_ParserCreate (0 /* encoding */);
413 XML_SetElementHandler (parser, cb_start, cb_end);
414 XML_SetCharacterDataHandler (parser, cb_chardata);
415 XML_SetXmlDeclHandler (parser, cb_decl);
416 XML_SetProcessingInstructionHandler (parser, cb_processing);
417 XML_SetUserData (parser, &uinfo);
418 XML_SetCommentHandler (parser, cb_comment);
419 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
420 XML_SetEntityDeclHandler (parser, cb_entity_decl);
421 XML_SetExternalEntityRefHandler (parser, cb_external_entity);
422 XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end);
424 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo);
429 void *buf = XML_GetBuffer (parser, XML_CHUNK);
433 yaz_log (YLOG_WARN, "XML_GetBuffer fail");
436 r = (*rf)(fh, buf, XML_CHUNK);
440 yaz_log (YLOG_WARN, "XML read fail");
445 if (!XML_ParseBuffer (parser, r, done))
448 yaz_log (YLOG_WARN, "%d:%d:XML error: %s",
449 XML_GetCurrentLineNumber(parser),
450 XML_GetCurrentColumnNumber(parser),
451 XML_ErrorString(XML_GetErrorCode(parser)));
454 XML_ParserFree (parser);
455 if (!uinfo.d1_stack[1] || !done)
457 /* insert XML header if not present .. */
458 first_node = uinfo.d1_stack[0]->child;
459 if (first_node->which != DATA1N_preprocess ||
460 strcmp(first_node->u.preprocess.target, "xml"))
462 const char *attr_list[5];
464 attr_list[0] = "version";
465 attr_list[1] = "1.0";
467 attr_list[2] = "encoding";
468 attr_list[3] = "UTF-8"; /* encoding */
472 data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list,
475 return uinfo.d1_stack[0];
479 XML_Expat_Version expat_version;
482 static data1_node *grs_read_xml (struct grs_read_info *p)
484 return zebra_read_xml (p->dh, p->readf, p->fh, p->mem);
487 static void *filter_init(Res res, RecType recType)
489 struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p));
491 p->expat_version = XML_ExpatVersionInfo();
496 static void filter_destroy(void *clientData)
498 struct xml_info *p = (struct xml_info *) clientData;
503 static int filter_extract(void *clientData, struct recExtractCtrl *ctrl)
505 return zebra_grs_extract(clientData, ctrl, grs_read_xml);
508 static int filter_retrieve(void *clientData, struct recRetrieveCtrl *ctrl)
510 return zebra_grs_retrieve(clientData, ctrl, grs_read_xml);
513 static struct recType filter_type = {
524 #ifdef IDZEBRA_STATIC_GRS_XML
525 idzebra_filter_grs_xml