1 /* $Id: xmlread.c,v 1.9 2003-09-08 09:30:17 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
37 #include <yaz/xmalloc.h>
43 #define XML_CHUNK 1024
47 data1_node *d1_stack[256];
54 static void cb_start (void *user, const char *el, const char **attr)
56 struct user_info *ui = (struct user_info*) user;
58 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
59 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
60 ui->d1_stack[ui->level-1]);
62 yaz_log (ui->loglevel, "cb_start %s", el);
65 static void cb_end (void *user, const char *el)
67 struct user_info *ui = (struct user_info*) user;
70 yaz_log (ui->loglevel, "cb_end %s", el);
73 static void cb_chardata (void *user, const char *s, int len)
75 struct user_info *ui = (struct user_info*) user;
77 yaz_log (ui->loglevel, "cb_chardata %.*s", len, s);
79 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
80 ui->d1_stack[ui->level -1]);
83 static void cb_decl (void *user, const char *version, const char*encoding,
86 struct user_info *ui = (struct user_info*) user;
87 const char *attr_list[7];
89 attr_list[0] = "version";
90 attr_list[1] = version;
92 attr_list[2] = "encoding";
93 attr_list[3] = "UTF-8"; /* encoding */
95 attr_list[4] = "standalone";
96 attr_list[5] = standalone ? "yes" : "no";
100 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
101 ui->d1_stack[ui->level-1]);
102 yaz_log (ui->loglevel, "decl version=%s encoding=%s",
103 version ? version : "null",
104 encoding ? encoding : "null");
107 static void cb_processing (void *user, const char *target,
110 struct user_info *ui = (struct user_info*) user;
112 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
113 ui->d1_stack[ui->level-1]);
114 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
116 yaz_log (ui->loglevel, "decl processing target=%s data=%s",
117 target ? target : "null",
118 data ? data : "null");
123 static void cb_comment (void *user, const char *data)
125 struct user_info *ui = (struct user_info*) user;
126 yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null");
127 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
130 static void cb_doctype_start (void *userData, const char *doctypeName,
131 const char *sysid, const char *pubid,
132 int has_internal_subset)
134 struct user_info *ui = (struct user_info*) userData;
135 yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s",
136 doctypeName, sysid, pubid);
139 static void cb_doctype_end (void *userData)
141 struct user_info *ui = (struct user_info*) userData;
142 yaz_log (ui->loglevel, "doctype end");
146 static void cb_entity_decl (void *userData, const char *entityName,
147 int is_parameter_entity,
148 const char *value, int value_length,
149 const char *base, const char *systemId,
150 const char *publicId, const char *notationName)
152 struct user_info *ui = (struct user_info*) userData;
153 yaz_log (ui->loglevel,
154 "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s"
155 " publicId=%s notationName=%s",
156 entityName, is_parameter_entity, value_length, value,
157 base, systemId, publicId, notationName);
161 static int cb_external_entity (XML_Parser pparser,
164 const char *systemId,
165 const char *publicId)
167 struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
172 yaz_log (ui->loglevel,
173 "external entity context=%s base=%s systemid=%s publicid=%s",
174 context, base, systemId, publicId);
178 if (!(inf = fopen (systemId, "rb")))
180 yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId);
184 parser = XML_ExternalEntityParserCreate (pparser, "", 0);
188 void *buf = XML_GetBuffer (parser, XML_CHUNK);
191 yaz_log (LOG_WARN, "XML_GetBuffer fail");
194 r = fread (buf, 1, XML_CHUNK, inf);
199 yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId);
204 if (!XML_ParseBuffer (parser, r, done))
206 if (ui->full_error_info)
207 yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s",
209 XML_GetCurrentLineNumber(parser),
210 XML_GetCurrentColumnNumber(parser),
211 XML_ErrorString(XML_GetErrorCode(parser)));
213 yaz_log (LOG_WARN, "%s:%d:XML error: %s",
215 XML_GetCurrentLineNumber(parser),
216 XML_ErrorString(XML_GetErrorCode(parser)));
220 XML_ParserFree (parser);
226 static int cb_encoding_convert (void *data, const char *s)
228 iconv_t t = (iconv_t) data;
231 char outbuf_[2], *outbuf = outbuf_;
233 char *inbuf = (char *) s;
237 yaz_log(LOG_LOG, "------------------------- cb_encoding_convert --- ");
239 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
240 if (ret == (size_t) (-1) && errno != E2BIG)
242 iconv (t, 0, 0, 0, 0);
247 memcpy (&code, outbuf_, sizeof(short));
251 static void cb_encoding_release (void *data)
253 iconv_t t = (iconv_t) data;
257 static int cb_encoding_handler (void *userData, const char *name,
262 struct user_info *ui = (struct user_info*) userData;
264 iconv_t t = iconv_open ("UNICODE", name);
265 if (t == (iconv_t) (-1))
268 info->data = 0; /* signal that multibyte is not in use */
269 yaz_log (ui->loglevel, "Encoding handler of %s", name);
270 for (i = 0; i<256; i++)
275 char *inbuf = inbuf_;
276 char *outbuf = outbuf_;
281 iconv (t, 0, 0, 0, 0); /* reset iconv */
283 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
284 if (ret == (size_t) (-1))
288 yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i);
289 info->map[i] = -1; /* invalid sequence */
292 { /* multi byte input */
309 assert (i >= 0 && i<255);
312 for (k = 0; k<len; k++)
314 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
316 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
317 if (ret == (size_t) (-1))
319 if (errno == EILSEQ || errno == E2BIG)
325 else if (errno == EINVAL)
331 else if (outleft == 0)
334 info->data = t; /* signal that multibyte is in use */
342 if (info->map[i] < -1)
343 yaz_log (ui->loglevel, "Encoding %d: multibyte input %d",
346 yaz_log (ui->loglevel, "Encoding %d: multibyte input failed",
351 info->map[i] = -1; /* no room for output */
353 yaz_log (LOG_WARN, "Encoding %d: no room for output",
357 else if (outleft == 0)
360 memcpy (&code, outbuf_, sizeof(short));
365 { /* should never happen */
367 yaz_log (LOG_DEBUG, "Encoding %d: bad state", i);
371 { /* at least one multi byte */
372 info->convert = cb_encoding_convert;
373 info->release = cb_encoding_release;
377 /* no multi byte - we no longer need iconv handler */
389 static void cb_ns_start(void *userData, const char *prefix, const char *uri)
391 struct user_info *ui = (struct user_info*) userData;
393 yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri);
396 static void cb_ns_end(void *userData, const char *prefix)
398 struct user_info *ui = (struct user_info*) userData;
400 yaz_log(ui->loglevel, "cb_ns_end %s", prefix);
402 data1_node *zebra_read_xml (data1_handle dh,
403 int (*rf)(void *, char *, size_t), void *fh,
408 struct user_info uinfo;
411 uinfo.full_error_info = full_error_info;
412 uinfo.loglevel = LOG_DEBUG;
416 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
417 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
419 parser = XML_ParserCreate (0 /* encoding */);
421 XML_SetElementHandler (parser, cb_start, cb_end);
422 XML_SetCharacterDataHandler (parser, cb_chardata);
423 XML_SetXmlDeclHandler (parser, cb_decl);
424 XML_SetProcessingInstructionHandler (parser, cb_processing);
425 XML_SetUserData (parser, &uinfo);
426 XML_SetCommentHandler (parser, cb_comment);
427 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
428 XML_SetEntityDeclHandler (parser, cb_entity_decl);
429 XML_SetExternalEntityRefHandler (parser, cb_external_entity);
430 XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end);
432 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo);
437 void *buf = XML_GetBuffer (parser, XML_CHUNK);
441 yaz_log (LOG_WARN, "XML_GetBuffer fail");
444 r = (*rf)(fh, buf, XML_CHUNK);
448 yaz_log (LOG_WARN, "XML read fail");
453 if (!XML_ParseBuffer (parser, r, done))
456 yaz_log (LOG_WARN, "%d:%d:XML error: %s",
457 XML_GetCurrentLineNumber(parser),
458 XML_GetCurrentColumnNumber(parser),
459 XML_ErrorString(XML_GetErrorCode(parser)));
461 yaz_log (LOG_WARN, "XML error: %s",
462 XML_ErrorString(XML_GetErrorCode(parser)));
465 XML_ParserFree (parser);
466 if (!uinfo.d1_stack[1] || !done)
468 return uinfo.d1_stack[0];
472 XML_Expat_Version expat_version;
473 int full_error_info; /* true if we can safely use Expat's
474 XML_GetCurrent{Line,Column}Number */
477 static void *grs_init_xml(void)
479 struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p));
481 p->expat_version = XML_ExpatVersionInfo();
483 /* determine if we can use XML_GetCurrent{Line,Column}Number */
484 p->full_error_info = 0;
485 if (p->expat_version.major > 1)
486 p->full_error_info = 1;
487 else if (p->expat_version.major == 1 && p->expat_version.minor > 95)
488 p->full_error_info = 1;
489 else if (p->expat_version.major == 1 && p->expat_version.minor == 95
490 && p->expat_version.micro >= 3)
491 p->full_error_info = 1;
495 static data1_node *grs_read_xml (struct grs_read_info *p)
497 struct xml_info *x = (struct xml_info *) p->clientData;
498 return zebra_read_xml (p->dh, p->readf, p->fh, p->mem, x->full_error_info);
501 static void grs_destroy_xml(void *clientData)
503 struct xml_info *p = (struct xml_info *) clientData;
508 static struct recTypeGrs xml_type = {
515 RecTypeGrs recTypeGrs_xml = &xml_type;