2 * Copyright (c) 2002, Index Data.
3 * See the file LICENSE for details.
5 * $Id: d1_expat.c,v 1.7 2002-08-23 14:24:05 adam Exp $
19 #include <yaz/xmalloc.h>
21 #include <yaz/data1.h>
25 #define XML_CHUNK 1024
28 data1_node *d1_stack[256];
35 static void cb_start (void *user, const char *el, const char **attr)
37 struct user_info *ui = (struct user_info*) user;
39 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
40 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
41 ui->d1_stack[ui->level-1]);
43 yaz_log (ui->loglevel, "cb_start %s", el);
46 static void cb_end (void *user, const char *el)
48 struct user_info *ui = (struct user_info*) user;
51 yaz_log (ui->loglevel, "cb_end %s", el);
54 static void cb_chardata (void *user, const char *s, int len)
56 struct user_info *ui = (struct user_info*) user;
58 yaz_log (ui->loglevel, "cb_chardata %.*s", len, s);
60 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
61 ui->d1_stack[ui->level -1]);
64 static void cb_decl (void *user, const char *version, const char*encoding,
67 struct user_info *ui = (struct user_info*) user;
68 const char *attr_list[7];
70 attr_list[0] = "version";
71 attr_list[1] = version;
73 attr_list[2] = "encoding";
74 attr_list[3] = "UTF-8"; /* encoding */
76 attr_list[4] = "standalone";
77 attr_list[5] = standalone ? "yes" : "no";
81 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
82 ui->d1_stack[ui->level-1]);
83 yaz_log (ui->loglevel, "decl version=%s encoding=%s",
84 version ? version : "null",
85 encoding ? encoding : "null");
88 static void cb_processing (void *user, const char *target,
91 struct user_info *ui = (struct user_info*) user;
93 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
94 ui->d1_stack[ui->level-1]);
95 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
97 yaz_log (ui->loglevel, "decl processing target=%s data=%s",
98 target ? target : "null",
99 data ? data : "null");
104 static void cb_comment (void *user, const char *data)
106 struct user_info *ui = (struct user_info*) user;
107 yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null");
108 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
111 static void cb_doctype_start (void *userData, const char *doctypeName,
112 const char *sysid, const char *pubid,
113 int has_internal_subset)
115 struct user_info *ui = (struct user_info*) userData;
116 yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s",
117 doctypeName, sysid, pubid);
120 static void cb_doctype_end (void *userData)
122 struct user_info *ui = (struct user_info*) userData;
123 yaz_log (ui->loglevel, "doctype end");
127 static void cb_entity_decl (void *userData, const char *entityName,
128 int is_parameter_entity,
129 const char *value, int value_length,
130 const char *base, const char *systemId,
131 const char *publicId, const char *notationName)
133 struct user_info *ui = (struct user_info*) userData;
134 yaz_log (ui->loglevel,
135 "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s"
136 " publicId=%s notationName=%s",
137 entityName, is_parameter_entity, value_length, value,
138 base, systemId, publicId, notationName);
142 static int cb_external_entity (XML_Parser pparser,
145 const char *systemId,
146 const char *publicId)
148 struct user_info *ui = (struct user_info*) XML_GetUserData(pparser);
153 yaz_log (ui->loglevel,
154 "external entity context=%s base=%s systemid=%s publicid=%s",
155 context, base, systemId, publicId);
159 if (!(inf = fopen (systemId, "rb")))
161 yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId);
165 parser = XML_ExternalEntityParserCreate (pparser, "", 0);
169 void *buf = XML_GetBuffer (parser, XML_CHUNK);
172 yaz_log (LOG_WARN, "XML_GetBuffer fail");
175 r = fread (buf, 1, XML_CHUNK, inf);
180 yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId);
185 if (!XML_ParseBuffer (parser, r, done))
187 yaz_log (LOG_WARN, "XML_ParseBuffer failed %s",
188 XML_ErrorString(XML_GetErrorCode(parser)));
192 XML_ParserFree (parser);
198 static int cb_encoding_convert (void *data, const char *s)
200 iconv_t t = (iconv_t) data;
203 char outbuf_[2], *outbuf = outbuf_;
205 char *inbuf = (char *) s;
208 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
209 if (ret == (size_t) (-1) && errno != E2BIG)
211 iconv (t, 0, 0, 0, 0);
216 memcpy (&code, outbuf_, sizeof(short));
220 static void cb_encoding_release (void *data)
222 iconv_t t = (iconv_t) data;
226 static int cb_encoding_handler (void *userData, const char *name,
231 struct user_info *ui = (struct user_info*) userData;
233 iconv_t t = iconv_open ("UNICODE", name);
234 if (t == (iconv_t) (-1))
237 info->data = 0; /* signal that multibyte is not in use */
238 yaz_log (ui->loglevel, "Encoding handler of %s", name);
239 for (i = 0; i<256; i++)
244 char *inbuf = inbuf_;
245 char *outbuf = outbuf_;
250 iconv (t, 0, 0, 0, 0); /* reset iconv */
252 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
253 if (ret == (size_t) (-1))
257 yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i);
258 info->map[i] = -1; /* invalid sequence */
261 { /* multi byte input */
278 assert (i >= 0 && i<255);
281 for (k = 0; k<len; k++)
283 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
285 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
286 if (ret == (size_t) (-1))
288 if (errno == EILSEQ || errno == E2BIG)
294 else if (errno == EINVAL)
300 else if (outleft == 0)
303 info->data = t; /* signal that multibyte is in use */
311 if (info->map[i] < -1)
312 yaz_log (ui->loglevel, "Encoding %d: multibyte input %d",
315 yaz_log (ui->loglevel, "Encoding %d: multibyte input failed",
320 info->map[i] = -1; /* no room for output */
321 yaz_log (LOG_WARN, "Encoding %d: no room for output",
325 else if (outleft == 0)
328 memcpy (&code, outbuf_, sizeof(short));
333 { /* should never happen */
335 yaz_log (LOG_DEBUG, "Encoding %d: bad state", i);
339 { /* at least one multi byte */
340 info->convert = cb_encoding_convert;
341 info->release = cb_encoding_release;
345 /* no multi byte - we no longer need iconv handler */
358 data1_node *data1_read_xml (data1_handle dh,
359 int (*rf)(void *, char *, size_t), void *fh,
363 struct user_info uinfo;
366 uinfo.loglevel = LOG_LOG;
370 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
371 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
373 parser = XML_ParserCreate (0 /* encoding */);
375 XML_SetElementHandler (parser, cb_start, cb_end);
376 XML_SetCharacterDataHandler (parser, cb_chardata);
377 XML_SetXmlDeclHandler (parser, cb_decl);
378 XML_SetProcessingInstructionHandler (parser, cb_processing);
379 XML_SetUserData (parser, &uinfo);
380 XML_SetCommentHandler (parser, cb_comment);
381 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
382 XML_SetEntityDeclHandler (parser, cb_entity_decl);
383 XML_SetExternalEntityRefHandler (parser, cb_external_entity);
385 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo);
390 void *buf = XML_GetBuffer (parser, XML_CHUNK);
394 yaz_log (LOG_WARN, "XML_GetBuffer fail");
397 r = (*rf)(fh, buf, XML_CHUNK);
401 yaz_log (LOG_WARN, "XML read fail");
406 if (!XML_ParseBuffer (parser, r, done))
408 yaz_log (LOG_WARN, "XML_ParseBuffer (1) failed %s",
409 XML_ErrorString(XML_GetErrorCode(parser)));
412 XML_ParserFree (parser);
413 if (!uinfo.d1_stack[1] || !done)
415 return uinfo.d1_stack[0];