2 * Copyright (c) 2002, Index Data.
3 * See the file LICENSE for details.
5 * $Id: d1_expat.c,v 1.5 2002-07-11 10:39:49 adam Exp $
19 #include <yaz/xmalloc.h>
21 #include <yaz/data1.h>
26 data1_node *d1_stack[256];
32 static void cb_start (void *user, const char *el, const char **attr)
34 struct user_info *ui = (struct user_info*) user;
36 data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el);
37 ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr,
38 ui->d1_stack[ui->level-1]);
40 yaz_log (LOG_DEBUG, "cb_start %s", el);
43 static void cb_end (void *user, const char *el)
45 struct user_info *ui = (struct user_info*) user;
48 yaz_log (LOG_DEBUG, "cb_end %s", el);
51 static void cb_chardata (void *user, const char *s, int len)
53 struct user_info *ui = (struct user_info*) user;
55 yaz_log (LOG_DEBUG, "cb_chardata %.*s", len, s);
56 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
57 ui->d1_stack[ui->level -1]);
61 for (i = 0; i<len; i++)
62 if (!strchr ("\n\n ", s[i]))
66 ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len,
67 ui->d1_stack[ui->level -1]);
72 static void cb_decl (void *user, const char *version, const char*encoding,
75 struct user_info *ui = (struct user_info*) user;
76 const char *attr_list[7];
78 attr_list[0] = "version";
79 attr_list[1] = version;
81 attr_list[2] = "encoding";
82 attr_list[3] = encoding;
84 attr_list[4] = "standalone";
85 attr_list[5] = standalone ? "yes" : "no";
89 data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list,
90 ui->d1_stack[ui->level-1]);
91 yaz_log (LOG_DEBUG, "decl version=%s encoding=%s",
92 version ? version : "null",
93 encoding ? encoding : "null");
96 static void cb_processing (void *user, const char *target,
99 struct user_info *ui = (struct user_info*) user;
101 data1_mk_preprocess (ui->dh, ui->nmem, target, 0,
102 ui->d1_stack[ui->level-1]);
103 data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res);
105 yaz_log (LOG_DEBUG, "decl processing target=%s data=%s",
106 target ? target : "null",
107 data ? data : "null");
112 static void cb_comment (void *user, const char *data)
114 struct user_info *ui = (struct user_info*) user;
115 yaz_log (LOG_DEBUG, "decl comment data=%s", data ? data : "null");
116 data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]);
119 static void cb_doctype_start (void *userData, const char *doctypeName,
120 const char *sysid, const char *pubid,
121 int has_internal_subset)
123 yaz_log (LOG_DEBUG, "doctype start doctype=%s sysid=%s pubid=%s",
124 doctypeName, sysid, pubid);
127 static void cb_doctype_end (void *userData)
129 yaz_log (LOG_DEBUG, "doctype end");
133 static void cb_entity_decl (void *userData, const char *entityName,
134 int is_parameter_entity,
135 const char *value, int value_length,
136 const char *base, const char *systemId,
137 const char *publicId, const char *notationName)
140 "entity %s is_para_entry=%d value=%.*s base=%s systemId=%s"
141 " publicId=%s notationName=%s",
142 entityName, is_parameter_entity, value_length, value,
143 base, systemId, publicId, notationName);
148 static int cb_encoding_convert (void *data, const char *s)
150 iconv_t t = (iconv_t) data;
153 char outbuf_[2], *outbuf = outbuf_;
155 char *inbuf = (char *) s;
158 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
159 if (ret == (size_t) (-1) && errno != E2BIG)
161 iconv (t, 0, 0, 0, 0);
166 memcpy (&code, outbuf_, sizeof(short));
170 static void cb_encoding_release (void *data)
172 iconv_t t = (iconv_t) data;
176 static int cb_encoding_handler (void *userData, const char *name,
182 iconv_t t = iconv_open ("UNICODE", name);
183 if (t == (iconv_t) (-1))
186 info->data = 0; /* signal that multibyte is not in use */
187 yaz_log (LOG_DEBUG, "Encoding handler of %s", name);
188 for (i = 0; i<256; i++)
193 char *inbuf = inbuf_;
194 char *outbuf = outbuf_;
199 iconv (t, 0, 0, 0, 0); /* reset iconv */
201 ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft);
202 if (ret == (size_t) (-1))
206 yaz_log (LOG_DEBUG, "Encoding %d: invalid sequence", i);
207 info->map[i] = -1; /* invalid sequence */
210 { /* multi byte input */
227 assert (i >= 0 && i<255);
230 for (k = 0; k<len; k++)
232 sprintf (sbuf+strlen(sbuf), "%d ", inbuf_[k]&255);
234 ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft);
235 if (ret == (size_t) (-1))
237 if (errno == EILSEQ || errno == E2BIG)
243 else if (errno == EINVAL)
249 else if (outleft == 0)
252 info->data = t; /* signal that multibyte is in use */
260 if (info->map[i] < -1)
261 yaz_log (LOG_DEBUG, "Encoding %d: multibyte input %d",
264 yaz_log (LOG_DEBUG, "Encoding %d: multibyte input failed",
269 info->map[i] = -1; /* no room for output */
270 yaz_log (LOG_WARN, "Encoding %d: no room for output",
274 else if (outleft == 0)
277 memcpy (&code, outbuf_, sizeof(short));
282 { /* should never happen */
284 yaz_log (LOG_DEBUG, "Encoding %d: bad state", i);
288 { /* at least one multi byte */
289 info->convert = cb_encoding_convert;
290 info->release = cb_encoding_release;
294 /* no multi byte - we no longer need iconv handler */
306 #define XML_CHUNK 1024
308 data1_node *data1_read_xml (data1_handle dh,
309 int (*rf)(void *, char *, size_t), void *fh,
313 struct user_info uinfo;
319 uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0);
320 uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */
322 parser = XML_ParserCreate (0 /* encoding */);
324 XML_SetElementHandler (parser, cb_start, cb_end);
325 XML_SetCharacterDataHandler (parser, cb_chardata);
326 XML_SetXmlDeclHandler (parser, cb_decl);
327 XML_SetProcessingInstructionHandler (parser, cb_processing);
328 XML_SetUserData (parser, &uinfo);
329 XML_SetCommentHandler (parser, cb_comment);
330 XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end);
331 XML_SetEntityDeclHandler (parser, cb_entity_decl);
333 XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, 0);
339 void *buf = XML_GetBuffer (parser, XML_CHUNK);
343 yaz_log (LOG_FATAL, "XML_GetBuffer fail");
346 r = (*rf)(fh, buf, XML_CHUNK);
350 yaz_log (LOG_FATAL, "XML read fail");
355 if (!XML_ParseBuffer (parser, r, done))
357 yaz_log (LOG_FATAL, "XML_ParseBuffer failed %s",
358 XML_ErrorString(XML_GetErrorCode(parser)));
361 XML_ParserFree (parser);
362 if (!uinfo.d1_stack[1])
364 return uinfo.d1_stack[0];