X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=retrieval%2Fd1_expat.c;fp=retrieval%2Fd1_expat.c;h=0000000000000000000000000000000000000000;hp=1aa7538661c1b00fa5c189875694186415e5fee2;hb=8f3f8256752d09f66cd2c7a412cc8c30a0fc90ea;hpb=f3fa2f64b69427aef4dbb191fc870865bb87c9b5 diff --git a/retrieval/d1_expat.c b/retrieval/d1_expat.c deleted file mode 100644 index 1aa7538..0000000 --- a/retrieval/d1_expat.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Copyright (c) 2002, Index Data. - * See the file LICENSE for details. - * - * $Id: d1_expat.c,v 1.7 2002-08-23 14:24:05 adam Exp $ - */ - -#if HAVE_EXPAT_H - -#include -#include -#include - -#if HAVE_ICONV_H -#include -#include -#endif - -#include -#include -#include - -#include - -#define XML_CHUNK 1024 - -struct user_info { - data1_node *d1_stack[256]; - int level; - data1_handle dh; - NMEM nmem; - int loglevel; -}; - -static void cb_start (void *user, const char *el, const char **attr) -{ - struct user_info *ui = (struct user_info*) user; - if (ui->level == 1) - data1_set_root (ui->dh, ui->d1_stack[0], ui->nmem, el); - ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr, - ui->d1_stack[ui->level-1]); - ui->level++; - yaz_log (ui->loglevel, "cb_start %s", el); -} - -static void cb_end (void *user, const char *el) -{ - struct user_info *ui = (struct user_info*) user; - - ui->level--; - yaz_log (ui->loglevel, "cb_end %s", el); -} - -static void cb_chardata (void *user, const char *s, int len) -{ - struct user_info *ui = (struct user_info*) user; -#if 0 - yaz_log (ui->loglevel, "cb_chardata %.*s", len, s); -#endif - ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len, - ui->d1_stack[ui->level -1]); -} - -static void cb_decl (void *user, const char *version, const char*encoding, - int standalone) -{ - struct user_info *ui = (struct user_info*) user; - const char *attr_list[7]; - - attr_list[0] = "version"; - attr_list[1] = version; - - attr_list[2] = "encoding"; - attr_list[3] = "UTF-8"; /* encoding */ - - attr_list[4] = "standalone"; - attr_list[5] = standalone ? "yes" : "no"; - - attr_list[6] = 0; - - data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list, - ui->d1_stack[ui->level-1]); - yaz_log (ui->loglevel, "decl version=%s encoding=%s", - version ? version : "null", - encoding ? encoding : "null"); -} - -static void cb_processing (void *user, const char *target, - const char *data) -{ - struct user_info *ui = (struct user_info*) user; - data1_node *res = - data1_mk_preprocess (ui->dh, ui->nmem, target, 0, - ui->d1_stack[ui->level-1]); - data1_mk_text_nf (ui->dh, ui->nmem, data, strlen(data), res); - - yaz_log (ui->loglevel, "decl processing target=%s data=%s", - target ? target : "null", - data ? data : "null"); - - -} - -static void cb_comment (void *user, const char *data) -{ - struct user_info *ui = (struct user_info*) user; - yaz_log (ui->loglevel, "decl comment data=%s", data ? data : "null"); - data1_mk_comment (ui->dh, ui->nmem, data, ui->d1_stack[ui->level-1]); -} - -static void cb_doctype_start (void *userData, const char *doctypeName, - const char *sysid, const char *pubid, - int has_internal_subset) -{ - struct user_info *ui = (struct user_info*) userData; - yaz_log (ui->loglevel, "doctype start doctype=%s sysid=%s pubid=%s", - doctypeName, sysid, pubid); -} - -static void cb_doctype_end (void *userData) -{ - struct user_info *ui = (struct user_info*) userData; - yaz_log (ui->loglevel, "doctype end"); -} - - -static void cb_entity_decl (void *userData, const char *entityName, - int is_parameter_entity, - const char *value, int value_length, - const char *base, const char *systemId, - const char *publicId, const char *notationName) -{ - struct user_info *ui = (struct user_info*) userData; - yaz_log (ui->loglevel, - "entity decl %s is_para_entry=%d value=%.*s base=%s systemId=%s" - " publicId=%s notationName=%s", - entityName, is_parameter_entity, value_length, value, - base, systemId, publicId, notationName); - -} - -static int cb_external_entity (XML_Parser pparser, - const char *context, - const char *base, - const char *systemId, - const char *publicId) -{ - struct user_info *ui = (struct user_info*) XML_GetUserData(pparser); - FILE *inf; - int done = 0; - XML_Parser parser; - - yaz_log (ui->loglevel, - "external entity context=%s base=%s systemid=%s publicid=%s", - context, base, systemId, publicId); - if (!systemId) - return 1; - - if (!(inf = fopen (systemId, "rb"))) - { - yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId); - return 0; - } - - parser = XML_ExternalEntityParserCreate (pparser, "", 0); - while (!done) - { - int r; - void *buf = XML_GetBuffer (parser, XML_CHUNK); - if (!buf) - { - yaz_log (LOG_WARN, "XML_GetBuffer fail"); - break; - } - r = fread (buf, 1, XML_CHUNK, inf); - if (r == 0) - { - if (ferror(inf)) - { - yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId); - break; - } - done = 1; - } - if (!XML_ParseBuffer (parser, r, done)) - { - yaz_log (LOG_WARN, "XML_ParseBuffer failed %s", - XML_ErrorString(XML_GetErrorCode(parser))); - } - } - fclose (inf); - XML_ParserFree (parser); - return done; -} - - -#if HAVE_ICONV_H -static int cb_encoding_convert (void *data, const char *s) -{ - iconv_t t = (iconv_t) data; - size_t ret; - size_t outleft = 2; - char outbuf_[2], *outbuf = outbuf_; - size_t inleft = 4; - char *inbuf = (char *) s; - unsigned short code; - - ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft); - if (ret == (size_t) (-1) && errno != E2BIG) - { - iconv (t, 0, 0, 0, 0); - return -1; - } - if (outleft != 0) - return -1; - memcpy (&code, outbuf_, sizeof(short)); - return code; -} - -static void cb_encoding_release (void *data) -{ - iconv_t t = (iconv_t) data; - iconv_close (t); -} - -static int cb_encoding_handler (void *userData, const char *name, - XML_Encoding *info) -{ - int i = 0; - int no_ok = 0; - struct user_info *ui = (struct user_info*) userData; - - iconv_t t = iconv_open ("UNICODE", name); - if (t == (iconv_t) (-1)) - return 0; - - info->data = 0; /* signal that multibyte is not in use */ - yaz_log (ui->loglevel, "Encoding handler of %s", name); - for (i = 0; i<256; i++) - { - size_t ret; - char outbuf_[5]; - char inbuf_[5]; - char *inbuf = inbuf_; - char *outbuf = outbuf_; - size_t inleft = 1; - size_t outleft = 2; - inbuf_[0] = i; - - iconv (t, 0, 0, 0, 0); /* reset iconv */ - - ret = iconv(t, &inbuf, &inleft, &outbuf, &outleft); - if (ret == (size_t) (-1)) - { - if (errno == EILSEQ) - { - yaz_log (ui->loglevel, "Encoding %d: invalid sequence", i); - info->map[i] = -1; /* invalid sequence */ - } - if (errno == EINVAL) - { /* multi byte input */ - int len = 2; - int j = 0; - info->map[i] = -1; - - while (len <= 4) - { - char sbuf[80]; - int k; - inbuf = inbuf_; - inleft = len; - outbuf = outbuf_; - outleft = 2; - - inbuf_[len-1] = j; - iconv (t, 0,0,0,0); - - assert (i >= 0 && i<255); - - *sbuf = 0; - for (k = 0; k 255) - break; - } - else if (errno == EINVAL) - { - len++; - j = 7; - } - } - else if (outleft == 0) - { - info->map[i] = -len; - info->data = t; /* signal that multibyte is in use */ - break; - } - else - { - break; - } - } - if (info->map[i] < -1) - yaz_log (ui->loglevel, "Encoding %d: multibyte input %d", - i, -info->map[i]); - else - yaz_log (ui->loglevel, "Encoding %d: multibyte input failed", - i); - } - if (errno == E2BIG) - { - info->map[i] = -1; /* no room for output */ - yaz_log (LOG_WARN, "Encoding %d: no room for output", - i); - } - } - else if (outleft == 0) - { - unsigned short code; - memcpy (&code, outbuf_, sizeof(short)); - info->map[i] = code; - no_ok++; - } - else - { /* should never happen */ - info->map[i] = -1; - yaz_log (LOG_DEBUG, "Encoding %d: bad state", i); - } - } - if (info->data) - { /* at least one multi byte */ - info->convert = cb_encoding_convert; - info->release = cb_encoding_release; - } - else - { - /* no multi byte - we no longer need iconv handler */ - iconv_close(t); - info->convert = 0; - info->release = 0; - } - if (!no_ok) - return 0; - return 1; -} -/* HAVE_ICONV_H */ -#endif - - -data1_node *data1_read_xml (data1_handle dh, - int (*rf)(void *, char *, size_t), void *fh, - NMEM m) -{ - XML_Parser parser; - struct user_info uinfo; - int done = 0; - - uinfo.loglevel = LOG_LOG; - uinfo.level = 1; - uinfo.dh = dh; - uinfo.nmem = m; - uinfo.d1_stack[0] = data1_mk_node2 (dh, m, DATA1N_root, 0); - uinfo.d1_stack[1] = 0; /* indicate no children (see end of routine) */ - - parser = XML_ParserCreate (0 /* encoding */); - - XML_SetElementHandler (parser, cb_start, cb_end); - XML_SetCharacterDataHandler (parser, cb_chardata); - XML_SetXmlDeclHandler (parser, cb_decl); - XML_SetProcessingInstructionHandler (parser, cb_processing); - XML_SetUserData (parser, &uinfo); - XML_SetCommentHandler (parser, cb_comment); - XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end); - XML_SetEntityDeclHandler (parser, cb_entity_decl); - XML_SetExternalEntityRefHandler (parser, cb_external_entity); -#if HAVE_ICONV_H - XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo); -#endif - while (!done) - { - int r; - void *buf = XML_GetBuffer (parser, XML_CHUNK); - if (!buf) - { - /* error */ - yaz_log (LOG_WARN, "XML_GetBuffer fail"); - break; - } - r = (*rf)(fh, buf, XML_CHUNK); - if (r < 0) - { - /* error */ - yaz_log (LOG_WARN, "XML read fail"); - break; - } - else if (r == 0) - done = 1; - if (!XML_ParseBuffer (parser, r, done)) - { - yaz_log (LOG_WARN, "XML_ParseBuffer (1) failed %s", - XML_ErrorString(XML_GetErrorCode(parser))); - } - } - XML_ParserFree (parser); - if (!uinfo.d1_stack[1] || !done) - return 0; - return uinfo.d1_stack[0]; -} - -/* HAVE_EXPAT_H */ -#endif