From 651ccb74d12f2968e791b3f411a6c55bca68428a Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 13 May 2002 14:13:37 +0000 Subject: [PATCH] XML reader for data1 (EXPAT) --- CHANGELOG | 3 ++ buildconf.sh | 4 +- configure.in | 32 ++++++++++++++-- include/yaz/data1.h | 15 ++++---- retrieval/Makefile.am | 4 +- retrieval/d1_expat.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ retrieval/d1_grs.c | 7 +++- retrieval/d1_prtree.c | 10 ++--- retrieval/d1_read.c | 45 +++++++++++----------- retrieval/d1_write.c | 7 +--- yaz-config.in | 4 +- 11 files changed, 180 insertions(+), 51 deletions(-) create mode 100644 retrieval/d1_expat.c diff --git a/CHANGELOG b/CHANGELOG index 59cfb09..472384b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,10 @@ Possible compatibility problems with earlier versions marked with '*'. + --- 1.8.7 2002/MM/DD +XML reader for data1 (EXPAT). + Fixed bug in ZOOM connect that occurred on OpenBSD (maybe others). Patch by Oleg Kolobov. diff --git a/buildconf.sh b/buildconf.sh index 5373ba8..6c75e17 100755 --- a/buildconf.sh +++ b/buildconf.sh @@ -1,5 +1,5 @@ #!/bin/sh -# $Id: buildconf.sh,v 1.10 2002-03-16 11:46:18 adam Exp $ +# $Id: buildconf.sh,v 1.11 2002-05-13 14:13:37 adam Exp $ aclocal libtoolize --automake --force automake -a @@ -7,4 +7,4 @@ autoconf if [ -f config.cache ]; then rm config.cache fi -util/cvs-date.tcl include/yaz/yaz-date.h +#util/cvs-date.tcl include/yaz/yaz-date.h diff --git a/configure.in b/configure.in index ea5551e..c5b3a91 100644 --- a/configure.in +++ b/configure.in @@ -1,10 +1,11 @@ dnl YAZ Toolkit, Index Data 1994-2001 dnl See the file LICENSE for details. -dnl $Id: configure.in,v 1.72 2002-05-03 13:47:57 adam Exp $ +dnl $Id: configure.in,v 1.73 2002-05-13 14:13:37 adam Exp $ AC_INIT(include/yaz/yaz-version.h) AM_INIT_AUTOMAKE(yaz, 1.8.7) dnl AC_SUBST(READLINE_LIBS) +AC_SUBST(YAZ_CONF_CFLAGS) dnl ------ Checking programs AC_PROG_CC AC_PROG_CPP @@ -93,6 +94,30 @@ if test "$checkBoth" = "1"; then fi AC_CHECK_FUNC(gethostbyname, , AC_CHECK_LIB(nsl, main, [LIBS="$LIBS -lnsl"])) dnl +dnl ------ EXPAT +expat=yes +AC_ARG_WITH(expat, [ --with-expat[=DIR] EXPAT library in DIR],[expat=$withval]) +if test "$expat" != "no"; then + xLIBS="$LIBS"; + xCFLAGS="$CFLAGS"; + if test "$expat" != "yes"; then + EXPAT_CFLAGS="-I$expat/include" + EXPAT_LIBS="-L$expat/lib" + CFLAGS="$EXPAT_CFLAGS $CFLAGS" + LIBS="$EXPAT_LIBS $LIBS" + fi + AC_CHECK_LIB(expat,XML_ParserCreate,[LIBS="$LIBS -lexpat"]) + if test "$ac_cv_lib_expat_XML_ParserCreate" = "yes"; then + AC_CHECK_HEADERS(expat.h) + if test "$ac_cv_header_expat_h" = "yes"; then + YAZ_CONF_CFLAGS="$YAZ_CONF_CFLAGS -DYAZ_HAVE_EXPAT=1" + fi + else + LIBS="$xLIBS" + CFLAGS="$xCFLAGS" + fi +fi +dnl dnl ------ Open SSL openssl=no AC_ARG_WITH(openssl, [ --with-openssl[=DIR] OpenSSL library in DIR], [openssl=$withval]) @@ -125,6 +150,7 @@ if test "$openssl" != "no"; then CFLAGS="$xCFLAGS" fi AM_CONDITIONAL(ISSSL, test $USE_SSL = "1") +dnl dnl ------ GNU Readline READLINE_SHARED_LIBADD="" AC_CHECK_LIB(ncurses, tgetent, [READLINE_SHARED_LIBADD="-lncurses"], @@ -135,7 +161,7 @@ AC_CHECK_LIB(readline, readline, [READLINE_LIBS="$READLINE_LIBS -lreadline $READ AC_CHECK_LIB(history, add_history, [READLINE_LIBS="$READLINE_LIBS -lhistory"]) if test "$ac_cv_lib_readline_readline" = "yes"; then AC_CHECK_HEADERS(readline/readline.h readline/history.h) - OLDLIBS=$LIBS + xLIBS=$LIBS LIBS="$LIBS $READLINE_LIBS" AC_TRY_LINK([ #include @@ -155,7 +181,7 @@ if test "$ac_cv_lib_readline_readline" = "yes"; then rl_completion_matches (0, 0); } ],AC_DEFINE(HAVE_READLINE_RL_COMPLETION_MATCHES)) - LIBS=$OLDLIBS + LIBS=$xLIBS fi dnl ------ various functions AC_CHECK_FUNCS(vsnprintf gettimeofday poll) diff --git a/include/yaz/data1.h b/include/yaz/data1.h index 769b512..430d413 100644 --- a/include/yaz/data1.h +++ b/include/yaz/data1.h @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: data1.h,v 1.9 2002-05-07 11:02:56 adam Exp $ + * $Id: data1.h,v 1.10 2002-05-13 14:13:37 adam Exp $ */ #ifndef DATA1_H @@ -43,8 +43,6 @@ #define d1_isspace(c) strchr(" \r\n\t\f", c) #define d1_isdigit(c) ((c) <= '9' && (c) >= '0') -#define DATA1_USING_XATTR 1 - YAZ_BEGIN_CDECL #define data1_matchstr(s1, s2) yaz_matchstr(s1, s2) @@ -195,13 +193,11 @@ typedef struct data1_sub_elements { data1_element *elements; } data1_sub_elements; -#if DATA1_USING_XATTR typedef struct data1_xattr { char *name; char *value; struct data1_xattr *next; } data1_xattr; -#endif typedef struct data1_absyn { @@ -249,9 +245,7 @@ typedef struct data1_node int get_bytes; unsigned node_selected : 1; unsigned make_variantlist : 1; -#if DATA1_USING_XATTR data1_xattr *attributes; -#endif } tag; struct @@ -342,9 +336,11 @@ YAZ_EXPORT data1_node *data1_mk_node2(data1_handle dh, NMEM m, int type, data1_node *parent); YAZ_EXPORT data1_node *data1_mk_tag (data1_handle dh, NMEM nmem, - const char *tag, data1_node *at); + const char *tag, const char **attr, + data1_node *at); YAZ_EXPORT data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, const char *tag, size_t len, + const char **attr, data1_node *at); YAZ_EXPORT data1_node *data1_mk_text_n (data1_handle dh, NMEM mem, @@ -414,6 +410,9 @@ YAZ_EXPORT char *data1_insert_string_n (data1_handle dh, data1_node *res, NMEM m, const char *str, size_t len); YAZ_EXPORT data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf); +YAZ_EXPORT data1_node *data1_read_xml (data1_handle dh, + int (*rf)(void *, char *, size_t), + void *fh, NMEM m); YAZ_EXPORT void data1_absyn_trav (data1_handle dh, void *handle, void (*fh)(data1_handle dh, void *h, data1_absyn *a)); diff --git a/retrieval/Makefile.am b/retrieval/Makefile.am index c95ff59..3076065 100644 --- a/retrieval/Makefile.am +++ b/retrieval/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.5 2002-04-15 09:44:44 adam Exp $ +## $Id: Makefile.am,v 1.6 2002-05-13 14:13:37 adam Exp $ noinst_LTLIBRARIES=libret.la @@ -6,7 +6,7 @@ LIBS = libret_la_SOURCES = d1_handle.c d1_read.c d1_attset.c d1_tagset.c d1_absyn.c \ d1_grs.c d1_sutrs.c d1_varset.c d1_espec.c d1_doespec.c d1_map.c d1_marc.c \ - d1_write.c d1_expout.c d1_sumout.c d1_soif.c d1_prtree.c d1_if.c + d1_write.c d1_expout.c d1_sumout.c d1_soif.c d1_prtree.c d1_if.c d1_expat.c AM_CPPFLAGS=-I$(top_srcdir)/include diff --git a/retrieval/d1_expat.c b/retrieval/d1_expat.c new file mode 100644 index 0000000..099a922 --- /dev/null +++ b/retrieval/d1_expat.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2002, Index Data. + * See the file LICENSE for details. + * + * $Id: d1_expat.c,v 1.1 2002-05-13 14:13:37 adam Exp $ + */ + +#if HAVE_EXPAT_H + +#include +#include +#include + +#include +#include +#include + +#include + +struct user_info { + data1_node *d1_stack[256]; + int level; + data1_handle dh; + NMEM nmem; +}; + +static void cb_start (void *user, const char *el, const char **attr) +{ + struct user_info *ui = (struct user_info*) user; + if (ui->level) + { + ui->d1_stack[ui->level] = data1_mk_tag (ui->dh, ui->nmem, el, attr, + ui->d1_stack[ui->level-1]); + } + else + { + ui->d1_stack[0] = data1_mk_root (ui->dh, ui->nmem, el); + } + ui->level++; +} + +static void cb_end (void *user, const char *el) +{ + struct user_info *ui = (struct user_info*) user; + + ui->level--; +} + +static void cb_chardata (void *user, const char *s, int len) +{ + struct user_info *ui = (struct user_info*) user; + ui->d1_stack[ui->level] = data1_mk_text_n (ui->dh, ui->nmem, s, len, + ui->d1_stack[ui->level -1]); +} + +#define XML_CHUNK 1024 + +data1_node *data1_read_xml (data1_handle dh, + int (*rf)(void *, char *, size_t), void *fh, + NMEM m) +{ + XML_Parser parser; + struct user_info uinfo; + int done = 0; + + uinfo.level = 0; + uinfo.dh = dh; + uinfo.d1_stack[0] = 0; + uinfo.nmem = m; + + parser = XML_ParserCreate (0 /* encoding */); + + XML_SetElementHandler (parser, cb_start, cb_end); + XML_SetCharacterDataHandler (parser, cb_chardata); + XML_SetUserData (parser, &uinfo); + + while (!done) + { + int r; + void *buf = XML_GetBuffer (parser, XML_CHUNK); + if (!buf) + { + /* error */ + return 0; + } + r = (*rf)(fh, buf, XML_CHUNK); + if (r < 0) + { + /* error */ + return 0; + } + else if (r == 0) + done = 1; + XML_ParseBuffer (parser, r, done); + } + XML_ParserFree (parser); + return uinfo.d1_stack[0]; +} + +#endif diff --git a/retrieval/d1_grs.c b/retrieval/d1_grs.c index 882758b..774b188 100644 --- a/retrieval/d1_grs.c +++ b/retrieval/d1_grs.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: d1_grs.c,v $ - * Revision 1.19 2002-04-15 09:06:30 adam + * Revision 1.20 2002-05-13 14:13:37 adam + * XML reader for data1 (EXPAT) + * + * Revision 1.19 2002/04/15 09:06:30 adam * Fix explain tags for XML writer * * Revision 1.18 2002/04/12 14:40:07 adam @@ -204,7 +207,7 @@ static Z_ElementData *nodetoelementdata(data1_handle dh, data1_node *n, break; case DATA1I_text: toget = n->u.data.len; - if (p->u.tag.get_bytes > 0 && p->u.tag.get_bytes < toget) + if (p && p->u.tag.get_bytes > 0 && p->u.tag.get_bytes < toget) toget = p->u.tag.get_bytes; res->which = Z_ElementData_string; res->u.string = (char *)odr_malloc(o, toget+1); diff --git a/retrieval/d1_prtree.c b/retrieval/d1_prtree.c index e5df6fe..6f04412 100644 --- a/retrieval/d1_prtree.c +++ b/retrieval/d1_prtree.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_prtree.c,v 1.7 2002-01-26 19:20:25 adam Exp $ + * $Id: d1_prtree.c,v 1.8 2002-05-13 14:13:37 adam Exp $ */ #include @@ -27,12 +27,11 @@ static void pr_tree (data1_handle dh, data1_node *n, FILE *out, int level) fprintf (out, "%*s", level, ""); switch (n->which) { - case DATA1N_root: - fprintf (out, "root abstract syntax=%s\n", n->u.root.type); - break; + case DATA1N_root: + fprintf (out, "root abstract syntax=%s\n", n->u.root.type); + break; case DATA1N_tag: fprintf (out, "tag type=%s\n", n->u.tag.tag); -#if DATA1_USING_XATTR if (n->u.tag.attributes) { data1_xattr *xattr = n->u.tag.attributes; @@ -41,7 +40,6 @@ static void pr_tree (data1_handle dh, data1_node *n, FILE *out, int level) fprintf (out, " %s=%s ", xattr->name, xattr->value); fprintf (out, "\n"); } -#endif break; case DATA1N_data: fprintf (out, "data type="); diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index 21290c4..ac19a9f 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_read.c,v 1.41 2002-05-07 11:02:56 adam Exp $ + * $Id: d1_read.c,v 1.42 2002-05-13 14:13:37 adam Exp $ */ #include @@ -69,9 +69,7 @@ data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type, r->u.tag.node_selected = 0; r->u.tag.make_variantlist = 0; r->u.tag.get_bytes = -1; -#if DATA1_USING_XATTR r->u.tag.attributes = 0; -#endif break; case DATA1N_root: r->u.root.type = 0; @@ -124,11 +122,13 @@ data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name) } data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, - const char *tag, size_t len, data1_node *at) + const char *tag, size_t len, const char **attr, + data1_node *at) { data1_node *partag = get_parent_tag(dh, at); data1_node *res = data1_mk_node2 (dh, nmem, DATA1N_tag, at); data1_element *e = NULL; + data1_xattr **p; res->u.tag.tag = data1_insert_string_n (dh, res, nmem, tag, len); @@ -137,13 +137,22 @@ data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, res->u.tag.element = data1_getelementbytagname (dh, at->root->u.root.absyn, e, res->u.tag.tag); + p = &res->u.tag.attributes; + while (attr && *attr) + { + *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); + (*p)->name = nmem_strdup (nmem, attr[0]); + (*p)->value = nmem_strdup (nmem, attr[1]); + p = &(*p)->next; + } + *p = 0; return res; } data1_node *data1_mk_tag (data1_handle dh, NMEM nmem, - const char *tag, data1_node *at) + const char *tag, const char **attr, data1_node *at) { - return data1_mk_tag_n (dh, nmem, tag, strlen(tag), at); + return data1_mk_tag_n (dh, nmem, tag, strlen(tag), attr, at); } data1_node *data1_search_tag (data1_handle dh, data1_node *n, @@ -163,7 +172,7 @@ data1_node *data1_mk_tag_uni (data1_handle dh, NMEM nmem, { data1_node *node = data1_search_tag (dh, at->child, tag); if (!node) - node = data1_mk_tag (dh, nmem, tag, at); + node = data1_mk_tag (dh, nmem, tag, 0 /* attr */, at); else node->child = node->last_child = 0; return node; @@ -341,7 +350,6 @@ data1_node *data1_mk_tag_data_text_uni (data1_handle dh, data1_node *at, } -#if DATA1_USING_XATTR data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, int (*get_byte)(void *fh), void *fh, WRBUF wrbuf, int *ch) @@ -405,7 +413,6 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, *ch = c; return p_first; } -#endif /* * Ugh. Sometimes functions just grow and grow on you. This one reads a @@ -436,9 +443,8 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, if (c == '<') /* beginning of tag */ { -#if DATA1_USING_XATTR data1_xattr *xattr; -#endif + char tag[64]; char args[256]; int null_tag = 0; @@ -466,17 +472,8 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, c = (*get_byte)(fh); } tag[i] = '\0'; -#if DATA1_USING_XATTR xattr = data1_read_xattr (dh, m, get_byte, fh, wrbuf, &c); args[0] = '\0'; -#else - while (d1_isspace(c)) - c = (*get_byte)(fh); - for (i = 0; c && c != '>' && c != '/'; c = (*get_byte)(fh)) - if (i < (sizeof(args)-1)) - args[i++] = c; - args[i] = '\0'; -#endif if (c == '/') { /* or */ null_tag = 1; @@ -568,8 +565,12 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, data1_insert_string (dh, res, m, args + val_offset); } } - else /* tag.. acquire our element in the abstract syntax */ - res = data1_mk_tag (dh, m, tag, parent); + else + { + /* tag.. acquire our element in the abstract syntax */ + res = data1_mk_tag (dh, m, tag, 0 /* attr */, parent); + res->u.tag.attributes = xattr; + } d1_stack[level] = res; d1_stack[level+1] = 0; if (level < 250 && !null_tag) diff --git a/retrieval/d1_write.c b/retrieval/d1_write.c index 51f2ff3..bf8b8fe 100644 --- a/retrieval/d1_write.c +++ b/retrieval/d1_write.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_write.c,v 1.10 2002-04-15 09:06:30 adam Exp $ + * $Id: d1_write.c,v 1.11 2002-05-13 14:13:37 adam Exp $ */ #include @@ -43,13 +43,11 @@ static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col) } else { -#if DATA1_USING_XATTR data1_xattr *p; -#endif + sprintf (line, "%*s<", col, ""); wrbuf_puts (b, line); wrbuf_puts (b, tag); -#if DATA1_USING_XATTR for (p = c->u.tag.attributes; p; p = p->next) { wrbuf_putc (b, ' '); @@ -59,7 +57,6 @@ static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col) wrbuf_puts (b, p->value); wrbuf_putc (b, '"'); } -#endif wrbuf_puts(b, ">\n"); if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2) < 0) return -1; diff --git a/yaz-config.in b/yaz-config.in index 2c5f686..f7117be 100644 --- a/yaz-config.in +++ b/yaz-config.in @@ -1,5 +1,5 @@ #!/bin/sh -# $Id: yaz-config.in,v 1.12 2002-03-18 12:22:00 adam Exp $ +# $Id: yaz-config.in,v 1.13 2002-05-13 14:13:37 adam Exp $ yazprefix=@prefix@ yaz_echo_cflags=no yaz_echo_libs=no @@ -137,6 +137,8 @@ else fi fi +YAZINC="$YAZINC @YAZ_CONF_CFLAGS@" + if test "$lib_thread" = "yes"; then YAZINC="$YAZINC @CFLAGSTHREADS@" fi -- 1.7.10.4