X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fxmlread.c;h=08c3e239c4ffccdcc35fa02fb5fc07e7be5af794;hb=9b9f570a2960c2c8a7026b2faee943794b08ce49;hp=90b17d3868ee2dccd0b28895e16e200d687e26a9;hpb=c63292356fbdef7b575efbf027ba8574482b0320;p=idzebra-moved-to-github.git diff --git a/recctrl/xmlread.c b/recctrl/xmlread.c index 90b17d3..08c3e23 100644 --- a/recctrl/xmlread.c +++ b/recctrl/xmlread.c @@ -1,6 +1,6 @@ -/* $Id: xmlread.c,v 1.3 2002-08-28 12:47:10 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 - Index Data Aps +/* $Id: xmlread.c,v 1.19 2005-03-31 12:42:07 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -32,11 +32,10 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include -#include "grsread.h" +#include -#include #include -#include +#include #include @@ -79,7 +78,7 @@ static void cb_chardata (void *user, const char *s, int len) ui->d1_stack[ui->level -1]); } -static void cb_decl (void *user, const char *version, const char*encoding, +static void cb_decl (void *user, const char *version, const char *encoding, int standalone) { struct user_info *ui = (struct user_info*) user; @@ -89,7 +88,7 @@ static void cb_decl (void *user, const char *version, const char*encoding, attr_list[1] = version; attr_list[2] = "encoding"; - attr_list[3] = "UTF-8"; /* encoding */ + attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */ attr_list[4] = "standalone"; attr_list[5] = standalone ? "yes" : "no"; @@ -98,9 +97,11 @@ static void cb_decl (void *user, const char *version, const char*encoding, data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list, ui->d1_stack[ui->level-1]); - yaz_log (ui->loglevel, "decl version=%s encoding=%s", +#if 0 + yaz_log (YLOG_LOG, "decl version=%s encoding=%s", version ? version : "null", encoding ? encoding : "null"); +#endif } static void cb_processing (void *user, const char *target, @@ -115,8 +116,6 @@ static void cb_processing (void *user, const char *target, yaz_log (ui->loglevel, "decl processing target=%s data=%s", target ? target : "null", data ? data : "null"); - - } static void cb_comment (void *user, const char *data) @@ -176,7 +175,7 @@ static int cb_external_entity (XML_Parser pparser, if (!(inf = fopen (systemId, "rb"))) { - yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId); + yaz_log (YLOG_WARN|YLOG_ERRNO, "fopen %s", systemId); return 0; } @@ -187,7 +186,7 @@ static int cb_external_entity (XML_Parser pparser, void *buf = XML_GetBuffer (parser, XML_CHUNK); if (!buf) { - yaz_log (LOG_WARN, "XML_GetBuffer fail"); + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); break; } r = fread (buf, 1, XML_CHUNK, inf); @@ -195,14 +194,18 @@ static int cb_external_entity (XML_Parser pparser, { if (ferror(inf)) { - yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId); + yaz_log (YLOG_WARN|YLOG_ERRNO, "fread %s", systemId); break; } done = 1; } if (!XML_ParseBuffer (parser, r, done)) { - yaz_log (LOG_WARN, "XML_ParseBuffer failed %s", + done = 1; + yaz_log (YLOG_WARN, "%s:%d:%d:XML error: %s", + systemId, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); } } @@ -223,6 +226,9 @@ static int cb_encoding_convert (void *data, const char *s) char *inbuf = (char *) s; unsigned short code; +#if 1 + yaz_log(YLOG_LOG, "------------------------- cb_encoding_convert --- "); +#endif ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t) (-1) && errno != E2BIG) { @@ -336,8 +342,9 @@ static int cb_encoding_handler (void *userData, const char *name, if (errno == E2BIG) { info->map[i] = -1; /* no room for output */ - yaz_log (LOG_WARN, "Encoding %d: no room for output", - i); + if (i != 0) + yaz_log (YLOG_WARN, "Encoding %d: no room for output", + i); } } else if (outleft == 0) @@ -350,7 +357,7 @@ static int cb_encoding_handler (void *userData, const char *name, else { /* should never happen */ info->map[i] = -1; - yaz_log (LOG_DEBUG, "Encoding %d: bad state", i); + yaz_log (YLOG_DEBUG, "Encoding %d: bad state", i); } } if (info->data) @@ -372,7 +379,19 @@ static int cb_encoding_handler (void *userData, const char *name, /* HAVE_ICONV_H */ #endif +static void cb_ns_start(void *userData, const char *prefix, const char *uri) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix && uri) + yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri); +} +static void cb_ns_end(void *userData, const char *prefix) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix) + yaz_log(ui->loglevel, "cb_ns_end %s", prefix); +} data1_node *zebra_read_xml (data1_handle dh, int (*rf)(void *, char *, size_t), void *fh, NMEM m) @@ -380,8 +399,9 @@ data1_node *zebra_read_xml (data1_handle dh, XML_Parser parser; struct user_info uinfo; int done = 0; + data1_node *first_node; - uinfo.loglevel = LOG_LOG; + uinfo.loglevel = YLOG_DEBUG; uinfo.level = 1; uinfo.dh = dh; uinfo.nmem = m; @@ -399,6 +419,7 @@ data1_node *zebra_read_xml (data1_handle dh, XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end); XML_SetEntityDeclHandler (parser, cb_entity_decl); XML_SetExternalEntityRefHandler (parser, cb_external_entity); + XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end); #if HAVE_ICONV_H XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo); #endif @@ -409,61 +430,107 @@ data1_node *zebra_read_xml (data1_handle dh, if (!buf) { /* error */ - yaz_log (LOG_WARN, "XML_GetBuffer fail"); + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); break; } r = (*rf)(fh, buf, XML_CHUNK); if (r < 0) { /* error */ - yaz_log (LOG_WARN, "XML read fail"); + yaz_log (YLOG_WARN, "XML read fail"); break; } else if (r == 0) done = 1; if (!XML_ParseBuffer (parser, r, done)) { - yaz_log (LOG_WARN, "XML_ParseBuffer (1) failed %s", + done = 1; + yaz_log (YLOG_WARN, "%d:%d:XML error: %s", + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); } } XML_ParserFree (parser); if (!uinfo.d1_stack[1] || !done) return 0; + /* insert XML header if not present .. */ + first_node = uinfo.d1_stack[0]->child; + if (first_node->which != DATA1N_preprocess || + strcmp(first_node->u.preprocess.target, "xml")) + { + const char *attr_list[5]; + + attr_list[0] = "version"; + attr_list[1] = "1.0"; + + attr_list[2] = "encoding"; + attr_list[3] = "UTF-8"; /* encoding */ + + attr_list[4] = 0; + + data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list, + uinfo.d1_stack[0]); + } return uinfo.d1_stack[0]; } struct xml_info { - int dummy; + XML_Expat_Version expat_version; }; -static void *grs_init_xml(void) +static data1_node *grs_read_xml (struct grs_read_info *p) +{ + return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); +} + +static void *filter_init(Res res, RecType recType) { struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p)); + + p->expat_version = XML_ExpatVersionInfo(); + return p; } -static data1_node *grs_read_xml (struct grs_read_info *p) +static void filter_destroy(void *clientData) { - return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); + struct xml_info *p = (struct xml_info *) clientData; + + xfree (p); } -static void grs_destroy_xml(void *clientData) +static int filter_extract(void *clientData, struct recExtractCtrl *ctrl) { - struct sgml_getc_info *p = (struct sgml_getc_info *) clientData; + return zebra_grs_extract(clientData, ctrl, grs_read_xml); +} - xfree (p); +static int filter_retrieve(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_xml); } -static struct recTypeGrs xml_type = { - "xml", - grs_init_xml, - grs_destroy_xml, - grs_read_xml +static struct recType filter_type = { + 0, + "grs.xml", + filter_init, + 0, + filter_destroy, + filter_extract, + filter_retrieve, }; -RecTypeGrs recTypeGrs_xml = &xml_type; +RecType +#ifdef IDZEBRA_STATIC_GRS_XML +idzebra_filter_grs_xml +#else +idzebra_filter +#endif -/* HAVE_EXPAT_H */ +[] = { + &filter_type, + 0, +}; + #endif