X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fxmlread.c;h=a5bd0069d547652efe8d5c6f95372fb26e38cdfc;hb=b673999f9e517726d6e3789f3cefd194ce89ef15;hp=0cb5a75edcf10d69e98631fb0c15cea4e9109e8a;hpb=73ee6ebf9388c712be03c0523f527681984d5cbb;p=idzebra-moved-to-github.git diff --git a/recctrl/xmlread.c b/recctrl/xmlread.c index 0cb5a75..a5bd006 100644 --- a/recctrl/xmlread.c +++ b/recctrl/xmlread.c @@ -1,5 +1,5 @@ -/* $Id: xmlread.c,v 1.5 2002-08-29 09:59:25 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: xmlread.c,v 1.15 2004-09-28 10:15:03 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -32,11 +32,10 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include -#include "grsread.h" +#include #include #include -#include #include @@ -79,7 +78,7 @@ static void cb_chardata (void *user, const char *s, int len) ui->d1_stack[ui->level -1]); } -static void cb_decl (void *user, const char *version, const char*encoding, +static void cb_decl (void *user, const char *version, const char *encoding, int standalone) { struct user_info *ui = (struct user_info*) user; @@ -89,7 +88,7 @@ static void cb_decl (void *user, const char *version, const char*encoding, attr_list[1] = version; attr_list[2] = "encoding"; - attr_list[3] = "UTF-8"; /* encoding */ + attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */ attr_list[4] = "standalone"; attr_list[5] = standalone ? "yes" : "no"; @@ -98,9 +97,11 @@ static void cb_decl (void *user, const char *version, const char*encoding, data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list, ui->d1_stack[ui->level-1]); - yaz_log (ui->loglevel, "decl version=%s encoding=%s", +#if 0 + yaz_log (LOG_LOG, "decl version=%s encoding=%s", version ? version : "null", encoding ? encoding : "null"); +#endif } static void cb_processing (void *user, const char *target, @@ -115,8 +116,6 @@ static void cb_processing (void *user, const char *target, yaz_log (ui->loglevel, "decl processing target=%s data=%s", target ? target : "null", data ? data : "null"); - - } static void cb_comment (void *user, const char *data) @@ -202,10 +201,11 @@ static int cb_external_entity (XML_Parser pparser, } if (!XML_ParseBuffer (parser, r, done)) { - yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s", - systemId, - XML_GetCurrentLineNumber(parser), - XML_GetCurrentColumnNumber(parser), + done = 1; + yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s", + systemId, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); } } @@ -226,6 +226,9 @@ static int cb_encoding_convert (void *data, const char *s) char *inbuf = (char *) s; unsigned short code; +#if 1 + yaz_log(LOG_LOG, "------------------------- cb_encoding_convert --- "); +#endif ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t) (-1) && errno != E2BIG) { @@ -339,8 +342,9 @@ static int cb_encoding_handler (void *userData, const char *name, if (errno == E2BIG) { info->map[i] = -1; /* no room for output */ - yaz_log (LOG_WARN, "Encoding %d: no room for output", - i); + if (i != 0) + yaz_log (LOG_WARN, "Encoding %d: no room for output", + i); } } else if (outleft == 0) @@ -375,7 +379,19 @@ static int cb_encoding_handler (void *userData, const char *name, /* HAVE_ICONV_H */ #endif +static void cb_ns_start(void *userData, const char *prefix, const char *uri) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix && uri) + yaz_log(ui->loglevel, "cb_ns_start %s %s", prefix, uri); +} +static void cb_ns_end(void *userData, const char *prefix) +{ + struct user_info *ui = (struct user_info*) userData; + if (prefix) + yaz_log(ui->loglevel, "cb_ns_end %s", prefix); +} data1_node *zebra_read_xml (data1_handle dh, int (*rf)(void *, char *, size_t), void *fh, NMEM m) @@ -383,6 +399,7 @@ data1_node *zebra_read_xml (data1_handle dh, XML_Parser parser; struct user_info uinfo; int done = 0; + data1_node *first_node; uinfo.loglevel = LOG_DEBUG; uinfo.level = 1; @@ -402,6 +419,7 @@ data1_node *zebra_read_xml (data1_handle dh, XML_SetDoctypeDeclHandler (parser, cb_doctype_start, cb_doctype_end); XML_SetEntityDeclHandler (parser, cb_entity_decl); XML_SetExternalEntityRefHandler (parser, cb_external_entity); + XML_SetNamespaceDeclHandler(parser, cb_ns_start, cb_ns_end); #if HAVE_ICONV_H XML_SetUnknownEncodingHandler (parser, cb_encoding_handler, &uinfo); #endif @@ -426,49 +444,92 @@ data1_node *zebra_read_xml (data1_handle dh, done = 1; if (!XML_ParseBuffer (parser, r, done)) { - yaz_log (LOG_WARN, "%d:%d:XML error: %s", - XML_GetCurrentLineNumber(parser), - XML_GetCurrentColumnNumber(parser), + done = 1; + yaz_log (LOG_WARN, "%d:%d:XML error: %s", + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); } } XML_ParserFree (parser); if (!uinfo.d1_stack[1] || !done) return 0; + /* insert XML header if not present .. */ + first_node = uinfo.d1_stack[0]->child; + if (first_node->which != DATA1N_preprocess || + strcmp(first_node->u.preprocess.target, "xml")) + { + const char *attr_list[5]; + + attr_list[0] = "version"; + attr_list[1] = "1.0"; + + attr_list[2] = "encoding"; + attr_list[3] = "UTF-8"; /* encoding */ + + attr_list[4] = 0; + + data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list, + uinfo.d1_stack[0]); + } return uinfo.d1_stack[0]; } struct xml_info { - int dummy; + XML_Expat_Version expat_version; }; -static void *grs_init_xml(void) +static data1_node *grs_read_xml (struct grs_read_info *p) +{ + return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); +} + +static void *init_xml(Res res, RecType recType) { struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p)); + + p->expat_version = XML_ExpatVersionInfo(); + return p; } -static data1_node *grs_read_xml (struct grs_read_info *p) +static void destroy_xml(void *clientData) { - return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); + struct xml_info *p = (struct xml_info *) clientData; + + xfree (p); } -static void grs_destroy_xml(void *clientData) +static int extract_xml(void *clientData, struct recExtractCtrl *ctrl) { - struct sgml_getc_info *p = (struct sgml_getc_info *) clientData; + return zebra_grs_extract(clientData, ctrl, grs_read_xml); +} - xfree (p); +static int retrieve_xml(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_xml); } -static struct recTypeGrs xml_type = { - "xml", - grs_init_xml, - grs_destroy_xml, - grs_read_xml +static struct recType xml_type = { + "grs.xml", + init_xml, + 0, + destroy_xml, + extract_xml, + retrieve_xml, }; -RecTypeGrs recTypeGrs_xml = &xml_type; +RecType +#ifdef IDZEBRA_STATIC_GRS_XML +idzebra_filter_grs_xml +#else +idzebra_filter +#endif -/* HAVE_EXPAT_H */ +[] = { + &xml_type, + 0, +}; + #endif