X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fxmlread.c;h=4a05b577a0f99043f51e6274c151c9df93a7f29e;hb=6dfee19bc1cec29ff5ec5e9cd8021d1354a6126e;hp=99951c6751ad4978ce6423e5d17948429772d7df;hpb=025215fcab86e8ea7c143238da9ffab5afdd1067;p=idzebra-moved-to-github.git diff --git a/recctrl/xmlread.c b/recctrl/xmlread.c index 99951c6..4a05b57 100644 --- a/recctrl/xmlread.c +++ b/recctrl/xmlread.c @@ -1,6 +1,6 @@ -/* $Id: xmlread.c,v 1.9 2003-09-08 09:30:17 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 - Index Data Aps +/* $Id: xmlread.c,v 1.20 2005-06-14 18:27:23 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -32,18 +32,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include -#include "grsread.h" +#include -#include #include -#include +#include #include #define XML_CHUNK 1024 struct user_info { - int full_error_info; data1_node *d1_stack[256]; int level; data1_handle dh; @@ -80,7 +78,7 @@ static void cb_chardata (void *user, const char *s, int len) ui->d1_stack[ui->level -1]); } -static void cb_decl (void *user, const char *version, const char*encoding, +static void cb_decl (void *user, const char *version, const char *encoding, int standalone) { struct user_info *ui = (struct user_info*) user; @@ -90,7 +88,7 @@ static void cb_decl (void *user, const char *version, const char*encoding, attr_list[1] = version; attr_list[2] = "encoding"; - attr_list[3] = "UTF-8"; /* encoding */ + attr_list[3] = "UTF-8"; /* internally it's always UTF-8 */ attr_list[4] = "standalone"; attr_list[5] = standalone ? "yes" : "no"; @@ -99,9 +97,11 @@ static void cb_decl (void *user, const char *version, const char*encoding, data1_mk_preprocess (ui->dh, ui->nmem, "xml", attr_list, ui->d1_stack[ui->level-1]); - yaz_log (ui->loglevel, "decl version=%s encoding=%s", +#if 0 + yaz_log (YLOG_LOG, "decl version=%s encoding=%s", version ? version : "null", encoding ? encoding : "null"); +#endif } static void cb_processing (void *user, const char *target, @@ -116,8 +116,6 @@ static void cb_processing (void *user, const char *target, yaz_log (ui->loglevel, "decl processing target=%s data=%s", target ? target : "null", data ? data : "null"); - - } static void cb_comment (void *user, const char *data) @@ -177,7 +175,7 @@ static int cb_external_entity (XML_Parser pparser, if (!(inf = fopen (systemId, "rb"))) { - yaz_log (LOG_WARN|LOG_ERRNO, "fopen %s", systemId); + yaz_log (YLOG_WARN|YLOG_ERRNO, "fopen %s", systemId); return 0; } @@ -188,7 +186,7 @@ static int cb_external_entity (XML_Parser pparser, void *buf = XML_GetBuffer (parser, XML_CHUNK); if (!buf) { - yaz_log (LOG_WARN, "XML_GetBuffer fail"); + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); break; } r = fread (buf, 1, XML_CHUNK, inf); @@ -196,24 +194,19 @@ static int cb_external_entity (XML_Parser pparser, { if (ferror(inf)) { - yaz_log (LOG_WARN|LOG_ERRNO, "fread %s", systemId); + yaz_log (YLOG_WARN|YLOG_ERRNO, "fread %s", systemId); break; } done = 1; } if (!XML_ParseBuffer (parser, r, done)) { - if (ui->full_error_info) - yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s", - systemId, - XML_GetCurrentLineNumber(parser), - XML_GetCurrentColumnNumber(parser), - XML_ErrorString(XML_GetErrorCode(parser))); - else - yaz_log (LOG_WARN, "%s:%d:XML error: %s", - systemId, - XML_GetCurrentLineNumber(parser), - XML_ErrorString(XML_GetErrorCode(parser))); + done = 1; + yaz_log (YLOG_WARN, "%s:%d:%d:XML error: %s", + systemId, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); } } fclose (inf); @@ -234,7 +227,7 @@ static int cb_encoding_convert (void *data, const char *s) unsigned short code; #if 1 - yaz_log(LOG_LOG, "------------------------- cb_encoding_convert --- "); + yaz_log(YLOG_LOG, "------------------------- cb_encoding_convert --- "); #endif ret = iconv (t, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t) (-1) && errno != E2BIG) @@ -350,7 +343,7 @@ static int cb_encoding_handler (void *userData, const char *name, { info->map[i] = -1; /* no room for output */ if (i != 0) - yaz_log (LOG_WARN, "Encoding %d: no room for output", + yaz_log (YLOG_WARN, "Encoding %d: no room for output", i); } } @@ -364,7 +357,7 @@ static int cb_encoding_handler (void *userData, const char *name, else { /* should never happen */ info->map[i] = -1; - yaz_log (LOG_DEBUG, "Encoding %d: bad state", i); + yaz_log (YLOG_DEBUG, "Encoding %d: bad state", i); } } if (info->data) @@ -401,15 +394,15 @@ static void cb_ns_end(void *userData, const char *prefix) } data1_node *zebra_read_xml (data1_handle dh, int (*rf)(void *, char *, size_t), void *fh, - NMEM m, - int full_error_info) + NMEM m) { XML_Parser parser; struct user_info uinfo; int done = 0; + data1_node *first_node; + int no_read = 0; - uinfo.full_error_info = full_error_info; - uinfo.loglevel = LOG_DEBUG; + uinfo.loglevel = YLOG_DEBUG; uinfo.level = 1; uinfo.dh = dh; uinfo.nmem = m; @@ -438,82 +431,111 @@ data1_node *zebra_read_xml (data1_handle dh, if (!buf) { /* error */ - yaz_log (LOG_WARN, "XML_GetBuffer fail"); + yaz_log (YLOG_WARN, "XML_GetBuffer fail"); break; } r = (*rf)(fh, buf, XML_CHUNK); if (r < 0) { /* error */ - yaz_log (LOG_WARN, "XML read fail"); + yaz_log (YLOG_WARN, "XML read fail"); break; } else if (r == 0) done = 1; - if (!XML_ParseBuffer (parser, r, done)) + else + no_read += r; + if (no_read && !XML_ParseBuffer (parser, r, done)) { - if (full_error_info) - yaz_log (LOG_WARN, "%d:%d:XML error: %s", - XML_GetCurrentLineNumber(parser), - XML_GetCurrentColumnNumber(parser), - XML_ErrorString(XML_GetErrorCode(parser))); - else - yaz_log (LOG_WARN, "XML error: %s", - XML_ErrorString(XML_GetErrorCode(parser))); + done = 1; + yaz_log (YLOG_WARN, "%d:%d:XML error: %s", + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); } } XML_ParserFree (parser); + if (no_read == 0) + return 0; if (!uinfo.d1_stack[1] || !done) return 0; + /* insert XML header if not present .. */ + first_node = uinfo.d1_stack[0]->child; + if (first_node->which != DATA1N_preprocess || + strcmp(first_node->u.preprocess.target, "xml")) + { + const char *attr_list[5]; + + attr_list[0] = "version"; + attr_list[1] = "1.0"; + + attr_list[2] = "encoding"; + attr_list[3] = "UTF-8"; /* encoding */ + + attr_list[4] = 0; + + data1_insert_preprocess (uinfo.dh, uinfo.nmem, "xml", attr_list, + uinfo.d1_stack[0]); + } return uinfo.d1_stack[0]; } struct xml_info { XML_Expat_Version expat_version; - int full_error_info; /* true if we can safely use Expat's - XML_GetCurrent{Line,Column}Number */ }; -static void *grs_init_xml(void) +static data1_node *grs_read_xml (struct grs_read_info *p) +{ + return zebra_read_xml (p->dh, p->readf, p->fh, p->mem); +} + +static void *filter_init(Res res, RecType recType) { struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p)); p->expat_version = XML_ExpatVersionInfo(); - /* determine if we can use XML_GetCurrent{Line,Column}Number */ - p->full_error_info = 0; - if (p->expat_version.major > 1) - p->full_error_info = 1; - else if (p->expat_version.major == 1 && p->expat_version.minor > 95) - p->full_error_info = 1; - else if (p->expat_version.major == 1 && p->expat_version.minor == 95 - && p->expat_version.micro >= 3) - p->full_error_info = 1; return p; } -static data1_node *grs_read_xml (struct grs_read_info *p) +static void filter_destroy(void *clientData) { - struct xml_info *x = (struct xml_info *) p->clientData; - return zebra_read_xml (p->dh, p->readf, p->fh, p->mem, x->full_error_info); + struct xml_info *p = (struct xml_info *) clientData; + + xfree (p); } -static void grs_destroy_xml(void *clientData) +static int filter_extract(void *clientData, struct recExtractCtrl *ctrl) { - struct xml_info *p = (struct xml_info *) clientData; + return zebra_grs_extract(clientData, ctrl, grs_read_xml); +} - xfree (p); +static int filter_retrieve(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_xml); } -static struct recTypeGrs xml_type = { - "xml", - grs_init_xml, - grs_destroy_xml, - grs_read_xml +static struct recType filter_type = { + 0, + "grs.xml", + filter_init, + 0, + filter_destroy, + filter_extract, + filter_retrieve, }; -RecTypeGrs recTypeGrs_xml = &xml_type; +RecType +#ifdef IDZEBRA_STATIC_GRS_XML +idzebra_filter_grs_xml +#else +idzebra_filter +#endif -/* HAVE_EXPAT_H */ +[] = { + &filter_type, + 0, +}; + #endif