Try to avoid SEGV in Expat when XML_Parse fails
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 8 Sep 2003 09:30:17 +0000 (09:30 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 8 Sep 2003 09:30:17 +0000 (09:30 +0000)
CHANGELOG
configure.in
recctrl/xmlread.c
test/Makefile.am
test/malxml/Makefile.am [new file with mode: 0644]
test/malxml/f1.xml [new file with mode: 0644]
test/malxml/test1.sh [new file with mode: 0755]
test/malxml/zebra.cfg [new file with mode: 0644]

index 5f6ba5c..8d004d5 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,8 @@
 
+Attempt to avoid SEGV in older versions of Expat by not calling 
+XML_GetCurrent{Line,Column}Number when XML_Parse fails. New
+test case: malxml.
+
 Zebra ignores "unsupported use attribute" for individual databases
 when search multiple databases (unless all databases fail).
 
index abae2c7..3c048ef 100644 (file)
@@ -1,5 +1,5 @@
 dnl Zebra, Index Data Aps, 1995-2003
-dnl $Id: configure.in,v 1.81 2003-07-04 14:25:51 heikki Exp $
+dnl $Id: configure.in,v 1.82 2003-09-08 09:30:17 adam Exp $
 dnl
 AC_INIT(include/zebraver.h)
 AM_INIT_AUTOMAKE(idzebra,1.3.12)
@@ -391,7 +391,7 @@ AC_OUTPUT([
   doc/zebraphp.dsl
   doc/tkl.xsl
   test/Makefile test/gils/Makefile test/usmarc/Makefile test/api/Makefile
-  test/rusmarc/Makefile test/cddb/Makefile
+  test/rusmarc/Makefile test/cddb/Makefile test/malxml/Makefile
   perl/Makefile.PL test/xelm/Makefile
   test/dmoz/Makefile test/xpath/Makefile test/sort/Makefile test/zsh/Makefile
   examples/Makefile examples/gils/Makefile examples/zthes/Makefile
index 5071fc5..99951c6 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: xmlread.c,v 1.8 2003-08-21 10:29:00 adam Exp $
+/* $Id: xmlread.c,v 1.9 2003-09-08 09:30:17 adam Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -43,6 +43,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #define XML_CHUNK 1024
 
 struct user_info {
+    int full_error_info;
     data1_node *d1_stack[256];
     int level;
     data1_handle dh;
@@ -202,11 +203,17 @@ static int cb_external_entity (XML_Parser pparser,
         }
         if (!XML_ParseBuffer (parser, r, done))
         {
-            yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s",
-                     systemId,
-                     XML_GetCurrentLineNumber(parser),
-                     XML_GetCurrentColumnNumber(parser),
-                    XML_ErrorString(XML_GetErrorCode(parser)));
+           if (ui->full_error_info)
+               yaz_log (LOG_WARN, "%s:%d:%d:XML error: %s",
+                        systemId,
+                        XML_GetCurrentLineNumber(parser),
+                        XML_GetCurrentColumnNumber(parser),
+                        XML_ErrorString(XML_GetErrorCode(parser)));
+           else
+               yaz_log (LOG_WARN, "%s:%d:XML error: %s",
+                        systemId,
+                        XML_GetCurrentLineNumber(parser),
+                        XML_ErrorString(XML_GetErrorCode(parser)));
        }
     }
     fclose (inf);
@@ -394,12 +401,14 @@ static void cb_ns_end(void *userData, const char *prefix)
 }
 data1_node *zebra_read_xml (data1_handle dh,
                             int (*rf)(void *, char *, size_t), void *fh,
-                            NMEM m)
+                            NMEM m,
+                           int full_error_info)
 {
     XML_Parser parser;
     struct user_info uinfo;
     int done = 0;
 
+    uinfo.full_error_info = full_error_info;
     uinfo.loglevel = LOG_DEBUG;
     uinfo.level = 1;
     uinfo.dh = dh;
@@ -443,10 +452,14 @@ data1_node *zebra_read_xml (data1_handle dh,
             done = 1;
         if (!XML_ParseBuffer (parser, r, done))
         {
-            yaz_log (LOG_WARN, "%d:%d:XML error: %s",
-                     XML_GetCurrentLineNumber(parser),
-                     XML_GetCurrentColumnNumber(parser),
-                    XML_ErrorString(XML_GetErrorCode(parser)));
+           if (full_error_info)
+               yaz_log (LOG_WARN, "%d:%d:XML error: %s",
+                        XML_GetCurrentLineNumber(parser),
+                        XML_GetCurrentColumnNumber(parser),
+                        XML_ErrorString(XML_GetErrorCode(parser)));
+           else
+               yaz_log (LOG_WARN, "XML error: %s",
+                        XML_ErrorString(XML_GetErrorCode(parser)));
        }
     }
     XML_ParserFree (parser);
@@ -456,23 +469,38 @@ data1_node *zebra_read_xml (data1_handle dh,
 }
 
 struct xml_info {
-    int dummy;
+    XML_Expat_Version expat_version;
+    int full_error_info;   /* true if we can safely use Expat's
+                             XML_GetCurrent{Line,Column}Number */
 };
 
 static void *grs_init_xml(void)
 {
     struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p));
+
+    p->expat_version = XML_ExpatVersionInfo();
+
+    /* determine if we can use XML_GetCurrent{Line,Column}Number */
+    p->full_error_info = 0;
+    if (p->expat_version.major > 1)
+       p->full_error_info = 1;
+    else if (p->expat_version.major == 1 && p->expat_version.minor > 95)
+       p->full_error_info = 1;
+    else if (p->expat_version.major == 1 && p->expat_version.minor == 95
+            && p->expat_version.micro >= 3)
+       p->full_error_info = 1;
     return p;
 }
 
 static data1_node *grs_read_xml (struct grs_read_info *p)
 {
-    return zebra_read_xml (p->dh, p->readf, p->fh, p->mem);
+    struct xml_info *x = (struct xml_info *) p->clientData;
+    return zebra_read_xml (p->dh, p->readf, p->fh, p->mem, x->full_error_info);
 }
 
 static void grs_destroy_xml(void *clientData)
 {
-    struct sgml_getc_info *p = (struct sgml_getc_info *) clientData;
+    struct xml_info *p = (struct xml_info *) clientData;
 
     xfree (p);
 }
index 4796200..ce2eb2f 100644 (file)
@@ -1,3 +1,3 @@
 
-SUBDIRS=api gils usmarc dmoz xpath sort xelm cddb rusmarc zsh
+SUBDIRS=api gils malxml usmarc dmoz xpath sort xelm cddb rusmarc zsh
 
diff --git a/test/malxml/Makefile.am b/test/malxml/Makefile.am
new file mode 100644 (file)
index 0000000..b461166
--- /dev/null
@@ -0,0 +1,6 @@
+# $Id: Makefile.am,v 1.1 2003-09-08 09:30:17 adam Exp $
+
+check_SCRIPTS = test1.sh
+TESTS = test1.sh
+
+EXTRA_DIST = zebra.cfg $(check_SCRIPTS) f1.xml
diff --git a/test/malxml/f1.xml b/test/malxml/f1.xml
new file mode 100644 (file)
index 0000000..200a0b3
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<metadata creator="tkl-oai" created="Wed Aug 20 14:49:04 2003" hidden="0">
+<oai_dc:dc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<dc:identifier>http://www.byplanlab.dk/index.html</dc:identifier>
+<dc:title>Dansk Byplanlaboratorium</dc:title>
+<dc:type>ORSO 06 Institutioners og foreningers hjemmesider</dc:type>
+<dc:type>BIBL 02 Opslagsværker og bibliografier</dc:type>
+<dc:type>ORSO 06 Institutioners og foreningers hjemmesider</dc:type>
+<dc:subject>2 Arkitektur. Fysisk planlægning</dc:subject>
+<dc:description xml:lang="da">Dansk Byplanlaboratorium er en selvejende uafhængig institution, stiftet i 1921, hvis formål er at fremme by- og regionplanlægningen i Danmark. Byplanlaboratoriet henvender sig til planlæggere, studerende, offentlige myndigheder og interesserede borgere, som ønsker at vide noget mere om planlægning i Danmark.Byplanlaboratoriet er med til at skabe debat om aktuelle planlægningsspørgsmål og formidle ny viden indenfor området. Det sker gennem biblioteket, forlagsvirksomhed, deltagelse i udviklingsprojekter, afholdelse af kurser, seminarer, konferencer, studierejser samt løbende deltagelse i den faglige debat.</dc:description>
+<dc:publisher>Dansk Byplanlaboratorium</dc:publisher>
+<dc:subject>20/240 Byplanlægning</dc:subject>
+<dc:subject>20/220/2207/22072 Regionplanlægning</dc:subject>
+<dc:subject>20/220/2207/22075 Kommuneplanlægning</dc:subject>
+<dc:subject>20/220/2207/22077 Lokalplanlægning</dc:subject>
+<dc:coverage xml:lang="da">Danmark</dc:coverage>
+<dc:language>dan Dansk</dc:language>
+<dc:language>eng Engelsk</dc:language>
+<dc:rights>Ubegrænset adgang</dc:rights>
+</oai_dc:dc>
+</metadata>
\ No newline at end of file
diff --git a/test/malxml/test1.sh b/test/malxml/test1.sh
new file mode 100755 (executable)
index 0000000..eb12cd1
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+../../index/zebraidx init
+../../index/zebraidx update f1.xml
diff --git a/test/malxml/zebra.cfg b/test/malxml/zebra.cfg
new file mode 100644 (file)
index 0000000..1ceb59a
--- /dev/null
@@ -0,0 +1,17 @@
+# Simple Zebra configuration file
+# $Id: zebra.cfg,v 1.1 2003-09-08 09:30:17 adam Exp $
+#
+# Where the schema files, attribute files, etc are located.
+profilePath: .:../../tab
+
+# Files that describe the attribute sets supported.
+attset: bib1.att
+attset: gils.att
+attset: explain.att
+
+recordtype: grs.xml
+
+#storekeys: 1
+#storedata: 1
+#recordId: (bib1,identifier-standard)
+isam: b