Fix Metaproxy stops logging after check config failed MP-590
[metaproxy-moved-to-github.git] / src / test_html_parser.cpp
index 5230117..5ba7e5a 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Metaproxy.
-   Copyright (C) 2005-2013 Index Data
+   Copyright (C) Index Data
 
 Metaproxy is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -35,59 +35,267 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 using namespace boost::unit_test;
 namespace mp = metaproxy_1;
 
-class MyEvent : public mp::HTMLParserEvent {
-    public:
-        std::string out;
-        void openTagStart(const char *name)
+class MyEvent : public mp::HTMLParserEvent
+{
+public:
+    std::string out;
+    void openTagStart(const char *tag, int tag_len) {
+        out += "<";
+        out.append(tag, tag_len);
+    }
+
+    void attribute(const char *tag, int tag_len,
+                   const char *attr, int attr_len,
+                   const char *value, int val_len, const char *sep) {
+        out += " ";
+        out.append(attr, attr_len);
+        if (value)
         {
-            out += "<";
-            out += name;
-        } 
-        
-        void attribute(const char *tagName, 
-                const char *name, const char *value)
+            out += "=";
+            out += sep;
+            out.append(value, val_len);
+            out += sep;
+        }
+    }
+    void anyTagEnd(const char *tag, int tag_len, int close_it) {
+        if (close_it)
+            out += "/";
+        out += ">";
+    }
+    void closeTag(const char *tag, int tag_len) {
+        out += "</";
+        out.append(tag, tag_len);
+    }
+    void text(const char *value, int len) {
+        out.append(value, len);
+    }
+};
+
+BOOST_AUTO_TEST_CASE( test_html_parser_1 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
+            "<hr><table ></table  ><a href=\"x\"/></body></html>";
+        const char* expected =
+            "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
+            "<hr><table></table  ><a href=\"x\"/></body></html>";
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
         {
-            out += " ";
-            out += name;
-            out += "=\"";
-            out += value;
-            out += "\"";
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
         }
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
 
-        void anyTagEnd(const char *name)
+BOOST_AUTO_TEST_CASE( test_html_parser_2 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+            "<HTML>\n"
+            " <HEAD>\n"
+            "  <TITLE>YAZ 4.2.60</TITLE>\n"
+            " </HEAD>\n"
+            " <BODY>\n"
+            "  <P><A HREF=\"http://www.indexdata.com/yaz/\">YAZ</A> 4.2.60</P>\n"
+            "  <P>Error: 404</P>\n"
+            "  <P>Description: Not Found</P>\n"
+            " </BODY>\n"
+            "</HTML>";
+
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
+        {
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
+        }
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_3 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<?xml version=\"1.0\" strandalone=\"no\"?>\n"
+            "<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\"\n"
+            "  \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"\n"
+            "[\n"
+            " <!ENTITY % local SYSTEM \"local.ent\">\n"
+            " %local;\n"
+            "]>\n"
+            "<book></book>";
+
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
         {
-            out += ">";
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
         }
-        
-        void closeTag(const char *name)
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_4 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<?xml version=\"1.0\" strandalone=\"no\"?  ax>\n"
+            "<book><x ? href/><!-- hello > --></book>";
+
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
         {
-            out += "</";
-            out += name;
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
         }
-        
-        void text(const char *value, int len)
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_5 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<x link/>";
+
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
         {
-            out.append(value, len);
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
         }
-};
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
 
+BOOST_AUTO_TEST_CASE( test_html_parser_6 )
+{
+    try
+    {
+        mp::HTMLParser hp;
+        const char* html =
+            "<html><script><x;</script></html>";
 
-BOOST_AUTO_TEST_CASE( test_html_parser_1 )
+        const char* expected = html;
+        MyEvent e;
+        hp.set_verbose(0);
+        hp.parse(e, html);
+
+        BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
+        {
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
+        }
+    }
+    catch (std::exception & e)
+    {
+        std::cout << e.what();
+        std::cout << std::endl;
+        BOOST_CHECK (false);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_7 )
 {
     try
     {
         mp::HTMLParser hp;
-        const char* html = 
-            "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
-            "<hr><table ></table  ></body></html";
-        const char* expected = 
-            "<html><body><a t1=\"v1\" t2=\"v2\" t3=\"v3\">some text</a>"
-            "<hr><table></table></body></html";
+        const char* html =
+            "<html><Script>x=1; for (i=0;i<x;i++) </y>;"
+            "</SCRIPT ;>"
+            "</1>\nx=2;\n</Script></html>";
+
+        const char* expected = html;
         MyEvent e;
+        hp.set_verbose(0);
         hp.parse(e, html);
+
         BOOST_CHECK_EQUAL(std::string(expected), e.out);
+        if (std::string(expected) != e.out)
+        {
+            std::cout << "Expected" << std::endl;
+            std::cout << expected << std::endl;
+            std::cout << "Got" << std::endl;
+            std::cout << e.out << std::endl;
+        }
     }
-    catch (std::exception & e) 
+    catch (std::exception & e)
     {
         std::cout << e.what();
         std::cout << std::endl;