/* This file is part of Metaproxy.
- Copyright (C) 2005-2013 Index Data
+ Copyright (C) Index Data
Metaproxy is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
using namespace boost::unit_test;
namespace mp = metaproxy_1;
-class MyEvent : public mp::HTMLParserEvent {
- public:
- std::string out;
- void openTagStart(const char *name)
+class MyEvent : public mp::HTMLParserEvent
+{
+public:
+ std::string out;
+ void openTagStart(const char *tag, int tag_len) {
+ out += "<";
+ out.append(tag, tag_len);
+ }
+
+ void attribute(const char *tag, int tag_len,
+ const char *attr, int attr_len,
+ const char *value, int val_len, const char *sep) {
+ out += " ";
+ out.append(attr, attr_len);
+ if (value)
{
- out += "<";
- out += name;
- }
-
- void attribute(const char *tagName,
- const char *name, const char *value)
+ out += "=";
+ out += sep;
+ out.append(value, val_len);
+ out += sep;
+ }
+ }
+ void anyTagEnd(const char *tag, int tag_len, int close_it) {
+ if (close_it)
+ out += "/";
+ out += ">";
+ }
+ void closeTag(const char *tag, int tag_len) {
+ out += "</";
+ out.append(tag, tag_len);
+ }
+ void text(const char *value, int len) {
+ out.append(value, len);
+ }
+};
+
+BOOST_AUTO_TEST_CASE( test_html_parser_1 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
+ "<hr><table ></table ><a href=\"x\"/></body></html>";
+ const char* expected =
+ "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
+ "<hr><table></table ><a href=\"x\"/></body></html>";
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
{
- out += " ";
- out += name;
- out += "=\"";
- out += value;
- out += "\"";
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
}
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
- void anyTagEnd(const char *name)
+BOOST_AUTO_TEST_CASE( test_html_parser_2 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+ "<HTML>\n"
+ " <HEAD>\n"
+ " <TITLE>YAZ 4.2.60</TITLE>\n"
+ " </HEAD>\n"
+ " <BODY>\n"
+ " <P><A HREF=\"http://www.indexdata.com/yaz/\">YAZ</A> 4.2.60</P>\n"
+ " <P>Error: 404</P>\n"
+ " <P>Description: Not Found</P>\n"
+ " </BODY>\n"
+ "</HTML>";
+
+ const char* expected = html;
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
+ {
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
+ }
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_3 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<?xml version=\"1.0\" strandalone=\"no\"?>\n"
+ "<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\"\n"
+ " \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"\n"
+ "[\n"
+ " <!ENTITY % local SYSTEM \"local.ent\">\n"
+ " %local;\n"
+ "]>\n"
+ "<book></book>";
+
+ const char* expected = html;
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
{
- out += ">";
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
}
-
- void closeTag(const char *name)
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_4 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<?xml version=\"1.0\" strandalone=\"no\"? ax>\n"
+ "<book><x ? href/><!-- hello > --></book>";
+
+ const char* expected = html;
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
{
- out += "</";
- out += name;
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
}
-
- void text(const char *value, int len)
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_5 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<x link/>";
+
+ const char* expected = html;
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
{
- out.append(value, len);
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
}
-};
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+BOOST_AUTO_TEST_CASE( test_html_parser_6 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<html><script><x;</script></html>";
-BOOST_AUTO_TEST_CASE( test_html_parser_1 )
+ const char* expected = html;
+ MyEvent e;
+ hp.set_verbose(0);
+ hp.parse(e, html);
+
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
+ {
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
+ }
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+BOOST_AUTO_TEST_CASE( test_html_parser_7 )
{
try
{
mp::HTMLParser hp;
- const char* html =
- "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
- "<hr><table ></table ></body></html";
- const char* expected =
- "<html><body><a t1=\"v1\" t2=\"v2\" t3=\"v3\">some text</a>"
- "<hr><table></table></body></html";
+ const char* html =
+ "<html><Script>x=1; for (i=0;i<x;i++) </y>;"
+ "</SCRIPT ;>"
+ "</1>\nx=2;\n</Script></html>";
+
+ const char* expected = html;
MyEvent e;
+ hp.set_verbose(0);
hp.parse(e, html);
+
BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ if (std::string(expected) != e.out)
+ {
+ std::cout << "Expected" << std::endl;
+ std::cout << expected << std::endl;
+ std::cout << "Got" << std::endl;
+ std::cout << e.out << std::endl;
+ }
}
- catch (std::exception & e)
+ catch (std::exception & e)
{
std::cout << e.what();
std::cout << std::endl;