More appropriate HTML comment handling
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 8 Jul 2013 13:23:49 +0000 (15:23 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 8 Jul 2013 13:23:49 +0000 (15:23 +0200)
src/html_parser.cpp
src/test_html_parser.cpp

index 22d4a31..a720c56 100644 (file)
@@ -222,9 +222,22 @@ void mp::HTMLParser::Rep::parse_str(HTMLParserEvent &event, const char *cp)
         {
             int i;
             tagText(event, text_start, cp - 1);
-            for (i = 1; cp[i] && cp[i] != '>'; i++)
-                ;
-            event.openTagStart(cp, i);
+            if (cp[1] == '-' && cp[2] == '-')
+            {
+                for (i = 3; cp[i]; i++)
+                    if (cp[i] == '-' && cp[i+1] == '-' && cp[i+2] == '>')
+                    {
+                        i+= 2;
+                        event.openTagStart(cp, i);
+                        break;
+                    }
+            }
+            else
+            {
+                for (i = 1; cp[i] && cp[i] != '>'; i++)
+                    ;
+                event.openTagStart(cp, i);
+            }
             if (m_verbose)
                 printf("------ dtd %.*s\n", i, cp);
             i += tagEnd(event, cp, i, cp + i);
index 370ff72..3604436 100644 (file)
@@ -187,7 +187,7 @@ BOOST_AUTO_TEST_CASE( test_html_parser_4 )
         mp::HTMLParser hp;
         const char* html =
             "<?xml version=\"1.0\" strandalone=\"no\"?  ax>\n"
-            "<book><x ? href/></book>";
+            "<book><x ? href/><!-- hello > --></book>";
 
         const char* expected = html;
         MyEvent e;