#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
+#include <yaz/matchstr.h>
#define SPACECHR " \t\r\n\f"
Rep();
~Rep();
int m_verbose;
+ bool nest;
};
}
mp::HTMLParser::Rep::Rep()
{
m_verbose = 0;
+ nest = true;
}
mp::HTMLParser::Rep::~Rep()
if (*cp++ != '<')
continue;
- if (*cp == '!')
+ if (nest && *cp == '!')
{
int i;
tagText(event, text_start, cp - 1);
cp += i;
text_start = cp;
}
- else if (*cp == '?')
+ else if (nest && *cp == '?')
{
int i;
tagText(event, text_start, cp - 1);
else if (*cp == '/' && isAlpha(cp[1]))
{
int i;
- tagText(event, text_start, cp - 1);
-
+
i = skipName(++cp);
+
+ if (!nest)
+ {
+ if (i == 6 && !yaz_strncasecmp(cp, "script", i))
+ {
+ int ws = skipSpace(cp + 6);
+ if (cp[ws + 6] == '>')
+ nest = true; /* really terminated */
+ }
+ if (!nest)
+ continue;
+ }
+ tagText(event, text_start, cp - 2);
event.closeTag(cp, i);
if (m_verbose)
printf("------ tag close %.*s\n", i, cp);
cp += i;
text_start = cp;
}
- else if (isAlpha(*cp))
+ else if (nest && isAlpha(*cp))
{
int i, j;
tagText(event, text_start, cp - 1);
printf("------ tag open %.*s\n", i, cp);
j = tagAttrs(event, cp, i, cp + i);
j += tagEnd(event, cp, i, cp + i + j);
+
+ if (i == 6 && !yaz_strncasecmp(cp, "script", i))
+ nest = false;
+
cp += i + j;
text_start = cp;
}