#include <metaproxy/package.hpp>
#include <metaproxy/util.hpp>
#include "filter_http_rewrite.hpp"
+#include "html_parser.hpp"
#include <yaz/zgdu.h>
#include <yaz/log.h>
#include <boost/regex.hpp>
#include <boost/lexical_cast.hpp>
+#include <boost/algorithm/string.hpp>
#include <map>
-#if HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
namespace mp = metaproxy_1;
namespace yf = mp::filter;
RulePtr rule;
};
- class HttpRewrite::Section {
+ class HttpRewrite::Phase {
public:
std::list<Within> within_list;
void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
char **content_buf, int *content_len,
std::map<std::string, std::string> & vars) const;
};
+ class HttpRewrite::Event : public HTMLParserEvent {
+ void openTagStart(const char *name);
+ void anyTagEnd(const char *name);
+ void attribute(const char *tagName,
+ const char *name,
+ const char *value,
+ int val_len);
+ void closeTag(const char *name);
+ void text(const char *value, int len);
+ const Phase *m_phase;
+ WRBUF m_w;
+ std::list<Within>::const_iterator enabled_within;
+ public:
+ Event(const Phase *p);
+ ~Event();
+ const char *result();
+ };
}
}
yf::HttpRewrite::HttpRewrite() :
- req_section(new Section), res_section(new Section)
+ req_phase(new Phase), res_phase(new Phase)
{
}
{
Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
mp::odr o;
- req_section->rewrite_reqline(o, hreq, vars);
+ req_phase->rewrite_reqline(o, hreq, vars);
yaz_log(YLOG_LOG, ">> Request headers");
- req_section->rewrite_headers(o, hreq->headers, vars);
- req_section->rewrite_body(o,
+ req_phase->rewrite_headers(o, hreq->headers, vars);
+ req_phase->rewrite_body(o,
&hreq->content_buf, &hreq->content_len, vars);
package.request() = gdu;
}
yaz_log(YLOG_LOG, "Response code %d", hres->code);
mp::odr o;
yaz_log(YLOG_LOG, "<< Respose headers");
- res_section->rewrite_headers(o, hres->headers, vars);
- res_section->rewrite_body(o, &hres->content_buf,
+ res_phase->rewrite_headers(o, hres->headers, vars);
+ res_phase->rewrite_body(o, &hres->content_buf,
&hres->content_len, vars);
package.response() = gdu;
}
}
-void yf::HttpRewrite::Section::rewrite_reqline (mp::odr & o,
+void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
Z_HTTP_Request *hreq,
std::map<std::string, std::string> & vars) const
{
}
}
-void yf::HttpRewrite::Section::rewrite_headers(mp::odr & o,
+void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
Z_HTTP_Header *headers,
std::map<std::string, std::string> & vars) const
{
}
}
-void yf::HttpRewrite::Section::rewrite_body(mp::odr & o,
+void yf::HttpRewrite::Phase::rewrite_body(mp::odr & o,
char **content_buf,
int *content_len,
std::map<std::string, std::string> & vars) const
{
if (*content_buf)
{
+ HTMLParser parser;
+ Event ev(this);
+ std::string buf(*content_buf, *content_len);
+
+ parser.parse(ev, buf.c_str());
+ std::cout << "RES\n" << ev.result() << std::endl;
+ std::cout << "-----" << std::endl;
+
std::list<Within>::const_iterator it = within_list.begin();
if (it != within_list.end())
}
}
+yf::HttpRewrite::Event::Event(const Phase *p) : m_phase(p)
+{
+ m_w = wrbuf_alloc();
+ enabled_within = m_phase->within_list.end();
+}
+
+yf::HttpRewrite::Event::~Event()
+{
+ wrbuf_destroy(m_w);
+}
+
+const char *yf::HttpRewrite::Event::result()
+{
+ return wrbuf_cstr(m_w);
+}
+
+void yf::HttpRewrite::Event::openTagStart(const char *name)
+{
+ // check if there is <within tag="x" .. />
+ if (enabled_within == m_phase->within_list.end())
+ {
+ std::list<Within>::const_iterator it =
+ m_phase->within_list.begin();
+ for (; it != m_phase->within_list.end(); it++)
+ {
+ if (it->tag.length() > 0 && it->tag.compare(name) == 0)
+ {
+ enabled_within = it;
+ }
+ }
+ }
+ wrbuf_putc(m_w, '<');
+ wrbuf_puts(m_w, name);
+}
+
+void yf::HttpRewrite::Event::anyTagEnd(const char *name)
+{
+ std::list<Within>::const_iterator it = enabled_within;
+ if (it != m_phase->within_list.end())
+ {
+ if (it->tag.compare(name) == 0)
+ {
+ enabled_within = m_phase->within_list.end();
+ }
+ }
+ wrbuf_putc(m_w, '>');
+}
+
+void yf::HttpRewrite::Event::attribute(const char *tagName,
+ const char *name,
+ const char *value,
+ int val_len)
+{
+ std::list<Within>::const_iterator it = enabled_within;
+ bool subst = false;
+
+ if (it == m_phase->within_list.end())
+ {
+ // no active within tag.. see if a attr rule without tag applies
+ it = m_phase->within_list.begin();
+ for (; it != m_phase->within_list.end(); it++)
+ {
+ if (it->attr.length() > 0 && it->tag.length() == 0)
+ break;
+ }
+ }
+ if (it != m_phase->within_list.end())
+ {
+ std::vector<std::string> attr;
+ boost::split(attr, it->attr, boost::is_any_of(","));
+ size_t i;
+ for (i = 0; i < attr.size(); i++)
+ {
+ if (attr[i].compare("#text") && attr[i].compare(tagName) == 0)
+ {
+ subst = true;
+ }
+ }
+ }
+
+ wrbuf_putc(m_w, ' ');
+ wrbuf_puts(m_w, name);
+ wrbuf_puts(m_w, "\"");
+ wrbuf_write(m_w, value, val_len);
+ if (subst)
+ wrbuf_puts(m_w, " SUBST");
+ wrbuf_puts(m_w, "\"");
+}
+
+void yf::HttpRewrite::Event::closeTag(const char *name)
+{
+ std::list<Within>::const_iterator it = enabled_within;
+ if (it != m_phase->within_list.end())
+ {
+ if (it->tag.compare(name) == 0)
+ {
+ enabled_within = m_phase->within_list.end();
+ }
+ }
+ wrbuf_puts(m_w, "</");
+ wrbuf_puts(m_w, name);
+}
+
+void yf::HttpRewrite::Event::text(const char *value, int len)
+{
+ std::list<Within>::const_iterator it = enabled_within;
+ bool subst = false;
+
+ if (it != m_phase->within_list.end())
+ {
+ subst = true;
+ if (it->attr.length() > 0)
+ {
+ subst = false;
+ std::vector<std::string> attr;
+ boost::split(attr, it->attr, boost::is_any_of(","));
+ size_t i;
+ for (i = 0; i < attr.size(); i++)
+ {
+ if (attr[i].compare("#text") == 0)
+ {
+ subst = true;
+ }
+ }
+ }
+ }
+ wrbuf_write(m_w, value, len);
+ if (subst)
+ wrbuf_puts(m_w, "<!-- SUBST -->");
+}
+
+
/**
* Tests pattern from the vector in order and executes recipe on
the first match.
}
-void yf::HttpRewrite::configure_section(const xmlNode *ptr,
- Section §ion)
+void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
{
std::map<std::string, RulePtr > rules;
for (ptr = ptr->children; ptr; ptr = ptr->next)
("Reference to non-existing rule '" + values[3] +
"' in http_rewrite filter");
w.rule = it->second;
- section.within_list.push_back(w);
+ phase.within_list.push_back(w);
}
else
{
continue;
else if (!strcmp((const char *) ptr->name, "request"))
{
- configure_section(ptr, *req_section);
+ configure_phase(ptr, *req_phase);
}
else if (!strcmp((const char *) ptr->name, "response"))
{
- configure_section(ptr, *res_section);
+ configure_phase(ptr, *res_phase);
}
else
{