X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_http_rewrite.cpp;h=1badb44cee656f8a0744288b30307b207cb3a38e;hb=586d78659d671683f33ec55f4a7d32b28e345ccd;hp=90e3152b0362195a0b221c37a60bd10ae16c8650;hpb=41ddc790e888e2b0cfe5da66a34e26e737e20b54;p=metaproxy-moved-to-github.git diff --git a/src/filter_http_rewrite.cpp b/src/filter_http_rewrite.cpp index 90e3152..1badb44 100644 --- a/src/filter_http_rewrite.cpp +++ b/src/filter_http_rewrite.cpp @@ -1,5 +1,5 @@ /* This file is part of Metaproxy. - Copyright (C) 2005-2013 Index Data + Copyright (C) Index Data Metaproxy is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include #include #include #include @@ -39,60 +40,85 @@ namespace metaproxy_1 { namespace filter { class HttpRewrite::Replace { public: - std::string regex; + bool start_anchor; + boost::regex re; std::string recipe; std::map group_index; - const std::string search_replace( - std::map & vars, - const std::string & txt) const; - std::string sub_vars ( + std::string sub_vars( const std::map & vars) const; - void parse_groups(); + void parse_groups(std::string pattern); }; class HttpRewrite::Rule { public: std::list replace_list; - const std::string test_patterns( - std::map & vars, - const std::string & txt) const; + bool test_patterns( + std::map &vars, + std::string &txt, bool anchor, + std::list &skip_list); }; class HttpRewrite::Within { public: - std::string header; - std::string attr; - std::string tag; + boost::regex header; + boost::regex attr; + boost::regex tag; + std::string type; bool reqline; RulePtr rule; + bool exec(std::map &vars, + std::string &txt, bool anchor, + std::list &skip_list) const; }; + class HttpRewrite::Content { + public: + std::string type; + boost::regex content_re; + std::list within_list; + void configure(const xmlNode *ptr, + std::map &rules); + void quoted_literal(std::string &content, + std::map &vars, + std::list & skip_list) const; + void parse(int verbose, std::string &content, + std::map & vars, + std::list & skip_list ) const; + }; class HttpRewrite::Phase { public: Phase(); - std::list within_list; int m_verbose; + std::list content_list; + void read_skip_headers(Z_HTTP_Request *hreq, + std::list &skip_list); void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq, std::map & vars) const; void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, std::map & vars) const; void rewrite_body(mp::odr & o, - char **content_buf, int *content_len, - std::map & vars) const; + const char *content_type, + char **content_buf, int *content_len, + std::map & vars, + std::list & skip_list ) const; }; class HttpRewrite::Event : public HTMLParserEvent { void openTagStart(const char *tag, int tag_len); void anyTagEnd(const char *tag, int tag_len, int close_it); void attribute(const char *tag, int tag_len, const char *attr, int attr_len, - const char *value, int val_len); + const char *value, int val_len, + const char *sep); void closeTag(const char *tag, int tag_len); void text(const char *value, int len); - const Phase *m_phase; + const Content *m_content; WRBUF m_w; - std::list::const_iterator enabled_within; + std::stack::const_iterator> s_within; std::map &m_vars; + std::list & m_skips; public: - Event(const Phase *p, std::map &vars); + Event(const Content *p, + std::map &vars, + std::list & skip_list ); ~Event(); const char *result(); }; @@ -115,15 +141,22 @@ void yf::HttpRewrite::process(mp::Package & package) const //map of request/response vars std::map vars; //we have an http req + + std::list skip_list; + if (gdu && gdu->which == Z_GDU_HTTP_Request) { Z_HTTP_Request *hreq = gdu->u.HTTP_Request; mp::odr o; req_phase->rewrite_reqline(o, hreq, vars); + res_phase->read_skip_headers(hreq, skip_list); yaz_log(YLOG_LOG, ">> Request headers"); req_phase->rewrite_headers(o, hreq->headers, vars); req_phase->rewrite_body(o, - &hreq->content_buf, &hreq->content_len, vars); + z_HTTP_header_lookup(hreq->headers, + "Content-Type"), + &hreq->content_buf, &hreq->content_len, + vars, skip_list); package.request() = gdu; } package.move(); @@ -135,12 +168,53 @@ void yf::HttpRewrite::process(mp::Package & package) const mp::odr o; yaz_log(YLOG_LOG, "<< Respose headers"); res_phase->rewrite_headers(o, hres->headers, vars); - res_phase->rewrite_body(o, &hres->content_buf, - &hres->content_len, vars); + res_phase->rewrite_body(o, + z_HTTP_header_lookup(hres->headers, + "Content-Type"), + &hres->content_buf, &hres->content_len, + vars, skip_list); package.response() = gdu; } } +// Read (and remove) the X-Metaproxy-SkipLink headers +void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq, + std::list &skip_list ) +{ + std::string url(hreq->path); + if ( url.substr(0,7) != "http://" ) + { // path was relative, as it usually is + const char *host = z_HTTP_header_lookup(hreq->headers, "Host"); + if (host) + url = "http://" + std::string(host) + hreq->path ; + } + + while ( const char *hv = z_HTTP_header_remove( &(hreq->headers), + "X-Metaproxy-SkipLink") ) + { + yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv ); + const char *p = strchr(hv,' '); + if (!p) + continue; // should not happen + std::string page(hv,p); + std::string link(p+1); + boost::regex pagere(page); + if ( boost::regex_search(url, pagere) ) + { + yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s", + page.c_str(), url.c_str() ); + boost::regex linkre(link); + skip_list.push_back(linkre); + } + else + { + yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'", + url.c_str(), page.c_str() ); + } + } +} + + void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, std::map & vars) const @@ -156,95 +230,117 @@ void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o, else { //TODO what about proto + const char *host = z_HTTP_header_lookup(hreq->headers, "Host"); + if (!host) + return; + path += "http://"; - path += z_HTTP_header_lookup(hreq->headers, "Host"); + path += host; path += hreq->path; } + std::list::const_iterator cit = content_list.begin(); + for (; cit != content_list.end(); cit++) + if (cit->type == "headers") + break; - std::list::const_iterator it = within_list.begin(); - for (; it != within_list.end(); it++) + if (cit == content_list.end()) + return; + + std::list::const_iterator it = cit->within_list.begin(); + for (; it != cit->within_list.end(); it++) if (it->reqline) { - RulePtr rule = it->rule; yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); - std::string npath = rule->test_patterns(vars, path); - if (!npath.empty()) + std::list dummy_skip_list; // no skips here! + if (it->exec(vars, path, true, dummy_skip_list)) { - yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str()); - hreq->path = odr_strdup(o, npath.c_str()); + yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str()); + hreq->path = odr_strdup(o, path.c_str()); } } } void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, - std::map & vars) const + std::map & vars ) const { + std::list::const_iterator cit = content_list.begin(); + for (; cit != content_list.end(); cit++) + if (cit->type == "headers") + break; + + if (cit == content_list.end()) + return; + for (Z_HTTP_Header *header = headers; header; header = header->next) { - std::list::const_iterator it = within_list.begin(); - for (; it != within_list.end(); it++) + std::list::const_iterator it = cit->within_list.begin(); + for (; it != cit->within_list.end(); it++) { - if (it->header.length() > 0 && - yaz_strcasecmp(it->header.c_str(), header->name) == 0) + if (!it->header.empty() && + regex_match(header->name, it->header)) { - std::string sheader(header->name); - sheader += ": "; - sheader += header->value; - - RulePtr rule = it->rule; - std::string out = rule->test_patterns(vars, sheader); - if (!out.empty()) + // Match and replace only the header value + std::string hval(header->value); + std::list dummy_skip_list; // no skips here! + if (it->exec(vars, hval, true, dummy_skip_list)) { - size_t pos = out.find(": "); - if (pos == std::string::npos) - { - yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring"); - continue; - } - header->name = odr_strdup(o, out.substr(0, pos).c_str()); - header->value = odr_strdup(o, - out.substr(pos + 2, - std::string::npos).c_str()); + header->value = odr_strdup(o, hval.c_str()); } } } } } -void yf::HttpRewrite::Phase::rewrite_body(mp::odr & o, - char **content_buf, - int *content_len, - std::map & vars) const +void yf::HttpRewrite::Phase::rewrite_body( + mp::odr &o, + const char *content_type, + char **content_buf, + int *content_len, + std::map & vars, + std::list & skip_list ) const { - if (*content_buf) + if (*content_len == 0) + return; + if (!content_type) { + yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite"); + return; + } + std::list::const_iterator cit = content_list.begin(); + for (; cit != content_list.end(); cit++) { - int i; - for (i = 0; i < *content_len; i++) - if ((*content_buf)[i] == 0) - return; // binary content. skip - - HTMLParser parser; - Event ev(this, vars); - - parser.set_verbose(m_verbose); + yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s", + content_type, cit->type.c_str()); + if (cit->type != "headers" + && regex_match(content_type, cit->content_re)) + break; + } + if (cit == content_list.end()) { + yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting", + content_type ); + return; + } - std::string buf(*content_buf, *content_len); + int i; + for (i = 0; i < *content_len; i++) + if ((*content_buf)[i] == 0) { + yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting"); + return; // binary content. skip + } - parser.parse(ev, buf.c_str()); - const char *res = ev.result(); - *content_buf = odr_strdup(o, res); - *content_len = strlen(res); - } + std::string content(*content_buf, *content_len); + cit->parse(m_verbose, content, vars, skip_list); + *content_buf = odr_strdup(o, content.c_str()); + *content_len = strlen(*content_buf); } -yf::HttpRewrite::Event::Event(const Phase *p, - std::map & vars - ) : m_phase(p), m_vars(vars) +yf::HttpRewrite::Event::Event(const Content *p, + std::map & vars, + std::list & skip_list + ) : m_content(p), m_vars(vars), m_skips(skip_list) { m_w = wrbuf_alloc(); - enabled_within = m_phase->within_list.end(); } yf::HttpRewrite::Event::~Event() @@ -259,33 +355,22 @@ const char *yf::HttpRewrite::Event::result() void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len) { - // check if there is - if (enabled_within == m_phase->within_list.end()) + wrbuf_putc(m_w, '<'); + wrbuf_write(m_w, tag, tag_len); + + std::string t(tag, tag_len); + std::list::const_iterator it = m_content->within_list.begin(); + for (; it != m_content->within_list.end(); it++) { - std::string t(tag, tag_len); - std::list::const_iterator it = - m_phase->within_list.begin(); - for (; it != m_phase->within_list.end(); it++) + if (!it->tag.empty() && regex_match(t, it->tag)) { - if (it->tag.length() > 0 && yaz_strcasecmp(it->tag.c_str(), - t.c_str()) == 0) + if (!it->attr.empty() && regex_match("#text", it->attr)) { - std::vector attr; - boost::split(attr, it->attr, boost::is_any_of(",")); - size_t i; - for (i = 0; i < attr.size(); i++) - { - if (attr[i].compare("#text") == 0) - { - enabled_within = it; - break; - } - } + s_within.push(it); + return; } } } - wrbuf_putc(m_w, '<'); - wrbuf_write(m_w, tag, tag_len); } void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len, @@ -293,14 +378,12 @@ void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len, { if (close_it) { - std::list::const_iterator it = enabled_within; - if (it != m_phase->within_list.end()) + if (!s_within.empty()) { + std::list::const_iterator it = s_within.top(); std::string t(tag, tag_len); - if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) - { - enabled_within = m_phase->within_list.end(); - } + if (regex_match(t, it->tag)) + s_within.pop(); } } if (close_it) @@ -310,27 +393,20 @@ void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len, void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len, const char *attr, int attr_len, - const char *value, int val_len) + const char *value, int val_len, + const char *sep) { - std::list::const_iterator it = m_phase->within_list.begin(); + std::list::const_iterator it = m_content->within_list.begin(); bool subst = false; - for (; it != m_phase->within_list.end(); it++) + for (; it != m_content->within_list.end(); it++) { std::string t(tag, tag_len); - if (it->tag.length() == 0 || - yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + if (it->tag.empty() || regex_match(t, it->tag)) { std::string a(attr, attr_len); - std::vector attr; - boost::split(attr, it->attr, boost::is_any_of(",")); - size_t i; - for (i = 0; i < attr.size(); i++) - { - if (attr[i].compare("#text") && - yaz_strcasecmp(attr[i].c_str(), a.c_str()) == 0) - subst = true; - } + if (!it->attr.empty() && regex_match(a, it->attr)) + subst = true; } if (subst) break; @@ -338,31 +414,32 @@ void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len, wrbuf_putc(m_w, ' '); wrbuf_write(m_w, attr, attr_len); - wrbuf_puts(m_w, "=\""); - - std::string output; - if (subst) + if (value) { - std::string input(value, val_len); - output = it->rule->test_patterns(m_vars, input); + wrbuf_puts(m_w, "="); + wrbuf_puts(m_w, sep); + + std::string output; + if (subst) + { + std::string s(value, val_len); + it->exec(m_vars, s, true, m_skips); + wrbuf_puts(m_w, s.c_str()); + } + else + wrbuf_write(m_w, value, val_len); + wrbuf_puts(m_w, sep); } - if (output.empty()) - wrbuf_write(m_w, value, val_len); - else - wrbuf_puts(m_w, output.c_str()); - wrbuf_puts(m_w, "\""); } void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len) { - std::list::const_iterator it = enabled_within; - if (it != m_phase->within_list.end()) + if (!s_within.empty()) { + std::list::const_iterator it = s_within.top(); std::string t(tag, tag_len); - if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) - { - enabled_within = m_phase->within_list.end(); - } + if (regex_match(t, it->tag)) + s_within.pop(); } wrbuf_puts(m_w, "::const_iterator it = enabled_within; - bool subst = false; + std::list::const_iterator it = m_content->within_list.end(); + if (!s_within.empty()) + it = s_within.top(); + if (it != m_content->within_list.end()) + { + std::string s(value, len); + it->exec(m_vars, s, false, m_skips); + wrbuf_puts(m_w, s.c_str()); + } + else + wrbuf_write(m_w, value, len); +} - if (it != m_phase->within_list.end()) +static bool embed_quoted_literal( + std::string &content, + std::map &vars, + mp::filter::HttpRewrite::RulePtr ruleptr, + bool html_context, + std::list &skip_list) +{ + bool replace = false; + std::string res; + const char *cp = content.c_str(); + const char *cp0 = cp; + while (*cp) { - subst = true; - if (it->attr.length() > 0) + if (html_context && !strncmp(cp, """, 6)) { - subst = false; - std::vector attr; - boost::split(attr, it->attr, boost::is_any_of(",")); - size_t i; - for (i = 0; i < attr.size(); i++) + cp += 6; + res.append(cp0, cp - cp0); + cp0 = cp; + while (*cp) { - if (attr[i].compare("#text") == 0) - { - subst = true; - } + if (!strncmp(cp, """, 6)) + break; + if (*cp == '\n') + break; + cp++; } + if (!*cp) + break; + std::string s(cp0, cp - cp0); + if (ruleptr->test_patterns(vars, s, true, skip_list)) + replace = true; + cp0 = cp; + res.append(s); } + else if (*cp == '"' || *cp == '\'') + { + int m = *cp; + cp++; + res.append(cp0, cp - cp0); + cp0 = cp; + while (*cp) + { + if (cp[-1] != '\\' && *cp == m) + break; + if (*cp == '\n') + break; + cp++; + } + if (!*cp) + break; + std::string s(cp0, cp - cp0); + if (ruleptr->test_patterns(vars, s, true, skip_list)) + replace = true; + cp0 = cp; + res.append(s); + } + else if (*cp == '/' && cp[1] == '/') + { + while (cp[1] && cp[1] != '\n') + cp++; + } + cp++; } - std::string output; - if (subst) - { - std::string input(value, len); - output = it->rule->test_patterns(m_vars, input); - } - if (output.empty()) - wrbuf_write(m_w, value, len); - else - wrbuf_puts(m_w, output.c_str()); + res.append(cp0, cp - cp0); + content = res; + return replace; } - -/** - * Tests pattern from the vector in order and executes recipe on - the first match. - */ -const std::string yf::HttpRewrite::Rule::test_patterns( - std::map & vars, - const std::string & txt) const +bool yf::HttpRewrite::Within::exec( + std::map & vars, + std::string & txt, bool anchor, + std::list & skip_list) const { - std::list::const_iterator it = replace_list.begin(); - - for (; it != replace_list.end(); it++) + if (type == "quoted-literal") { - std::string out = it->search_replace(vars, txt); - if (!out.empty()) return out; + return embed_quoted_literal(txt, vars, rule, true, skip_list); + } + else + { + return rule->test_patterns(vars, txt, anchor, skip_list); } - return ""; } -const std::string yf::HttpRewrite::Replace::search_replace( - std::map & vars, - const std::string & txt) const +bool yf::HttpRewrite::Rule::test_patterns( + std::map & vars, + std::string & txt, bool anchor, + std::list & skip_list ) { - //exec regex against value - boost::regex re(regex); - boost::smatch what; + bool replaces = false; + bool first = anchor; + std::string out; std::string::const_iterator start, end; start = txt.begin(); end = txt.end(); - std::string out; - while (regex_search(start, end, what, re)) //find next full match + while (1) { + std::list::iterator bit = replace_list.end(); + boost::smatch bwhat; + bool match_one = false; + { + std::list::iterator it = replace_list.begin(); + for (; it != replace_list.end(); it++) + { + if (it->start_anchor && !first) + continue; + boost::smatch what; + if (regex_search(start, end, what, it->re)) + { + if (!match_one || what[0].first < bwhat[0].first) + { + bwhat = what; + bit = it; + } + match_one = true; + } + } + if (!match_one) + break; + } + first = false; + replaces = true; size_t i; - for (i = 1; i < what.size(); ++i) + for (i = 1; i < bwhat.size(); ++i) { //check if the group is named - std::map::const_iterator it - = group_index.find(i); - if (it != group_index.end()) + std::map::const_iterator git + = bit->group_index.find(i); + if (git != bit->group_index.end()) { //it is - if (!what[i].str().empty()) - vars[it->second] = what[i]; + vars[git->second] = bwhat[i]; } } + // Compare against skip_list + bool skipthis = false; + std::list::iterator si = skip_list.begin(); + for ( ; si != skip_list.end(); si++) { + if ( boost::regex_search(bwhat.str(0), *si) ) + { + skipthis = true; + break; + } + } //prepare replacement string - std::string rvalue = sub_vars(vars); - yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", - what.str(0).c_str(), rvalue.c_str()); - out.append(start, what[0].first); - out.append(rvalue); - start = what[0].second; //move search forward + std::string rvalue = bit->sub_vars(vars); + out.append(start, bwhat[0].first); + if ( skipthis ) + { + yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match", + bwhat.str(0).c_str() ); + out.append(bwhat.str(0).c_str()); + } + else + { + yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", + bwhat.str(0).c_str(), rvalue.c_str()); + out.append(rvalue); + } + start = bwhat[0].second; //move search forward } - //if we had a match cat the last part - if (start != txt.begin()) - out.append(start, end); - return out; + out.append(start, end); + txt = out; + return replaces; } -void yf::HttpRewrite::Replace::parse_groups() +void yf::HttpRewrite::Replace::parse_groups(std::string pattern) { int gnum = 0; bool esc = false; - const std::string & str = regex; + const std::string &str = pattern; std::string res; + start_anchor = str[0] == '^'; yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str()); for (size_t i = 0; i < str.size(); ++i) { @@ -522,11 +686,11 @@ void yf::HttpRewrite::Replace::parse_groups() } esc = false; } - regex = res; + re = res; } -std::string yf::HttpRewrite::Replace::sub_vars ( - const std::map & vars) const +std::string yf::HttpRewrite::Replace::sub_vars( + const std::map & vars) const { std::string out; bool esc = false; @@ -579,6 +743,111 @@ yf::HttpRewrite::Phase::Phase() : m_verbose(0) { } +void yf::HttpRewrite::Content::parse( + int verbose, + std::string &content, + std::map &vars, + std::list & skip_list ) const +{ + if (type == "html") + { + HTMLParser parser; + Event ev(this, vars, skip_list); + + parser.set_verbose(verbose); + + parser.parse(ev, content.c_str()); + content = ev.result(); + } + if (type == "quoted-literal") + { + quoted_literal(content, vars, skip_list); + } +} + +void yf::HttpRewrite::Content::quoted_literal( + std::string &content, + std::map &vars, + std::list & skip_list ) const +{ + std::list::const_iterator it = within_list.begin(); + if (it != within_list.end()) + embed_quoted_literal(content, vars, it->rule, false, skip_list); +} + +void yf::HttpRewrite::Content::configure( + const xmlNode *ptr, std::map &rules) +{ + for (; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) ptr->name, "within")) + { + static const char *names[7] = + { "header", "attr", "tag", "rule", "reqline", "type", 0 }; + std::string values[6]; + mp::xml::parse_attr(ptr, names, values); + Within w; + if (values[0].length() > 0) + w.header.assign(values[0], boost::regex_constants::icase); + if (values[1].length() > 0) + w.attr.assign(values[1], boost::regex_constants::icase); + if (values[2].length() > 0) + w.tag.assign(values[2], boost::regex_constants::icase); + + std::vector rulenames; + boost::split(rulenames, values[3], boost::is_any_of(",")); + if (rulenames.size() == 0) + { + throw mp::filter::FilterException + ("Empty rule in '" + values[3] + + "' in http_rewrite filter"); + } + else if (rulenames.size() == 1) + { + std::map::const_iterator it = + rules.find(rulenames[0]); + if (it == rules.end()) + throw mp::filter::FilterException + ("Reference to non-existing rule '" + rulenames[0] + + "' in http_rewrite filter"); + w.rule = it->second; + + } + else + { + RulePtr rule(new Rule); + size_t i; + for (i = 0; i < rulenames.size(); i++) + { + std::map::const_iterator it = + rules.find(rulenames[i]); + if (it == rules.end()) + throw mp::filter::FilterException + ("Reference to non-existing rule '" + rulenames[i] + + "' in http_rewrite filter"); + RulePtr subRule = it->second; + std::list::iterator rit = + subRule->replace_list.begin(); + for (; rit != subRule->replace_list.end(); rit++) + rule->replace_list.push_back(*rit); + } + w.rule = rule; + } + w.reqline = values[4] == "1"; + w.type = values[5]; + if (w.type.empty() || w.type == "quoted-literal") + ; + else + throw mp::filter::FilterException + ("within type must be quoted-literal or none in " + " in http_rewrite filter"); + within_list.push_back(w); + } + } +} + void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) { static const char *names[2] = { "verbose", 0 }; @@ -608,11 +877,12 @@ void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) if (!strcmp((const char *) p->name, "rewrite")) { Replace replace; + std::string from; const struct _xmlAttr *attr; for (attr = p->properties; attr; attr = attr->next) { if (!strcmp((const char *) attr->name, "from")) - replace.regex = mp::xml::get_text(attr->children); + from = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "to")) replace.recipe = mp::xml::get_text(attr->children); else @@ -622,10 +892,12 @@ void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) + " in rewrite section of http_rewrite"); } yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", - replace.regex.c_str(), replace.recipe.c_str()); - replace.parse_groups(); - if (!replace.regex.empty()) + from.c_str(), replace.recipe.c_str()); + if (!from.empty()) + { + replace.parse_groups(from); rule->replace_list.push_back(replace); + } } else throw mp::filter::FilterException @@ -635,25 +907,26 @@ void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) } rules[values[0]] = rule; } - else if (!strcmp((const char *) ptr->name, "within")) + else if (!strcmp((const char *) ptr->name, "content")) { - static const char *names[6] = - { "header", "attr", "tag", "rule", "reqline", 0 }; - std::string values[5]; + static const char *names[3] = + { "type", "mime", 0 }; + std::string values[2]; mp::xml::parse_attr(ptr, names, values); - Within w; - w.header = values[0]; - w.attr = values[1]; - w.tag = values[2]; - std::map::const_iterator it = - rules.find(values[3]); - if (it == rules.end()) - throw mp::filter::FilterException - ("Reference to non-existing rule '" + values[3] + - "' in http_rewrite filter"); - w.rule = it->second; - w.reqline = values[4] == "1"; - phase.within_list.push_back(w); + if (values[0].empty()) + { + throw mp::filter::FilterException + ("Missing attribute, type for for element " + + std::string((const char *) ptr->name) + + " in http_rewrite filter"); + } + Content c; + + c.type = values[0]; + if (!values[1].empty()) + c.content_re.assign(values[1], boost::regex::icase); + c.configure(ptr->children, rules); + phase.content_list.push_back(c); } else {