X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_http_rewrite.cpp;h=58243bfb2aae694cad56f1a7bf84c6ced463da96;hb=c271eb1db3286cd6c309f2cde33dc90d7c3c4dfe;hp=501c87e9eb3fb21baf61f0983ff19954570c3eda;hpb=34b33150854c015d84bcec7d85b808387cc74d67;p=metaproxy-moved-to-github.git diff --git a/src/filter_http_rewrite.cpp b/src/filter_http_rewrite.cpp index 501c87e..58243bf 100644 --- a/src/filter_http_rewrite.cpp +++ b/src/filter_http_rewrite.cpp @@ -59,11 +59,14 @@ namespace metaproxy_1 { }; class HttpRewrite::Within { public: - std::string header; - std::string attr; - std::string tag; + boost::regex header; + boost::regex attr; + boost::regex tag; + std::string type; bool reqline; RulePtr rule; + bool exec(std::map &vars, + std::string &txt, bool anchor) const; }; class HttpRewrite::Content { @@ -198,7 +201,7 @@ void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o, if (it->reqline) { yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); - if (it->rule->test_patterns(vars, path, true)) + if (it->exec(vars, path, true)) { yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str()); hreq->path = odr_strdup(o, path.c_str()); @@ -223,14 +226,20 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o, std::list::const_iterator it = cit->within_list.begin(); for (; it != cit->within_list.end(); it++) { - if (it->header.length() > 0 && - yaz_strcasecmp(it->header.c_str(), header->name) == 0) + if (!it->header.empty() && + regex_match(header->name, it->header)) { +#ifdef OLDHEADERMATCH + // Matches and replaces the whole header line. + // This is good if you want to play with the header name too, + // but useless for patterns that want to anchor to the beginning + // or end of the header value, as we want to do with host-relative + // links. This code should probably be removed. std::string sheader(header->name); sheader += ": "; sheader += header->value; - if (it->rule->test_patterns(vars, sheader, true)) + if (it->exec(vars, sheader, true)) { size_t pos = sheader.find(": "); if (pos == std::string::npos) @@ -243,6 +252,15 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o, header->value = odr_strdup( o, sheader.substr(pos + 2, std::string::npos).c_str()); } +#else + // Match and replace only the header value + std::string hval(header->value); + if (it->exec(vars, hval, true)) + { + header->value = odr_strdup(o, hval.c_str()); + } + +#endif } } } @@ -255,6 +273,12 @@ void yf::HttpRewrite::Phase::rewrite_body( int *content_len, std::map & vars) const { + if (*content_len == 0) + return; + if (!content_type) { + yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite"); + return; + } std::list::const_iterator cit = content_list.begin(); for (; cit != content_list.end(); cit++) { @@ -264,21 +288,23 @@ void yf::HttpRewrite::Phase::rewrite_body( && regex_match(content_type, cit->content_re)) break; } - if (cit == content_list.end()) + if (cit == content_list.end()) { + yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting", + content_type ); return; - - if (*content_buf) - { - int i; - for (i = 0; i < *content_len; i++) - if ((*content_buf)[i] == 0) - return; // binary content. skip - - std::string content(*content_buf, *content_len); - cit->parse(m_verbose, content, vars); - *content_buf = odr_strdup(o, content.c_str()); - *content_len = strlen(*content_buf); } + + int i; + for (i = 0; i < *content_len; i++) + if ((*content_buf)[i] == 0) { + yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting"); + return; // binary content. skip + } + + std::string content(*content_buf, *content_len); + cit->parse(m_verbose, content, vars); + *content_buf = odr_strdup(o, content.c_str()); + *content_len = strlen(*content_buf); } yf::HttpRewrite::Event::Event(const Content *p, @@ -307,19 +333,12 @@ void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len) std::list::const_iterator it = m_content->within_list.begin(); for (; it != m_content->within_list.end(); it++) { - if (it->tag.length() > 0 && yaz_strcasecmp(it->tag.c_str(), - t.c_str()) == 0) + if (!it->tag.empty() && regex_match(t, it->tag)) { - std::vector attr; - boost::split(attr, it->attr, boost::is_any_of(",")); - size_t i; - for (i = 0; i < attr.size(); i++) + if (!it->attr.empty() && regex_match("#text", it->attr)) { - if (attr[i].compare("#text") == 0) - { - s_within.push(it); - return; - } + s_within.push(it); + return; } } } @@ -334,7 +353,7 @@ void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len, { std::list::const_iterator it = s_within.top(); std::string t(tag, tag_len); - if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + if (regex_match(t, it->tag)) s_within.pop(); } } @@ -354,19 +373,11 @@ void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len, for (; it != m_content->within_list.end(); it++) { std::string t(tag, tag_len); - if (it->tag.length() == 0 || - yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + if (it->tag.empty() || regex_match(t, it->tag)) { std::string a(attr, attr_len); - std::vector attr; - boost::split(attr, it->attr, boost::is_any_of(",")); - size_t i; - for (i = 0; i < attr.size(); i++) - { - if (attr[i].compare("#text") && - yaz_strcasecmp(attr[i].c_str(), a.c_str()) == 0) - subst = true; - } + if (!it->attr.empty() && regex_match(a, it->attr)) + subst = true; } if (subst) break; @@ -383,7 +394,7 @@ void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len, if (subst) { std::string s(value, val_len); - it->rule->test_patterns(m_vars, s, true); + it->exec(m_vars, s, true); wrbuf_puts(m_w, s.c_str()); } else @@ -398,7 +409,7 @@ void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len) { std::list::const_iterator it = s_within.top(); std::string t(tag, tag_len); - if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0) + if (regex_match(t, it->tag)) s_within.pop(); } wrbuf_puts(m_w, "within_list.end()) { std::string s(value, len); - it->rule->test_patterns(m_vars, s, false); + it->exec(m_vars, s, false); wrbuf_puts(m_w, s.c_str()); } else wrbuf_write(m_w, value, len); } +static bool embed_quoted_literal( + std::string &content, + std::map &vars, + mp::filter::HttpRewrite::RulePtr ruleptr, + bool html_context) +{ + bool replace = false; + std::string res; + const char *cp = content.c_str(); + const char *cp0 = cp; + while (*cp) + { + if (html_context && !strncmp(cp, """, 6)) + { + cp += 6; + res.append(cp0, cp - cp0); + cp0 = cp; + while (*cp) + { + if (!strncmp(cp, """, 6)) + break; + if (*cp == '\n') + break; + cp++; + } + if (!*cp) + break; + std::string s(cp0, cp - cp0); + if (ruleptr->test_patterns(vars, s, true)) + replace = true; + cp0 = cp; + res.append(s); + } + else if (*cp == '"' || *cp == '\'') + { + int m = *cp; + cp++; + res.append(cp0, cp - cp0); + cp0 = cp; + while (*cp) + { + if (cp[-1] != '\\' && *cp == m) + break; + if (*cp == '\n') + break; + cp++; + } + if (!*cp) + break; + std::string s(cp0, cp - cp0); + if (ruleptr->test_patterns(vars, s, true)) + replace = true; + cp0 = cp; + res.append(s); + } + else if (*cp == '/' && cp[1] == '/') + { + while (cp[1] && cp[1] != '\n') + cp++; + } + cp++; + } + res.append(cp0, cp - cp0); + content = res; + return replace; +} + +bool yf::HttpRewrite::Within::exec( + std::map & vars, + std::string & txt, bool anchor) const +{ + if (type == "quoted-literal") + { + return embed_quoted_literal(txt, vars, rule, true); + } + else + { + return rule->test_patterns(vars, txt, anchor); + } +} + bool yf::HttpRewrite::Rule::test_patterns( std::map & vars, std::string & txt, bool anchor) @@ -622,43 +714,9 @@ void yf::HttpRewrite::Content::quoted_literal( std::string &content, std::map &vars) const { - std::string res; - const char *cp = content.c_str(); - const char *cp0 = cp; - while (*cp) - { - if (*cp == '"' || *cp == '\'') - { - int m = *cp; - cp++; - res.append(cp0, cp - cp0); - cp0 = cp; - while (*cp) - { - if (cp[-1] != '\\' && *cp == m) - break; - if (*cp == '\n') - break; - cp++; - } - if (!*cp) - break; - std::list::const_iterator it = within_list.begin(); - std::string s(cp0, cp - cp0); - if (it != within_list.end()) - it->rule->test_patterns(vars, s, true); - cp0 = cp; - res.append(s); - } - else if (*cp == '/' && cp[1] == '/') - { - while (cp[1] && cp[1] != '\n') - cp++; - } - cp++; - } - res.append(cp0, cp - cp0); - content = res; + std::list::const_iterator it = within_list.begin(); + if (it != within_list.end()) + embed_quoted_literal(content, vars, it->rule, false); } void yf::HttpRewrite::Content::configure( @@ -670,22 +728,65 @@ void yf::HttpRewrite::Content::configure( continue; if (!strcmp((const char *) ptr->name, "within")) { - static const char *names[6] = - { "header", "attr", "tag", "rule", "reqline", 0 }; - std::string values[5]; + static const char *names[7] = + { "header", "attr", "tag", "rule", "reqline", "type", 0 }; + std::string values[6]; mp::xml::parse_attr(ptr, names, values); Within w; - w.header = values[0]; - w.attr = values[1]; - w.tag = values[2]; - std::map::const_iterator it = - rules.find(values[3]); - if (it == rules.end()) + if (values[0].length() > 0) + w.header.assign(values[0], boost::regex_constants::icase); + if (values[1].length() > 0) + w.attr.assign(values[1], boost::regex_constants::icase); + if (values[2].length() > 0) + w.tag.assign(values[2], boost::regex_constants::icase); + + std::vector rulenames; + boost::split(rulenames, values[3], boost::is_any_of(",")); + if (rulenames.size() == 0) + { throw mp::filter::FilterException - ("Reference to non-existing rule '" + values[3] + + ("Empty rule in '" + values[3] + "' in http_rewrite filter"); - w.rule = it->second; + } + else if (rulenames.size() == 1) + { + std::map::const_iterator it = + rules.find(rulenames[0]); + if (it == rules.end()) + throw mp::filter::FilterException + ("Reference to non-existing rule '" + rulenames[0] + + "' in http_rewrite filter"); + w.rule = it->second; + + } + else + { + RulePtr rule(new Rule); + size_t i; + for (i = 0; i < rulenames.size(); i++) + { + std::map::const_iterator it = + rules.find(rulenames[i]); + if (it == rules.end()) + throw mp::filter::FilterException + ("Reference to non-existing rule '" + rulenames[i] + + "' in http_rewrite filter"); + RulePtr subRule = it->second; + std::list::iterator rit = + subRule->replace_list.begin(); + for (; rit != subRule->replace_list.end(); rit++) + rule->replace_list.push_back(*rit); + } + w.rule = rule; + } w.reqline = values[4] == "1"; + w.type = values[5]; + if (w.type.empty() || w.type == "quoted-literal") + ; + else + throw mp::filter::FilterException + ("within type must be quoted-literal or none in " + " in http_rewrite filter"); within_list.push_back(w); } } @@ -766,8 +867,8 @@ void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase) Content c; c.type = values[0]; - // if (!values[1].empty()) - c.content_re = values[1]; + if (!values[1].empty()) + c.content_re.assign(values[1], boost::regex::icase); c.configure(ptr->children, rules); phase.content_list.push_back(c); }