1 /* This file is part of Metaproxy.
2 Copyright (C) 2005-2013 Index Data
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24 #include "html_parser.hpp"
30 #include <boost/regex.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/algorithm/string.hpp>
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
39 namespace metaproxy_1 {
41 class HttpRewrite::Replace {
47 std::map<int, std::string> group_index;
49 const std::map<std::string, std::string> & vars) const;
50 void parse_groups(std::string pattern);
53 class HttpRewrite::Rule {
55 std::list<Replace> replace_list;
57 std::map<std::string, std::string> &vars,
58 std::string &txt, bool anchor);
60 class HttpRewrite::Within {
69 class HttpRewrite::Content {
72 boost::regex content_re;
73 std::list<Within> within_list;
74 void configure(const xmlNode *ptr,
75 std::map<std::string, RulePtr > &rules);
76 void quoted_literal(std::string &content,
77 std::map<std::string, std::string> &vars) const;
78 void parse(int verbose, std::string &content,
79 std::map<std::string, std::string> & vars) const;
81 class HttpRewrite::Phase {
85 std::list<Content> content_list;
86 void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
87 std::map<std::string, std::string> & vars) const;
88 void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
89 std::map<std::string, std::string> & vars) const;
90 void rewrite_body(mp::odr & o,
91 const char *content_type,
92 char **content_buf, int *content_len,
93 std::map<std::string, std::string> & vars) const;
95 class HttpRewrite::Event : public HTMLParserEvent {
96 void openTagStart(const char *tag, int tag_len);
97 void anyTagEnd(const char *tag, int tag_len, int close_it);
98 void attribute(const char *tag, int tag_len,
99 const char *attr, int attr_len,
100 const char *value, int val_len,
102 void closeTag(const char *tag, int tag_len);
103 void text(const char *value, int len);
104 const Content *m_content;
106 std::stack<std::list<Within>::const_iterator> s_within;
107 std::map<std::string, std::string> &m_vars;
109 Event(const Content *p, std::map<std::string, std::string> &vars);
111 const char *result();
116 yf::HttpRewrite::HttpRewrite() :
117 req_phase(new Phase), res_phase(new Phase)
121 yf::HttpRewrite::~HttpRewrite()
125 void yf::HttpRewrite::process(mp::Package & package) const
127 yaz_log(YLOG_LOG, "HttpRewrite begins....");
128 Z_GDU *gdu = package.request().get();
129 //map of request/response vars
130 std::map<std::string, std::string> vars;
131 //we have an http req
132 if (gdu && gdu->which == Z_GDU_HTTP_Request)
134 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
136 req_phase->rewrite_reqline(o, hreq, vars);
137 yaz_log(YLOG_LOG, ">> Request headers");
138 req_phase->rewrite_headers(o, hreq->headers, vars);
139 req_phase->rewrite_body(o,
140 z_HTTP_header_lookup(hreq->headers,
142 &hreq->content_buf, &hreq->content_len,
144 package.request() = gdu;
147 gdu = package.response().get();
148 if (gdu && gdu->which == Z_GDU_HTTP_Response)
150 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
151 yaz_log(YLOG_LOG, "Response code %d", hres->code);
153 yaz_log(YLOG_LOG, "<< Respose headers");
154 res_phase->rewrite_headers(o, hres->headers, vars);
155 res_phase->rewrite_body(o,
156 z_HTTP_header_lookup(hres->headers,
158 &hres->content_buf, &hres->content_len,
160 package.response() = gdu;
164 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
165 Z_HTTP_Request *hreq,
166 std::map<std::string, std::string> & vars) const
168 //rewrite the request line
170 if (strstr(hreq->path, "http://") == hreq->path)
172 yaz_log(YLOG_LOG, "Path in the method line is absolute, "
173 "possibly a proxy request");
178 //TODO what about proto
179 const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
188 std::list<Content>::const_iterator cit = content_list.begin();
189 for (; cit != content_list.end(); cit++)
190 if (cit->type == "headers")
193 if (cit == content_list.end())
196 std::list<Within>::const_iterator it = cit->within_list.begin();
197 for (; it != cit->within_list.end(); it++)
200 yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
201 if (it->rule->test_patterns(vars, path, true))
203 yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
204 hreq->path = odr_strdup(o, path.c_str());
209 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
210 Z_HTTP_Header *headers,
211 std::map<std::string, std::string> & vars) const
213 std::list<Content>::const_iterator cit = content_list.begin();
214 for (; cit != content_list.end(); cit++)
215 if (cit->type == "headers")
218 if (cit == content_list.end())
221 for (Z_HTTP_Header *header = headers; header; header = header->next)
223 std::list<Within>::const_iterator it = cit->within_list.begin();
224 for (; it != cit->within_list.end(); it++)
226 if (it->header.length() > 0 &&
227 yaz_strcasecmp(it->header.c_str(), header->name) == 0)
229 std::string sheader(header->name);
231 sheader += header->value;
233 if (it->rule->test_patterns(vars, sheader, true))
235 size_t pos = sheader.find(": ");
236 if (pos == std::string::npos)
238 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
241 header->name = odr_strdup(
242 o, sheader.substr(0, pos).c_str());
243 header->value = odr_strdup(
244 o, sheader.substr(pos + 2, std::string::npos).c_str());
251 void yf::HttpRewrite::Phase::rewrite_body(
253 const char *content_type,
256 std::map<std::string, std::string> & vars) const
258 std::list<Content>::const_iterator cit = content_list.begin();
259 for (; cit != content_list.end(); cit++)
261 yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
262 content_type, cit->type.c_str());
263 if (cit->type != "headers"
264 && regex_match(content_type, cit->content_re))
267 if (cit == content_list.end())
273 for (i = 0; i < *content_len; i++)
274 if ((*content_buf)[i] == 0)
275 return; // binary content. skip
277 std::string content(*content_buf, *content_len);
278 cit->parse(m_verbose, content, vars);
279 *content_buf = odr_strdup(o, content.c_str());
280 *content_len = strlen(*content_buf);
284 yf::HttpRewrite::Event::Event(const Content *p,
285 std::map<std::string, std::string> & vars
286 ) : m_content(p), m_vars(vars)
291 yf::HttpRewrite::Event::~Event()
296 const char *yf::HttpRewrite::Event::result()
298 return wrbuf_cstr(m_w);
301 void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
303 wrbuf_putc(m_w, '<');
304 wrbuf_write(m_w, tag, tag_len);
306 std::string t(tag, tag_len);
307 std::list<Within>::const_iterator it = m_content->within_list.begin();
308 for (; it != m_content->within_list.end(); it++)
310 if (it->tag.length() > 0 && yaz_strcasecmp(it->tag.c_str(),
313 std::vector<std::string> attr;
314 boost::split(attr, it->attr, boost::is_any_of(","));
316 for (i = 0; i < attr.size(); i++)
318 if (attr[i].compare("#text") == 0)
328 void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
333 if (!s_within.empty())
335 std::list<Within>::const_iterator it = s_within.top();
336 std::string t(tag, tag_len);
337 if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0)
342 wrbuf_putc(m_w, '/');
343 wrbuf_putc(m_w, '>');
346 void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
347 const char *attr, int attr_len,
348 const char *value, int val_len,
351 std::list<Within>::const_iterator it = m_content->within_list.begin();
354 for (; it != m_content->within_list.end(); it++)
356 std::string t(tag, tag_len);
357 if (it->tag.length() == 0 ||
358 yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0)
360 std::string a(attr, attr_len);
361 std::vector<std::string> attr;
362 boost::split(attr, it->attr, boost::is_any_of(","));
364 for (i = 0; i < attr.size(); i++)
366 if (attr[i].compare("#text") &&
367 yaz_strcasecmp(attr[i].c_str(), a.c_str()) == 0)
375 wrbuf_putc(m_w, ' ');
376 wrbuf_write(m_w, attr, attr_len);
379 wrbuf_puts(m_w, "=");
380 wrbuf_puts(m_w, sep);
385 std::string s(value, val_len);
386 it->rule->test_patterns(m_vars, s, true);
387 wrbuf_puts(m_w, s.c_str());
390 wrbuf_write(m_w, value, val_len);
391 wrbuf_puts(m_w, sep);
395 void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
397 if (!s_within.empty())
399 std::list<Within>::const_iterator it = s_within.top();
400 std::string t(tag, tag_len);
401 if (yaz_strcasecmp(it->tag.c_str(), t.c_str()) == 0)
404 wrbuf_puts(m_w, "</");
405 wrbuf_write(m_w, tag, tag_len);
408 void yf::HttpRewrite::Event::text(const char *value, int len)
410 std::list<Within>::const_iterator it = m_content->within_list.end();
411 if (!s_within.empty())
413 if (it != m_content->within_list.end())
415 std::string s(value, len);
416 it->rule->test_patterns(m_vars, s, false);
417 wrbuf_puts(m_w, s.c_str());
420 wrbuf_write(m_w, value, len);
423 bool yf::HttpRewrite::Rule::test_patterns(
424 std::map<std::string, std::string> & vars,
425 std::string & txt, bool anchor)
427 bool replaces = false;
430 std::string::const_iterator start, end;
435 std::list<Replace>::iterator bit = replace_list.end();
437 std::string::const_iterator best_pos = txt.end();
438 std::list<Replace>::iterator it = replace_list.begin();
439 for (; it != replace_list.end(); it++)
441 if (it->start_anchor && !first)
443 if (regex_search(start, end, it->what, it->re))
445 if (it->what[0].first < best_pos)
447 best_pos = it->what[0].first;
452 if (bit == replace_list.end())
458 for (i = 1; i < bit->what.size(); ++i)
460 //check if the group is named
461 std::map<int, std::string>::const_iterator git
462 = bit->group_index.find(i);
463 if (git != bit->group_index.end())
465 vars[git->second] = bit->what[i];
469 //prepare replacement string
470 std::string rvalue = bit->sub_vars(vars);
471 yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
472 bit->what.str(0).c_str(), rvalue.c_str());
473 out.append(start, bit->what[0].first);
475 start = bit->what[0].second; //move search forward
477 out.append(start, end);
482 void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
486 const std::string &str = pattern;
488 start_anchor = str[0] == '^';
489 yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
490 for (size_t i = 0; i < str.size(); ++i)
493 if (!esc && str[i] == '\\')
498 if (!esc && str[i] == '(') //group starts
501 if (i+1 < str.size() && str[i+1] == '?') //group with attrs
504 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
506 if (gnum > 0) gnum--;
512 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
514 if (i+1 < str.size() && str[i+1] == '<') //named
519 while (++i < str.size())
521 if (str[i] == '>') { term = true; break; }
522 if (!isalnum(str[i]))
523 throw mp::filter::FilterException
524 ("Only alphanumeric chars allowed, found "
528 + boost::lexical_cast<std::string>(i));
532 throw mp::filter::FilterException
533 ("Unterminated group name '" + gname
534 + " in '" + str +"'");
535 group_index[gnum] = gname;
536 yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
537 gname.c_str(), gnum);
546 std::string yf::HttpRewrite::Replace::sub_vars(
547 const std::map<std::string, std::string> & vars) const
551 const std::string & in = recipe;
552 for (size_t i = 0; i < in.size(); ++i)
554 if (!esc && in[i] == '\\')
559 if (!esc && in[i] == '$') //var
561 if (i+1 < in.size() && in[i+1] == '{') //ref prefix
566 while (++i < in.size())
568 if (in[i] == '}') { term = true; break; }
571 if (!term) throw mp::filter::FilterException
572 ("Unterminated var ref in '"+in+"' at "
573 + boost::lexical_cast<std::string>(i));
574 std::map<std::string, std::string>::const_iterator it
576 if (it != vars.end())
583 throw mp::filter::FilterException
584 ("Malformed or trimmed var ref in '"
585 +in+"' at "+boost::lexical_cast<std::string>(i));
596 yf::HttpRewrite::Phase::Phase() : m_verbose(0)
600 void yf::HttpRewrite::Content::parse(
602 std::string &content,
603 std::map<std::string, std::string> &vars) const
608 Event ev(this, vars);
610 parser.set_verbose(verbose);
612 parser.parse(ev, content.c_str());
613 content = ev.result();
615 if (type == "quoted-literal")
617 quoted_literal(content, vars);
621 void yf::HttpRewrite::Content::quoted_literal(
622 std::string &content,
623 std::map<std::string, std::string> &vars) const
626 const char *cp = content.c_str();
627 const char *cp0 = cp;
630 if (*cp == '"' || *cp == '\'')
634 res.append(cp0, cp - cp0);
638 if (cp[-1] != '\\' && *cp == m)
646 std::list<Within>::const_iterator it = within_list.begin();
647 std::string s(cp0, cp - cp0);
648 if (it != within_list.end())
649 it->rule->test_patterns(vars, s, true);
653 else if (*cp == '/' && cp[1] == '/')
655 while (cp[1] && cp[1] != '\n')
660 res.append(cp0, cp - cp0);
664 void yf::HttpRewrite::Content::configure(
665 const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
667 for (; ptr; ptr = ptr->next)
669 if (ptr->type != XML_ELEMENT_NODE)
671 if (!strcmp((const char *) ptr->name, "within"))
673 static const char *names[6] =
674 { "header", "attr", "tag", "rule", "reqline", 0 };
675 std::string values[5];
676 mp::xml::parse_attr(ptr, names, values);
678 w.header = values[0];
681 std::map<std::string,RulePtr>::const_iterator it =
682 rules.find(values[3]);
683 if (it == rules.end())
684 throw mp::filter::FilterException
685 ("Reference to non-existing rule '" + values[3] +
686 "' in http_rewrite filter");
688 w.reqline = values[4] == "1";
689 within_list.push_back(w);
694 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
696 static const char *names[2] = { "verbose", 0 };
697 std::string values[1];
699 mp::xml::parse_attr(ptr, names, values);
701 phase.m_verbose = atoi(values[0].c_str());
703 std::map<std::string, RulePtr > rules;
704 for (ptr = ptr->children; ptr; ptr = ptr->next)
706 if (ptr->type != XML_ELEMENT_NODE)
708 else if (!strcmp((const char *) ptr->name, "rule"))
710 static const char *names[2] = { "name", 0 };
711 std::string values[1];
712 values[0] = "default";
713 mp::xml::parse_attr(ptr, names, values);
715 RulePtr rule(new Rule);
716 for (xmlNode *p = ptr->children; p; p = p->next)
718 if (p->type != XML_ELEMENT_NODE)
720 if (!strcmp((const char *) p->name, "rewrite"))
724 const struct _xmlAttr *attr;
725 for (attr = p->properties; attr; attr = attr->next)
727 if (!strcmp((const char *) attr->name, "from"))
728 from = mp::xml::get_text(attr->children);
729 else if (!strcmp((const char *) attr->name, "to"))
730 replace.recipe = mp::xml::get_text(attr->children);
732 throw mp::filter::FilterException
734 + std::string((const char *) attr->name)
735 + " in rewrite section of http_rewrite");
737 yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
738 from.c_str(), replace.recipe.c_str());
741 replace.parse_groups(from);
742 rule->replace_list.push_back(replace);
746 throw mp::filter::FilterException
748 + std::string((const char *) p->name)
749 + " in http_rewrite filter");
751 rules[values[0]] = rule;
753 else if (!strcmp((const char *) ptr->name, "content"))
755 static const char *names[3] =
756 { "type", "mime", 0 };
757 std::string values[2];
758 mp::xml::parse_attr(ptr, names, values);
759 if (values[0].empty())
761 throw mp::filter::FilterException
762 ("Missing attribute, type for for element "
763 + std::string((const char *) ptr->name)
764 + " in http_rewrite filter");
769 // if (!values[1].empty())
770 c.content_re = values[1];
771 c.configure(ptr->children, rules);
772 phase.content_list.push_back(c);
776 throw mp::filter::FilterException
778 + std::string((const char *) ptr->name)
779 + " in http_rewrite filter");
784 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
787 for (ptr = ptr->children; ptr; ptr = ptr->next)
789 if (ptr->type != XML_ELEMENT_NODE)
791 else if (!strcmp((const char *) ptr->name, "request"))
793 configure_phase(ptr, *req_phase);
795 else if (!strcmp((const char *) ptr->name, "response"))
797 configure_phase(ptr, *res_phase);
801 throw mp::filter::FilterException
803 + std::string((const char *) ptr->name)
804 + " in http_rewrite1 filter");
809 static mp::filter::Base* filter_creator()
811 return new mp::filter::HttpRewrite;
815 struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
826 * c-file-style: "Stroustrup"
827 * indent-tabs-mode: nil
829 * vim: shiftwidth=4 tabstop=8 expandtab