1 /* This file is part of Metaproxy.
2 Copyright (C) 2005-2013 Index Data
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
34 #include <sys/types.h>
37 namespace mp = metaproxy_1;
38 namespace yf = mp::filter;
40 namespace metaproxy_1 {
42 class HttpRewrite::Replace {
46 std::map<int, std::string> group_index;
47 const std::string search_replace(
48 std::map<std::string, std::string> & vars,
49 const std::string & txt) const;
50 std::string sub_vars (
51 const std::map<std::string, std::string> & vars) const;
55 class HttpRewrite::Rule {
57 std::list<Replace> replace_list;
58 const std::string test_patterns(
59 std::map<std::string, std::string> & vars,
60 const std::string & txt) const;
62 class HttpRewrite::Within {
70 class HttpRewrite::Section {
72 std::list<Within> within_list;
73 void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
74 std::map<std::string, std::string> & vars) const;
75 void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
76 std::map<std::string, std::string> & vars) const;
77 void rewrite_body(mp::odr & o,
78 char **content_buf, int *content_len,
79 std::map<std::string, std::string> & vars) const;
84 yf::HttpRewrite::HttpRewrite() :
85 req_section(new Section), res_section(new Section)
89 yf::HttpRewrite::~HttpRewrite()
93 void yf::HttpRewrite::process(mp::Package & package) const
95 yaz_log(YLOG_LOG, "HttpRewrite begins....");
96 Z_GDU *gdu = package.request().get();
97 //map of request/response vars
98 std::map<std::string, std::string> vars;
100 if (gdu && gdu->which == Z_GDU_HTTP_Request)
102 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
104 req_section->rewrite_reqline(o, hreq, vars);
105 yaz_log(YLOG_LOG, ">> Request headers");
106 req_section->rewrite_headers(o, hreq->headers, vars);
107 req_section->rewrite_body(o,
108 &hreq->content_buf, &hreq->content_len, vars);
109 package.request() = gdu;
112 gdu = package.response().get();
113 if (gdu && gdu->which == Z_GDU_HTTP_Response)
115 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
116 yaz_log(YLOG_LOG, "Response code %d", hres->code);
118 yaz_log(YLOG_LOG, "<< Respose headers");
119 res_section->rewrite_headers(o, hres->headers, vars);
120 res_section->rewrite_body(o, &hres->content_buf,
121 &hres->content_len, vars);
122 package.response() = gdu;
126 void yf::HttpRewrite::Section::rewrite_reqline (mp::odr & o,
127 Z_HTTP_Request *hreq,
128 std::map<std::string, std::string> & vars) const
130 //rewrite the request line
132 if (strstr(hreq->path, "http://") == hreq->path)
134 yaz_log(YLOG_LOG, "Path in the method line is absolute, "
135 "possibly a proxy request");
140 //TODO what about proto
142 path += z_HTTP_header_lookup(hreq->headers, "Host");
146 std::list<Within>::const_iterator it = within_list.begin();
147 if (it != within_list.end())
149 RulePtr rule = it->rule;
151 yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
152 std::string npath = rule->test_patterns(vars, path);
155 yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
156 hreq->path = odr_strdup(o, npath.c_str());
161 void yf::HttpRewrite::Section::rewrite_headers(mp::odr & o,
162 Z_HTTP_Header *headers,
163 std::map<std::string, std::string> & vars) const
165 for (Z_HTTP_Header *header = headers;
167 header = header->next)
169 std::string sheader(header->name);
171 sheader += header->value;
172 yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
174 std::list<Within>::const_iterator it = within_list.begin();
175 if (it == within_list.end())
177 RulePtr rule = it->rule;
179 std::string out = rule->test_patterns(vars, sheader);
182 size_t pos = out.find(": ");
183 if (pos == std::string::npos)
185 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
188 header->name = odr_strdup(o, out.substr(0, pos).c_str());
189 header->value = odr_strdup(o, out.substr(pos+2,
190 std::string::npos).c_str());
195 void yf::HttpRewrite::Section::rewrite_body(mp::odr & o,
198 std::map<std::string, std::string> & vars) const
203 std::list<Within>::const_iterator it = within_list.begin();
204 if (it != within_list.end())
206 RulePtr rule = it->rule;
208 std::string body(*content_buf);
209 std::string nbody = rule->test_patterns(vars, body);
212 *content_buf = odr_strdup(o, nbody.c_str());
213 *content_len = nbody.size();
220 * Tests pattern from the vector in order and executes recipe on
223 const std::string yf::HttpRewrite::Rule::test_patterns(
224 std::map<std::string, std::string> & vars,
225 const std::string & txt) const
227 std::list<Replace>::const_iterator it = replace_list.begin();
229 for (; it != replace_list.end(); it++)
231 std::string out = it->search_replace(vars, txt);
232 if (!out.empty()) return out;
237 const std::string yf::HttpRewrite::Replace::search_replace(
238 std::map<std::string, std::string> & vars,
239 const std::string & txt) const
241 //exec regex against value
242 boost::regex re(regex);
244 std::string::const_iterator start, end;
248 while (regex_search(start, end, what, re)) //find next full match
251 for (i = 1; i < what.size(); ++i)
253 //check if the group is named
254 std::map<int, std::string>::const_iterator it
255 = group_index.find(i);
256 if (it != group_index.end())
258 if (!what[i].str().empty())
259 vars[it->second] = what[i];
263 //prepare replacement string
264 std::string rvalue = sub_vars(vars);
265 yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
266 what.str(0).c_str(), rvalue.c_str());
267 out.append(start, what[0].first);
269 start = what[0].second; //move search forward
271 //if we had a match cat the last part
272 if (start != txt.begin())
273 out.append(start, end);
277 void yf::HttpRewrite::Replace::parse_groups()
281 const std::string & str = regex;
283 yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
284 for (size_t i = 0; i < str.size(); ++i)
287 if (!esc && str[i] == '\\')
292 if (!esc && str[i] == '(') //group starts
295 if (i+1 < str.size() && str[i+1] == '?') //group with attrs
298 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
300 if (gnum > 0) gnum--;
306 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
308 if (i+1 < str.size() && str[i+1] == '<') //named
313 while (++i < str.size())
315 if (str[i] == '>') { term = true; break; }
316 if (!isalnum(str[i]))
317 throw mp::filter::FilterException
318 ("Only alphanumeric chars allowed, found "
322 + boost::lexical_cast<std::string>(i));
326 throw mp::filter::FilterException
327 ("Unterminated group name '" + gname
328 + " in '" + str +"'");
329 group_index[gnum] = gname;
330 yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
331 gname.c_str(), gnum);
340 std::string yf::HttpRewrite::Replace::sub_vars (
341 const std::map<std::string, std::string> & vars) const
345 const std::string & in = recipe;
346 for (size_t i = 0; i < in.size(); ++i)
348 if (!esc && in[i] == '\\')
353 if (!esc && in[i] == '$') //var
355 if (i+1 < in.size() && in[i+1] == '{') //ref prefix
360 while (++i < in.size())
362 if (in[i] == '}') { term = true; break; }
365 if (!term) throw mp::filter::FilterException
366 ("Unterminated var ref in '"+in+"' at "
367 + boost::lexical_cast<std::string>(i));
368 std::map<std::string, std::string>::const_iterator it
370 if (it != vars.end())
377 throw mp::filter::FilterException
378 ("Malformed or trimmed var ref in '"
379 +in+"' at "+boost::lexical_cast<std::string>(i));
391 void yf::HttpRewrite::configure_section(const xmlNode *ptr,
394 std::map<std::string, RulePtr > rules;
395 for (ptr = ptr->children; ptr; ptr = ptr->next)
397 if (ptr->type != XML_ELEMENT_NODE)
399 else if (!strcmp((const char *) ptr->name, "rule"))
401 static const char *names[2] = { "name", 0 };
402 std::string values[1];
403 values[0] = "default";
404 mp::xml::parse_attr(ptr, names, values);
406 RulePtr rule(new Rule);
407 for (xmlNode *p = ptr->children; p; p = p->next)
409 if (p->type != XML_ELEMENT_NODE)
411 if (!strcmp((const char *) p->name, "rewrite"))
414 const struct _xmlAttr *attr;
415 for (attr = p->properties; attr; attr = attr->next)
417 if (!strcmp((const char *) attr->name, "from"))
418 replace.regex = mp::xml::get_text(attr->children);
419 else if (!strcmp((const char *) attr->name, "to"))
420 replace.recipe = mp::xml::get_text(attr->children);
422 throw mp::filter::FilterException
424 + std::string((const char *) attr->name)
425 + " in rewrite section of http_rewrite");
427 yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
428 replace.regex.c_str(), replace.recipe.c_str());
429 replace.parse_groups();
430 if (!replace.regex.empty())
431 rule->replace_list.push_back(replace);
434 throw mp::filter::FilterException
436 + std::string((const char *) p->name)
437 + " in http_rewrite filter");
439 if (!rule->replace_list.empty())
440 rules[values[0]] = rule;
442 else if (!strcmp((const char *) ptr->name, "within"))
444 static const char *names[5] =
445 { "header", "attr", "tag", "rule", 0 };
446 std::string values[4];
447 mp::xml::parse_attr(ptr, names, values);
449 w.header = values[0];
452 std::map<std::string,RulePtr>::const_iterator it =
453 rules.find(values[3]);
454 if (it == rules.end())
455 throw mp::filter::FilterException
456 ("Reference to non-existing rule '" + values[3] +
457 "' in http_rewrite filter");
459 section.within_list.push_back(w);
463 throw mp::filter::FilterException
465 + std::string((const char *) ptr->name)
466 + " in http_rewrite filter");
471 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
474 for (ptr = ptr->children; ptr; ptr = ptr->next)
476 if (ptr->type != XML_ELEMENT_NODE)
478 else if (!strcmp((const char *) ptr->name, "request"))
480 configure_section(ptr, *req_section);
482 else if (!strcmp((const char *) ptr->name, "response"))
484 configure_section(ptr, *res_section);
488 throw mp::filter::FilterException
490 + std::string((const char *) ptr->name)
491 + " in http_rewrite1 filter");
496 static mp::filter::Base* filter_creator()
498 return new mp::filter::HttpRewrite;
502 struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
513 * c-file-style: "Stroustrup"
514 * indent-tabs-mode: nil
516 * vim: shiftwidth=4 tabstop=8 expandtab