http_rewrite: rename Section -> Phase
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24
25 #include <yaz/zgdu.h>
26 #include <yaz/log.h>
27
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
30
31 #include <map>
32
33 #if HAVE_SYS_TYPES_H
34 #include <sys/types.h>
35 #endif
36
37 namespace mp = metaproxy_1;
38 namespace yf = mp::filter;
39
40 namespace metaproxy_1 {
41     namespace filter {
42         class HttpRewrite::Replace {
43         public:
44             std::string regex;
45             std::string recipe;
46             std::map<int, std::string> group_index;
47             const std::string search_replace(
48                 std::map<std::string, std::string> & vars,
49                 const std::string & txt) const;
50             std::string sub_vars (
51                 const std::map<std::string, std::string> & vars) const;
52             void parse_groups();
53         };
54
55         class HttpRewrite::Rule {
56         public:
57             std::list<Replace> replace_list;
58             const std::string test_patterns(
59                 std::map<std::string, std::string> & vars,
60                 const std::string & txt) const;
61         };
62         class HttpRewrite::Within {
63         public:
64             std::string header;
65             std::string attr;
66             std::string tag;
67             RulePtr rule;
68         };
69
70         class HttpRewrite::Phase {
71         public:
72             std::list<Within> within_list;
73             void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
74                 std::map<std::string, std::string> & vars) const;
75             void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
76                 std::map<std::string, std::string> & vars) const;
77             void rewrite_body(mp::odr & o,
78                 char **content_buf, int *content_len,
79                 std::map<std::string, std::string> & vars) const;
80         };
81     }
82 }
83
84 yf::HttpRewrite::HttpRewrite() :
85     req_phase(new Phase), res_phase(new Phase)
86 {
87 }
88
89 yf::HttpRewrite::~HttpRewrite()
90 {
91 }
92
93 void yf::HttpRewrite::process(mp::Package & package) const
94 {
95     yaz_log(YLOG_LOG, "HttpRewrite begins....");
96     Z_GDU *gdu = package.request().get();
97     //map of request/response vars
98     std::map<std::string, std::string> vars;
99     //we have an http req
100     if (gdu && gdu->which == Z_GDU_HTTP_Request)
101     {
102         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
103         mp::odr o;
104         req_phase->rewrite_reqline(o, hreq, vars);
105         yaz_log(YLOG_LOG, ">> Request headers");
106         req_phase->rewrite_headers(o, hreq->headers, vars);
107         req_phase->rewrite_body(o,
108                 &hreq->content_buf, &hreq->content_len, vars);
109         package.request() = gdu;
110     }
111     package.move();
112     gdu = package.response().get();
113     if (gdu && gdu->which == Z_GDU_HTTP_Response)
114     {
115         Z_HTTP_Response *hres = gdu->u.HTTP_Response;
116         yaz_log(YLOG_LOG, "Response code %d", hres->code);
117         mp::odr o;
118         yaz_log(YLOG_LOG, "<< Respose headers");
119         res_phase->rewrite_headers(o, hres->headers, vars);
120         res_phase->rewrite_body(o, &hres->content_buf,
121                 &hres->content_len, vars);
122         package.response() = gdu;
123     }
124 }
125
126 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
127         Z_HTTP_Request *hreq,
128         std::map<std::string, std::string> & vars) const
129 {
130     //rewrite the request line
131     std::string path;
132     if (strstr(hreq->path, "http://") == hreq->path)
133     {
134         yaz_log(YLOG_LOG, "Path in the method line is absolute, "
135             "possibly a proxy request");
136         path += hreq->path;
137     }
138     else
139     {
140         //TODO what about proto
141         path += "http://";
142         path += z_HTTP_header_lookup(hreq->headers, "Host");
143         path += hreq->path;
144     }
145
146     std::list<Within>::const_iterator it = within_list.begin();
147     if (it != within_list.end())
148     {
149         RulePtr rule = it->rule;
150
151         yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
152         std::string npath = rule->test_patterns(vars, path);
153         if (!npath.empty())
154         {
155             yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
156             hreq->path = odr_strdup(o, npath.c_str());
157         }
158     }
159 }
160
161 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
162         Z_HTTP_Header *headers,
163         std::map<std::string, std::string> & vars) const
164 {
165     for (Z_HTTP_Header *header = headers;
166             header != 0;
167             header = header->next)
168     {
169         std::string sheader(header->name);
170         sheader += ": ";
171         sheader += header->value;
172         yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
173
174         std::list<Within>::const_iterator it = within_list.begin();
175         if (it == within_list.end())
176             continue;
177         RulePtr rule = it->rule;
178
179         std::string out = rule->test_patterns(vars, sheader);
180         if (!out.empty())
181         {
182             size_t pos = out.find(": ");
183             if (pos == std::string::npos)
184             {
185                 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
186                 continue;
187             }
188             header->name = odr_strdup(o, out.substr(0, pos).c_str());
189             header->value = odr_strdup(o, out.substr(pos+2,
190                                                      std::string::npos).c_str());
191         }
192     }
193 }
194
195 void yf::HttpRewrite::Phase::rewrite_body(mp::odr & o,
196         char **content_buf,
197         int *content_len,
198         std::map<std::string, std::string> & vars) const
199 {
200     if (*content_buf)
201     {
202
203         std::list<Within>::const_iterator it = within_list.begin();
204         if (it != within_list.end())
205         {
206             RulePtr rule = it->rule;
207
208             std::string body(*content_buf);
209             std::string nbody = rule->test_patterns(vars, body);
210             if (!nbody.empty())
211             {
212                 *content_buf = odr_strdup(o, nbody.c_str());
213                 *content_len = nbody.size();
214             }
215         }
216     }
217 }
218
219 /**
220  * Tests pattern from the vector in order and executes recipe on
221  the first match.
222  */
223 const std::string yf::HttpRewrite::Rule::test_patterns(
224         std::map<std::string, std::string> & vars,
225         const std::string & txt) const
226 {
227     std::list<Replace>::const_iterator it = replace_list.begin();
228
229     for (; it != replace_list.end(); it++)
230     {
231         std::string out = it->search_replace(vars, txt);
232         if (!out.empty()) return out;
233     }
234     return "";
235 }
236
237 const std::string yf::HttpRewrite::Replace::search_replace(
238         std::map<std::string, std::string> & vars,
239         const std::string & txt) const
240 {
241     //exec regex against value
242     boost::regex re(regex);
243     boost::smatch what;
244     std::string::const_iterator start, end;
245     start = txt.begin();
246     end = txt.end();
247     std::string out;
248     while (regex_search(start, end, what, re)) //find next full match
249     {
250         size_t i;
251         for (i = 1; i < what.size(); ++i)
252         {
253             //check if the group is named
254             std::map<int, std::string>::const_iterator it
255                 = group_index.find(i);
256             if (it != group_index.end())
257             {   //it is
258                 if (!what[i].str().empty())
259                     vars[it->second] = what[i];
260             }
261
262         }
263         //prepare replacement string
264         std::string rvalue = sub_vars(vars);
265         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
266                 what.str(0).c_str(), rvalue.c_str());
267         out.append(start, what[0].first);
268         out.append(rvalue);
269         start = what[0].second; //move search forward
270     }
271     //if we had a match cat the last part
272     if (start != txt.begin())
273         out.append(start, end);
274     return out;
275 }
276
277 void yf::HttpRewrite::Replace::parse_groups()
278 {
279     int gnum = 0;
280     bool esc = false;
281     const std::string & str = regex;
282     std::string res;
283     yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
284     for (size_t i = 0; i < str.size(); ++i)
285     {
286         res += str[i];
287         if (!esc && str[i] == '\\')
288         {
289             esc = true;
290             continue;
291         }
292         if (!esc && str[i] == '(') //group starts
293         {
294             gnum++;
295             if (i+1 < str.size() && str[i+1] == '?') //group with attrs
296             {
297                 i++;
298                 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
299                 {
300                     if (gnum > 0) gnum--;
301                     res += str[i];
302                     i++;
303                     res += str[i];
304                     continue;
305                 }
306                 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
307                     i++;
308                 if (i+1 < str.size() && str[i+1] == '<') //named
309                 {
310                     i++;
311                     std::string gname;
312                     bool term = false;
313                     while (++i < str.size())
314                     {
315                         if (str[i] == '>') { term = true; break; }
316                         if (!isalnum(str[i]))
317                             throw mp::filter::FilterException
318                                 ("Only alphanumeric chars allowed, found "
319                                  " in '"
320                                  + str
321                                  + "' at "
322                                  + boost::lexical_cast<std::string>(i));
323                         gname += str[i];
324                     }
325                     if (!term)
326                         throw mp::filter::FilterException
327                             ("Unterminated group name '" + gname
328                              + " in '" + str +"'");
329                     group_index[gnum] = gname;
330                     yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
331                             gname.c_str(), gnum);
332                 }
333             }
334         }
335         esc = false;
336     }
337     regex = res;
338 }
339
340 std::string yf::HttpRewrite::Replace::sub_vars (
341         const std::map<std::string, std::string> & vars) const
342 {
343     std::string out;
344     bool esc = false;
345     const std::string & in = recipe;
346     for (size_t i = 0; i < in.size(); ++i)
347     {
348         if (!esc && in[i] == '\\')
349         {
350             esc = true;
351             continue;
352         }
353         if (!esc && in[i] == '$') //var
354         {
355             if (i+1 < in.size() && in[i+1] == '{') //ref prefix
356             {
357                 ++i;
358                 std::string name;
359                 bool term = false;
360                 while (++i < in.size())
361                 {
362                     if (in[i] == '}') { term = true; break; }
363                     name += in[i];
364                 }
365                 if (!term) throw mp::filter::FilterException
366                     ("Unterminated var ref in '"+in+"' at "
367                      + boost::lexical_cast<std::string>(i));
368                 std::map<std::string, std::string>::const_iterator it
369                     = vars.find(name);
370                 if (it != vars.end())
371                 {
372                     out += it->second;
373                 }
374             }
375             else
376             {
377                 throw mp::filter::FilterException
378                     ("Malformed or trimmed var ref in '"
379                      +in+"' at "+boost::lexical_cast<std::string>(i));
380             }
381             continue;
382         }
383         //passthru
384         out += in[i];
385         esc = false;
386     }
387     return out;
388 }
389
390
391 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
392 {
393     std::map<std::string, RulePtr > rules;
394     for (ptr = ptr->children; ptr; ptr = ptr->next)
395     {
396         if (ptr->type != XML_ELEMENT_NODE)
397             continue;
398         else if (!strcmp((const char *) ptr->name, "rule"))
399         {
400             static const char *names[2] = { "name", 0 };
401             std::string values[1];
402             values[0] = "default";
403             mp::xml::parse_attr(ptr, names, values);
404
405             RulePtr rule(new Rule);
406             for (xmlNode *p = ptr->children; p; p = p->next)
407             {
408                 if (p->type != XML_ELEMENT_NODE)
409                     continue;
410                 if (!strcmp((const char *) p->name, "rewrite"))
411                 {
412                     Replace replace;
413                     const struct _xmlAttr *attr;
414                     for (attr = p->properties; attr; attr = attr->next)
415                     {
416                         if (!strcmp((const char *) attr->name,  "from"))
417                             replace.regex = mp::xml::get_text(attr->children);
418                         else if (!strcmp((const char *) attr->name,  "to"))
419                             replace.recipe = mp::xml::get_text(attr->children);
420                         else
421                             throw mp::filter::FilterException
422                                 ("Bad attribute "
423                                  + std::string((const char *) attr->name)
424                                  + " in rewrite section of http_rewrite");
425                     }
426                     yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
427                             replace.regex.c_str(), replace.recipe.c_str());
428                     replace.parse_groups();
429                     if (!replace.regex.empty())
430                         rule->replace_list.push_back(replace);
431                 }
432                 else
433                     throw mp::filter::FilterException
434                         ("Bad element "
435                          + std::string((const char *) p->name)
436                          + " in http_rewrite filter");
437             }
438             if (!rule->replace_list.empty())
439                 rules[values[0]] = rule;
440         }
441         else if (!strcmp((const char *) ptr->name, "within"))
442         {
443             static const char *names[5] =
444                 { "header", "attr", "tag", "rule", 0 };
445             std::string values[4];
446             mp::xml::parse_attr(ptr, names, values);
447             Within w;
448             w.header = values[0];
449             w.attr = values[1];
450             w.tag = values[2];
451             std::map<std::string,RulePtr>::const_iterator it =
452                 rules.find(values[3]);
453             if (it == rules.end())
454                 throw mp::filter::FilterException
455                     ("Reference to non-existing rule '" + values[3] +
456                      "' in http_rewrite filter");
457             w.rule = it->second;
458             phase.within_list.push_back(w);
459         }
460         else
461         {
462             throw mp::filter::FilterException
463                 ("Bad element "
464                  + std::string((const char *) ptr->name)
465                  + " in http_rewrite filter");
466         }
467     }
468 }
469
470 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
471         const char *path)
472 {
473     for (ptr = ptr->children; ptr; ptr = ptr->next)
474     {
475         if (ptr->type != XML_ELEMENT_NODE)
476             continue;
477         else if (!strcmp((const char *) ptr->name, "request"))
478         {
479             configure_phase(ptr, *req_phase);
480         }
481         else if (!strcmp((const char *) ptr->name, "response"))
482         {
483             configure_phase(ptr, *res_phase);
484         }
485         else
486         {
487             throw mp::filter::FilterException
488                 ("Bad element "
489                  + std::string((const char *) ptr->name)
490                  + " in http_rewrite1 filter");
491         }
492     }
493 }
494
495 static mp::filter::Base* filter_creator()
496 {
497     return new mp::filter::HttpRewrite;
498 }
499
500 extern "C" {
501     struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
502         0,
503         "http_rewrite",
504         filter_creator
505     };
506 }
507
508
509 /*
510  * Local variables:
511  * c-basic-offset: 4
512  * c-file-style: "Stroustrup"
513  * indent-tabs-mode: nil
514  * End:
515  * vim: shiftwidth=4 tabstop=8 expandtab
516  */
517