Remove redundant include
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24
25 #include <yaz/zgdu.h>
26 #include <yaz/log.h>
27
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
30
31 #include <map>
32
33 namespace mp = metaproxy_1;
34 namespace yf = mp::filter;
35
36 namespace metaproxy_1 {
37     namespace filter {
38         class HttpRewrite::Replace {
39         public:
40             std::string regex;
41             std::string recipe;
42             std::map<int, std::string> group_index;
43             const std::string search_replace(
44                 std::map<std::string, std::string> & vars,
45                 const std::string & txt) const;
46             std::string sub_vars (
47                 const std::map<std::string, std::string> & vars) const;
48             void parse_groups();
49         };
50
51         class HttpRewrite::Rule {
52         public:
53             std::list<Replace> replace_list;
54             const std::string test_patterns(
55                 std::map<std::string, std::string> & vars,
56                 const std::string & txt) const;
57         };
58         class HttpRewrite::Within {
59         public:
60             std::string header;
61             std::string attr;
62             std::string tag;
63             RulePtr rule;
64         };
65
66         class HttpRewrite::Phase {
67         public:
68             std::list<Within> within_list;
69             void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
70                 std::map<std::string, std::string> & vars) const;
71             void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
72                 std::map<std::string, std::string> & vars) const;
73             void rewrite_body(mp::odr & o,
74                 char **content_buf, int *content_len,
75                 std::map<std::string, std::string> & vars) const;
76         };
77     }
78 }
79
80 yf::HttpRewrite::HttpRewrite() :
81     req_phase(new Phase), res_phase(new Phase)
82 {
83 }
84
85 yf::HttpRewrite::~HttpRewrite()
86 {
87 }
88
89 void yf::HttpRewrite::process(mp::Package & package) const
90 {
91     yaz_log(YLOG_LOG, "HttpRewrite begins....");
92     Z_GDU *gdu = package.request().get();
93     //map of request/response vars
94     std::map<std::string, std::string> vars;
95     //we have an http req
96     if (gdu && gdu->which == Z_GDU_HTTP_Request)
97     {
98         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
99         mp::odr o;
100         req_phase->rewrite_reqline(o, hreq, vars);
101         yaz_log(YLOG_LOG, ">> Request headers");
102         req_phase->rewrite_headers(o, hreq->headers, vars);
103         req_phase->rewrite_body(o,
104                 &hreq->content_buf, &hreq->content_len, vars);
105         package.request() = gdu;
106     }
107     package.move();
108     gdu = package.response().get();
109     if (gdu && gdu->which == Z_GDU_HTTP_Response)
110     {
111         Z_HTTP_Response *hres = gdu->u.HTTP_Response;
112         yaz_log(YLOG_LOG, "Response code %d", hres->code);
113         mp::odr o;
114         yaz_log(YLOG_LOG, "<< Respose headers");
115         res_phase->rewrite_headers(o, hres->headers, vars);
116         res_phase->rewrite_body(o, &hres->content_buf,
117                 &hres->content_len, vars);
118         package.response() = gdu;
119     }
120 }
121
122 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
123         Z_HTTP_Request *hreq,
124         std::map<std::string, std::string> & vars) const
125 {
126     //rewrite the request line
127     std::string path;
128     if (strstr(hreq->path, "http://") == hreq->path)
129     {
130         yaz_log(YLOG_LOG, "Path in the method line is absolute, "
131             "possibly a proxy request");
132         path += hreq->path;
133     }
134     else
135     {
136         //TODO what about proto
137         path += "http://";
138         path += z_HTTP_header_lookup(hreq->headers, "Host");
139         path += hreq->path;
140     }
141
142     std::list<Within>::const_iterator it = within_list.begin();
143     if (it != within_list.end())
144     {
145         RulePtr rule = it->rule;
146
147         yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
148         std::string npath = rule->test_patterns(vars, path);
149         if (!npath.empty())
150         {
151             yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
152             hreq->path = odr_strdup(o, npath.c_str());
153         }
154     }
155 }
156
157 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
158         Z_HTTP_Header *headers,
159         std::map<std::string, std::string> & vars) const
160 {
161     for (Z_HTTP_Header *header = headers;
162             header != 0;
163             header = header->next)
164     {
165         std::string sheader(header->name);
166         sheader += ": ";
167         sheader += header->value;
168         yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
169
170         std::list<Within>::const_iterator it = within_list.begin();
171         if (it == within_list.end())
172             continue;
173         RulePtr rule = it->rule;
174
175         std::string out = rule->test_patterns(vars, sheader);
176         if (!out.empty())
177         {
178             size_t pos = out.find(": ");
179             if (pos == std::string::npos)
180             {
181                 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
182                 continue;
183             }
184             header->name = odr_strdup(o, out.substr(0, pos).c_str());
185             header->value = odr_strdup(o, out.substr(pos+2,
186                                                      std::string::npos).c_str());
187         }
188     }
189 }
190
191 void yf::HttpRewrite::Phase::rewrite_body(mp::odr & o,
192         char **content_buf,
193         int *content_len,
194         std::map<std::string, std::string> & vars) const
195 {
196     if (*content_buf)
197     {
198
199         std::list<Within>::const_iterator it = within_list.begin();
200         if (it != within_list.end())
201         {
202             RulePtr rule = it->rule;
203
204             std::string body(*content_buf);
205             std::string nbody = rule->test_patterns(vars, body);
206             if (!nbody.empty())
207             {
208                 *content_buf = odr_strdup(o, nbody.c_str());
209                 *content_len = nbody.size();
210             }
211         }
212     }
213 }
214
215 /**
216  * Tests pattern from the vector in order and executes recipe on
217  the first match.
218  */
219 const std::string yf::HttpRewrite::Rule::test_patterns(
220         std::map<std::string, std::string> & vars,
221         const std::string & txt) const
222 {
223     std::list<Replace>::const_iterator it = replace_list.begin();
224
225     for (; it != replace_list.end(); it++)
226     {
227         std::string out = it->search_replace(vars, txt);
228         if (!out.empty()) return out;
229     }
230     return "";
231 }
232
233 const std::string yf::HttpRewrite::Replace::search_replace(
234         std::map<std::string, std::string> & vars,
235         const std::string & txt) const
236 {
237     //exec regex against value
238     boost::regex re(regex);
239     boost::smatch what;
240     std::string::const_iterator start, end;
241     start = txt.begin();
242     end = txt.end();
243     std::string out;
244     while (regex_search(start, end, what, re)) //find next full match
245     {
246         size_t i;
247         for (i = 1; i < what.size(); ++i)
248         {
249             //check if the group is named
250             std::map<int, std::string>::const_iterator it
251                 = group_index.find(i);
252             if (it != group_index.end())
253             {   //it is
254                 if (!what[i].str().empty())
255                     vars[it->second] = what[i];
256             }
257
258         }
259         //prepare replacement string
260         std::string rvalue = sub_vars(vars);
261         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
262                 what.str(0).c_str(), rvalue.c_str());
263         out.append(start, what[0].first);
264         out.append(rvalue);
265         start = what[0].second; //move search forward
266     }
267     //if we had a match cat the last part
268     if (start != txt.begin())
269         out.append(start, end);
270     return out;
271 }
272
273 void yf::HttpRewrite::Replace::parse_groups()
274 {
275     int gnum = 0;
276     bool esc = false;
277     const std::string & str = regex;
278     std::string res;
279     yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
280     for (size_t i = 0; i < str.size(); ++i)
281     {
282         res += str[i];
283         if (!esc && str[i] == '\\')
284         {
285             esc = true;
286             continue;
287         }
288         if (!esc && str[i] == '(') //group starts
289         {
290             gnum++;
291             if (i+1 < str.size() && str[i+1] == '?') //group with attrs
292             {
293                 i++;
294                 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
295                 {
296                     if (gnum > 0) gnum--;
297                     res += str[i];
298                     i++;
299                     res += str[i];
300                     continue;
301                 }
302                 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
303                     i++;
304                 if (i+1 < str.size() && str[i+1] == '<') //named
305                 {
306                     i++;
307                     std::string gname;
308                     bool term = false;
309                     while (++i < str.size())
310                     {
311                         if (str[i] == '>') { term = true; break; }
312                         if (!isalnum(str[i]))
313                             throw mp::filter::FilterException
314                                 ("Only alphanumeric chars allowed, found "
315                                  " in '"
316                                  + str
317                                  + "' at "
318                                  + boost::lexical_cast<std::string>(i));
319                         gname += str[i];
320                     }
321                     if (!term)
322                         throw mp::filter::FilterException
323                             ("Unterminated group name '" + gname
324                              + " in '" + str +"'");
325                     group_index[gnum] = gname;
326                     yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
327                             gname.c_str(), gnum);
328                 }
329             }
330         }
331         esc = false;
332     }
333     regex = res;
334 }
335
336 std::string yf::HttpRewrite::Replace::sub_vars (
337         const std::map<std::string, std::string> & vars) const
338 {
339     std::string out;
340     bool esc = false;
341     const std::string & in = recipe;
342     for (size_t i = 0; i < in.size(); ++i)
343     {
344         if (!esc && in[i] == '\\')
345         {
346             esc = true;
347             continue;
348         }
349         if (!esc && in[i] == '$') //var
350         {
351             if (i+1 < in.size() && in[i+1] == '{') //ref prefix
352             {
353                 ++i;
354                 std::string name;
355                 bool term = false;
356                 while (++i < in.size())
357                 {
358                     if (in[i] == '}') { term = true; break; }
359                     name += in[i];
360                 }
361                 if (!term) throw mp::filter::FilterException
362                     ("Unterminated var ref in '"+in+"' at "
363                      + boost::lexical_cast<std::string>(i));
364                 std::map<std::string, std::string>::const_iterator it
365                     = vars.find(name);
366                 if (it != vars.end())
367                 {
368                     out += it->second;
369                 }
370             }
371             else
372             {
373                 throw mp::filter::FilterException
374                     ("Malformed or trimmed var ref in '"
375                      +in+"' at "+boost::lexical_cast<std::string>(i));
376             }
377             continue;
378         }
379         //passthru
380         out += in[i];
381         esc = false;
382     }
383     return out;
384 }
385
386
387 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
388 {
389     std::map<std::string, RulePtr > rules;
390     for (ptr = ptr->children; ptr; ptr = ptr->next)
391     {
392         if (ptr->type != XML_ELEMENT_NODE)
393             continue;
394         else if (!strcmp((const char *) ptr->name, "rule"))
395         {
396             static const char *names[2] = { "name", 0 };
397             std::string values[1];
398             values[0] = "default";
399             mp::xml::parse_attr(ptr, names, values);
400
401             RulePtr rule(new Rule);
402             for (xmlNode *p = ptr->children; p; p = p->next)
403             {
404                 if (p->type != XML_ELEMENT_NODE)
405                     continue;
406                 if (!strcmp((const char *) p->name, "rewrite"))
407                 {
408                     Replace replace;
409                     const struct _xmlAttr *attr;
410                     for (attr = p->properties; attr; attr = attr->next)
411                     {
412                         if (!strcmp((const char *) attr->name,  "from"))
413                             replace.regex = mp::xml::get_text(attr->children);
414                         else if (!strcmp((const char *) attr->name,  "to"))
415                             replace.recipe = mp::xml::get_text(attr->children);
416                         else
417                             throw mp::filter::FilterException
418                                 ("Bad attribute "
419                                  + std::string((const char *) attr->name)
420                                  + " in rewrite section of http_rewrite");
421                     }
422                     yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
423                             replace.regex.c_str(), replace.recipe.c_str());
424                     replace.parse_groups();
425                     if (!replace.regex.empty())
426                         rule->replace_list.push_back(replace);
427                 }
428                 else
429                     throw mp::filter::FilterException
430                         ("Bad element "
431                          + std::string((const char *) p->name)
432                          + " in http_rewrite filter");
433             }
434             if (!rule->replace_list.empty())
435                 rules[values[0]] = rule;
436         }
437         else if (!strcmp((const char *) ptr->name, "within"))
438         {
439             static const char *names[5] =
440                 { "header", "attr", "tag", "rule", 0 };
441             std::string values[4];
442             mp::xml::parse_attr(ptr, names, values);
443             Within w;
444             w.header = values[0];
445             w.attr = values[1];
446             w.tag = values[2];
447             std::map<std::string,RulePtr>::const_iterator it =
448                 rules.find(values[3]);
449             if (it == rules.end())
450                 throw mp::filter::FilterException
451                     ("Reference to non-existing rule '" + values[3] +
452                      "' in http_rewrite filter");
453             w.rule = it->second;
454             phase.within_list.push_back(w);
455         }
456         else
457         {
458             throw mp::filter::FilterException
459                 ("Bad element "
460                  + std::string((const char *) ptr->name)
461                  + " in http_rewrite filter");
462         }
463     }
464 }
465
466 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
467         const char *path)
468 {
469     for (ptr = ptr->children; ptr; ptr = ptr->next)
470     {
471         if (ptr->type != XML_ELEMENT_NODE)
472             continue;
473         else if (!strcmp((const char *) ptr->name, "request"))
474         {
475             configure_phase(ptr, *req_phase);
476         }
477         else if (!strcmp((const char *) ptr->name, "response"))
478         {
479             configure_phase(ptr, *res_phase);
480         }
481         else
482         {
483             throw mp::filter::FilterException
484                 ("Bad element "
485                  + std::string((const char *) ptr->name)
486                  + " in http_rewrite1 filter");
487         }
488     }
489 }
490
491 static mp::filter::Base* filter_creator()
492 {
493     return new mp::filter::HttpRewrite;
494 }
495
496 extern "C" {
497     struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
498         0,
499         "http_rewrite",
500         filter_creator
501     };
502 }
503
504
505 /*
506  * Local variables:
507  * c-basic-offset: 4
508  * c-file-style: "Stroustrup"
509  * indent-tabs-mode: nil
510  * End:
511  * vim: shiftwidth=4 tabstop=8 expandtab
512  */
513