http_rewrite: parsing of new configuration
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24
25 #include <yaz/zgdu.h>
26 #include <yaz/log.h>
27
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
30
31 #include <map>
32
33 #if HAVE_SYS_TYPES_H
34 #include <sys/types.h>
35 #endif
36
37 namespace mp = metaproxy_1;
38 namespace yf = mp::filter;
39
40 namespace metaproxy_1 {
41     namespace filter {
42         class HttpRewrite::Replace {
43         public:
44             std::string regex;
45             std::string recipe;
46             std::map<int, std::string> group_index;
47             const std::string search_replace(
48                 std::map<std::string, std::string> & vars,
49                 const std::string & txt) const;
50             std::string sub_vars (
51                 const std::map<std::string, std::string> & vars) const;
52             void parse_groups();
53         };
54
55         class HttpRewrite::Rule {
56         public:
57             std::list<Replace> replace_list;
58             const std::string test_patterns(
59                 std::map<std::string, std::string> & vars,
60                 const std::string & txt) const;
61         };
62         class HttpRewrite::Within {
63         public:
64             std::string header;
65             std::string attr;
66             std::string tag;
67             RulePtr rule;
68         };
69
70         class HttpRewrite::Section {
71         public:
72             std::list<Within> within_list;
73             void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
74                 std::map<std::string, std::string> & vars) const;
75             void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
76                 std::map<std::string, std::string> & vars) const;
77             void rewrite_body(mp::odr & o,
78                 char **content_buf, int *content_len,
79                 std::map<std::string, std::string> & vars) const;
80         };
81     }
82 }
83
84 yf::HttpRewrite::HttpRewrite() :
85     req_section(new Section), res_section(new Section)
86 {
87 }
88
89 yf::HttpRewrite::~HttpRewrite()
90 {
91 }
92
93 void yf::HttpRewrite::process(mp::Package & package) const
94 {
95     yaz_log(YLOG_LOG, "HttpRewrite begins....");
96     Z_GDU *gdu = package.request().get();
97     //map of request/response vars
98     std::map<std::string, std::string> vars;
99     //we have an http req
100     if (gdu && gdu->which == Z_GDU_HTTP_Request)
101     {
102         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
103         mp::odr o;
104         req_section->rewrite_reqline(o, hreq, vars);
105         yaz_log(YLOG_LOG, ">> Request headers");
106         req_section->rewrite_headers(o, hreq->headers, vars);
107         req_section->rewrite_body(o,
108                 &hreq->content_buf, &hreq->content_len, vars);
109         package.request() = gdu;
110     }
111     package.move();
112     gdu = package.response().get();
113     if (gdu && gdu->which == Z_GDU_HTTP_Response)
114     {
115         Z_HTTP_Response *hres = gdu->u.HTTP_Response;
116         yaz_log(YLOG_LOG, "Response code %d", hres->code);
117         mp::odr o;
118         yaz_log(YLOG_LOG, "<< Respose headers");
119         res_section->rewrite_headers(o, hres->headers, vars);
120         res_section->rewrite_body(o, &hres->content_buf,
121                 &hres->content_len, vars);
122         package.response() = gdu;
123     }
124 }
125
126 void yf::HttpRewrite::Section::rewrite_reqline (mp::odr & o,
127         Z_HTTP_Request *hreq,
128         std::map<std::string, std::string> & vars) const
129 {
130     //rewrite the request line
131     std::string path;
132     if (strstr(hreq->path, "http://") == hreq->path)
133     {
134         yaz_log(YLOG_LOG, "Path in the method line is absolute, "
135             "possibly a proxy request");
136         path += hreq->path;
137     }
138     else
139     {
140         //TODO what about proto
141         path += "http://";
142         path += z_HTTP_header_lookup(hreq->headers, "Host");
143         path += hreq->path;
144     }
145
146     std::list<Within>::const_iterator it = within_list.begin();
147     if (it != within_list.end())
148     {
149         RulePtr rule = it->rule;
150
151         yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
152         std::string npath = rule->test_patterns(vars, path);
153         if (!npath.empty())
154         {
155             yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
156             hreq->path = odr_strdup(o, npath.c_str());
157         }
158     }
159 }
160
161 void yf::HttpRewrite::Section::rewrite_headers(mp::odr & o,
162         Z_HTTP_Header *headers,
163         std::map<std::string, std::string> & vars) const
164 {
165     for (Z_HTTP_Header *header = headers;
166             header != 0;
167             header = header->next)
168     {
169         std::string sheader(header->name);
170         sheader += ": ";
171         sheader += header->value;
172         yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
173
174         std::list<Within>::const_iterator it = within_list.begin();
175         if (it == within_list.end())
176             continue;
177         RulePtr rule = it->rule;
178
179         std::string out = rule->test_patterns(vars, sheader);
180         if (!out.empty())
181         {
182             size_t pos = out.find(": ");
183             if (pos == std::string::npos)
184             {
185                 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
186                 continue;
187             }
188             header->name = odr_strdup(o, out.substr(0, pos).c_str());
189             header->value = odr_strdup(o, out.substr(pos+2,
190                                                      std::string::npos).c_str());
191         }
192     }
193 }
194
195 void yf::HttpRewrite::Section::rewrite_body(mp::odr & o,
196         char **content_buf,
197         int *content_len,
198         std::map<std::string, std::string> & vars) const
199 {
200     if (*content_buf)
201     {
202
203         std::list<Within>::const_iterator it = within_list.begin();
204         if (it != within_list.end())
205         {
206             RulePtr rule = it->rule;
207
208             std::string body(*content_buf);
209             std::string nbody = rule->test_patterns(vars, body);
210             if (!nbody.empty())
211             {
212                 *content_buf = odr_strdup(o, nbody.c_str());
213                 *content_len = nbody.size();
214             }
215         }
216     }
217 }
218
219 /**
220  * Tests pattern from the vector in order and executes recipe on
221  the first match.
222  */
223 const std::string yf::HttpRewrite::Rule::test_patterns(
224         std::map<std::string, std::string> & vars,
225         const std::string & txt) const
226 {
227     std::list<Replace>::const_iterator it = replace_list.begin();
228
229     for (; it != replace_list.end(); it++)
230     {
231         std::string out = it->search_replace(vars, txt);
232         if (!out.empty()) return out;
233     }
234     return "";
235 }
236
237 const std::string yf::HttpRewrite::Replace::search_replace(
238         std::map<std::string, std::string> & vars,
239         const std::string & txt) const
240 {
241     //exec regex against value
242     boost::regex re(regex);
243     boost::smatch what;
244     std::string::const_iterator start, end;
245     start = txt.begin();
246     end = txt.end();
247     std::string out;
248     while (regex_search(start, end, what, re)) //find next full match
249     {
250         size_t i;
251         for (i = 1; i < what.size(); ++i)
252         {
253             //check if the group is named
254             std::map<int, std::string>::const_iterator it
255                 = group_index.find(i);
256             if (it != group_index.end())
257             {   //it is
258                 if (!what[i].str().empty())
259                     vars[it->second] = what[i];
260             }
261
262         }
263         //prepare replacement string
264         std::string rvalue = sub_vars(vars);
265         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
266                 what.str(0).c_str(), rvalue.c_str());
267         out.append(start, what[0].first);
268         out.append(rvalue);
269         start = what[0].second; //move search forward
270     }
271     //if we had a match cat the last part
272     if (start != txt.begin())
273         out.append(start, end);
274     return out;
275 }
276
277 void yf::HttpRewrite::Replace::parse_groups()
278 {
279     int gnum = 0;
280     bool esc = false;
281     const std::string & str = regex;
282     std::string res;
283     yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
284     for (size_t i = 0; i < str.size(); ++i)
285     {
286         res += str[i];
287         if (!esc && str[i] == '\\')
288         {
289             esc = true;
290             continue;
291         }
292         if (!esc && str[i] == '(') //group starts
293         {
294             gnum++;
295             if (i+1 < str.size() && str[i+1] == '?') //group with attrs
296             {
297                 i++;
298                 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
299                 {
300                     if (gnum > 0) gnum--;
301                     res += str[i];
302                     i++;
303                     res += str[i];
304                     continue;
305                 }
306                 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
307                     i++;
308                 if (i+1 < str.size() && str[i+1] == '<') //named
309                 {
310                     i++;
311                     std::string gname;
312                     bool term = false;
313                     while (++i < str.size())
314                     {
315                         if (str[i] == '>') { term = true; break; }
316                         if (!isalnum(str[i]))
317                             throw mp::filter::FilterException
318                                 ("Only alphanumeric chars allowed, found "
319                                  " in '"
320                                  + str
321                                  + "' at "
322                                  + boost::lexical_cast<std::string>(i));
323                         gname += str[i];
324                     }
325                     if (!term)
326                         throw mp::filter::FilterException
327                             ("Unterminated group name '" + gname
328                              + " in '" + str +"'");
329                     group_index[gnum] = gname;
330                     yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
331                             gname.c_str(), gnum);
332                 }
333             }
334         }
335         esc = false;
336     }
337     regex = res;
338 }
339
340 std::string yf::HttpRewrite::Replace::sub_vars (
341         const std::map<std::string, std::string> & vars) const
342 {
343     std::string out;
344     bool esc = false;
345     const std::string & in = recipe;
346     for (size_t i = 0; i < in.size(); ++i)
347     {
348         if (!esc && in[i] == '\\')
349         {
350             esc = true;
351             continue;
352         }
353         if (!esc && in[i] == '$') //var
354         {
355             if (i+1 < in.size() && in[i+1] == '{') //ref prefix
356             {
357                 ++i;
358                 std::string name;
359                 bool term = false;
360                 while (++i < in.size())
361                 {
362                     if (in[i] == '}') { term = true; break; }
363                     name += in[i];
364                 }
365                 if (!term) throw mp::filter::FilterException
366                     ("Unterminated var ref in '"+in+"' at "
367                      + boost::lexical_cast<std::string>(i));
368                 std::map<std::string, std::string>::const_iterator it
369                     = vars.find(name);
370                 if (it != vars.end())
371                 {
372                     out += it->second;
373                 }
374             }
375             else
376             {
377                 throw mp::filter::FilterException
378                     ("Malformed or trimmed var ref in '"
379                      +in+"' at "+boost::lexical_cast<std::string>(i));
380             }
381             continue;
382         }
383         //passthru
384         out += in[i];
385         esc = false;
386     }
387     return out;
388 }
389
390
391 void yf::HttpRewrite::configure_section(const xmlNode *ptr,
392         Section &section)
393 {
394     std::map<std::string, RulePtr > rules;
395     for (ptr = ptr->children; ptr; ptr = ptr->next)
396     {
397         if (ptr->type != XML_ELEMENT_NODE)
398             continue;
399         else if (!strcmp((const char *) ptr->name, "rule"))
400         {
401             static const char *names[2] = { "name", 0 };
402             std::string values[1];
403             values[0] = "default";
404             mp::xml::parse_attr(ptr, names, values);
405
406             RulePtr rule(new Rule);
407             for (xmlNode *p = ptr->children; p; p = p->next)
408             {
409                 if (p->type != XML_ELEMENT_NODE)
410                     continue;
411                 if (!strcmp((const char *) p->name, "rewrite"))
412                 {
413                     Replace replace;
414                     const struct _xmlAttr *attr;
415                     for (attr = p->properties; attr; attr = attr->next)
416                     {
417                         if (!strcmp((const char *) attr->name,  "from"))
418                             replace.regex = mp::xml::get_text(attr->children);
419                         else if (!strcmp((const char *) attr->name,  "to"))
420                             replace.recipe = mp::xml::get_text(attr->children);
421                         else
422                             throw mp::filter::FilterException
423                                 ("Bad attribute "
424                                  + std::string((const char *) attr->name)
425                                  + " in rewrite section of http_rewrite");
426                     }
427                     yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
428                             replace.regex.c_str(), replace.recipe.c_str());
429                     replace.parse_groups();
430                     if (!replace.regex.empty())
431                         rule->replace_list.push_back(replace);
432                 }
433                 else
434                     throw mp::filter::FilterException
435                         ("Bad element "
436                          + std::string((const char *) p->name)
437                          + " in http_rewrite filter");
438             }
439             if (!rule->replace_list.empty())
440                 rules[values[0]] = rule;
441         }
442         else if (!strcmp((const char *) ptr->name, "within"))
443         {
444             static const char *names[5] =
445                 { "header", "attr", "tag", "rule", 0 };
446             std::string values[4];
447             mp::xml::parse_attr(ptr, names, values);
448             Within w;
449             w.header = values[0];
450             w.attr = values[1];
451             w.tag = values[2];
452             std::map<std::string,RulePtr>::const_iterator it =
453                 rules.find(values[3]);
454             if (it == rules.end())
455                 throw mp::filter::FilterException
456                     ("Reference to non-existing rule '" + values[3] +
457                      "' in http_rewrite filter");
458             w.rule = it->second;
459             section.within_list.push_back(w);
460         }
461         else
462         {
463             throw mp::filter::FilterException
464                 ("Bad element "
465                  + std::string((const char *) ptr->name)
466                  + " in http_rewrite filter");
467         }
468     }
469 }
470
471 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
472         const char *path)
473 {
474     for (ptr = ptr->children; ptr; ptr = ptr->next)
475     {
476         if (ptr->type != XML_ELEMENT_NODE)
477             continue;
478         else if (!strcmp((const char *) ptr->name, "request"))
479         {
480             configure_section(ptr, *req_section);
481         }
482         else if (!strcmp((const char *) ptr->name, "response"))
483         {
484             configure_section(ptr, *res_section);
485         }
486         else
487         {
488             throw mp::filter::FilterException
489                 ("Bad element "
490                  + std::string((const char *) ptr->name)
491                  + " in http_rewrite1 filter");
492         }
493     }
494 }
495
496 static mp::filter::Base* filter_creator()
497 {
498     return new mp::filter::HttpRewrite;
499 }
500
501 extern "C" {
502     struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
503         0,
504         "http_rewrite",
505         filter_creator
506     };
507 }
508
509
510 /*
511  * Local variables:
512  * c-basic-offset: 4
513  * c-file-style: "Stroustrup"
514  * indent-tabs-mode: nil
515  * End:
516  * vim: shiftwidth=4 tabstop=8 expandtab
517  */
518