Using log level
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24
25 #include <yaz/zgdu.h>
26 #include <yaz/log.h>
27
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
30
31 #include <list>
32 #include <map>
33
34 #if HAVE_SYS_TYPES_H
35 #include <sys/types.h>
36 #endif
37
38 namespace mp = metaproxy_1;
39 namespace yf = mp::filter;
40
41 yf::HttpRewrite::HttpRewrite()
42 {
43 }
44
45 yf::HttpRewrite::~HttpRewrite()
46 {
47 }
48
49 void yf::HttpRewrite::process(mp::Package & package) const 
50 {
51     yaz_log(YLOG_LOG, "HttpRewrite begins....");
52     Z_GDU *gdu = package.request().get();
53     //map of request/response vars
54     std::map<std::string, std::string> vars;
55     //we have an http req
56     if (gdu && gdu->which == Z_GDU_HTTP_Request)
57     {
58         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
59         mp::odr o;
60         rewrite_reqline(o, hreq, vars);
61         yaz_log(YLOG_LOG, ">> Request headers");
62         rewrite_headers(o, hreq->headers, vars);
63         rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars);
64         package.request() = gdu;
65     }
66     package.move();
67     gdu = package.response().get();
68     if (gdu && gdu->which == Z_GDU_HTTP_Response)
69     {
70         Z_HTTP_Response *hres = gdu->u.HTTP_Response;
71         yaz_log(YLOG_LOG, "Response code %d", hres->code);
72         mp::odr o;
73         yaz_log(YLOG_LOG, "<< Respose headers");
74         rewrite_headers(o, hres->headers, vars);
75         rewrite_body(o, &hres->content_buf, &hres->content_len, vars);
76         package.response() = gdu;
77     }
78 }
79
80 void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
81         std::map<std::string, std::string> & vars) const 
82 {
83     //rewrite the request line
84     std::string path;
85     if (strstr(hreq->path, "http://") == hreq->path)
86     {
87         yaz_log(YLOG_LOG, "Path in the method line is absolute, " 
88             "possibly a proxy request");
89         path += hreq->path;
90     }
91     else
92     {
93         //TODO what about proto
94         path += "http://";
95         path += z_HTTP_header_lookup(hreq->headers, "Host");
96         path += hreq->path; 
97     }
98     yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
99     std::string npath = 
100         test_patterns(vars, path, req_uri_pats, req_groups_bynum);
101     if (!npath.empty())
102     {
103         yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
104         hreq->path = odr_strdup(o, npath.c_str());
105     }
106 }
107
108 void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
109         std::map<std::string, std::string> & vars) const 
110 {
111     for (Z_HTTP_Header *header = headers;
112             header != 0; 
113             header = header->next) 
114     {
115         std::string sheader(header->name);
116         sheader += ": ";
117         sheader += header->value;
118         yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
119         std::string out = test_patterns(vars, 
120                 sheader, 
121                 req_uri_pats, req_groups_bynum);
122         if (!out.empty()) 
123         {
124             size_t pos = out.find(": ");
125             if (pos == std::string::npos)
126             {
127                 yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
128                 continue;
129             }
130             header->name = odr_strdup(o, out.substr(0, pos).c_str());
131             header->value = odr_strdup(o, out.substr(pos+2, 
132                         std::string::npos).c_str());
133         }
134     }
135 }
136
137 void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *content_len,
138         std::map<std::string, std::string> & vars) const 
139 {
140     if (*content_buf)
141     {
142         std::string body(*content_buf);
143         std::string nbody = 
144             test_patterns(vars, body, req_uri_pats, req_groups_bynum);
145         if (!nbody.empty())
146         {
147             *content_buf = odr_strdup(o, nbody.c_str());
148             *content_len = nbody.size();
149         }
150     }
151 }
152
153 /**
154  * Tests pattern from the vector in order and executes recipe on
155  the first match.
156  */
157 const std::string yf::HttpRewrite::test_patterns(
158         std::map<std::string, std::string> & vars,
159         const std::string & txt, 
160         const spair_vec & uri_pats,
161         const std::vector<std::map<int, std::string> > & groups_bynum_vec)
162     const
163 {
164     for (unsigned i = 0; i < uri_pats.size(); i++) 
165     {
166         std::string out = search_replace(vars, txt, 
167                 uri_pats[i].first, uri_pats[i].second,
168                 groups_bynum_vec[i]);
169         if (!out.empty()) return out;
170     }
171     return "";
172 }
173
174
175 const std::string yf::HttpRewrite::search_replace(
176         std::map<std::string, std::string> & vars,
177         const std::string & txt,
178         const std::string & uri_re,
179         const std::string & uri_pat,
180         const std::map<int, std::string> & groups_bynum) const
181 {
182     //exec regex against value
183     boost::regex re(uri_re);
184     boost::smatch what;
185     std::string::const_iterator start, end;
186     start = txt.begin();
187     end = txt.end();
188     std::string out;
189     while (regex_search(start, end, what, re)) //find next full match
190     {
191         unsigned i;
192         for (i = 1; i < what.size(); ++i)
193         {
194             //check if the group is named
195             std::map<int, std::string>::const_iterator it
196                 = groups_bynum.find(i);
197             if (it != groups_bynum.end()) 
198             {   //it is
199                 std::string name = it->second;
200                 if (!what[i].str().empty())
201                     vars[name] = what[i];
202             }
203
204         }
205         //prepare replacement string
206         std::string rvalue = sub_vars(uri_pat, vars);
207         //rewrite value
208         std::string rhvalue = what.prefix().str() 
209             + rvalue + what.suffix().str();
210         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", 
211                 what.str(0).c_str(), rvalue.c_str());
212         out += rhvalue;
213         start = what[0].second; //move search forward
214     }
215     return out;
216 }
217
218 void yf::HttpRewrite::parse_groups(
219         const spair_vec & uri_pats,
220         std::vector<std::map<int, std::string> > & groups_bynum_vec)
221 {
222     for (unsigned h = 0; h < uri_pats.size(); h++) 
223     {
224         int gnum = 0;
225         bool esc = false;
226         //regex is first, subpat is second
227         std::string str = uri_pats[h].first;
228         //for each pair we have an indexing map
229         std::map<int, std::string> groups_bynum;
230         for (unsigned i = 0; i < str.size(); ++i)
231         {
232             if (!esc && str[i] == '\\')
233             {
234                 esc = true;
235                 continue;
236             }
237             if (!esc && str[i] == '(') //group starts
238             {
239                 gnum++;
240                 if (i+1 < str.size() && str[i+1] == '?') //group with attrs 
241                 {
242                     i++;
243                     if (i+1 < str.size() && str[i+1] == ':') //non-capturing
244                     {
245                         if (gnum > 0) gnum--;
246                         i++;
247                         continue;
248                     }
249                     if (i+1 < str.size() && str[i+1] == 'P') //optional, python
250                         i++;
251                     if (i+1 < str.size() && str[i+1] == '<') //named
252                     {
253                         i++;
254                         std::string gname;
255                         bool term = false;
256                         while (++i < str.size())
257                         {
258                             if (str[i] == '>') { term = true; break; }
259                             if (!isalnum(str[i])) 
260                                 throw mp::filter::FilterException
261                                     ("Only alphanumeric chars allowed, found "
262                                      " in '" 
263                                      + str 
264                                      + "' at " 
265                                      + boost::lexical_cast<std::string>(i)); 
266                             gname += str[i];
267                         }
268                         if (!term)
269                             throw mp::filter::FilterException
270                                 ("Unterminated group name '" + gname 
271                                  + " in '" + str +"'");
272                         groups_bynum[gnum] = gname;
273                         yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
274                                 gname.c_str(), gnum);
275                     }
276                 }
277             }
278             esc = false;
279         }
280         groups_bynum_vec.push_back(groups_bynum);
281     }
282 }
283
284 std::string yf::HttpRewrite::sub_vars (const std::string & in, 
285         const std::map<std::string, std::string> & vars)
286 {
287     std::string out;
288     bool esc = false;
289     for (unsigned i = 0; i < in.size(); ++i)
290     {
291         if (!esc && in[i] == '\\')
292         {
293             esc = true;
294             continue;
295         }
296         if (!esc && in[i] == '$') //var
297         {
298             if (i+1 < in.size() && in[i+1] == '{') //ref prefix
299             {
300                 ++i;
301                 std::string name;
302                 bool term = false;
303                 while (++i < in.size()) 
304                 {
305                     if (in[i] == '}') { term = true; break; }
306                     name += in[i];
307                 }
308                 if (!term) throw mp::filter::FilterException
309                     ("Unterminated var ref in '"+in+"' at "
310                      + boost::lexical_cast<std::string>(i));
311                 std::map<std::string, std::string>::const_iterator it
312                     = vars.find(name);
313                 if (it != vars.end())
314                 {
315                     out += it->second;
316                 }
317             }
318             else
319             {
320                 throw mp::filter::FilterException
321                     ("Malformed or trimmed var ref in '"
322                      +in+"' at "+boost::lexical_cast<std::string>(i)); 
323             }
324             continue;
325         }
326         //passthru
327         out += in[i];
328         esc = false;
329     }
330     return out;
331 }
332
333 void yf::HttpRewrite::configure(
334         const spair_vec req_uri_pats,
335         const spair_vec res_uri_pats)
336 {
337     //TODO should we really copy them out?
338     this->req_uri_pats = req_uri_pats;
339     this->res_uri_pats = res_uri_pats;
340     //pick up names
341     parse_groups(req_uri_pats, req_groups_bynum);
342     parse_groups(res_uri_pats, res_groups_bynum);
343 }
344
345
346 static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & dest)
347 {
348     for (ptr = ptr->children; ptr; ptr = ptr->next)
349     {
350         if (ptr->type != XML_ELEMENT_NODE)
351             continue;
352         else if (!strcmp((const char *) ptr->name, "rewrite"))
353         {
354             std::string from, to;
355             const struct _xmlAttr *attr;
356             for (attr = ptr->properties; attr; attr = attr->next)
357             {
358                 if (!strcmp((const char *) attr->name,  "from"))
359                     from = mp::xml::get_text(attr->children);
360                 else if (!strcmp((const char *) attr->name,  "to"))
361                     to = mp::xml::get_text(attr->children);
362                 else
363                     throw mp::filter::FilterException
364                         ("Bad attribute "
365                          + std::string((const char *) attr->name)
366                          + " in rewrite section of http_rewrite");
367             }
368             yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", 
369                     from.c_str(), to.c_str());
370             if (!from.empty())
371                 dest.push_back(std::make_pair(from, to));
372         }
373         else
374         {
375             throw mp::filter::FilterException
376                 ("Bad element o"
377                  + std::string((const char *) ptr->name)
378                  + " in http_rewrite1 filter");
379         }
380     }
381 }
382
383 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
384         const char *path)
385 {
386     spair_vec req_uri_pats;
387     spair_vec res_uri_pats;
388     for (ptr = ptr->children; ptr; ptr = ptr->next)
389     {
390         if (ptr->type != XML_ELEMENT_NODE)
391             continue;
392         else if (!strcmp((const char *) ptr->name, "request"))
393         {
394             configure_rules(ptr, req_uri_pats);
395         }
396         else if (!strcmp((const char *) ptr->name, "response"))
397         {
398             configure_rules(ptr, res_uri_pats);
399         }
400         else
401         {
402             throw mp::filter::FilterException
403                 ("Bad element "
404                  + std::string((const char *) ptr->name)
405                  + " in http_rewrite1 filter");
406         }
407     }
408     configure(req_uri_pats, res_uri_pats);
409 }
410
411 static mp::filter::Base* filter_creator()
412 {
413     return new mp::filter::HttpRewrite;
414 }
415
416 extern "C" {
417     struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
418         0,
419         "http_rewrite",
420         filter_creator
421     };
422 }
423
424
425 /*
426  * Local variables:
427  * c-basic-offset: 4
428  * c-file-style: "Stroustrup"
429  * indent-tabs-mode: nil
430  * End:
431  * vim: shiftwidth=4 tabstop=8 expandtab
432  */
433