Handle muliple req/resp patterns
[metaproxy-moved-to-github.git] / src / test_filter_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <iostream>
21 #include <stdexcept>
22
23 #include "filter_http_client.hpp"
24 #include <metaproxy/util.hpp>
25 #include "router_chain.hpp"
26 #include <metaproxy/package.hpp>
27
28 #include <boost/regex.hpp>
29 #include <boost/lexical_cast.hpp>
30
31 #define BOOST_AUTO_TEST_MAIN
32 #define BOOST_TEST_DYN_LINK
33
34 #include <boost/test/auto_unit_test.hpp>
35
36 using namespace boost::unit_test;
37 namespace mp = metaproxy_1;
38
39 typedef std::pair<std::string, std::string> string_pair;
40 typedef std::vector<string_pair> spair_vec;
41 typedef spair_vec::iterator spv_iter;
42
43 class FilterHeaderRewrite: public mp::filter::Base {
44 public:
45     void process(mp::Package & package) const {
46         Z_GDU *gdu = package.request().get();
47         //map of request/response vars
48         std::map<std::string, std::string> vars;
49         //we have an http req
50         if (gdu && gdu->which == Z_GDU_HTTP_Request)
51         {
52             Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
53             mp::odr o;
54             //rewrite the request line
55             std::string path;
56             if (strstr(hreq->path, "http://") == hreq->path)
57             {
58                 std::cout << "Path in the method line is absolute, " 
59                     "possibly a proxy request\n";
60                 path += hreq->path;
61             }
62             else
63             {
64                 //TODO what about proto
65                path += z_HTTP_header_lookup(hreq->headers, "Host");
66                path += hreq->path; 
67             }
68             std::cout << "Proxy request URL is " << path << std::endl;
69             std::string npath = 
70                 test_patterns(vars, path, req_uri_pats, req_groups_bynum);
71             std::cout << "Resp request URL is " << npath << std::endl;
72             if (!npath.empty())
73                 hreq->path = odr_strdup(o, npath.c_str());
74             std::cout << ">> Request headers" << std::endl;
75             //iterate headers
76             for (Z_HTTP_Header *header = hreq->headers;
77                     header != 0; 
78                     header = header->next) 
79             {
80                 std::cout << header->name << ": " << header->value << std::endl;
81                 std::string out = test_patterns(vars, 
82                         std::string(header->value), 
83                         req_uri_pats, req_groups_bynum);
84                 if (!out.empty())
85                     header->value = odr_strdup(o, out.c_str());
86             }
87             package.request() = gdu;
88         }
89         package.move();
90         gdu = package.response().get();
91         if (gdu && gdu->which == Z_GDU_HTTP_Response)
92         {
93             Z_HTTP_Response *hr = gdu->u.HTTP_Response;
94             std::cout << "Response " << hr->code;
95             std::cout << "<< Respose headers" << std::endl;
96             mp::odr o;
97             //iterate headers
98             for (Z_HTTP_Header *header = hr->headers;
99                     header != 0; 
100                     header = header->next) 
101             {
102                 std::cout << header->name << ": " << header->value << std::endl;
103                 std::string out = test_patterns(vars,
104                         std::string(header->value), 
105                         res_uri_pats, res_groups_bynum); 
106                 if (!out.empty())
107                     header->value = odr_strdup(o, out.c_str());
108             }
109             package.response() = gdu;
110         }
111     };
112
113     void configure(const xmlNode* ptr, bool test_only, const char *path) {};
114
115     /**
116      * Tests pattern from the vector in order and executes recipe on
117        the first match.
118      */
119     const std::string test_patterns(
120             std::map<std::string, std::string> & vars,
121             const std::string & txt, 
122             const spair_vec & uri_pats,
123             const std::vector<std::map<int, std::string> > & groups_bynum_vec)
124         const
125     {
126         for (int i = 0; i < uri_pats.size(); i++) 
127         {
128             std::string out = search_replace(vars, txt, 
129                     uri_pats[i].first, uri_pats[i].second,
130                     groups_bynum_vec[i]);
131             if (!out.empty()) return out;
132         }
133         return "";
134     }
135
136
137     const std::string search_replace(
138             std::map<std::string, std::string> & vars,
139             const std::string & txt,
140             const std::string & uri_re,
141             const std::string & uri_pat,
142             const std::map<int, std::string> & groups_bynum) const
143     {
144         //exec regex against value
145         boost::regex re(uri_re);
146         boost::smatch what;
147         std::string::const_iterator start, end;
148         start = txt.begin();
149         end = txt.end();
150         std::string out;
151         while (regex_search(start, end, what, re)) //find next full match
152         {
153             unsigned i;
154             for (i = 1; i < what.size(); ++i)
155             {
156                 //check if the group is named
157                 std::map<int, std::string>::const_iterator it
158                     = groups_bynum.find(i);
159                 if (it != groups_bynum.end()) 
160                 {   //it is
161                     std::string name = it->second;
162                     if (!what[i].str().empty())
163                         vars[name] = what[i];
164                 }
165
166             }
167             //prepare replacement string
168             std::string rvalue = sub_vars(uri_pat, vars);
169             //rewrite value
170             std::string rhvalue = what.prefix().str() 
171                 + rvalue + what.suffix().str();
172             std::cout << "! Rewritten '"+what.str(0)+"' to '"+rvalue+"'\n";
173             out += rhvalue;
174             start = what[0].second; //move search forward
175         }
176         return out;
177     }
178
179     static void parse_groups(
180             const spair_vec & uri_pats,
181             std::vector<std::map<int, std::string> > & groups_bynum_vec)
182     {
183         for (int h = 0; h < uri_pats.size(); h++) 
184         {
185             int gnum = 0;
186             bool esc = false;
187             //regex is first, subpat is second
188             std::string str = uri_pats[h].first;
189             //for each pair we have an indexing map
190             std::map<int, std::string> groups_bynum;
191             for (int i = 0; i < str.size(); ++i)
192             {
193                 if (!esc && str[i] == '\\')
194                 {
195                     esc = true;
196                     continue;
197                 }
198                 if (!esc && str[i] == '(') //group starts
199                 {
200                     gnum++;
201                     if (i+1 < str.size() && str[i+1] == '?') //group with attrs 
202                     {
203                         i++;
204                         if (i+1 < str.size() && str[i+1] == ':') //non-capturing
205                         {
206                             if (gnum > 0) gnum--;
207                             i++;
208                             continue;
209                         }
210                         if (i+1 < str.size() && str[i+1] == 'P') //optional, python
211                             i++;
212                         if (i+1 < str.size() && str[i+1] == '<') //named
213                         {
214                             i++;
215                             std::string gname;
216                             bool term = false;
217                             while (++i < str.size())
218                             {
219                                 if (str[i] == '>') { term = true; break; }
220                                 if (!isalnum(str[i])) 
221                                     throw mp::filter::FilterException
222                                         ("Only alphanumeric chars allowed, found "
223                                          " in '" 
224                                          + str 
225                                          + "' at " 
226                                          + boost::lexical_cast<std::string>(i)); 
227                                 gname += str[i];
228                             }
229                             if (!term)
230                                 throw mp::filter::FilterException
231                                     ("Unterminated group name '" + gname 
232                                      + " in '" + str +"'");
233                             groups_bynum[gnum] = gname;
234                             std::cout << "Found named group '" << gname 
235                                 << "' at $" << gnum << std::endl;
236                         }
237                     }
238                 }
239                 esc = false;
240             }
241             groups_bynum_vec.push_back(groups_bynum);
242         }
243     }
244
245     static std::string sub_vars (const std::string & in, 
246             const std::map<std::string, std::string> & vars)
247     {
248         std::string out;
249         bool esc = false;
250         for (int i = 0; i < in.size(); ++i)
251         {
252             if (!esc && in[i] == '\\')
253             {
254                 esc = true;
255                 continue;
256             }
257             if (!esc && in[i] == '$') //var
258             {
259                 if (i+1 < in.size() && in[i+1] == '{') //ref prefix
260                 {
261                     ++i;
262                     std::string name;
263                     bool term = false;
264                     while (++i < in.size()) 
265                     {
266                         if (in[i] == '}') { term = true; break; }
267                         name += in[i];
268                     }
269                     if (!term) throw mp::filter::FilterException
270                         ("Unterminated var ref in '"+in+"' at "
271                          + boost::lexical_cast<std::string>(i));
272                     std::map<std::string, std::string>::const_iterator it
273                         = vars.find(name);
274                     if (it != vars.end())
275                     {
276                         out += it->second;
277                     }
278                 }
279                 else
280                 {
281                     throw mp::filter::FilterException
282                         ("Malformed or trimmed var ref in '"
283                          +in+"' at "+boost::lexical_cast<std::string>(i)); 
284                 }
285                 continue;
286             }
287             //passthru
288             out += in[i];
289             esc = false;
290         }
291         return out;
292     }
293     
294     void configure(
295             const spair_vec req_uri_pats,
296             const spair_vec res_uri_pats)
297     {
298        //TODO should we really copy them out?
299        this->req_uri_pats = req_uri_pats;
300        this->res_uri_pats = res_uri_pats;
301        //pick up names
302        parse_groups(req_uri_pats, req_groups_bynum);
303        parse_groups(res_uri_pats, res_groups_bynum);
304     };
305
306 private:
307     std::map<std::string, std::string> vars;
308     spair_vec req_uri_pats;
309     spair_vec res_uri_pats;
310     std::vector<std::map<int, std::string> > req_groups_bynum;
311     std::vector<std::map<int, std::string> > res_groups_bynum;
312
313 };
314
315
316 BOOST_AUTO_TEST_CASE( test_filter_rewrite_1 )
317 {
318     try
319     {
320        FilterHeaderRewrite fhr;
321     }
322     catch ( ... ) {
323         BOOST_CHECK (false);
324     }
325 }
326
327 BOOST_AUTO_TEST_CASE( test_filter_rewrite_2 )
328 {
329     try
330     {
331         mp::RouterChain router;
332
333         FilterHeaderRewrite fhr;
334         
335         spair_vec vec_req;
336         vec_req.push_back(std::make_pair(
337         "(?<proto>http\\:\\/\\/s?)(?<pxhost>[^\\/?#]+)\\/(?<pxpath>[^\\/]+)"
338         "\\/(?<target>.+)",
339         "${proto}${target}"
340         ));
341         vec_req.push_back(std::make_pair(
342         "proxyhost",
343         "localhost"
344         ));
345
346         spair_vec vec_res;
347         
348         fhr.configure(vec_req, vec_res);
349
350         mp::filter::HTTPClient hc;
351         
352         router.append(fhr);
353         router.append(hc);
354
355         // create an http request
356         mp::Package pack;
357
358         mp::odr odr;
359         Z_GDU *gdu_req = z_get_HTTP_Request_uri(odr, 
360         "http://proxyhost/proxypath/localhost:80/~jakub/targetsite.php", 0, 1);
361
362         pack.request() = gdu_req;
363
364         //feed to the router
365         pack.router(router).move();
366
367         //analyze the response
368         Z_GDU *gdu_res = pack.response().get();
369         BOOST_CHECK(gdu_res);
370         BOOST_CHECK_EQUAL(gdu_res->which, Z_GDU_HTTP_Response);
371         
372         Z_HTTP_Response *hres = gdu_res->u.HTTP_Response;
373         BOOST_CHECK(hres);
374
375     }
376     catch (std::exception & e) {
377         std::cout << e.what();
378         BOOST_CHECK (false);
379     }
380 }
381
382 /*
383  * Local variables:
384  * c-basic-offset: 4
385  * c-file-style: "Stroustrup"
386  * indent-tabs-mode: nil
387  * End:
388  * vim: shiftwidth=4 tabstop=8 expandtab
389  */
390