Refactor HTTP rewrite filter and tests
authorJakub Skoczen <jakub@indexdata.dk>
Mon, 10 Jun 2013 15:28:10 +0000 (17:28 +0200)
committerJakub Skoczen <jakub@indexdata.dk>
Tue, 11 Jun 2013 09:29:31 +0000 (11:29 +0200)
src/filter_http_rewrite.cpp
src/filter_http_rewrite.hpp
src/test_filter_rewrite.cpp

index 249a757..55ae35c 100644 (file)
@@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <boost/regex.hpp>
 #include <boost/lexical_cast.hpp>
 
-#include <list>
+#include <vector>
 #include <map>
 
 #if HAVE_SYS_TYPES_H
@@ -38,7 +38,47 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 namespace mp = metaproxy_1;
 namespace yf = mp::filter;
 
-yf::HttpRewrite::HttpRewrite()
+namespace metaproxy_1 {
+    namespace filter {
+        class HttpRewrite::RuleScope {
+        public:
+            std::vector<std::string> tags;
+            std::vector<std::string> attrs;
+            std::string content_type;
+        };
+        class HttpRewrite::Rule {
+        public:
+            enum Section { METHOD, HEADER, BODY };
+            std::string regex;
+            std::string recipe;
+            std::map<int, std::string> group_index;
+            std::vector<RuleScope> scopes;
+            Section section;
+            const std::string search_replace(
+                std::map<std::string, std::string> & vars,
+                const std::string & txt) const;
+            std::string sub_vars (
+                const std::map<std::string, std::string> & vars) const;
+            void parse_groups();
+        };
+        class HttpRewrite::Rules {
+        public:
+            std::vector<Rule> rules;
+            void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
+                std::map<std::string, std::string> & vars) const;
+            void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
+                std::map<std::string, std::string> & vars) const;
+            void rewrite_body (mp::odr & o, 
+                char **content_buf, int *content_len,
+                std::map<std::string, std::string> & vars) const;
+            const std::string test_patterns(
+                std::map<std::string, std::string> & vars,
+                const std::string & txt) const;
+        };
+    }
+}
+
+yf::HttpRewrite::HttpRewrite() : req_rules(new Rules), res_rules(new Rules)
 {
 }
 
@@ -57,11 +97,12 @@ void yf::HttpRewrite::process(mp::Package & package) const
     {
         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
         mp::odr o;
-        rewrite_reqline(o, hreq, vars);
+        req_rules->rewrite_reqline(o, hreq, vars);
         yaz_log(YLOG_LOG, ">> Request headers");
-        rewrite_headers(o, hreq->headers, vars, req_uri_pats, req_groups_bynum);
-        rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars,
-                req_uri_pats, req_groups_bynum);
+        req_rules->rewrite_headers(o, hreq->headers, vars);
+        req_rules->rewrite_body(o, 
+                &hreq->content_buf, &hreq->content_len, 
+                vars);
         package.request() = gdu;
     }
     package.move();
@@ -72,14 +113,15 @@ void yf::HttpRewrite::process(mp::Package & package) const
         yaz_log(YLOG_LOG, "Response code %d", hres->code);
         mp::odr o;
         yaz_log(YLOG_LOG, "<< Respose headers");
-        rewrite_headers(o, hres->headers, vars, res_uri_pats, res_groups_bynum);
-        rewrite_body(o, &hres->content_buf, &hres->content_len, vars,
-                res_uri_pats, res_groups_bynum);
+        res_rules->rewrite_headers(o, hres->headers, vars);
+        res_rules->rewrite_body(o, &hres->content_buf, 
+                &hres->content_len, vars);
         package.response() = gdu;
     }
 }
 
-void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
+void yf::HttpRewrite::Rules::rewrite_reqline (mp::odr & o, 
+        Z_HTTP_Request *hreq,
         std::map<std::string, std::string> & vars) const 
 {
     //rewrite the request line
@@ -99,7 +141,7 @@ void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
     }
     yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
     std::string npath = 
-        test_patterns(vars, path, req_uri_pats, req_groups_bynum);
+        test_patterns(vars, path);
     if (!npath.empty())
     {
         yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
@@ -107,10 +149,9 @@ void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
     }
 }
 
-void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
-        std::map<std::string, std::string> & vars, 
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum) const 
+void yf::HttpRewrite::Rules::rewrite_headers(mp::odr & o, 
+        Z_HTTP_Header *headers,
+        std::map<std::string, std::string> & vars) const 
 {
     for (Z_HTTP_Header *header = headers;
             header != 0; 
@@ -120,7 +161,7 @@ void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
         sheader += ": ";
         sheader += header->value;
         yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
-        std::string out = test_patterns(vars, sheader, uri_pats, groups_bynum);
+        std::string out = test_patterns(vars, sheader);
         if (!out.empty()) 
         {
             size_t pos = out.find(": ");
@@ -136,16 +177,16 @@ void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
     }
 }
 
-void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *content_len,
-        std::map<std::string, std::string> & vars,
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum) const 
+void yf::HttpRewrite::Rules::rewrite_body (mp::odr & o, 
+        char **content_buf, 
+        int *content_len,
+        std::map<std::string, std::string> & vars) const 
 {
     if (*content_buf)
     {
         std::string body(*content_buf);
         std::string nbody = 
-            test_patterns(vars, body, uri_pats, groups_bynum);
+            test_patterns(vars, body);
         if (!nbody.empty())
         {
             *content_buf = odr_strdup(o, nbody.c_str());
@@ -158,33 +199,24 @@ void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *conten
  * Tests pattern from the vector in order and executes recipe on
  the first match.
  */
-const std::string yf::HttpRewrite::test_patterns(
+const std::string yf::HttpRewrite::Rules::test_patterns(
         std::map<std::string, std::string> & vars,
-        const std::string & txt, 
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum_vec)
-    const
+        const std::string & txt) const
 {
-    for (unsigned i = 0; i < uri_pats.size(); i++) 
+    for (unsigned i = 0; i < rules.size(); i++) 
     {
-        std::string out = search_replace(vars, txt, 
-                uri_pats[i].first, uri_pats[i].second,
-                groups_bynum_vec[i]);
+        std::string out = rules[i].search_replace(vars, txt);
         if (!out.empty()) return out;
     }
     return "";
 }
 
-
-const std::string yf::HttpRewrite::search_replace(
+const std::string yf::HttpRewrite::Rule::search_replace(
         std::map<std::string, std::string> & vars,
-        const std::string & txt,
-        const std::string & uri_re,
-        const std::string & uri_pat,
-        const std::map<int, std::string> & groups_bynum) const
+        const std::string & txt) const
 {
     //exec regex against value
-    boost::regex re(uri_re);
+    boost::regex re(regex);
     boost::smatch what;
     std::string::const_iterator start, end;
     start = txt.begin();
@@ -197,8 +229,8 @@ const std::string yf::HttpRewrite::search_replace(
         {
             //check if the group is named
             std::map<int, std::string>::const_iterator it
-                = groups_bynum.find(i);
-            if (it != groups_bynum.end()) 
+                = group_index.find(i);
+            if (it != group_index.end()) 
             {   //it is
                 if (!what[i].str().empty())
                     vars[it->second] = what[i];
@@ -206,7 +238,7 @@ const std::string yf::HttpRewrite::search_replace(
 
         }
         //prepare replacement string
-        std::string rvalue = sub_vars(uri_pat, vars);
+        std::string rvalue = sub_vars(vars);
         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", 
                 what.str(0).c_str(), rvalue.c_str());
         out.append(start, what[0].first);
@@ -219,77 +251,70 @@ const std::string yf::HttpRewrite::search_replace(
     return out;
 }
 
-void yf::HttpRewrite::parse_groups(
-        const spair_vec & uri_pats,
-        std::vector<std::map<int, std::string> > & groups_bynum_vec)
+void yf::HttpRewrite::Rule::parse_groups()
 {
-    for (unsigned h = 0; h < uri_pats.size(); h++) 
+    int gnum = 0;
+    bool esc = false;
+    const std::string & str = regex;
+    yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
+    for (unsigned i = 0; i < str.size(); ++i)
     {
-        int gnum = 0;
-        bool esc = false;
-        //regex is first, subpat is second
-        std::string str = uri_pats[h].first;
-        //for each pair we have an indexing map
-        std::map<int, std::string> groups_bynum;
-        for (unsigned i = 0; i < str.size(); ++i)
+        if (!esc && str[i] == '\\')
         {
-            if (!esc && str[i] == '\\')
-            {
-                esc = true;
-                continue;
-            }
-            if (!esc && str[i] == '(') //group starts
+            esc = true;
+            continue;
+        }
+        if (!esc && str[i] == '(') //group starts
+        {
+            gnum++;
+            if (i+1 < str.size() && str[i+1] == '?') //group with attrs 
             {
-                gnum++;
-                if (i+1 < str.size() && str[i+1] == '?') //group with attrs 
+                i++; 
+                if (i+1 < str.size() && str[i+1] == ':') //non-capturing
                 {
+                    if (gnum > 0) gnum--;
                     i++;
-                    if (i+1 < str.size() && str[i+1] == ':') //non-capturing
-                    {
-                        if (gnum > 0) gnum--;
-                        i++;
-                        continue;
-                    }
-                    if (i+1 < str.size() && str[i+1] == 'P') //optional, python
-                        i++;
-                    if (i+1 < str.size() && str[i+1] == '<') //named
+                    continue;
+                }
+                if (i+1 < str.size() && str[i+1] == 'P') //optional, python
+                    i++;
+                if (i+1 < str.size() && str[i+1] == '<') //named
+                {
+                    i++;
+                    std::string gname;
+                    bool term = false;
+                    while (++i < str.size())
                     {
-                        i++;
-                        std::string gname;
-                        bool term = false;
-                        while (++i < str.size())
-                        {
-                            if (str[i] == '>') { term = true; break; }
-                            if (!isalnum(str[i])) 
-                                throw mp::filter::FilterException
-                                    ("Only alphanumeric chars allowed, found "
-                                     " in '" 
-                                     + str 
-                                     + "' at " 
-                                     + boost::lexical_cast<std::string>(i)); 
-                            gname += str[i];
-                        }
-                        if (!term)
+                        if (str[i] == '>') { term = true; break; }
+                        if (!isalnum(str[i])) 
                             throw mp::filter::FilterException
-                                ("Unterminated group name '" + gname 
-                                 + " in '" + str +"'");
-                        groups_bynum[gnum] = gname;
-                        yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
-                                gname.c_str(), gnum);
+                                ("Only alphanumeric chars allowed, found "
+                                 " in '" 
+                                 + str 
+                                 + "' at " 
+                                 + boost::lexical_cast<std::string>(i)); 
+                        gname += str[i];
                     }
+                    if (!term)
+                        throw mp::filter::FilterException
+                            ("Unterminated group name '" + gname 
+                             + " in '" + str +"'");
+                    group_index[gnum] = gname;
+                    yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
+                            gname.c_str(), gnum);
                 }
             }
-            esc = false;
         }
-        groups_bynum_vec.push_back(groups_bynum);
+        esc = false;
     }
 }
 
-std::string yf::HttpRewrite::sub_vars (const std::string & in, 
-        const std::map<std::string, std::string> & vars)
+std::string yf::HttpRewrite::Rule::sub_vars (
+        const std::map<std::string, std::string> & vars) const
 {
     std::string out;
     bool esc = false;
+    const std::string & in = recipe;
     for (unsigned i = 0; i < in.size(); ++i)
     {
         if (!esc && in[i] == '\\')
@@ -334,20 +359,8 @@ std::string yf::HttpRewrite::sub_vars (const std::string & in,
     return out;
 }
 
-void yf::HttpRewrite::configure(
-        const spair_vec req_uri_pats,
-        const spair_vec res_uri_pats)
-{
-    //TODO should we really copy them out?
-    this->req_uri_pats = req_uri_pats;
-    this->res_uri_pats = res_uri_pats;
-    //pick up names
-    parse_groups(req_uri_pats, req_groups_bynum);
-    parse_groups(res_uri_pats, res_groups_bynum);
-}
-
-
-static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & dest)
+void yf::HttpRewrite::configure_rules(const xmlNode *ptr, 
+        Rules & rules)
 {
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
@@ -355,14 +368,14 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des
             continue;
         else if (!strcmp((const char *) ptr->name, "rewrite"))
         {
-            std::string from, to;
+            Rule rule;
             const struct _xmlAttr *attr;
             for (attr = ptr->properties; attr; attr = attr->next)
             {
                 if (!strcmp((const char *) attr->name,  "from"))
-                    from = mp::xml::get_text(attr->children);
+                    rule.regex = mp::xml::get_text(attr->children);
                 else if (!strcmp((const char *) attr->name,  "to"))
-                    to = mp::xml::get_text(attr->children);
+                    rule.recipe = mp::xml::get_text(attr->children);
                 else
                     throw mp::filter::FilterException
                         ("Bad attribute "
@@ -370,9 +383,10 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des
                          + " in rewrite section of http_rewrite");
             }
             yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", 
-                    from.c_str(), to.c_str());
-            if (!from.empty())
-                dest.push_back(std::make_pair(from, to));
+                    rule.regex.c_str(), rule.recipe.c_str());
+            rule.parse_groups();
+            if (!rule.regex.empty())
+                rules.rules.push_back(rule);
         }
         else
         {
@@ -387,19 +401,17 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des
 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
         const char *path)
 {
-    spair_vec req_uri_pats;
-    spair_vec res_uri_pats;
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
         else if (!strcmp((const char *) ptr->name, "request"))
         {
-            configure_rules(ptr, req_uri_pats);
+            configure_rules(ptr, *req_rules);
         }
         else if (!strcmp((const char *) ptr->name, "response"))
         {
-            configure_rules(ptr, res_uri_pats);
+            configure_rules(ptr, *res_rules);
         }
         else
         {
@@ -409,7 +421,6 @@ void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
                  + " in http_rewrite1 filter");
         }
     }
-    configure(req_uri_pats, res_uri_pats);
 }
 
 static mp::filter::Base* filter_creator()
index 11b43e6..d611142 100644 (file)
@@ -20,59 +20,25 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #define FILTER_HTTP_REWRITE_HPP
 
 #include <metaproxy/filter.hpp>
-#include <vector>
-#include <map>
-#include <metaproxy/util.hpp>
+#include <boost/scoped_ptr.hpp>
 
 namespace mp = metaproxy_1;
 
 namespace metaproxy_1 {
     namespace filter {
         class HttpRewrite : public Base {
+            class Rules;
+            class Rule;
+            class RuleScope;
+            boost::scoped_ptr<Rules> req_rules;
+            boost::scoped_ptr<Rules> res_rules;
+            void configure_rules(const xmlNode *ptr, Rules & rules);
         public:
-            typedef std::pair<std::string, std::string> string_pair;
-            typedef std::vector<string_pair> spair_vec;
-            typedef spair_vec::iterator spv_iter;
             HttpRewrite();
             ~HttpRewrite();
             void process(metaproxy_1::Package & package) const;
-            void configure(const xmlNode * ptr, bool test_only,
-                           const char *path);
-            void configure(const spair_vec req_uri_pats,
-                           const spair_vec res_uri_pats); 
-        private:
-            spair_vec req_uri_pats;
-            spair_vec res_uri_pats;
-            std::vector<std::map<int, std::string> > req_groups_bynum;
-            std::vector<std::map<int, std::string> > res_groups_bynum;
-            void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
-                    std::map<std::string, std::string> & vars) const;
-            void rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
-                    std::map<std::string, std::string> & vars,
-                    const spair_vec & uri_pats,
-                    const std::vector<std::map<int, std::string> > & groups_bynum_vec) const;
-            void rewrite_body (mp::odr & o, char **content_buf, int *content_len,
-                    std::map<std::string, std::string> & vars,
-                    const spair_vec & uri_pats,
-                    const std::vector<std::map<int, std::string> > 
-                    & groups_bynum) const; 
-            const std::string test_patterns(
-                    std::map<std::string, std::string> & vars,
-                    const std::string & txt, 
-                    const spair_vec & uri_pats,
-                    const std::vector<std::map<int, std::string> > 
-                    & groups_bynum) const;
-            const std::string search_replace(
-                    std::map<std::string, std::string> & vars,
-                    const std::string & txt,
-                    const std::string & uri_re,
-                    const std::string & uri_pat,
-                    const std::map<int, std::string> & groups_bynum) const;
-            static void parse_groups(
-                    const spair_vec & uri_pats,
-                    std::vector<std::map<int, std::string> > & groups_bynum_vec);
-            static std::string sub_vars (const std::string & in, 
-                    const std::map<std::string, std::string> & vars);
+            void configure(const xmlNode * ptr, 
+                    bool test_only, const char *path);
         };
     }
 }
index d266274..cb9c47c 100644 (file)
@@ -60,27 +60,36 @@ BOOST_AUTO_TEST_CASE( test_filter_rewrite_1 )
         std::cout << "Running non-xml config test case" << std::endl;
         mp::RouterChain router;
         mp::filter::HttpRewrite fhr;
-        
-        //configure the filter
-        mp::filter::HttpRewrite::spair_vec vec_req;
-        vec_req.push_back(std::make_pair(
-        "(?<proto>http\\:\\/\\/s?)(?<pxhost>[^\\/?#]+)\\/(?<pxpath>[^\\/]+)"
-        "\\/(?<host>[^\\/]+)(?<path>.*)",
-        "${proto}${host}${path}"
-        ));
-        vec_req.push_back(std::make_pair(
-        "(?:Host\\: )(.*)",
-        "Host: ${host}"
-        ));
-
-        mp::filter::HttpRewrite::spair_vec vec_res;
-        vec_res.push_back(std::make_pair(
-        "(?<proto>http\\:\\/\\/s?)(?<host>[^\\/?# \"'>]+)\\/(?<path>[^ \"'>]+)",
-        "${proto}${pxhost}/${pxpath}/${host}/${path}"
-        ));
-        
-        fhr.configure(vec_req, vec_res);
-        
+         
+        std::string xmlconf =
+            "<?xml version='1.0'?>\n"
+            "<filter xmlns='http://indexdata.com/metaproxy'\n"
+            "        id='rewrite1' type='http_rewrite'>\n"
+            " <request>\n"
+            "   <rewrite from='"
+    "(?&lt;proto>https?://)(?&lt;pxhost>[^ /?#]+)/(?&lt;pxpath>[^ /]+)"
+    "/(?&lt;host>[^ /]+)(?&lt;path>[^ ]*)'\n"
+            "            to='${proto}${host}${path}' />\n"
+            "   <rewrite from='(?:Host: )(.*)'\n"
+            "            to='Host: ${host}' />\n" 
+            " </request>\n"
+            " <response>\n"
+            "   <rewrite from='"
+    "(?&lt;proto>https?://)(?&lt;host>[^/?# &quot;&apos;>]+)/(?&lt;path>[^  &quot;&apos;>]+)'\n"
+            "            to='${proto}${pxhost}/${pxpath}/${host}/${path}' />\n" 
+            " </response>\n"
+            "</filter>\n"
+        ;
+
+        std::cout << xmlconf;
+
+        // reading and parsing XML conf
+        xmlDocPtr doc = xmlParseMemory(xmlconf.c_str(), xmlconf.size());
+        BOOST_CHECK(doc);
+        xmlNode *root_element = xmlDocGetRootElement(doc);
+        fhr.configure(root_element, true, "");
+        xmlFreeDoc(doc);
+       
         router.append(fhr);
 
         // create an http request