Windows: use Boost 1.59, msvc 14.0
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
index 249a757..1badb44 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Metaproxy.
-   Copyright (C) 2005-2013 Index Data
+   Copyright (C) Index Data
 
 Metaproxy is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -21,24 +21,112 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
 #include "filter_http_rewrite.hpp"
+#include "html_parser.hpp"
 
 #include <yaz/zgdu.h>
 #include <yaz/log.h>
 
+#include <stack>
 #include <boost/regex.hpp>
 #include <boost/lexical_cast.hpp>
+#include <boost/algorithm/string.hpp>
 
-#include <list>
 #include <map>
 
-#if HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
 namespace mp = metaproxy_1;
 namespace yf = mp::filter;
 
-yf::HttpRewrite::HttpRewrite()
+namespace metaproxy_1 {
+    namespace filter {
+        class HttpRewrite::Replace {
+        public:
+            bool start_anchor;
+            boost::regex re;
+            std::string recipe;
+            std::map<int, std::string> group_index;
+            std::string sub_vars(
+                const std::map<std::string, std::string> & vars) const;
+            void parse_groups(std::string pattern);
+        };
+
+        class HttpRewrite::Rule {
+        public:
+            std::list<Replace> replace_list;
+            bool test_patterns(
+                std::map<std::string, std::string> &vars,
+                std::string &txt, bool anchor,
+                std::list<boost::regex> &skip_list);
+        };
+        class HttpRewrite::Within {
+        public:
+            boost::regex header;
+            boost::regex attr;
+            boost::regex tag;
+            std::string type;
+            bool reqline;
+            RulePtr rule;
+            bool exec(std::map<std::string, std::string> &vars,
+                      std::string &txt, bool anchor,
+                      std::list<boost::regex> &skip_list) const;
+        };
+
+        class HttpRewrite::Content {
+        public:
+            std::string type;
+            boost::regex content_re;
+            std::list<Within> within_list;
+            void configure(const xmlNode *ptr,
+                           std::map<std::string, RulePtr > &rules);
+            void quoted_literal(std::string &content,
+                                std::map<std::string, std::string> &vars,
+                                std::list<boost::regex> & skip_list) const;
+            void parse(int verbose, std::string &content,
+                       std::map<std::string, std::string> & vars,
+                       std::list<boost::regex> & skip_list ) const;
+        };
+        class HttpRewrite::Phase {
+        public:
+            Phase();
+            int m_verbose;
+            std::list<Content> content_list;
+            void read_skip_headers(Z_HTTP_Request *hreq,
+                                   std::list<boost::regex> &skip_list);
+            void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
+                std::map<std::string, std::string> & vars) const;
+            void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
+                std::map<std::string, std::string> & vars) const;
+            void rewrite_body(mp::odr & o,
+                              const char *content_type,
+                              char **content_buf, int *content_len,
+                              std::map<std::string, std::string> & vars,
+                              std::list<boost::regex> & skip_list ) const;
+        };
+        class HttpRewrite::Event : public HTMLParserEvent {
+            void openTagStart(const char *tag, int tag_len);
+            void anyTagEnd(const char *tag, int tag_len, int close_it);
+            void attribute(const char *tag, int tag_len,
+                           const char *attr, int attr_len,
+                           const char *value, int val_len,
+                           const char *sep);
+            void closeTag(const char *tag, int tag_len);
+            void text(const char *value, int len);
+            const Content *m_content;
+            WRBUF m_w;
+            std::stack<std::list<Within>::const_iterator> s_within;
+            std::map<std::string, std::string> &m_vars;
+            std::list<boost::regex> & m_skips;
+        public:
+            Event(const Content *p,
+                  std::map<std::string, std::string> &vars,
+                  std::list<boost::regex> & skip_list );
+            ~Event();
+            const char *result();
+        };
+    }
+}
+
+yf::HttpRewrite::HttpRewrite() :
+    req_phase(new Phase), res_phase(new Phase)
 {
 }
 
@@ -46,22 +134,29 @@ yf::HttpRewrite::~HttpRewrite()
 {
 }
 
-void yf::HttpRewrite::process(mp::Package & package) const 
+void yf::HttpRewrite::process(mp::Package & package) const
 {
     yaz_log(YLOG_LOG, "HttpRewrite begins....");
     Z_GDU *gdu = package.request().get();
     //map of request/response vars
     std::map<std::string, std::string> vars;
     //we have an http req
+
+    std::list<boost::regex> skip_list;
+    
     if (gdu && gdu->which == Z_GDU_HTTP_Request)
     {
         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
         mp::odr o;
-        rewrite_reqline(o, hreq, vars);
+        req_phase->rewrite_reqline(o, hreq, vars);
+        res_phase->read_skip_headers(hreq, skip_list);  
         yaz_log(YLOG_LOG, ">> Request headers");
-        rewrite_headers(o, hreq->headers, vars, req_uri_pats, req_groups_bynum);
-        rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars,
-                req_uri_pats, req_groups_bynum);
+        req_phase->rewrite_headers(o, hreq->headers, vars);
+        req_phase->rewrite_body(o,
+                                z_HTTP_header_lookup(hreq->headers,
+                                                     "Content-Type"),
+                                &hreq->content_buf, &hreq->content_len,
+                                vars, skip_list);
         package.request() = gdu;
     }
     package.move();
@@ -72,225 +167,535 @@ void yf::HttpRewrite::process(mp::Package & package) const
         yaz_log(YLOG_LOG, "Response code %d", hres->code);
         mp::odr o;
         yaz_log(YLOG_LOG, "<< Respose headers");
-        rewrite_headers(o, hres->headers, vars, res_uri_pats, res_groups_bynum);
-        rewrite_body(o, &hres->content_buf, &hres->content_len, vars,
-                res_uri_pats, res_groups_bynum);
+        res_phase->rewrite_headers(o, hres->headers, vars);
+        res_phase->rewrite_body(o,
+                                z_HTTP_header_lookup(hres->headers,
+                                                     "Content-Type"),
+                                &hres->content_buf, &hres->content_len,
+                                vars, skip_list);
         package.response() = gdu;
     }
 }
 
-void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
-        std::map<std::string, std::string> & vars) const 
+// Read (and remove) the X-Metaproxy-SkipLink headers
+void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
+                                 std::list<boost::regex> &skip_list )
+{
+    std::string url(hreq->path);
+    if ( url.substr(0,7) != "http://" )
+    { // path was relative, as it usually is
+        const char *host =  z_HTTP_header_lookup(hreq->headers, "Host");
+        if (host)
+          url = "http://" + std::string(host) + hreq->path ;
+    }
+
+    while ( const char *hv = z_HTTP_header_remove( &(hreq->headers),
+        "X-Metaproxy-SkipLink") )
+    {
+        yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv );
+        const char *p = strchr(hv,' ');
+        if (!p)
+            continue; // should not happen
+        std::string page(hv,p);
+        std::string link(p+1);
+        boost::regex pagere(page);
+        if ( boost::regex_search(url, pagere) )
+        {
+            yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s",
+                    page.c_str(), url.c_str() );
+            boost::regex linkre(link);
+            skip_list.push_back(linkre);
+        }
+        else
+        {
+            yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'",
+                    url.c_str(), page.c_str() );
+        }
+    }
+}
+
+
+void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
+        Z_HTTP_Request *hreq,
+        std::map<std::string, std::string> & vars) const
 {
     //rewrite the request line
     std::string path;
     if (strstr(hreq->path, "http://") == hreq->path)
     {
-        yaz_log(YLOG_LOG, "Path in the method line is absolute, " 
+        yaz_log(YLOG_LOG, "Path in the method line is absolute, "
             "possibly a proxy request");
         path += hreq->path;
     }
     else
     {
         //TODO what about proto
+        const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
+        if (!host)
+            return;
+
         path += "http://";
-        path += z_HTTP_header_lookup(hreq->headers, "Host");
-        path += hreq->path; 
+        path += host;
+        path += hreq->path;
     }
-    yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
-    std::string npath = 
-        test_patterns(vars, path, req_uri_pats, req_groups_bynum);
-    if (!npath.empty())
+
+    std::list<Content>::const_iterator cit = content_list.begin();
+    for (; cit != content_list.end(); cit++)
+        if (cit->type == "headers")
+            break;
+
+    if (cit == content_list.end())
+        return;
+
+    std::list<Within>::const_iterator it = cit->within_list.begin();
+    for (; it != cit->within_list.end(); it++)
+        if (it->reqline)
+        {
+            yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
+            std::list<boost::regex> dummy_skip_list; // no skips here!
+            if (it->exec(vars, path, true, dummy_skip_list))
+            {
+                yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
+                hreq->path = odr_strdup(o, path.c_str());
+            }
+        }
+}
+
+void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
+        Z_HTTP_Header *headers,
+        std::map<std::string, std::string> & vars ) const
+{
+    std::list<Content>::const_iterator cit = content_list.begin();
+    for (; cit != content_list.end(); cit++)
+        if (cit->type == "headers")
+            break;
+
+    if (cit == content_list.end())
+        return;
+
+    for (Z_HTTP_Header *header = headers; header; header = header->next)
     {
-        yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
-        hreq->path = odr_strdup(o, npath.c_str());
+        std::list<Within>::const_iterator it = cit->within_list.begin();
+        for (; it != cit->within_list.end(); it++)
+        {
+            if (!it->header.empty() &&
+                regex_match(header->name, it->header))
+            {
+                // Match and replace only the header value
+                std::string hval(header->value);
+                std::list<boost::regex> dummy_skip_list; // no skips here!
+                if (it->exec(vars, hval, true, dummy_skip_list))
+                {
+                    header->value = odr_strdup(o, hval.c_str());
+                }
+            }
+        }
     }
 }
 
-void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
-        std::map<std::string, std::string> & vars, 
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum) const 
+void yf::HttpRewrite::Phase::rewrite_body(
+    mp::odr &o,
+    const char *content_type,
+    char **content_buf,
+    int *content_len,
+    std::map<std::string, std::string> & vars,
+    std::list<boost::regex> & skip_list ) const
 {
-    for (Z_HTTP_Header *header = headers;
-            header != 0; 
-            header = header->next) 
+    if (*content_len == 0)
+        return;
+    if (!content_type) {
+        yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
+        return;
+    }
+    std::list<Content>::const_iterator cit = content_list.begin();
+    for (; cit != content_list.end(); cit++)
     {
-        std::string sheader(header->name);
-        sheader += ": ";
-        sheader += header->value;
-        yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
-        std::string out = test_patterns(vars, sheader, uri_pats, groups_bynum);
-        if (!out.empty()) 
+        yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
+                content_type, cit->type.c_str());
+        if (cit->type != "headers"
+            && regex_match(content_type, cit->content_re))
+            break;
+    }
+    if (cit == content_list.end()) {
+        yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
+                content_type );  
+        return;
+    }
+
+    int i;
+    for (i = 0; i < *content_len; i++)
+        if ((*content_buf)[i] == 0) {
+            yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
+            return;  // binary content. skip
+        }
+
+    std::string content(*content_buf, *content_len);
+    cit->parse(m_verbose, content, vars, skip_list);
+    *content_buf = odr_strdup(o, content.c_str());
+    *content_len = strlen(*content_buf);
+}
+
+yf::HttpRewrite::Event::Event(const Content *p,
+                              std::map<std::string, std::string> & vars,
+                              std::list<boost::regex> & skip_list 
+    ) : m_content(p), m_vars(vars), m_skips(skip_list)
+{
+    m_w = wrbuf_alloc();
+}
+
+yf::HttpRewrite::Event::~Event()
+{
+    wrbuf_destroy(m_w);
+}
+
+const char *yf::HttpRewrite::Event::result()
+{
+    return wrbuf_cstr(m_w);
+}
+
+void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
+{
+    wrbuf_putc(m_w, '<');
+    wrbuf_write(m_w, tag, tag_len);
+
+    std::string t(tag, tag_len);
+    std::list<Within>::const_iterator it = m_content->within_list.begin();
+    for (; it != m_content->within_list.end(); it++)
+    {
+        if (!it->tag.empty() && regex_match(t, it->tag))
         {
-            size_t pos = out.find(": ");
-            if (pos == std::string::npos)
+            if (!it->attr.empty() && regex_match("#text", it->attr))
             {
-                yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
-                continue;
+                s_within.push(it);
+                return;
             }
-            header->name = odr_strdup(o, out.substr(0, pos).c_str());
-            header->value = odr_strdup(o, out.substr(pos+2, 
-                        std::string::npos).c_str());
         }
     }
 }
 
-void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *content_len,
-        std::map<std::string, std::string> & vars,
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum) const 
+void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
+                                       int close_it)
 {
-    if (*content_buf)
+    if (close_it)
     {
-        std::string body(*content_buf);
-        std::string nbody = 
-            test_patterns(vars, body, uri_pats, groups_bynum);
-        if (!nbody.empty())
+        if (!s_within.empty())
         {
-            *content_buf = odr_strdup(o, nbody.c_str());
-            *content_len = nbody.size();
+            std::list<Within>::const_iterator it = s_within.top();
+            std::string t(tag, tag_len);
+            if (regex_match(t, it->tag))
+                s_within.pop();
         }
     }
+    if (close_it)
+        wrbuf_putc(m_w, '/');
+    wrbuf_putc(m_w, '>');
 }
 
-/**
- * Tests pattern from the vector in order and executes recipe on
- the first match.
- */
-const std::string yf::HttpRewrite::test_patterns(
-        std::map<std::string, std::string> & vars,
-        const std::string & txt, 
-        const spair_vec & uri_pats,
-        const std::vector<std::map<int, std::string> > & groups_bynum_vec)
-    const
+void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
+                                       const char *attr, int attr_len,
+                                       const char *value, int val_len,
+                                       const char *sep)
 {
-    for (unsigned i = 0; i < uri_pats.size(); i++) 
+    std::list<Within>::const_iterator it = m_content->within_list.begin();
+    bool subst = false;
+
+    for (; it != m_content->within_list.end(); it++)
+    {
+        std::string t(tag, tag_len);
+        if (it->tag.empty() || regex_match(t, it->tag))
+        {
+            std::string a(attr, attr_len);
+            if (!it->attr.empty() && regex_match(a, it->attr))
+                subst = true;
+        }
+        if (subst)
+            break;
+    }
+
+    wrbuf_putc(m_w, ' ');
+    wrbuf_write(m_w, attr, attr_len);
+    if (value)
     {
-        std::string out = search_replace(vars, txt, 
-                uri_pats[i].first, uri_pats[i].second,
-                groups_bynum_vec[i]);
-        if (!out.empty()) return out;
+        wrbuf_puts(m_w, "=");
+        wrbuf_puts(m_w, sep);
+
+        std::string output;
+        if (subst)
+        {
+            std::string s(value, val_len);
+            it->exec(m_vars, s, true, m_skips);
+            wrbuf_puts(m_w, s.c_str());
+        }
+        else
+            wrbuf_write(m_w, value, val_len);
+        wrbuf_puts(m_w, sep);
+    }
+}
+
+void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
+{
+    if (!s_within.empty())
+    {
+        std::list<Within>::const_iterator it = s_within.top();
+        std::string t(tag, tag_len);
+        if (regex_match(t, it->tag))
+            s_within.pop();
+    }
+    wrbuf_puts(m_w, "</");
+    wrbuf_write(m_w, tag, tag_len);
+}
+
+void yf::HttpRewrite::Event::text(const char *value, int len)
+{
+    std::list<Within>::const_iterator it = m_content->within_list.end();
+    if (!s_within.empty())
+        it = s_within.top();
+    if (it != m_content->within_list.end())
+    {
+        std::string s(value, len);
+        it->exec(m_vars, s, false, m_skips);
+        wrbuf_puts(m_w, s.c_str());
+    }
+    else
+        wrbuf_write(m_w, value, len);
+}
+
+static bool embed_quoted_literal(
+    std::string &content,
+    std::map<std::string, std::string> &vars,
+    mp::filter::HttpRewrite::RulePtr ruleptr,
+    bool html_context,
+    std::list<boost::regex> &skip_list)
+{
+    bool replace = false;
+    std::string res;
+    const char *cp = content.c_str();
+    const char *cp0 = cp;
+    while (*cp)
+    {
+        if (html_context && !strncmp(cp, "&quot;", 6))
+        {
+            cp += 6;
+            res.append(cp0, cp - cp0);
+            cp0 = cp;
+            while (*cp)
+            {
+                if (!strncmp(cp, "&quot;", 6))
+                    break;
+                if (*cp == '\n')
+                    break;
+                cp++;
+            }
+            if (!*cp)
+                break;
+            std::string s(cp0, cp - cp0);
+            if (ruleptr->test_patterns(vars, s, true, skip_list))
+                replace = true;
+            cp0 = cp;
+            res.append(s);
+        }
+        else if (*cp == '"' || *cp == '\'')
+        {
+            int m = *cp;
+            cp++;
+            res.append(cp0, cp - cp0);
+            cp0 = cp;
+            while (*cp)
+            {
+                if (cp[-1] != '\\' && *cp == m)
+                    break;
+                if (*cp == '\n')
+                    break;
+                cp++;
+            }
+            if (!*cp)
+                break;
+            std::string s(cp0, cp - cp0);
+            if (ruleptr->test_patterns(vars, s, true, skip_list))
+                replace = true;
+            cp0 = cp;
+            res.append(s);
+        }
+        else if (*cp == '/' && cp[1] == '/')
+        {
+            while (cp[1] && cp[1] != '\n')
+                cp++;
+        }
+        cp++;
     }
-    return "";
+    res.append(cp0, cp - cp0);
+    content = res;
+    return replace;
 }
 
+bool yf::HttpRewrite::Within::exec(
+    std::map<std::string, std::string> & vars,
+    std::string & txt, bool anchor,
+    std::list<boost::regex> & skip_list) const
+{
+    if (type == "quoted-literal")
+    {
+        return embed_quoted_literal(txt, vars, rule, true, skip_list);
+    }
+    else
+    {
+        return rule->test_patterns(vars, txt, anchor, skip_list);
+    }
+}
 
-const std::string yf::HttpRewrite::search_replace(
-        std::map<std::string, std::string> & vars,
-        const std::string & txt,
-        const std::string & uri_re,
-        const std::string & uri_pat,
-        const std::map<int, std::string> & groups_bynum) const
+bool yf::HttpRewrite::Rule::test_patterns(
+    std::map<std::string, std::string> & vars,
+    std::string & txt, bool anchor,
+    std::list<boost::regex> & skip_list )
 {
-    //exec regex against value
-    boost::regex re(uri_re);
-    boost::smatch what;
+    bool replaces = false;
+    bool first = anchor;
+    std::string out;
     std::string::const_iterator start, end;
     start = txt.begin();
     end = txt.end();
-    std::string out;
-    while (regex_search(start, end, what, re)) //find next full match
+    while (1)
     {
-        unsigned i;
-        for (i = 1; i < what.size(); ++i)
+        std::list<Replace>::iterator bit = replace_list.end();
+        boost::smatch bwhat;
+        bool match_one = false;
+        {
+            std::list<Replace>::iterator it = replace_list.begin();
+            for (; it != replace_list.end(); it++)
+            {
+                if (it->start_anchor && !first)
+                    continue;
+                boost::smatch what;
+                if (regex_search(start, end, what, it->re))
+                {
+                    if (!match_one || what[0].first < bwhat[0].first)
+                    {
+                        bwhat = what;
+                        bit = it;
+                    }
+                    match_one = true;
+                }
+            }
+            if (!match_one)
+                break;
+        }
+        first = false;
+        replaces = true;
+        size_t i;
+        for (i = 1; i < bwhat.size(); ++i)
         {
             //check if the group is named
-            std::map<int, std::string>::const_iterator it
-                = groups_bynum.find(i);
-            if (it != groups_bynum.end()) 
+            std::map<int, std::string>::const_iterator git
+                = bit->group_index.find(i);
+            if (git != bit->group_index.end())
             {   //it is
-                if (!what[i].str().empty())
-                    vars[it->second] = what[i];
+                vars[git->second] = bwhat[i];
             }
 
         }
+        // Compare against skip_list
+        bool skipthis = false;
+        std::list<boost::regex>::iterator si = skip_list.begin();
+        for ( ; si != skip_list.end(); si++) {
+            if ( boost::regex_search(bwhat.str(0), *si) )
+            {
+                skipthis = true;
+                break;
+            }
+        }
         //prepare replacement string
-        std::string rvalue = sub_vars(uri_pat, vars);
-        yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", 
-                what.str(0).c_str(), rvalue.c_str());
-        out.append(start, what[0].first);
-        out.append(rvalue);
-        start = what[0].second; //move search forward
+        std::string rvalue = bit->sub_vars(vars);
+        out.append(start, bwhat[0].first);
+        if ( skipthis )
+        {
+            yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match",
+                    bwhat.str(0).c_str() );
+            out.append(bwhat.str(0).c_str());
+        }
+        else
+        {
+            yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
+                    bwhat.str(0).c_str(), rvalue.c_str());
+            out.append(rvalue);
+        }
+        start = bwhat[0].second; //move search forward
     }
-    //if we had a match cat the last part
-    if (start != txt.begin())
-        out.append(start, end);
-    return out;
+    out.append(start, end);
+    txt = out;
+    return replaces;
 }
 
-void yf::HttpRewrite::parse_groups(
-        const spair_vec & uri_pats,
-        std::vector<std::map<int, std::string> > & groups_bynum_vec)
+void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
 {
-    for (unsigned h = 0; h < uri_pats.size(); h++) 
+    int gnum = 0;
+    bool esc = false;
+    const std::string &str = pattern;
+    std::string res;
+    start_anchor = str[0] == '^';
+    yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
+    for (size_t i = 0; i < str.size(); ++i)
     {
-        int gnum = 0;
-        bool esc = false;
-        //regex is first, subpat is second
-        std::string str = uri_pats[h].first;
-        //for each pair we have an indexing map
-        std::map<int, std::string> groups_bynum;
-        for (unsigned i = 0; i < str.size(); ++i)
+        res += str[i];
+        if (!esc && str[i] == '\\')
         {
-            if (!esc && str[i] == '\\')
-            {
-                esc = true;
-                continue;
-            }
-            if (!esc && str[i] == '(') //group starts
+            esc = true;
+            continue;
+        }
+        if (!esc && str[i] == '(') //group starts
+        {
+            gnum++;
+            if (i+1 < str.size() && str[i+1] == '?') //group with attrs
             {
-                gnum++;
-                if (i+1 < str.size() && str[i+1] == '?') //group with attrs 
+                i++;
+                if (i+1 < str.size() && str[i+1] == ':') //non-capturing
                 {
+                    if (gnum > 0) gnum--;
+                    res += str[i];
                     i++;
-                    if (i+1 < str.size() && str[i+1] == ':') //non-capturing
-                    {
-                        if (gnum > 0) gnum--;
-                        i++;
-                        continue;
-                    }
-                    if (i+1 < str.size() && str[i+1] == 'P') //optional, python
-                        i++;
-                    if (i+1 < str.size() && str[i+1] == '<') //named
+                    res += str[i];
+                    continue;
+                }
+                if (i+1 < str.size() && str[i+1] == 'P') //optional, python
+                    i++;
+                if (i+1 < str.size() && str[i+1] == '<') //named
+                {
+                    i++;
+                    std::string gname;
+                    bool term = false;
+                    while (++i < str.size())
                     {
-                        i++;
-                        std::string gname;
-                        bool term = false;
-                        while (++i < str.size())
-                        {
-                            if (str[i] == '>') { term = true; break; }
-                            if (!isalnum(str[i])) 
-                                throw mp::filter::FilterException
-                                    ("Only alphanumeric chars allowed, found "
-                                     " in '" 
-                                     + str 
-                                     + "' at " 
-                                     + boost::lexical_cast<std::string>(i)); 
-                            gname += str[i];
-                        }
-                        if (!term)
+                        if (str[i] == '>') { term = true; break; }
+                        if (!isalnum(str[i]))
                             throw mp::filter::FilterException
-                                ("Unterminated group name '" + gname 
-                                 + " in '" + str +"'");
-                        groups_bynum[gnum] = gname;
-                        yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
-                                gname.c_str(), gnum);
+                                ("Only alphanumeric chars allowed, found "
+                                 " in '"
+                                 + str
+                                 + "' at "
+                                 + boost::lexical_cast<std::string>(i));
+                        gname += str[i];
                     }
+                    if (!term)
+                        throw mp::filter::FilterException
+                            ("Unterminated group name '" + gname
+                             + " in '" + str +"'");
+                    group_index[gnum] = gname;
+                    yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
+                            gname.c_str(), gnum);
                 }
             }
-            esc = false;
         }
-        groups_bynum_vec.push_back(groups_bynum);
+        esc = false;
     }
+    re = res;
 }
 
-std::string yf::HttpRewrite::sub_vars (const std::string & in, 
-        const std::map<std::string, std::string> & vars)
+std::string yf::HttpRewrite::Replace::sub_vars(
+    const std::map<std::string, std::string> & vars) const
 {
     std::string out;
     bool esc = false;
-    for (unsigned i = 0; i < in.size(); ++i)
+    const std::string & in = recipe;
+    for (size_t i = 0; i < in.size(); ++i)
     {
         if (!esc && in[i] == '\\')
         {
@@ -304,7 +709,7 @@ std::string yf::HttpRewrite::sub_vars (const std::string & in,
                 ++i;
                 std::string name;
                 bool term = false;
-                while (++i < in.size()) 
+                while (++i < in.size())
                 {
                     if (in[i] == '}') { term = true; break; }
                     name += in[i];
@@ -323,7 +728,7 @@ std::string yf::HttpRewrite::sub_vars (const std::string & in,
             {
                 throw mp::filter::FilterException
                     ("Malformed or trimmed var ref in '"
-                     +in+"' at "+boost::lexical_cast<std::string>(i)); 
+                     +in+"' at "+boost::lexical_cast<std::string>(i));
             }
             continue;
         }
@@ -334,52 +739,201 @@ std::string yf::HttpRewrite::sub_vars (const std::string & in,
     return out;
 }
 
-void yf::HttpRewrite::configure(
-        const spair_vec req_uri_pats,
-        const spair_vec res_uri_pats)
+yf::HttpRewrite::Phase::Phase() : m_verbose(0)
 {
-    //TODO should we really copy them out?
-    this->req_uri_pats = req_uri_pats;
-    this->res_uri_pats = res_uri_pats;
-    //pick up names
-    parse_groups(req_uri_pats, req_groups_bynum);
-    parse_groups(res_uri_pats, res_groups_bynum);
 }
 
+void yf::HttpRewrite::Content::parse(
+    int verbose,
+    std::string &content,
+    std::map<std::string, std::string> &vars,
+    std::list<boost::regex> & skip_list ) const
+{
+    if (type == "html")
+    {
+        HTMLParser parser;
+        Event ev(this, vars, skip_list);
+
+        parser.set_verbose(verbose);
 
-static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & dest)
+        parser.parse(ev, content.c_str());
+        content = ev.result();
+    }
+    if (type == "quoted-literal")
+    {
+        quoted_literal(content, vars, skip_list);
+    }
+}
+
+void yf::HttpRewrite::Content::quoted_literal(
+    std::string &content,
+    std::map<std::string, std::string> &vars,
+    std::list<boost::regex> & skip_list ) const
+{
+    std::list<Within>::const_iterator it = within_list.begin();
+    if (it != within_list.end())
+        embed_quoted_literal(content, vars, it->rule, false, skip_list);
+}
+
+void yf::HttpRewrite::Content::configure(
+    const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
 {
+    for (; ptr; ptr = ptr->next)
+    {
+        if (ptr->type != XML_ELEMENT_NODE)
+            continue;
+        if (!strcmp((const char *) ptr->name, "within"))
+        {
+            static const char *names[7] =
+                { "header", "attr", "tag", "rule", "reqline", "type", 0 };
+            std::string values[6];
+            mp::xml::parse_attr(ptr, names, values);
+            Within w;
+            if (values[0].length() > 0)
+                w.header.assign(values[0], boost::regex_constants::icase);
+            if (values[1].length() > 0)
+                w.attr.assign(values[1], boost::regex_constants::icase);
+            if (values[2].length() > 0)
+                w.tag.assign(values[2], boost::regex_constants::icase);
+
+            std::vector<std::string> rulenames;
+            boost::split(rulenames, values[3], boost::is_any_of(","));
+            if (rulenames.size() == 0)
+            {
+                throw mp::filter::FilterException
+                    ("Empty rule in '" + values[3] +
+                     "' in http_rewrite filter");
+            }
+            else if (rulenames.size() == 1)
+            {
+                std::map<std::string,RulePtr>::const_iterator it =
+                    rules.find(rulenames[0]);
+                if (it == rules.end())
+                    throw mp::filter::FilterException
+                        ("Reference to non-existing rule '" + rulenames[0] +
+                         "' in http_rewrite filter");
+                w.rule = it->second;
+
+            }
+            else
+            {
+                RulePtr rule(new Rule);
+                size_t i;
+                for (i = 0; i < rulenames.size(); i++)
+                {
+                    std::map<std::string,RulePtr>::const_iterator it =
+                        rules.find(rulenames[i]);
+                    if (it == rules.end())
+                        throw mp::filter::FilterException
+                            ("Reference to non-existing rule '" + rulenames[i] +
+                             "' in http_rewrite filter");
+                    RulePtr subRule = it->second;
+                    std::list<Replace>::iterator rit =
+                        subRule->replace_list.begin();
+                    for (; rit != subRule->replace_list.end(); rit++)
+                        rule->replace_list.push_back(*rit);
+                }
+                w.rule = rule;
+            }
+            w.reqline = values[4] == "1";
+            w.type = values[5];
+            if (w.type.empty() || w.type == "quoted-literal")
+                ;
+            else
+                throw mp::filter::FilterException
+                    ("within type must be quoted-literal or none in "
+                     " in http_rewrite filter");
+            within_list.push_back(w);
+        }
+    }
+}
+
+void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
+{
+    static const char *names[2] = { "verbose", 0 };
+    std::string values[1];
+    values[0] = "0";
+    mp::xml::parse_attr(ptr, names, values);
+
+    phase.m_verbose = atoi(values[0].c_str());
+
+    std::map<std::string, RulePtr > rules;
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        else if (!strcmp((const char *) ptr->name, "rewrite"))
+        else if (!strcmp((const char *) ptr->name, "rule"))
         {
-            std::string from, to;
-            const struct _xmlAttr *attr;
-            for (attr = ptr->properties; attr; attr = attr->next)
+            static const char *names[2] = { "name", 0 };
+            std::string values[1];
+            values[0] = "default";
+            mp::xml::parse_attr(ptr, names, values);
+
+            RulePtr rule(new Rule);
+            for (xmlNode *p = ptr->children; p; p = p->next)
             {
-                if (!strcmp((const char *) attr->name,  "from"))
-                    from = mp::xml::get_text(attr->children);
-                else if (!strcmp((const char *) attr->name,  "to"))
-                    to = mp::xml::get_text(attr->children);
+                if (p->type != XML_ELEMENT_NODE)
+                    continue;
+                if (!strcmp((const char *) p->name, "rewrite"))
+                {
+                    Replace replace;
+                    std::string from;
+                    const struct _xmlAttr *attr;
+                    for (attr = p->properties; attr; attr = attr->next)
+                    {
+                        if (!strcmp((const char *) attr->name,  "from"))
+                            from = mp::xml::get_text(attr->children);
+                        else if (!strcmp((const char *) attr->name,  "to"))
+                            replace.recipe = mp::xml::get_text(attr->children);
+                        else
+                            throw mp::filter::FilterException
+                                ("Bad attribute "
+                                 + std::string((const char *) attr->name)
+                                 + " in rewrite section of http_rewrite");
+                    }
+                    yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
+                            from.c_str(), replace.recipe.c_str());
+                    if (!from.empty())
+                    {
+                        replace.parse_groups(from);
+                        rule->replace_list.push_back(replace);
+                    }
+                }
                 else
                     throw mp::filter::FilterException
-                        ("Bad attribute "
-                         + std::string((const char *) attr->name)
-                         + " in rewrite section of http_rewrite");
+                        ("Bad element "
+                         + std::string((const char *) p->name)
+                         + " in http_rewrite filter");
             }
-            yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", 
-                    from.c_str(), to.c_str());
-            if (!from.empty())
-                dest.push_back(std::make_pair(from, to));
+            rules[values[0]] = rule;
+        }
+        else if (!strcmp((const char *) ptr->name, "content"))
+        {
+            static const char *names[3] =
+                { "type", "mime", 0 };
+            std::string values[2];
+            mp::xml::parse_attr(ptr, names, values);
+            if (values[0].empty())
+            {
+                    throw mp::filter::FilterException
+                        ("Missing attribute, type for for element "
+                         + std::string((const char *) ptr->name)
+                         + " in http_rewrite filter");
+            }
+            Content c;
+
+            c.type = values[0];
+            if (!values[1].empty())
+                c.content_re.assign(values[1], boost::regex::icase);
+            c.configure(ptr->children, rules);
+            phase.content_list.push_back(c);
         }
         else
         {
             throw mp::filter::FilterException
-                ("Bad element o"
+                ("Bad element "
                  + std::string((const char *) ptr->name)
-                 + " in http_rewrite1 filter");
+                 + " in http_rewrite filter");
         }
     }
 }
@@ -387,19 +941,17 @@ static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & des
 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
         const char *path)
 {
-    spair_vec req_uri_pats;
-    spair_vec res_uri_pats;
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
         else if (!strcmp((const char *) ptr->name, "request"))
         {
-            configure_rules(ptr, req_uri_pats);
+            configure_phase(ptr, *req_phase);
         }
         else if (!strcmp((const char *) ptr->name, "response"))
         {
-            configure_rules(ptr, res_uri_pats);
+            configure_phase(ptr, *res_phase);
         }
         else
         {
@@ -409,7 +961,6 @@ void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
                  + " in http_rewrite1 filter");
         }
     }
-    configure(req_uri_pats, res_uri_pats);
 }
 
 static mp::filter::Base* filter_creator()