Use libyazpp6, libyaz5 and provide libmetaproxy5
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
index 855ee95..5cab5de 100644 (file)
@@ -42,7 +42,6 @@ namespace metaproxy_1 {
         public:
             bool start_anchor;
             boost::regex re;
-            boost::smatch what;
             std::string recipe;
             std::map<int, std::string> group_index;
             std::string sub_vars(
@@ -59,8 +58,8 @@ namespace metaproxy_1 {
         };
         class HttpRewrite::Within {
         public:
-            std::string header;
-            std::string attr;
+            boost::regex header;
+            boost::regex attr;
             boost::regex tag;
             std::string type;
             bool reqline;
@@ -226,25 +225,14 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
         std::list<Within>::const_iterator it = cit->within_list.begin();
         for (; it != cit->within_list.end(); it++)
         {
-            if (it->header.length() > 0 &&
-                yaz_strcasecmp(it->header.c_str(), header->name) == 0)
+            if (!it->header.empty() &&
+                regex_match(header->name, it->header))
             {
-                std::string sheader(header->name);
-                sheader += ": ";
-                sheader += header->value;
-
-                if (it->exec(vars, sheader, true))
+                // Match and replace only the header value
+                std::string hval(header->value);
+                if (it->exec(vars, hval, true))
                 {
-                    size_t pos = sheader.find(": ");
-                    if (pos == std::string::npos)
-                    {
-                        yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
-                        continue;
-                    }
-                    header->name = odr_strdup(
-                        o, sheader.substr(0, pos).c_str());
-                    header->value = odr_strdup(
-                        o, sheader.substr(pos + 2, std::string::npos).c_str());
+                    header->value = odr_strdup(o, hval.c_str());
                 }
             }
         }
@@ -260,6 +248,10 @@ void yf::HttpRewrite::Phase::rewrite_body(
 {
     if (*content_len == 0)
         return;
+    if (!content_type) {
+        yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
+        return;
+    }
     std::list<Content>::const_iterator cit = content_list.begin();
     for (; cit != content_list.end(); cit++)
     {
@@ -269,13 +261,18 @@ void yf::HttpRewrite::Phase::rewrite_body(
             && regex_match(content_type, cit->content_re))
             break;
     }
-    if (cit == content_list.end())
+    if (cit == content_list.end()) {
+        yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
+                content_type );  
         return;
+    }
 
     int i;
     for (i = 0; i < *content_len; i++)
-        if ((*content_buf)[i] == 0)
+        if ((*content_buf)[i] == 0) {
+            yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
             return;  // binary content. skip
+        }
 
     std::string content(*content_buf, *content_len);
     cit->parse(m_verbose, content, vars);
@@ -311,16 +308,10 @@ void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
     {
         if (!it->tag.empty() && regex_match(t, it->tag))
         {
-            std::vector<std::string> attr;
-            boost::split(attr, it->attr, boost::is_any_of(","));
-            size_t i;
-            for (i = 0; i < attr.size(); i++)
+            if (!it->attr.empty() && regex_match("#text", it->attr))
             {
-                if (attr[i].compare("#text") == 0)
-                {
-                    s_within.push(it);
-                    return;
-                }
+                s_within.push(it);
+                return;
             }
         }
     }
@@ -358,15 +349,8 @@ void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
         if (it->tag.empty() || regex_match(t, it->tag))
         {
             std::string a(attr, attr_len);
-            std::vector<std::string> attr;
-            boost::split(attr, it->attr, boost::is_any_of(","));
-            size_t i;
-            for (i = 0; i < attr.size(); i++)
-            {
-                if (attr[i].compare("#text") &&
-                    yaz_strcasecmp(attr[i].c_str(), a.c_str()) == 0)
-                    subst = true;
-            }
+            if (!it->attr.empty() && regex_match(a, it->attr))
+                subst = true;
         }
         if (subst)
             break;
@@ -514,46 +498,49 @@ bool yf::HttpRewrite::Rule::test_patterns(
     while (1)
     {
         std::list<Replace>::iterator bit = replace_list.end();
+        boost::smatch bwhat;
+        bool match_one = false;
         {
-            std::string::const_iterator best_pos = txt.end();
             std::list<Replace>::iterator it = replace_list.begin();
             for (; it != replace_list.end(); it++)
             {
                 if (it->start_anchor && !first)
                     continue;
-                if (regex_search(start, end, it->what, it->re))
+                boost::smatch what;
+                if (regex_search(start, end, what, it->re))
                 {
-                    if (it->what[0].first < best_pos)
+                    if (!match_one || what[0].first < bwhat[0].first)
                     {
-                        best_pos = it->what[0].first;
+                        bwhat = what;
                         bit = it;
                     }
+                    match_one = true;
                 }
             }
-            if (bit == replace_list.end())
+            if (!match_one)
                 break;
         }
         first = false;
         replaces = true;
         size_t i;
-        for (i = 1; i < bit->what.size(); ++i)
+        for (i = 1; i < bwhat.size(); ++i)
         {
             //check if the group is named
             std::map<int, std::string>::const_iterator git
                 = bit->group_index.find(i);
             if (git != bit->group_index.end())
             {   //it is
-                vars[git->second] = bit->what[i];
+                vars[git->second] = bwhat[i];
             }
 
         }
         //prepare replacement string
         std::string rvalue = bit->sub_vars(vars);
         yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
-                bit->what.str(0).c_str(), rvalue.c_str());
-        out.append(start, bit->what[0].first);
+                bwhat.str(0).c_str(), rvalue.c_str());
+        out.append(start, bwhat[0].first);
         out.append(rvalue);
-        start = bit->what[0].second; //move search forward
+        start = bwhat[0].second; //move search forward
     }
     out.append(start, end);
     txt = out;
@@ -722,17 +709,52 @@ void yf::HttpRewrite::Content::configure(
             std::string values[6];
             mp::xml::parse_attr(ptr, names, values);
             Within w;
-            w.header = values[0];
-            w.attr = values[1];
+            if (values[0].length() > 0)
+                w.header.assign(values[0], boost::regex_constants::icase);
+            if (values[1].length() > 0)
+                w.attr.assign(values[1], boost::regex_constants::icase);
             if (values[2].length() > 0)
-                w.tag = values[2];
-            std::map<std::string,RulePtr>::const_iterator it =
-                rules.find(values[3]);
-            if (it == rules.end())
+                w.tag.assign(values[2], boost::regex_constants::icase);
+
+            std::vector<std::string> rulenames;
+            boost::split(rulenames, values[3], boost::is_any_of(","));
+            if (rulenames.size() == 0)
+            {
                 throw mp::filter::FilterException
-                    ("Reference to non-existing rule '" + values[3] +
+                    ("Empty rule in '" + values[3] +
                      "' in http_rewrite filter");
-            w.rule = it->second;
+            }
+            else if (rulenames.size() == 1)
+            {
+                std::map<std::string,RulePtr>::const_iterator it =
+                    rules.find(rulenames[0]);
+                if (it == rules.end())
+                    throw mp::filter::FilterException
+                        ("Reference to non-existing rule '" + rulenames[0] +
+                         "' in http_rewrite filter");
+                w.rule = it->second;
+
+            }
+            else
+            {
+                RulePtr rule(new Rule);
+                size_t i;
+                for (i = 0; i < rulenames.size(); i++)
+                {
+                    std::map<std::string,RulePtr>::const_iterator it =
+                        rules.find(rulenames[i]);
+                    if (it == rules.end())
+                        throw mp::filter::FilterException
+                            ("Reference to non-existing rule '" + rulenames[i] +
+                             "' in http_rewrite filter");
+                    RulePtr subRule = it->second;
+                    std::list<Replace>::iterator rit =
+                        subRule->replace_list.begin();
+                    for (; rit != subRule->replace_list.end(); rit++)
+                        rule->replace_list.push_back(*rit);
+                }
+                w.rule = rule;
+            }
             w.reqline = values[4] == "1";
             w.type = values[5];
             if (w.type.empty() || w.type == "quoted-literal")
@@ -821,8 +843,8 @@ void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
             Content c;
 
             c.type = values[0];
-            // if (!values[1].empty())
-                c.content_re = values[1];
+            if (!values[1].empty())
+                c.content_re.assign(values[1], boost::regex::icase);
             c.configure(ptr->children, rules);
             phase.content_list.push_back(c);
         }