HTML parser stops eating tag when <-char is met
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
index 9ef4de2..58243bf 100644 (file)
@@ -229,6 +229,12 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
             if (!it->header.empty() &&
                 regex_match(header->name, it->header))
             {
+#ifdef OLDHEADERMATCH                
+                // Matches and replaces the whole header line.
+                // This is good if you want to play with the header name too,
+                // but useless for patterns that want to anchor to the beginning
+                // or end of the header value, as we want to do with host-relative
+                // links. This code should probably be removed.
                 std::string sheader(header->name);
                 sheader += ": ";
                 sheader += header->value;
@@ -246,6 +252,15 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
                     header->value = odr_strdup(
                         o, sheader.substr(pos + 2, std::string::npos).c_str());
                 }
+#else
+                // Match and replace only the header value
+                std::string hval(header->value);
+                if (it->exec(vars, hval, true))
+                {
+                    header->value = odr_strdup(o, hval.c_str());
+                }
+                    
+#endif
             }
         }
     }
@@ -260,6 +275,10 @@ void yf::HttpRewrite::Phase::rewrite_body(
 {
     if (*content_len == 0)
         return;
+    if (!content_type) {
+        yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
+        return;
+    }
     std::list<Content>::const_iterator cit = content_list.begin();
     for (; cit != content_list.end(); cit++)
     {
@@ -269,13 +288,18 @@ void yf::HttpRewrite::Phase::rewrite_body(
             && regex_match(content_type, cit->content_re))
             break;
     }
-    if (cit == content_list.end())
+    if (cit == content_list.end()) {
+        yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
+                content_type );  
         return;
+    }
 
     int i;
     for (i = 0; i < *content_len; i++)
-        if ((*content_buf)[i] == 0)
+        if ((*content_buf)[i] == 0) {
+            yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
             return;  // binary content. skip
+        }
 
     std::string content(*content_buf, *content_len);
     cit->parse(m_verbose, content, vars);
@@ -724,17 +748,36 @@ void yf::HttpRewrite::Content::configure(
                     ("Empty rule in '" + values[3] +
                      "' in http_rewrite filter");
             }
-            size_t i;
-            for (i = 0; i < rulenames.size(); i++)
+            else if (rulenames.size() == 1)
             {
                 std::map<std::string,RulePtr>::const_iterator it =
-                    rules.find(rulenames[i]);
+                    rules.find(rulenames[0]);
                 if (it == rules.end())
                     throw mp::filter::FilterException
-                        ("Reference to non-existing rule '" + rulenames[i] +
+                        ("Reference to non-existing rule '" + rulenames[0] +
                          "' in http_rewrite filter");
-                if (i == 0)
-                    w.rule = it->second;
+                w.rule = it->second;
+
+            }
+            else
+            {
+                RulePtr rule(new Rule);
+                size_t i;
+                for (i = 0; i < rulenames.size(); i++)
+                {
+                    std::map<std::string,RulePtr>::const_iterator it =
+                        rules.find(rulenames[i]);
+                    if (it == rules.end())
+                        throw mp::filter::FilterException
+                            ("Reference to non-existing rule '" + rulenames[i] +
+                             "' in http_rewrite filter");
+                    RulePtr subRule = it->second;
+                    std::list<Replace>::iterator rit =
+                        subRule->replace_list.begin();
+                    for (; rit != subRule->replace_list.end(); rit++)
+                        rule->replace_list.push_back(*rit);
+                }
+                w.rule = rule;
             }
             w.reqline = values[4] == "1";
             w.type = values[5];