HTML parser stops eating tag when <-char is met
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
index def74b1..58243bf 100644 (file)
@@ -229,6 +229,12 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
             if (!it->header.empty() &&
                 regex_match(header->name, it->header))
             {
+#ifdef OLDHEADERMATCH                
+                // Matches and replaces the whole header line.
+                // This is good if you want to play with the header name too,
+                // but useless for patterns that want to anchor to the beginning
+                // or end of the header value, as we want to do with host-relative
+                // links. This code should probably be removed.
                 std::string sheader(header->name);
                 sheader += ": ";
                 sheader += header->value;
@@ -246,6 +252,15 @@ void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
                     header->value = odr_strdup(
                         o, sheader.substr(pos + 2, std::string::npos).c_str());
                 }
+#else
+                // Match and replace only the header value
+                std::string hval(header->value);
+                if (it->exec(vars, hval, true))
+                {
+                    header->value = odr_strdup(o, hval.c_str());
+                }
+                    
+#endif
             }
         }
     }
@@ -260,6 +275,10 @@ void yf::HttpRewrite::Phase::rewrite_body(
 {
     if (*content_len == 0)
         return;
+    if (!content_type) {
+        yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
+        return;
+    }
     std::list<Content>::const_iterator cit = content_list.begin();
     for (; cit != content_list.end(); cit++)
     {
@@ -269,13 +288,18 @@ void yf::HttpRewrite::Phase::rewrite_body(
             && regex_match(content_type, cit->content_re))
             break;
     }
-    if (cit == content_list.end())
+    if (cit == content_list.end()) {
+        yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
+                content_type );  
         return;
+    }
 
     int i;
     for (i = 0; i < *content_len; i++)
-        if ((*content_buf)[i] == 0)
+        if ((*content_buf)[i] == 0) {
+            yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
             return;  // binary content. skip
+        }
 
     std::string content(*content_buf, *content_len);
     cit->parse(m_verbose, content, vars);