allow base attribute in configuration file tags
[metaproxy-moved-to-github.git] / src / filter_http_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24 #include "html_parser.hpp"
25
26 #include <yaz/zgdu.h>
27 #include <yaz/log.h>
28
29 #include <stack>
30 #include <boost/regex.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/algorithm/string.hpp>
33
34 #include <map>
35
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
38
39 namespace metaproxy_1 {
40     namespace filter {
41         class HttpRewrite::Replace {
42         public:
43             bool start_anchor;
44             boost::regex re;
45             std::string recipe;
46             std::map<int, std::string> group_index;
47             std::string sub_vars(
48                 const std::map<std::string, std::string> & vars) const;
49             void parse_groups(std::string pattern);
50         };
51
52         class HttpRewrite::Rule {
53         public:
54             std::list<Replace> replace_list;
55             bool test_patterns(
56                 std::map<std::string, std::string> &vars,
57                 std::string &txt, bool anchor,
58                 std::list<boost::regex> &skip_list);
59         };
60         class HttpRewrite::Within {
61         public:
62             boost::regex header;
63             boost::regex attr;
64             boost::regex tag;
65             std::string type;
66             bool reqline;
67             RulePtr rule;
68             bool exec(std::map<std::string, std::string> &vars,
69                       std::string &txt, bool anchor,
70                       std::list<boost::regex> &skip_list) const;
71         };
72
73         class HttpRewrite::Content {
74         public:
75             std::string type;
76             boost::regex content_re;
77             std::list<Within> within_list;
78             void configure(const xmlNode *ptr,
79                            std::map<std::string, RulePtr > &rules);
80             void quoted_literal(std::string &content,
81                                 std::map<std::string, std::string> &vars,
82                                 std::list<boost::regex> & skip_list) const;
83             void parse(int verbose, std::string &content,
84                        std::map<std::string, std::string> & vars,
85                        std::list<boost::regex> & skip_list ) const;
86         };
87         class HttpRewrite::Phase {
88         public:
89             Phase();
90             int m_verbose;
91             std::list<Content> content_list;
92             std::list<boost::regex> skip_list;
93             void read_skip_headers(Z_HTTP_Request *hreq);
94             void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
95                 std::map<std::string, std::string> & vars) const;
96             void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
97                 std::map<std::string, std::string> & vars) const;
98             void rewrite_body(mp::odr & o,
99                               const char *content_type,
100                               char **content_buf, int *content_len,
101                               std::map<std::string, std::string> & vars,
102                               std::list<boost::regex> & skip_list ) const;
103         };
104         class HttpRewrite::Event : public HTMLParserEvent {
105             void openTagStart(const char *tag, int tag_len);
106             void anyTagEnd(const char *tag, int tag_len, int close_it);
107             void attribute(const char *tag, int tag_len,
108                            const char *attr, int attr_len,
109                            const char *value, int val_len,
110                            const char *sep);
111             void closeTag(const char *tag, int tag_len);
112             void text(const char *value, int len);
113             const Content *m_content;
114             WRBUF m_w;
115             std::stack<std::list<Within>::const_iterator> s_within;
116             std::map<std::string, std::string> &m_vars;
117             std::list<boost::regex> & m_skips;
118         public:
119             Event(const Content *p,
120                   std::map<std::string, std::string> &vars,
121                   std::list<boost::regex> & skip_list );
122             ~Event();
123             const char *result();
124         };
125     }
126 }
127
128 yf::HttpRewrite::HttpRewrite() :
129     req_phase(new Phase), res_phase(new Phase)
130 {
131 }
132
133 yf::HttpRewrite::~HttpRewrite()
134 {
135 }
136
137 void yf::HttpRewrite::process(mp::Package & package) const
138 {
139     yaz_log(YLOG_LOG, "HttpRewrite begins....");
140     Z_GDU *gdu = package.request().get();
141     //map of request/response vars
142     std::map<std::string, std::string> vars;
143     //we have an http req
144     if (gdu && gdu->which == Z_GDU_HTTP_Request)
145     {
146         Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
147         mp::odr o;
148         req_phase->rewrite_reqline(o, hreq, vars);
149         res_phase->read_skip_headers(hreq);  
150         yaz_log(YLOG_LOG, ">> Request headers");
151         req_phase->rewrite_headers(o, hreq->headers, vars);
152         req_phase->rewrite_body(o,
153                                 z_HTTP_header_lookup(hreq->headers,
154                                                      "Content-Type"),
155                                 &hreq->content_buf, &hreq->content_len,
156                                 vars, res_phase->skip_list);
157         // TODO skip_list does not really belong in the phase. More like
158         // HttpRewrite itself!
159         package.request() = gdu;
160     }
161     package.move();
162     gdu = package.response().get();
163     if (gdu && gdu->which == Z_GDU_HTTP_Response)
164     {
165         Z_HTTP_Response *hres = gdu->u.HTTP_Response;
166         yaz_log(YLOG_LOG, "Response code %d", hres->code);
167         mp::odr o;
168         yaz_log(YLOG_LOG, "<< Respose headers");
169         res_phase->rewrite_headers(o, hres->headers, vars);
170         res_phase->rewrite_body(o,
171                                 z_HTTP_header_lookup(hres->headers,
172                                                      "Content-Type"),
173                                 &hres->content_buf, &hres->content_len,
174                                 vars, res_phase->skip_list);
175         package.response() = gdu;
176     }
177 }
178
179 // Read (and remove) the X-Metaproxy-SkipLink headers
180 void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq)
181 {
182     skip_list.clear();
183     std::string url(hreq->path);
184     if ( url.substr(0,7) != "http://" )
185     { // path was relative, as it usually is
186         const char *host =  z_HTTP_header_lookup(hreq->headers, "Host");
187         if (host)
188           url = "http://" + std::string(host) + hreq->path ;
189     }
190
191     while ( const char *hv = z_HTTP_header_remove( &(hreq->headers),
192         "X-Metaproxy-SkipLink") )
193     {
194         yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv );
195         const char *p = strchr(hv,' ');
196         if (!p)
197             continue; // should not happen
198         std::string page(hv,p);
199         std::string link(p+1);
200         boost::regex pagere(page);
201         if ( boost::regex_search(url, pagere) )
202         {
203             yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s",
204                     page.c_str(), url.c_str() );
205             boost::regex linkre(link);
206             skip_list.push_back(linkre);
207         }
208         else
209         {
210             yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'",
211                     url.c_str(), page.c_str() );
212         }
213     }
214 }
215
216
217 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
218         Z_HTTP_Request *hreq,
219         std::map<std::string, std::string> & vars) const
220 {
221     //rewrite the request line
222     std::string path;
223     if (strstr(hreq->path, "http://") == hreq->path)
224     {
225         yaz_log(YLOG_LOG, "Path in the method line is absolute, "
226             "possibly a proxy request");
227         path += hreq->path;
228     }
229     else
230     {
231         //TODO what about proto
232         const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
233         if (!host)
234             return;
235
236         path += "http://";
237         path += host;
238         path += hreq->path;
239     }
240
241     std::list<Content>::const_iterator cit = content_list.begin();
242     for (; cit != content_list.end(); cit++)
243         if (cit->type == "headers")
244             break;
245
246     if (cit == content_list.end())
247         return;
248
249     std::list<Within>::const_iterator it = cit->within_list.begin();
250     for (; it != cit->within_list.end(); it++)
251         if (it->reqline)
252         {
253             yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
254             std::list<boost::regex> dummy_skip_list; // no skips here!
255             if (it->exec(vars, path, true, dummy_skip_list))
256             {
257                 yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
258                 hreq->path = odr_strdup(o, path.c_str());
259             }
260         }
261 }
262
263 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
264         Z_HTTP_Header *headers,
265         std::map<std::string, std::string> & vars ) const
266 {
267     std::list<Content>::const_iterator cit = content_list.begin();
268     for (; cit != content_list.end(); cit++)
269         if (cit->type == "headers")
270             break;
271
272     if (cit == content_list.end())
273         return;
274
275     for (Z_HTTP_Header *header = headers; header; header = header->next)
276     {
277         std::list<Within>::const_iterator it = cit->within_list.begin();
278         for (; it != cit->within_list.end(); it++)
279         {
280             if (!it->header.empty() &&
281                 regex_match(header->name, it->header))
282             {
283                 // Match and replace only the header value
284                 std::string hval(header->value);
285                 std::list<boost::regex> dummy_skip_list; // no skips here!
286                 if (it->exec(vars, hval, true, dummy_skip_list))
287                 {
288                     header->value = odr_strdup(o, hval.c_str());
289                 }
290             }
291         }
292     }
293 }
294
295 void yf::HttpRewrite::Phase::rewrite_body(
296     mp::odr &o,
297     const char *content_type,
298     char **content_buf,
299     int *content_len,
300     std::map<std::string, std::string> & vars,
301     std::list<boost::regex> & skip_list ) const
302 {
303     if (*content_len == 0)
304         return;
305     if (!content_type) {
306         yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
307         return;
308     }
309     std::list<Content>::const_iterator cit = content_list.begin();
310     for (; cit != content_list.end(); cit++)
311     {
312         yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
313                 content_type, cit->type.c_str());
314         if (cit->type != "headers"
315             && regex_match(content_type, cit->content_re))
316             break;
317     }
318     if (cit == content_list.end()) {
319         yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
320                 content_type );  
321         return;
322     }
323
324     int i;
325     for (i = 0; i < *content_len; i++)
326         if ((*content_buf)[i] == 0) {
327             yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
328             return;  // binary content. skip
329         }
330
331     std::string content(*content_buf, *content_len);
332     cit->parse(m_verbose, content, vars, skip_list);
333     *content_buf = odr_strdup(o, content.c_str());
334     *content_len = strlen(*content_buf);
335 }
336
337 yf::HttpRewrite::Event::Event(const Content *p,
338                               std::map<std::string, std::string> & vars,
339                               std::list<boost::regex> & skip_list 
340     ) : m_content(p), m_vars(vars), m_skips(skip_list)
341 {
342     m_w = wrbuf_alloc();
343 }
344
345 yf::HttpRewrite::Event::~Event()
346 {
347     wrbuf_destroy(m_w);
348 }
349
350 const char *yf::HttpRewrite::Event::result()
351 {
352     return wrbuf_cstr(m_w);
353 }
354
355 void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
356 {
357     wrbuf_putc(m_w, '<');
358     wrbuf_write(m_w, tag, tag_len);
359
360     std::string t(tag, tag_len);
361     std::list<Within>::const_iterator it = m_content->within_list.begin();
362     for (; it != m_content->within_list.end(); it++)
363     {
364         if (!it->tag.empty() && regex_match(t, it->tag))
365         {
366             if (!it->attr.empty() && regex_match("#text", it->attr))
367             {
368                 s_within.push(it);
369                 return;
370             }
371         }
372     }
373 }
374
375 void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
376                                        int close_it)
377 {
378     if (close_it)
379     {
380         if (!s_within.empty())
381         {
382             std::list<Within>::const_iterator it = s_within.top();
383             std::string t(tag, tag_len);
384             if (regex_match(t, it->tag))
385                 s_within.pop();
386         }
387     }
388     if (close_it)
389         wrbuf_putc(m_w, '/');
390     wrbuf_putc(m_w, '>');
391 }
392
393 void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
394                                        const char *attr, int attr_len,
395                                        const char *value, int val_len,
396                                        const char *sep)
397 {
398     std::list<Within>::const_iterator it = m_content->within_list.begin();
399     bool subst = false;
400
401     for (; it != m_content->within_list.end(); it++)
402     {
403         std::string t(tag, tag_len);
404         if (it->tag.empty() || regex_match(t, it->tag))
405         {
406             std::string a(attr, attr_len);
407             if (!it->attr.empty() && regex_match(a, it->attr))
408                 subst = true;
409         }
410         if (subst)
411             break;
412     }
413
414     wrbuf_putc(m_w, ' ');
415     wrbuf_write(m_w, attr, attr_len);
416     if (value)
417     {
418         wrbuf_puts(m_w, "=");
419         wrbuf_puts(m_w, sep);
420
421         std::string output;
422         if (subst)
423         {
424             std::string s(value, val_len);
425             it->exec(m_vars, s, true, m_skips);
426             wrbuf_puts(m_w, s.c_str());
427         }
428         else
429             wrbuf_write(m_w, value, val_len);
430         wrbuf_puts(m_w, sep);
431     }
432 }
433
434 void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
435 {
436     if (!s_within.empty())
437     {
438         std::list<Within>::const_iterator it = s_within.top();
439         std::string t(tag, tag_len);
440         if (regex_match(t, it->tag))
441             s_within.pop();
442     }
443     wrbuf_puts(m_w, "</");
444     wrbuf_write(m_w, tag, tag_len);
445 }
446
447 void yf::HttpRewrite::Event::text(const char *value, int len)
448 {
449     std::list<Within>::const_iterator it = m_content->within_list.end();
450     if (!s_within.empty())
451         it = s_within.top();
452     if (it != m_content->within_list.end())
453     {
454         std::string s(value, len);
455         it->exec(m_vars, s, false, m_skips);
456         wrbuf_puts(m_w, s.c_str());
457     }
458     else
459         wrbuf_write(m_w, value, len);
460 }
461
462 static bool embed_quoted_literal(
463     std::string &content,
464     std::map<std::string, std::string> &vars,
465     mp::filter::HttpRewrite::RulePtr ruleptr,
466     bool html_context,
467     std::list<boost::regex> &skip_list)
468 {
469     bool replace = false;
470     std::string res;
471     const char *cp = content.c_str();
472     const char *cp0 = cp;
473     while (*cp)
474     {
475         if (html_context && !strncmp(cp, "&quot;", 6))
476         {
477             cp += 6;
478             res.append(cp0, cp - cp0);
479             cp0 = cp;
480             while (*cp)
481             {
482                 if (!strncmp(cp, "&quot;", 6))
483                     break;
484                 if (*cp == '\n')
485                     break;
486                 cp++;
487             }
488             if (!*cp)
489                 break;
490             std::string s(cp0, cp - cp0);
491             if (ruleptr->test_patterns(vars, s, true, skip_list))
492                 replace = true;
493             cp0 = cp;
494             res.append(s);
495         }
496         else if (*cp == '"' || *cp == '\'')
497         {
498             int m = *cp;
499             cp++;
500             res.append(cp0, cp - cp0);
501             cp0 = cp;
502             while (*cp)
503             {
504                 if (cp[-1] != '\\' && *cp == m)
505                     break;
506                 if (*cp == '\n')
507                     break;
508                 cp++;
509             }
510             if (!*cp)
511                 break;
512             std::string s(cp0, cp - cp0);
513             if (ruleptr->test_patterns(vars, s, true, skip_list))
514                 replace = true;
515             cp0 = cp;
516             res.append(s);
517         }
518         else if (*cp == '/' && cp[1] == '/')
519         {
520             while (cp[1] && cp[1] != '\n')
521                 cp++;
522         }
523         cp++;
524     }
525     res.append(cp0, cp - cp0);
526     content = res;
527     return replace;
528 }
529
530 bool yf::HttpRewrite::Within::exec(
531     std::map<std::string, std::string> & vars,
532     std::string & txt, bool anchor,
533     std::list<boost::regex> & skip_list) const
534 {
535     if (type == "quoted-literal")
536     {
537         return embed_quoted_literal(txt, vars, rule, true, skip_list);
538     }
539     else
540     {
541         return rule->test_patterns(vars, txt, anchor, skip_list);
542     }
543 }
544
545 bool yf::HttpRewrite::Rule::test_patterns(
546     std::map<std::string, std::string> & vars,
547     std::string & txt, bool anchor,
548     std::list<boost::regex> & skip_list )
549 {
550     bool replaces = false;
551     bool first = anchor;
552     std::string out;
553     std::string::const_iterator start, end;
554     start = txt.begin();
555     end = txt.end();
556     while (1)
557     {
558         std::list<Replace>::iterator bit = replace_list.end();
559         boost::smatch bwhat;
560         bool match_one = false;
561         {
562             std::list<Replace>::iterator it = replace_list.begin();
563             for (; it != replace_list.end(); it++)
564             {
565                 if (it->start_anchor && !first)
566                     continue;
567                 boost::smatch what;
568                 if (regex_search(start, end, what, it->re))
569                 {
570                     if (!match_one || what[0].first < bwhat[0].first)
571                     {
572                         bwhat = what;
573                         bit = it;
574                     }
575                     match_one = true;
576                 }
577             }
578             if (!match_one)
579                 break;
580         }
581         first = false;
582         replaces = true;
583         size_t i;
584         for (i = 1; i < bwhat.size(); ++i)
585         {
586             //check if the group is named
587             std::map<int, std::string>::const_iterator git
588                 = bit->group_index.find(i);
589             if (git != bit->group_index.end())
590             {   //it is
591                 vars[git->second] = bwhat[i];
592             }
593
594         }
595         // Compare against skip_list
596         bool skipthis = false;
597         std::list<boost::regex>::iterator si = skip_list.begin();
598         for ( ; si != skip_list.end(); si++) {
599             if ( boost::regex_search(bwhat.str(0), *si) )
600             {
601                 skipthis = true;
602                 break;
603             }
604         }
605         //prepare replacement string
606         std::string rvalue = bit->sub_vars(vars);
607         out.append(start, bwhat[0].first);
608         if ( skipthis )
609         {
610             yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match",
611                     bwhat.str(0).c_str() );
612             out.append(bwhat.str(0).c_str());
613         }
614         else
615         {
616             yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
617                     bwhat.str(0).c_str(), rvalue.c_str());
618             out.append(rvalue);
619         }
620         start = bwhat[0].second; //move search forward
621     }
622     out.append(start, end);
623     txt = out;
624     return replaces;
625 }
626
627 void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
628 {
629     int gnum = 0;
630     bool esc = false;
631     const std::string &str = pattern;
632     std::string res;
633     start_anchor = str[0] == '^';
634     yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
635     for (size_t i = 0; i < str.size(); ++i)
636     {
637         res += str[i];
638         if (!esc && str[i] == '\\')
639         {
640             esc = true;
641             continue;
642         }
643         if (!esc && str[i] == '(') //group starts
644         {
645             gnum++;
646             if (i+1 < str.size() && str[i+1] == '?') //group with attrs
647             {
648                 i++;
649                 if (i+1 < str.size() && str[i+1] == ':') //non-capturing
650                 {
651                     if (gnum > 0) gnum--;
652                     res += str[i];
653                     i++;
654                     res += str[i];
655                     continue;
656                 }
657                 if (i+1 < str.size() && str[i+1] == 'P') //optional, python
658                     i++;
659                 if (i+1 < str.size() && str[i+1] == '<') //named
660                 {
661                     i++;
662                     std::string gname;
663                     bool term = false;
664                     while (++i < str.size())
665                     {
666                         if (str[i] == '>') { term = true; break; }
667                         if (!isalnum(str[i]))
668                             throw mp::filter::FilterException
669                                 ("Only alphanumeric chars allowed, found "
670                                  " in '"
671                                  + str
672                                  + "' at "
673                                  + boost::lexical_cast<std::string>(i));
674                         gname += str[i];
675                     }
676                     if (!term)
677                         throw mp::filter::FilterException
678                             ("Unterminated group name '" + gname
679                              + " in '" + str +"'");
680                     group_index[gnum] = gname;
681                     yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
682                             gname.c_str(), gnum);
683                 }
684             }
685         }
686         esc = false;
687     }
688     re = res;
689 }
690
691 std::string yf::HttpRewrite::Replace::sub_vars(
692     const std::map<std::string, std::string> & vars) const
693 {
694     std::string out;
695     bool esc = false;
696     const std::string & in = recipe;
697     for (size_t i = 0; i < in.size(); ++i)
698     {
699         if (!esc && in[i] == '\\')
700         {
701             esc = true;
702             continue;
703         }
704         if (!esc && in[i] == '$') //var
705         {
706             if (i+1 < in.size() && in[i+1] == '{') //ref prefix
707             {
708                 ++i;
709                 std::string name;
710                 bool term = false;
711                 while (++i < in.size())
712                 {
713                     if (in[i] == '}') { term = true; break; }
714                     name += in[i];
715                 }
716                 if (!term) throw mp::filter::FilterException
717                     ("Unterminated var ref in '"+in+"' at "
718                      + boost::lexical_cast<std::string>(i));
719                 std::map<std::string, std::string>::const_iterator it
720                     = vars.find(name);
721                 if (it != vars.end())
722                 {
723                     out += it->second;
724                 }
725             }
726             else
727             {
728                 throw mp::filter::FilterException
729                     ("Malformed or trimmed var ref in '"
730                      +in+"' at "+boost::lexical_cast<std::string>(i));
731             }
732             continue;
733         }
734         //passthru
735         out += in[i];
736         esc = false;
737     }
738     return out;
739 }
740
741 yf::HttpRewrite::Phase::Phase() : m_verbose(0)
742 {
743 }
744
745 void yf::HttpRewrite::Content::parse(
746     int verbose,
747     std::string &content,
748     std::map<std::string, std::string> &vars,
749     std::list<boost::regex> & skip_list ) const
750 {
751     if (type == "html")
752     {
753         HTMLParser parser;
754         Event ev(this, vars, skip_list);
755
756         parser.set_verbose(verbose);
757
758         parser.parse(ev, content.c_str());
759         content = ev.result();
760     }
761     if (type == "quoted-literal")
762     {
763         quoted_literal(content, vars, skip_list);
764     }
765 }
766
767 void yf::HttpRewrite::Content::quoted_literal(
768     std::string &content,
769     std::map<std::string, std::string> &vars,
770     std::list<boost::regex> & skip_list ) const
771 {
772     std::list<Within>::const_iterator it = within_list.begin();
773     if (it != within_list.end())
774         embed_quoted_literal(content, vars, it->rule, false, skip_list);
775 }
776
777 void yf::HttpRewrite::Content::configure(
778     const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
779 {
780     for (; ptr; ptr = ptr->next)
781     {
782         if (ptr->type != XML_ELEMENT_NODE)
783             continue;
784         if (!strcmp((const char *) ptr->name, "within"))
785         {
786             static const char *names[7] =
787                 { "header", "attr", "tag", "rule", "reqline", "type", 0 };
788             std::string values[6];
789             mp::xml::parse_attr(ptr, names, values);
790             Within w;
791             if (values[0].length() > 0)
792                 w.header.assign(values[0], boost::regex_constants::icase);
793             if (values[1].length() > 0)
794                 w.attr.assign(values[1], boost::regex_constants::icase);
795             if (values[2].length() > 0)
796                 w.tag.assign(values[2], boost::regex_constants::icase);
797
798             std::vector<std::string> rulenames;
799             boost::split(rulenames, values[3], boost::is_any_of(","));
800             if (rulenames.size() == 0)
801             {
802                 throw mp::filter::FilterException
803                     ("Empty rule in '" + values[3] +
804                      "' in http_rewrite filter");
805             }
806             else if (rulenames.size() == 1)
807             {
808                 std::map<std::string,RulePtr>::const_iterator it =
809                     rules.find(rulenames[0]);
810                 if (it == rules.end())
811                     throw mp::filter::FilterException
812                         ("Reference to non-existing rule '" + rulenames[0] +
813                          "' in http_rewrite filter");
814                 w.rule = it->second;
815
816             }
817             else
818             {
819                 RulePtr rule(new Rule);
820                 size_t i;
821                 for (i = 0; i < rulenames.size(); i++)
822                 {
823                     std::map<std::string,RulePtr>::const_iterator it =
824                         rules.find(rulenames[i]);
825                     if (it == rules.end())
826                         throw mp::filter::FilterException
827                             ("Reference to non-existing rule '" + rulenames[i] +
828                              "' in http_rewrite filter");
829                     RulePtr subRule = it->second;
830                     std::list<Replace>::iterator rit =
831                         subRule->replace_list.begin();
832                     for (; rit != subRule->replace_list.end(); rit++)
833                         rule->replace_list.push_back(*rit);
834                 }
835                 w.rule = rule;
836             }
837             w.reqline = values[4] == "1";
838             w.type = values[5];
839             if (w.type.empty() || w.type == "quoted-literal")
840                 ;
841             else
842                 throw mp::filter::FilterException
843                     ("within type must be quoted-literal or none in "
844                      " in http_rewrite filter");
845             within_list.push_back(w);
846         }
847     }
848 }
849
850 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
851 {
852     static const char *names[2] = { "verbose", 0 };
853     std::string values[1];
854     values[0] = "0";
855     mp::xml::parse_attr(ptr, names, values);
856
857     phase.m_verbose = atoi(values[0].c_str());
858
859     std::map<std::string, RulePtr > rules;
860     for (ptr = ptr->children; ptr; ptr = ptr->next)
861     {
862         if (ptr->type != XML_ELEMENT_NODE)
863             continue;
864         else if (!strcmp((const char *) ptr->name, "rule"))
865         {
866             static const char *names[2] = { "name", 0 };
867             std::string values[1];
868             values[0] = "default";
869             mp::xml::parse_attr(ptr, names, values);
870
871             RulePtr rule(new Rule);
872             for (xmlNode *p = ptr->children; p; p = p->next)
873             {
874                 if (p->type != XML_ELEMENT_NODE)
875                     continue;
876                 if (!strcmp((const char *) p->name, "rewrite"))
877                 {
878                     Replace replace;
879                     std::string from;
880                     const struct _xmlAttr *attr;
881                     for (attr = p->properties; attr; attr = attr->next)
882                     {
883                         if (!strcmp((const char *) attr->name,  "from"))
884                             from = mp::xml::get_text(attr->children);
885                         else if (!strcmp((const char *) attr->name,  "to"))
886                             replace.recipe = mp::xml::get_text(attr->children);
887                         else
888                             throw mp::filter::FilterException
889                                 ("Bad attribute "
890                                  + std::string((const char *) attr->name)
891                                  + " in rewrite section of http_rewrite");
892                     }
893                     yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
894                             from.c_str(), replace.recipe.c_str());
895                     if (!from.empty())
896                     {
897                         replace.parse_groups(from);
898                         rule->replace_list.push_back(replace);
899                     }
900                 }
901                 else
902                     throw mp::filter::FilterException
903                         ("Bad element "
904                          + std::string((const char *) p->name)
905                          + " in http_rewrite filter");
906             }
907             rules[values[0]] = rule;
908         }
909         else if (!strcmp((const char *) ptr->name, "content"))
910         {
911             static const char *names[3] =
912                 { "type", "mime", 0 };
913             std::string values[2];
914             mp::xml::parse_attr(ptr, names, values);
915             if (values[0].empty())
916             {
917                     throw mp::filter::FilterException
918                         ("Missing attribute, type for for element "
919                          + std::string((const char *) ptr->name)
920                          + " in http_rewrite filter");
921             }
922             Content c;
923
924             c.type = values[0];
925             if (!values[1].empty())
926                 c.content_re.assign(values[1], boost::regex::icase);
927             c.configure(ptr->children, rules);
928             phase.content_list.push_back(c);
929         }
930         else
931         {
932             throw mp::filter::FilterException
933                 ("Bad element "
934                  + std::string((const char *) ptr->name)
935                  + " in http_rewrite filter");
936         }
937     }
938 }
939
940 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
941         const char *path)
942 {
943     for (ptr = ptr->children; ptr; ptr = ptr->next)
944     {
945         if (ptr->type != XML_ELEMENT_NODE)
946             continue;
947         else if (!strcmp((const char *) ptr->name, "request"))
948         {
949             configure_phase(ptr, *req_phase);
950         }
951         else if (!strcmp((const char *) ptr->name, "response"))
952         {
953             configure_phase(ptr, *res_phase);
954         }
955         else
956         {
957             throw mp::filter::FilterException
958                 ("Bad element "
959                  + std::string((const char *) ptr->name)
960                  + " in http_rewrite1 filter");
961         }
962     }
963 }
964
965 static mp::filter::Base* filter_creator()
966 {
967     return new mp::filter::HttpRewrite;
968 }
969
970 extern "C" {
971     struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
972         0,
973         "http_rewrite",
974         filter_creator
975     };
976 }
977
978
979 /*
980  * Local variables:
981  * c-basic-offset: 4
982  * c-file-style: "Stroustrup"
983  * indent-tabs-mode: nil
984  * End:
985  * vim: shiftwidth=4 tabstop=8 expandtab
986  */
987