Add facility to record HTTP requests to Pazpar2
[pazpar2-moved-to-github.git] / src / http.c
1 /* This file is part of Pazpar2.
2    Copyright (C) 2006-2011 Index Data
3
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #if HAVE_SYS_TIME_H
25 #include <sys/time.h>
26 #endif
27
28 #include <stdio.h>
29 #ifdef WIN32
30 #include <winsock.h>
31 typedef int socklen_t;
32 #endif
33
34 #if HAVE_SYS_SOCKET_H
35 #include <sys/socket.h>
36 #endif
37
38 #include <sys/types.h>
39
40 #include <yaz/snprintf.h>
41 #if HAVE_UNISTD_H
42 #include <unistd.h>
43 #endif
44
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <fcntl.h>
49 #if HAVE_NETDB_H
50 #include <netdb.h>
51 #endif
52
53 #include <errno.h>
54 #include <assert.h>
55 #include <string.h>
56
57 #if HAVE_NETINET_IN_H
58 #include <netinet/in.h>
59 #endif
60
61 #if HAVE_ARPA_INET_H
62 #include <arpa/inet.h>
63 #endif
64
65 #include <yaz/yaz-util.h>
66 #include <yaz/comstack.h>
67 #include <yaz/nmem.h>
68 #include <yaz/mutex.h>
69
70 #include "ppmutex.h"
71 #include "session.h"
72 #include "http.h"
73
74 #define MAX_HTTP_HEADER 4096
75
76 #ifdef WIN32
77 #define strncasecmp _strnicmp
78 #define strcasecmp _stricmp
79 #endif
80
81 struct http_buf
82 {
83 #define HTTP_BUF_SIZE 4096
84     char buf[4096];
85     int offset;
86     int len;
87     struct http_buf *next;
88 };
89
90
91 static void proxy_io(IOCHAN i, int event);
92 static struct http_channel *http_channel_create(http_server_t http_server,
93                                                 const char *addr,
94                                                 struct conf_server *server);
95 static void http_channel_destroy(IOCHAN i);
96 static http_server_t http_server_create(void);
97 static void http_server_incref(http_server_t hs);
98
99 struct http_server
100 {
101     struct http_buf *http_buf_freelist;
102     int http_buf_freelist_count;
103     int http_buf_freelist_max;
104
105     struct http_channel *http_channel_freelist;
106     int http_channel_freelist_count;
107     int http_channel_freelist_max;
108     YAZ_MUTEX mutex;
109     int listener_socket;
110     int ref_count;
111     http_sessions_t http_sessions;
112     struct sockaddr_in *proxy_addr;
113     FILE *record_file;
114 };
115
116 struct http_channel_observer_s {
117     void *data;
118     void *data2;
119     http_channel_destroy_t destroy;
120     struct http_channel_observer_s *next;
121     struct http_channel *chan;
122 };
123
124
125 const char *http_lookup_header(struct http_header *header,
126                                const char *name)
127 {
128     for (; header; header = header->next)
129         if (!strcasecmp(name, header->name))
130             return header->value;
131     return 0;
132 }
133
134 static struct http_buf *http_buf_create(http_server_t hs)
135 {
136     struct http_buf *r = 0;
137
138     yaz_mutex_enter(hs->mutex);
139     if (hs->http_buf_freelist)
140     {
141         r = hs->http_buf_freelist;
142         hs->http_buf_freelist = hs->http_buf_freelist->next;
143         hs->http_buf_freelist_count--;
144     }
145     yaz_mutex_leave(hs->mutex);
146     if (!r)
147         r = xmalloc(sizeof(struct http_buf));
148     r->offset = 0;
149     r->len = 0;
150     r->next = 0;
151     return r;
152 }
153
154 static void http_buf_destroy(http_server_t hs, struct http_buf *b)
155 {
156     yaz_mutex_enter(hs->mutex);
157     if (hs->http_buf_freelist_max > 0 && hs->http_buf_freelist_count >= hs->http_buf_freelist_max) {
158         xfree(b);
159         while ((b = hs->http_buf_freelist)) {
160             xfree(b);
161             hs->http_buf_freelist = hs->http_buf_freelist->next;
162         }
163         hs->http_buf_freelist_count = 0;
164     }
165     else {
166         b->next = hs->http_buf_freelist;
167         hs->http_buf_freelist = b;
168         hs->http_buf_freelist_count++;
169 #if 0 
170         yaz_log(YLOG_DEBUG, "Free %d http buffers on server.", hs->http_buf_freelist_count);
171 #endif
172     }
173     yaz_mutex_leave(hs->mutex);
174 }
175
176 static void http_buf_destroy_queue(http_server_t hs, struct http_buf *b)
177 {
178     struct http_buf *p;
179     while (b)
180     {
181         p = b->next;
182         http_buf_destroy(hs, b);
183         b = p;
184     }
185 }
186
187 static struct http_buf *http_buf_bybuf(http_server_t hs, char *b, int len)
188 {
189     struct http_buf *res = 0;
190     struct http_buf **p = &res;
191
192     while (len)
193     {
194         int tocopy = len;
195         if (tocopy > HTTP_BUF_SIZE)
196             tocopy = HTTP_BUF_SIZE;
197         *p = http_buf_create(hs);
198         memcpy((*p)->buf, b, tocopy);
199         (*p)->len = tocopy;
200         len -= tocopy;
201         b += tocopy;
202         p = &(*p)->next;
203     }
204     return res;
205 }
206
207 // Add a (chain of) buffers to the end of an existing queue.
208 static void http_buf_enqueue(struct http_buf **queue, struct http_buf *b)
209 {
210     while (*queue)
211         queue = &(*queue)->next;
212     *queue = b;
213 }
214
215 static struct http_buf *http_buf_bywrbuf(http_server_t hs, WRBUF wrbuf)
216 {
217     // Heavens to Betsy (buf)!
218     return http_buf_bybuf(hs, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
219 }
220
221 // Non-destructively collapse chain of buffers into a string (max *len)
222 // Return
223 static void http_buf_peek(struct http_buf *b, char *buf, int len)
224 {
225     int rd = 0;
226     while (b && rd < len)
227     {
228         int toread = len - rd;
229         if (toread > b->len)
230             toread = b->len;
231         memcpy(buf + rd, b->buf + b->offset, toread);
232         rd += toread;
233         b = b->next;
234     }
235     buf[rd] = '\0';
236 }
237
238 static int http_buf_size(struct http_buf *b)
239 {
240     int sz = 0;
241     for (; b; b = b->next)
242         sz += b->len;
243     return sz;
244 }
245
246 // Ddestructively munch up to len  from head of queue.
247 static int http_buf_read(http_server_t hs,
248                          struct http_buf **b, char *buf, int len)
249 {
250     int rd = 0;
251     while ((*b) && rd < len)
252     {
253         int toread = len - rd;
254         if (toread > (*b)->len)
255             toread = (*b)->len;
256         memcpy(buf + rd, (*b)->buf + (*b)->offset, toread);
257         rd += toread;
258         if (toread < (*b)->len)
259         {
260             (*b)->len -= toread;
261             (*b)->offset += toread;
262             break;
263         }
264         else
265         {
266             struct http_buf *n = (*b)->next;
267             http_buf_destroy(hs, *b);
268             *b = n;
269         }
270     }
271     buf[rd] = '\0';
272     return rd;
273 }
274
275 // Buffers may overlap.
276 static void urldecode(char *i, char *o)
277 {
278     while (*i)
279     {
280         if (*i == '+')
281         {
282             *(o++) = ' ';
283             i++;
284         }
285         else if (*i == '%' && i[1] && i[2])
286         {
287             int v;
288             i++;
289             sscanf(i, "%2x", &v);
290             *o++ = v;
291             i += 2;
292         }
293         else
294             *(o++) = *(i++);
295     }
296     *o = '\0';
297 }
298
299 // Warning: Buffers may not overlap
300 void urlencode(const char *i, char *o)
301 {
302     while (*i)
303     {
304         if (strchr(" /:", *i))
305         {
306             sprintf(o, "%%%.2X", (int) *i);
307             o += 3;
308         }
309         else
310             *(o++) = *i;
311         i++;
312     }
313     *o = '\0';
314 }
315
316 void http_addheader(struct http_response *r, const char *name, const char *value)
317 {
318     struct http_channel *c = r->channel;
319     struct http_header *h = nmem_malloc(c->nmem, sizeof *h);
320     h->name = nmem_strdup(c->nmem, name);
321     h->value = nmem_strdup(c->nmem, value);
322     h->next = r->headers;
323     r->headers = h;
324 }
325
326 const char *http_argbyname(struct http_request *r, const char *name)
327 {
328     struct http_argument *p;
329     if (!name)
330         return 0;
331     for (p = r->arguments; p; p = p->next)
332         if (!strcmp(p->name, name))
333             return p->value;
334     return 0;
335 }
336
337 const char *http_headerbyname(struct http_header *h, const char *name)
338 {
339     for (; h; h = h->next)
340         if (!strcmp(h->name, name))
341             return h->value;
342     return 0;
343 }
344
345 struct http_response *http_create_response(struct http_channel *c)
346 {
347     struct http_response *r = nmem_malloc(c->nmem, sizeof(*r));
348     strcpy(r->code, "200");
349     r->msg = "OK";
350     r->channel = c;
351     r->headers = 0;
352     r->payload = 0;
353     r->content_type = "text/xml";
354     return r;
355 }
356
357
358 static const char *next_crlf(const char *cp, size_t *skipped)
359 {
360     const char *next_cp = strchr(cp, '\n');
361     if (next_cp)
362     {
363         if (next_cp > cp && next_cp[-1] == '\r')
364             *skipped = next_cp - cp - 1;
365         else
366             *skipped = next_cp - cp;
367         next_cp++;
368     }
369     return next_cp;
370 }
371
372 // Check if buf contains a package (minus payload)
373 static int package_check(const char *buf, int sz)
374 {
375     int content_len = 0;
376     int len = 0;
377
378     while (*buf)
379     {
380         size_t skipped = 0;
381         const char *b = next_crlf(buf, &skipped);
382
383         if (!b)
384         {
385             // we did not find CRLF.. See if buffer is too large..
386             if (sz >= MAX_HTTP_HEADER-1)
387                 return MAX_HTTP_HEADER-1; // yes. Return that (will fail later)
388             break;
389         }
390         len += (b - buf);
391         if (skipped == 0)
392         {
393             // CRLF CRLF , i.e. end of header
394             if (len + content_len <= sz)
395                 return len + content_len;
396             break;
397         }
398         buf = b;
399         // following first skip of \r\n so that we don't consider Method
400         if (!strncasecmp(buf, "Content-Length:", 15))
401         {
402             const char *cp = buf+15;
403             while (*cp == ' ')
404                 cp++;
405             content_len = 0;
406             while (*cp && isdigit(*(const unsigned char *)cp))
407                 content_len = content_len*10 + (*cp++ - '0');
408             if (content_len < 0) /* prevent negative offsets */
409                 content_len = 0;
410         }
411     }
412     return 0;     // incomplete request
413 }
414
415 // Check if we have a request. Return 0 or length
416 static int request_check(struct http_buf *queue)
417 {
418     char tmp[MAX_HTTP_HEADER];
419
420     // only peek at the header..
421     http_buf_peek(queue, tmp, MAX_HTTP_HEADER-1);
422     // still we only return non-zero if the complete request is received..
423     return package_check(tmp, http_buf_size(queue));
424 }
425
426 struct http_response *http_parse_response_buf(struct http_channel *c, const char *buf, int len)
427 {
428     char tmp[MAX_HTTP_HEADER];
429     struct http_response *r = http_create_response(c);
430     char *p, *p2;
431     struct http_header **hp = &r->headers;
432
433     if (len >= MAX_HTTP_HEADER)
434         return 0;
435     memcpy(tmp, buf, len);
436     for (p = tmp; *p && *p != ' '; p++) // Skip HTTP version
437         ;
438     p++;
439     // Response code
440     for (p2 = p; *p2 && *p2 != ' ' && p2 - p < 3; p2++)
441         r->code[p2 - p] = *p2;
442     if (!(p = strstr(tmp, "\r\n")))
443         return 0;
444     p += 2;
445     while (*p)
446     {
447         if (!(p2 = strstr(p, "\r\n")))
448             return 0;
449         if (p == p2) // End of headers
450             break;
451         else
452         {
453             struct http_header *h = *hp = nmem_malloc(c->nmem, sizeof(*h));
454             char *value = strchr(p, ':');
455             if (!value)
456                 return 0;
457             *(value++) = '\0';
458             h->name = nmem_strdup(c->nmem, p);
459             while (isspace(*(const unsigned char *) value))
460                 value++;
461             if (value >= p2)  // Empty header;
462             {
463                 h->value = "";
464                 p = p2 + 2;
465                 continue;
466             }
467             *p2 = '\0';
468             h->value = nmem_strdup(c->nmem, value);
469             h->next = 0;
470             hp = &h->next;
471             p = p2 + 2;
472         }
473     }
474     return r;
475 }
476
477 static int http_parse_arguments(struct http_request *r, NMEM nmem,
478                                 const char *args)
479 {
480     const char *p2 = args;
481
482     while (*p2)
483     {
484         struct http_argument *a;
485         const char *equal = strchr(p2, '=');
486         const char *eoa = strchr(p2, '&');
487         if (!equal)
488         {
489             yaz_log(YLOG_WARN, "Expected '=' in argument");
490             return -1;
491         }
492         if (!eoa)
493             eoa = equal + strlen(equal); // last argument
494         else if (equal > eoa)
495         {
496             yaz_log(YLOG_WARN, "Missing '&' in argument");
497             return -1;
498         }
499         a = nmem_malloc(nmem, sizeof(struct http_argument));
500         a->name = nmem_strdupn(nmem, p2, equal - p2);
501         a->value = nmem_strdupn(nmem, equal+1, eoa - equal - 1);
502         urldecode(a->name, a->name);
503         urldecode(a->value, a->value);
504         a->next = r->arguments;
505         r->arguments = a;
506         p2 = eoa;
507         while (*p2 == '&')
508             p2++;
509     }
510     return 0;
511 }
512
513 struct http_request *http_parse_request(struct http_channel *c,
514                                         struct http_buf **queue,
515                                         int len)
516 {
517     struct http_request *r = nmem_malloc(c->nmem, sizeof(*r));
518     char *p, *p2;
519     char *start = nmem_malloc(c->nmem, len+1);
520     char *buf = start;
521
522     if (http_buf_read(c->http_server, queue, buf, len) < len)
523     {
524         yaz_log(YLOG_WARN, "http_buf_read < len (%d)", len);
525         return 0;
526     }
527     r->search = "";
528     r->channel = c;
529     r->arguments = 0;
530     r->headers = 0;
531     r->content_buf = 0;
532     r->content_len = 0;
533     // Parse first line
534     for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++)
535         *(p2++) = *p;
536     if (*p != ' ')
537     {
538         yaz_log(YLOG_WARN, "Unexpected HTTP method in request");
539         return 0;
540     }
541     *p2 = '\0';
542
543     if (!(buf = strchr(buf, ' ')))
544     {
545         yaz_log(YLOG_WARN, "Missing Request-URI in HTTP request");
546         return 0;
547     }
548     buf++;
549     if (!(p = strchr(buf, ' ')))
550     {
551         yaz_log(YLOG_WARN, "HTTP Request-URI not terminated (too long?)");
552         return 0;
553     }
554     *(p++) = '\0';
555     if ((p2 = strchr(buf, '?'))) // Do we have arguments?
556         *(p2++) = '\0';
557     r->path = nmem_strdup(c->nmem, buf);
558     if (p2)
559     {
560         r->search = nmem_strdup(c->nmem, p2);
561         // Parse Arguments
562         http_parse_arguments(r, c->nmem, p2);
563     }
564     buf = p;
565
566     if (strncmp(buf, "HTTP/", 5))
567         strcpy(r->http_version, "1.0");
568     else
569     {
570         size_t skipped;
571         buf += 5; // strlen("HTTP/")
572
573         p = (char*) next_crlf(buf, &skipped);
574         if (!p || skipped < 3 || skipped > 5)
575             return 0;
576
577         memcpy(r->http_version, buf, skipped);
578         r->http_version[skipped] = '\0';
579         buf = p;
580     }
581     strcpy(c->version, r->http_version);
582
583     r->headers = 0;
584     while (*buf)
585     {
586         size_t skipped;
587
588         p = (char *) next_crlf(buf, &skipped);
589         if (!p)
590         {
591             return 0;
592         }
593         else if (skipped == 0)
594         {
595             buf = p;
596             break;
597         }
598         else
599         {
600             char *cp;
601             char *n_v = nmem_malloc(c->nmem, skipped+1);
602             struct http_header *h = nmem_malloc(c->nmem, sizeof(*h));
603
604             memcpy(n_v, buf, skipped);
605             n_v[skipped] = '\0';
606
607             if (!(cp = strchr(n_v, ':')))
608                 return 0;
609             h->name = nmem_strdupn(c->nmem, n_v, cp - n_v);
610             cp++;
611             while (isspace(*cp))
612                 cp++;
613             h->value = nmem_strdup(c->nmem, cp);
614             h->next = r->headers;
615             r->headers = h;
616             buf = p;
617         }
618     }
619
620     // determine if we do keep alive
621     if (!strcmp(c->version, "1.0"))
622     {
623         const char *v = http_lookup_header(r->headers, "Connection");
624         if (v && !strcmp(v, "Keep-Alive"))
625             c->keep_alive = 1;
626         else
627             c->keep_alive = 0;
628     }
629     else
630     {
631         const char *v = http_lookup_header(r->headers, "Connection");
632         if (v && !strcmp(v, "close"))
633             c->keep_alive = 0;
634         else
635             c->keep_alive = 1;
636     }
637     if (buf < start + len)
638     {
639         const char *content_type = http_lookup_header(r->headers,
640                                                       "Content-Type");
641         r->content_len = start + len - buf;
642         r->content_buf = buf;
643
644         if (!yaz_strcmp_del("application/x-www-form-urlencoded",
645                             content_type, "; "))
646         {
647             http_parse_arguments(r, c->nmem, r->content_buf);
648         }
649     }
650     return r;
651 }
652
653 static struct http_buf *http_serialize_response(struct http_channel *c,
654         struct http_response *r)
655 {
656     struct http_header *h;
657
658     wrbuf_rewind(c->wrbuf);
659     wrbuf_printf(c->wrbuf, "HTTP/%s %s %s\r\n", c->version, r->code, r->msg);
660     for (h = r->headers; h; h = h->next)
661         wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value);
662     if (r->payload)
663     {
664         wrbuf_printf(c->wrbuf, "Content-Length: %d\r\n", r->payload ?
665                 (int) strlen(r->payload) : 0);
666         wrbuf_printf(c->wrbuf, "Content-Type: %s\r\n", r->content_type);
667         if (!strcmp(r->content_type, "text/xml"))
668         {
669             xmlDoc *doc = xmlParseMemory(r->payload, strlen(r->payload));
670             if (doc)
671             {
672                 xmlFreeDoc(doc);
673             }
674             else
675             {
676                 yaz_log(YLOG_WARN, "Sending non-wellformed "
677                         "response (bug #1162");
678                 yaz_log(YLOG_WARN, "payload: %s", r->payload);
679             }
680         }
681     }
682     wrbuf_puts(c->wrbuf, "\r\n");
683
684     if (r->payload)
685         wrbuf_puts(c->wrbuf, r->payload);
686
687     return http_buf_bywrbuf(c->http_server, c->wrbuf);
688 }
689
690 // Serialize a HTTP request
691 static struct http_buf *http_serialize_request(struct http_request *r)
692 {
693     struct http_channel *c = r->channel;
694     struct http_header *h;
695
696     wrbuf_rewind(c->wrbuf);
697     wrbuf_printf(c->wrbuf, "%s %s%s%s", r->method, r->path,
698                  *r->search ? "?" : "", r->search);
699
700     wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version);
701
702     for (h = r->headers; h; h = h->next)
703         wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value);
704
705     wrbuf_puts(c->wrbuf, "\r\n");
706
707     if (r->content_buf)
708         wrbuf_write(c->wrbuf, r->content_buf, r->content_len);
709
710 #if 0
711     yaz_log(YLOG_LOG, "WRITING TO PROXY:\n%s\n----",
712             wrbuf_cstr(c->wrbuf));
713 #endif
714     return http_buf_bywrbuf(c->http_server, c->wrbuf);
715 }
716
717
718 static int http_weshouldproxy(struct http_request *rq)
719 {
720     struct http_channel *c = rq->channel;
721     if (c->server->http_server->proxy_addr && !strstr(rq->path, "search.pz2"))
722         return 1;
723     return 0;
724 }
725
726
727 struct http_header * http_header_append(struct http_channel *ch, 
728                                         struct http_header * hp, 
729                                         const char *name, 
730                                         const char *value)
731 {
732     struct http_header *hpnew = 0; 
733
734     if (!hp | !ch)
735         return 0;
736
737     while (hp && hp->next)
738         hp = hp->next;
739
740     if(name && strlen(name)&& value && strlen(value)){
741         hpnew = nmem_malloc(ch->nmem, sizeof *hpnew);
742         hpnew->name = nmem_strdup(ch->nmem, name);
743         hpnew->value = nmem_strdup(ch->nmem, value);
744         
745         hpnew->next = 0;
746         hp->next = hpnew;
747         hp = hp->next;
748         
749         return hpnew;
750     }
751
752     return hp;
753 }
754
755    
756 static int is_inprogress(void)
757 {
758 #ifdef WIN32
759     if (WSAGetLastError() == WSAEWOULDBLOCK)
760         return 1;
761 #else
762     if (errno == EINPROGRESS)
763         return 1;
764 #endif
765     return 0;
766
767
768 static void enable_nonblock(int sock)
769 {
770     int flags;
771 #ifdef WIN32
772     flags = (flags & CS_FLAGS_BLOCKING) ? 0 : 1;
773     if (ioctlsocket(sock, FIONBIO, &flags) < 0)
774         yaz_log(YLOG_FATAL|YLOG_ERRNO, "ioctlsocket");
775 #else
776     if ((flags = fcntl(sock, F_GETFL, 0)) < 0) 
777         yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl");
778     if (fcntl(sock, F_SETFL, flags | O_NONBLOCK) < 0)
779         yaz_log(YLOG_FATAL|YLOG_ERRNO, "fcntl2");
780 #endif
781 }
782
783 static int http_proxy(struct http_request *rq)
784 {
785     struct http_channel *c = rq->channel;
786     struct http_proxy *p = c->proxy;
787     struct http_header *hp;
788     struct http_buf *requestbuf;
789     char server_port[16] = "";
790     struct conf_server *ser = c->server;
791
792     if (!p) // This is a new connection. Create a proxy channel
793     {
794         int sock;
795         struct protoent *pe;
796         int one = 1;
797
798         if (!(pe = getprotobyname("tcp"))) {
799             abort();
800         }
801         if ((sock = socket(PF_INET, SOCK_STREAM, pe->p_proto)) < 0)
802         {
803             yaz_log(YLOG_WARN|YLOG_ERRNO, "socket");
804             return -1;
805         }
806         if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*)
807                         &one, sizeof(one)) < 0)
808             abort();
809         enable_nonblock(sock);
810         if (connect(sock, (struct sockaddr *)
811                     c->server->http_server->proxy_addr, 
812                     sizeof(*c->server->http_server->proxy_addr)) < 0)
813         {
814             if (!is_inprogress()) 
815             {
816                 yaz_log(YLOG_WARN|YLOG_ERRNO, "Proxy connect");
817                 return -1;
818             }
819         }
820         p = xmalloc(sizeof(struct http_proxy));
821         p->oqueue = 0;
822         p->channel = c;
823         p->first_response = 1;
824         c->proxy = p;
825         // We will add EVENT_OUTPUT below
826         p->iochan = iochan_create(sock, proxy_io, EVENT_INPUT, "http_proxy");
827         iochan_setdata(p->iochan, p);
828
829         iochan_add(ser->iochan_man, p->iochan);
830     }
831
832     // Do _not_ modify Host: header, just checking it's existence
833
834     if (!http_lookup_header(rq->headers, "Host"))
835     {
836         yaz_log(YLOG_WARN, "Failed to find Host header in proxy");
837         return -1;
838     }
839     
840     // Add new header about paraz2 version, host, remote client address, etc.
841     {
842         char server_via[128];
843
844         hp = rq->headers;
845         hp = http_header_append(c, hp, 
846                                 "X-Pazpar2-Version", PACKAGE_VERSION);
847         hp = http_header_append(c, hp, 
848                                 "X-Pazpar2-Server-Host", ser->host);
849         sprintf(server_port, "%d",  ser->port);
850         hp = http_header_append(c, hp, 
851                                 "X-Pazpar2-Server-Port", server_port);
852         yaz_snprintf(server_via, sizeof(server_via), 
853                      "1.1 %s:%s (%s/%s)",  
854                      ser->host ? ser->host : "@",
855                      server_port, PACKAGE_NAME, PACKAGE_VERSION);
856         hp = http_header_append(c, hp, "Via" , server_via);
857         hp = http_header_append(c, hp, "X-Forwarded-For", c->addr);
858     }
859     
860     requestbuf = http_serialize_request(rq);
861
862     http_buf_enqueue(&p->oqueue, requestbuf);
863     iochan_setflag(p->iochan, EVENT_OUTPUT);
864     return 0;
865 }
866
867 void http_send_response(struct http_channel *ch)
868 {
869     struct http_response *rs = ch->response;
870     struct http_buf *hb;
871
872     assert(rs);
873     hb = http_serialize_response(ch, rs);
874     if (!hb)
875     {
876         yaz_log(YLOG_WARN, "Failed to serialize HTTP response");
877         http_channel_destroy(ch->iochan);
878     }
879     else
880     {
881         http_buf_enqueue(&ch->oqueue, hb);
882         iochan_setflag(ch->iochan, EVENT_OUTPUT);
883         ch->state = Http_Idle;
884     }
885 }
886
887 static void http_error(struct http_channel *hc, int no, const char *msg)
888 {
889     struct http_response *rs = http_create_response(hc);
890
891     hc->response = rs;
892     hc->keep_alive = 0;  // not keeping this HTTP session alive
893
894     sprintf(rs->code, "%d", no);
895
896     rs->msg = nmem_strdup(hc->nmem, msg);
897     rs->payload = nmem_malloc(hc->nmem, 100);
898     yaz_snprintf(rs->payload, 99, "<error>HTTP Error %d: %s</error>\n",
899                  no, msg);
900     http_send_response(hc);
901 }
902
903 static void http_io(IOCHAN i, int event)
904 {
905     struct http_channel *hc = iochan_getdata(i);
906     while (event)
907     {
908         if (event == EVENT_INPUT)
909         {
910             int res, reqlen;
911             struct http_buf *htbuf;
912             
913             htbuf = http_buf_create(hc->http_server);
914             res = recv(iochan_getfd(i), htbuf->buf, HTTP_BUF_SIZE -1, 0);
915             if (res == -1 && errno == EAGAIN)
916             {
917                 http_buf_destroy(hc->http_server, htbuf);
918                 return;
919             }
920             if (res <= 0)
921             {
922 #if HAVE_SYS_TIME_H
923                 if (hc->http_server->record_file)
924                 {
925                     struct timeval tv;
926                     gettimeofday(&tv, 0);
927                     fprintf(hc->http_server->record_file, "%lld %lld %lld 0\n",
928                             (long long) tv.tv_sec, (long long) tv.tv_usec,
929                             (long long) iochan_getfd(i));
930                 }
931 #endif
932                 http_buf_destroy(hc->http_server, htbuf);
933                 fflush(hc->http_server->record_file);
934                 http_channel_destroy(i);
935                 return;
936             }
937             htbuf->buf[res] = '\0';
938             htbuf->len = res;
939             http_buf_enqueue(&hc->iqueue, htbuf);
940
941             while (1)
942             {
943                 if (hc->state == Http_Busy)
944                     return;
945                 reqlen = request_check(hc->iqueue);
946                 if (reqlen <= 2)
947                     return;
948                 // we have a complete HTTP request
949                 nmem_reset(hc->nmem);
950 #if HAVE_SYS_TIME_H
951                 if (hc->http_server->record_file)
952                 {
953                     struct timeval tv;
954                     int sz = 0;
955                     struct http_buf *hb;
956                     for (hb = hc->iqueue; hb; hb = hb->next)
957                         sz += hb->len;
958                     gettimeofday(&tv, 0);
959                     fprintf(hc->http_server->record_file, "%lld %lld %lld %d\n",
960                             (long long) tv.tv_sec, (long long) tv.tv_usec,
961                             (long long) iochan_getfd(i), sz);
962                     for (hb = hc->iqueue; hb; hb = hb->next)
963                         fwrite(hb->buf, 1, hb->len, hc->http_server->record_file);
964                 }
965  #endif
966                 if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen)))
967                 {
968                     yaz_log(YLOG_WARN, "Failed to parse request");
969                     http_error(hc, 400, "Bad Request");
970                     return;
971                 }
972                 hc->response = 0;
973                 yaz_log(YLOG_LOG, "Request: %s %s%s%s", hc->request->method,
974                         hc->request->path,
975                         *hc->request->search ? "?" : "",
976                         hc->request->search);
977                 if (hc->request->content_buf)
978                     yaz_log(YLOG_LOG, "%s", hc->request->content_buf);
979                 if (http_weshouldproxy(hc->request))
980                     http_proxy(hc->request);
981                 else
982                 {
983                     // Execute our business logic!
984                     hc->state = Http_Busy;
985                     http_command(hc);
986                 }
987             }
988         }
989         else if (event == EVENT_OUTPUT)
990         {
991             event = 0;
992             if (hc->oqueue)
993             {
994                 struct http_buf *wb = hc->oqueue;
995                 int res;
996                 res = send(iochan_getfd(hc->iochan),
997                            wb->buf + wb->offset, wb->len, 0);
998                 if (res <= 0)
999                 {
1000                     yaz_log(YLOG_WARN|YLOG_ERRNO, "write");
1001                     http_channel_destroy(i);
1002                     return;
1003                 }
1004                 if (res == wb->len)
1005                 {
1006                     hc->oqueue = hc->oqueue->next;
1007                     http_buf_destroy(hc->http_server, wb);
1008                 }
1009                 else
1010                 {
1011                     wb->len -= res;
1012                     wb->offset += res;
1013                 }
1014                 if (!hc->oqueue)
1015                 {
1016                     if (!hc->keep_alive)
1017                     {
1018                         http_channel_destroy(i);
1019                         return;
1020                     }
1021                     else
1022                     {
1023                         iochan_clearflag(i, EVENT_OUTPUT);
1024                         if (hc->iqueue)
1025                             event = EVENT_INPUT;
1026                     }
1027                 }
1028             }
1029             if (!hc->oqueue && hc->proxy && !hc->proxy->iochan) 
1030                 http_channel_destroy(i); // Server closed; we're done
1031         }
1032         else
1033         {
1034             yaz_log(YLOG_WARN, "Unexpected event on connection");
1035             http_channel_destroy(i);
1036             event = 0;
1037         }
1038     }
1039 }
1040
1041 // Handles I/O on a client connection to a backend web server (proxy mode)
1042 static void proxy_io(IOCHAN pi, int event)
1043 {
1044     struct http_proxy *pc = iochan_getdata(pi);
1045     struct http_channel *hc = pc->channel;
1046
1047     switch (event)
1048     {
1049         int res;
1050         struct http_buf *htbuf;
1051
1052         case EVENT_INPUT:
1053             htbuf = http_buf_create(hc->http_server);
1054             res = recv(iochan_getfd(pi), htbuf->buf, HTTP_BUF_SIZE -1, 0);
1055             if (res == 0 || (res < 0 && !is_inprogress()))
1056             {
1057                 if (hc->oqueue)
1058                 {
1059                     yaz_log(YLOG_WARN, "Proxy read came up short");
1060                     // Close channel and alert client HTTP channel that we're gone
1061                     http_buf_destroy(hc->http_server, htbuf);
1062 #ifdef WIN32
1063                     closesocket(iochan_getfd(pi));
1064 #else
1065                     close(iochan_getfd(pi));
1066 #endif
1067                     iochan_destroy(pi);
1068                     pc->iochan = 0;
1069                 }
1070                 else
1071                 {
1072                     http_channel_destroy(hc->iochan);
1073                     return;
1074                 }
1075             }
1076             else
1077             {
1078                 htbuf->buf[res] = '\0';
1079                 htbuf->offset = 0;
1080                 htbuf->len = res;
1081                 // Write any remaining payload
1082                 if (htbuf->len - htbuf->offset > 0)
1083                     http_buf_enqueue(&hc->oqueue, htbuf);
1084             }
1085             iochan_setflag(hc->iochan, EVENT_OUTPUT);
1086             break;
1087         case EVENT_OUTPUT:
1088             if (!(htbuf = pc->oqueue))
1089             {
1090                 iochan_clearflag(pi, EVENT_OUTPUT);
1091                 return;
1092             }
1093             res = send(iochan_getfd(pi), htbuf->buf + htbuf->offset, htbuf->len, 0);
1094             if (res <= 0)
1095             {
1096                 yaz_log(YLOG_WARN|YLOG_ERRNO, "write");
1097                 http_channel_destroy(hc->iochan);
1098                 return;
1099             }
1100             if (res == htbuf->len)
1101             { 
1102                 struct http_buf *np = htbuf->next;
1103                 http_buf_destroy(hc->http_server, htbuf);
1104                 pc->oqueue = np;
1105             }
1106             else
1107             {
1108                 htbuf->len -= res;
1109                 htbuf->offset += res;
1110             }
1111
1112             if (!pc->oqueue) {
1113                 iochan_setflags(pi, EVENT_INPUT); // Turns off output flag
1114             }
1115             break;
1116         default:
1117             yaz_log(YLOG_WARN, "Unexpected event on connection");
1118             http_channel_destroy(hc->iochan);
1119     }
1120 }
1121
1122 static void http_fire_observers(struct http_channel *c);
1123 static void http_destroy_observers(struct http_channel *c);
1124
1125 // Cleanup channel
1126 static void http_channel_destroy(IOCHAN i)
1127 {
1128     struct http_channel *s = iochan_getdata(i);
1129     http_server_t http_server;
1130
1131     if (s->proxy)
1132     {
1133         if (s->proxy->iochan)
1134         {
1135 #ifdef WIN32
1136             closesocket(iochan_getfd(s->proxy->iochan));
1137 #else
1138             close(iochan_getfd(s->proxy->iochan));
1139 #endif
1140             iochan_destroy(s->proxy->iochan);
1141         }
1142         http_buf_destroy_queue(s->http_server, s->proxy->oqueue);
1143         xfree(s->proxy);
1144     }
1145     http_buf_destroy_queue(s->http_server, s->iqueue);
1146     http_buf_destroy_queue(s->http_server, s->oqueue);
1147     http_fire_observers(s);
1148     http_destroy_observers(s);
1149
1150     http_server = s->http_server; /* save it for destroy (decref) */
1151
1152     yaz_mutex_enter(s->http_server->mutex);
1153     if (s->http_server->http_channel_freelist_max > 0 && s->http_server->http_channel_freelist_count >= s->http_server->http_channel_freelist_max) {
1154         while ((s->next = s->http_server->http_channel_freelist)) {
1155             nmem_destroy(s->next->nmem);
1156             wrbuf_destroy(s->next->wrbuf);
1157             xfree(s->next);
1158             s->http_server->http_channel_freelist = s->http_server->http_channel_freelist->next;
1159         }
1160         s->http_server->http_channel_freelist_count = 0;
1161     }
1162     else {
1163         s->next = s->http_server->http_channel_freelist;
1164         s->http_server->http_channel_freelist = s;
1165         s->http_server->http_channel_freelist_count++;
1166         yaz_log(YLOG_DEBUG, "Free %d channels on server.", s->http_server->http_channel_freelist_count);
1167     }
1168     yaz_mutex_leave(s->http_server->mutex);
1169
1170     http_server_destroy(http_server);
1171
1172 #ifdef WIN32
1173     closesocket(iochan_getfd(i));
1174 #else
1175     close(iochan_getfd(i));
1176 #endif
1177     iochan_destroy(i);
1178 }
1179
1180 static struct http_channel *http_channel_create(http_server_t hs,
1181                                                 const char *addr,
1182                                                 struct conf_server *server)
1183 {
1184     struct http_channel *r;
1185
1186     yaz_mutex_enter(hs->mutex);
1187     r = hs->http_channel_freelist;
1188     if (r) {
1189         hs->http_channel_freelist = r->next;
1190         hs->http_channel_freelist_count--;
1191     }
1192     yaz_mutex_leave(hs->mutex);
1193
1194     if (r)
1195     {
1196         nmem_reset(r->nmem);
1197         wrbuf_rewind(r->wrbuf);
1198     }
1199     else
1200     {
1201         r = xmalloc(sizeof(struct http_channel));
1202         r->nmem = nmem_create();
1203         r->wrbuf = wrbuf_alloc();
1204     }
1205     http_server_incref(hs);
1206     r->http_server = hs;
1207     r->http_sessions = hs->http_sessions;
1208     assert(r->http_sessions);
1209     r->server = server;
1210     r->proxy = 0;
1211     r->iochan = 0;
1212     r->iqueue = r->oqueue = 0;
1213     r->state = Http_Idle;
1214     r->keep_alive = 0;
1215     r->request = 0;
1216     r->response = 0;
1217     if (!addr)
1218     {
1219         yaz_log(YLOG_WARN, "Invalid HTTP forward address");
1220         exit(1);
1221     }
1222     strcpy(r->addr, addr);
1223     r->observers = 0;
1224     return r;
1225 }
1226
1227
1228 /* Accept a new command connection */
1229 static void http_accept(IOCHAN i, int event)
1230 {
1231     struct sockaddr_in addr;
1232     int fd = iochan_getfd(i);
1233     socklen_t len;
1234     int s;
1235     IOCHAN c;
1236     struct http_channel *ch;
1237     struct conf_server *server = iochan_getdata(i);
1238
1239     len = sizeof addr;
1240     if ((s = accept(fd, (struct sockaddr *) &addr, &len)) < 0)
1241     {
1242         yaz_log(YLOG_WARN|YLOG_ERRNO, "accept");
1243         return;
1244     }
1245     enable_nonblock(s);
1246
1247     yaz_log(YLOG_DEBUG, "New command connection");
1248     c = iochan_create(s, http_io, EVENT_INPUT | EVENT_EXCEPT, "http_session_socket");
1249     
1250     ch = http_channel_create(server->http_server, inet_ntoa(addr.sin_addr),
1251                              server);
1252     ch->iochan = c;
1253     iochan_setdata(c, ch);
1254     iochan_add(server->iochan_man, c);
1255 }
1256
1257 /* Create a http-channel listener, syntax [host:]port */
1258 int http_init(const char *addr, struct conf_server *server,
1259               const char *record_fname)
1260 {
1261     IOCHAN c;
1262     int l;
1263     struct protoent *p;
1264     struct sockaddr_in myaddr;
1265     int one = 1;
1266     const char *pp;
1267     short port;
1268     FILE *record_file = 0;
1269
1270     yaz_log(YLOG_LOG, "HTTP listener %s", addr);
1271
1272
1273     if (record_fname)
1274     {
1275         record_file = fopen(record_fname, "wb");
1276         if (!record_file)
1277         {
1278             yaz_log(YLOG_FATAL|YLOG_ERRNO, "fopen %s", record_fname);
1279             return 1;
1280         }
1281     }
1282
1283     memset(&myaddr, 0, sizeof myaddr);
1284     myaddr.sin_family = AF_INET;
1285     pp = strchr(addr, ':');
1286     if (pp)
1287     {
1288         WRBUF w = wrbuf_alloc();
1289         struct hostent *he;
1290
1291         wrbuf_write(w, addr, pp - addr);
1292         wrbuf_puts(w, "");
1293
1294         he = gethostbyname(wrbuf_cstr(w));
1295         wrbuf_destroy(w);
1296         if (!he)
1297         {
1298             yaz_log(YLOG_FATAL, "Unable to resolve '%s'", addr);
1299             return 1;
1300         }
1301         memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length);
1302         port = atoi(pp + 1);
1303     }
1304     else
1305     {
1306         port = atoi(addr);
1307         myaddr.sin_addr.s_addr = INADDR_ANY;
1308     }
1309
1310     myaddr.sin_port = htons(port);
1311
1312     if (!(p = getprotobyname("tcp"))) {
1313         return 1;
1314     }
1315     if ((l = socket(PF_INET, SOCK_STREAM, p->p_proto)) < 0)
1316         yaz_log(YLOG_FATAL|YLOG_ERRNO, "socket");
1317     if (setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*)
1318                     &one, sizeof(one)) < 0)
1319         return 1;
1320
1321     if (bind(l, (struct sockaddr *) &myaddr, sizeof myaddr) < 0) 
1322     {
1323         yaz_log(YLOG_FATAL|YLOG_ERRNO, "bind");
1324         return 1;
1325     }
1326     if (listen(l, SOMAXCONN) < 0) 
1327     {
1328         yaz_log(YLOG_FATAL|YLOG_ERRNO, "listen");
1329         return 1;
1330     }
1331
1332     server->http_server = http_server_create();
1333
1334     server->http_server->record_file = record_file;
1335     server->http_server->listener_socket = l;
1336
1337     c = iochan_create(l, http_accept, EVENT_INPUT | EVENT_EXCEPT, "http_server");
1338     iochan_setdata(c, server);
1339
1340     iochan_add(server->iochan_man, c);
1341     return 0;
1342 }
1343
1344 void http_close_server(struct conf_server *server)
1345 {
1346     /* break the event_loop (select) by closing down the HTTP listener sock */
1347     if (server->http_server->listener_socket)
1348     {
1349 #ifdef WIN32
1350         closesocket(server->http_server->listener_socket);
1351 #else
1352         close(server->http_server->listener_socket);
1353 #endif
1354     }
1355 }
1356
1357 void http_set_proxyaddr(const char *host, struct conf_server *server)
1358 {
1359     const char *p;
1360     short port;
1361     struct hostent *he;
1362     WRBUF w = wrbuf_alloc();
1363
1364     yaz_log(YLOG_LOG, "HTTP backend  %s", host);
1365
1366     p = strchr(host, ':');
1367     if (p)
1368     {
1369         port = atoi(p + 1);
1370         wrbuf_write(w, host, p - host);
1371         wrbuf_puts(w, "");
1372     }
1373     else
1374     {
1375         port = 80;
1376         wrbuf_puts(w, host);
1377     }
1378     if (!(he = gethostbyname(wrbuf_cstr(w))))
1379     {
1380         fprintf(stderr, "Failed to lookup '%s'\n", wrbuf_cstr(w));
1381         exit(1);
1382     }
1383     wrbuf_destroy(w);
1384
1385     server->http_server->proxy_addr = xmalloc(sizeof(struct sockaddr_in));
1386     server->http_server->proxy_addr->sin_family = he->h_addrtype;
1387     memcpy(&server->http_server->proxy_addr->sin_addr.s_addr,
1388            he->h_addr_list[0], he->h_length);
1389     server->http_server->proxy_addr->sin_port = htons(port);
1390 }
1391
1392 static void http_fire_observers(struct http_channel *c)
1393 {
1394     http_channel_observer_t p = c->observers;
1395     while (p)
1396     {
1397         p->destroy(p->data, c, p->data2);
1398         p = p->next;
1399     }
1400 }
1401
1402 static void http_destroy_observers(struct http_channel *c)
1403 {
1404     while (c->observers)
1405     {
1406         http_channel_observer_t obs = c->observers;
1407         c->observers = obs->next;
1408         xfree(obs);
1409     }
1410 }
1411
1412 http_channel_observer_t http_add_observer(struct http_channel *c, void *data,
1413                                           http_channel_destroy_t des)
1414 {
1415     http_channel_observer_t obs = xmalloc(sizeof(*obs));
1416     obs->chan = c;
1417     obs->data = data;
1418     obs->data2 = 0;
1419     obs->destroy= des;
1420     obs->next = c->observers;
1421     c->observers = obs;
1422     return obs;
1423 }
1424
1425 void http_remove_observer(http_channel_observer_t obs)
1426 {
1427     struct http_channel *c = obs->chan;
1428     http_channel_observer_t found, *p = &c->observers;
1429     while (*p != obs)
1430         p = &(*p)->next;
1431     found = *p;
1432     assert(found);
1433     *p = (*p)->next;
1434     xfree(found);
1435 }
1436
1437 struct http_channel *http_channel_observer_chan(http_channel_observer_t obs)
1438 {
1439     return obs->chan;
1440 }
1441
1442 void http_observer_set_data2(http_channel_observer_t obs, void *data2)
1443 {
1444     obs->data2 = data2;
1445 }
1446
1447 http_server_t http_server_create(void)
1448 {
1449     http_server_t hs = xmalloc(sizeof(*hs));
1450     hs->mutex = 0;
1451     hs->proxy_addr = 0;
1452     hs->ref_count = 1;
1453     hs->http_sessions = 0;
1454
1455     hs->http_channel_freelist = 0;
1456     hs->http_channel_freelist_count = 0;
1457     /* Disable max check */
1458     hs->http_channel_freelist_max   = 0;
1459
1460     hs->http_buf_freelist = 0;
1461     hs->http_buf_freelist_count = 0;
1462     /* Disable max check */
1463     hs->http_buf_freelist_max = 0;
1464     hs->record_file = 0;
1465     return hs;
1466 }
1467
1468 void http_server_destroy(http_server_t hs)
1469 {
1470     if (hs)
1471     {
1472         int r;
1473
1474         yaz_mutex_enter(hs->mutex); /* OK: hs->mutex may be NULL */
1475         r = --(hs->ref_count);
1476         yaz_mutex_leave(hs->mutex);
1477
1478         if (r == 0)
1479         {
1480             struct http_buf *b = hs->http_buf_freelist;
1481             struct http_channel *c = hs->http_channel_freelist;
1482             while (b)
1483             {
1484                 struct http_buf *b_next = b->next;
1485                 xfree(b);
1486                 b = b_next;
1487             }
1488             while (c)
1489             {
1490                 struct http_channel *c_next = c->next;
1491                 nmem_destroy(c->nmem);
1492                 wrbuf_destroy(c->wrbuf);
1493                 xfree(c);
1494                 c = c_next;
1495             }
1496             http_sessions_destroy(hs->http_sessions);
1497             xfree(hs->proxy_addr);
1498             yaz_mutex_destroy(&hs->mutex);
1499             if (hs->record_file)
1500                 fclose(hs->record_file);
1501             xfree(hs);
1502         }
1503     }
1504 }
1505
1506 void http_server_incref(http_server_t hs)
1507 {
1508     assert(hs);
1509     yaz_mutex_enter(hs->mutex);
1510     (hs->ref_count)++;
1511     yaz_mutex_leave(hs->mutex);
1512 }
1513
1514 void http_mutex_init(struct conf_server *server)
1515 {
1516     assert(server);
1517
1518     assert(server->http_server->mutex == 0);
1519     pazpar2_mutex_create(&server->http_server->mutex, "http_server");
1520     server->http_server->http_sessions = http_sessions_create();
1521 }
1522
1523 /*
1524  * Local variables:
1525  * c-basic-offset: 4
1526  * c-file-style: "Stroustrup"
1527  * indent-tabs-mode: nil
1528  * End:
1529  * vim: shiftwidth=4 tabstop=8 expandtab
1530  */
1531