X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fhttp.c;h=f4d8d697574daf18abfbe4dbc7a8312bfd9a44d2;hb=f89a08d579f270d78b6e48a04ec63cef23539c88;hp=a9a9054c6955aa10d46903439b510258e176b78e;hpb=3c94e0ffce0f67640094e9448fcd1d2dba6ffba4;p=pazpar2-moved-to-github.git diff --git a/src/http.c b/src/http.c index a9a9054..f4d8d69 100644 --- a/src/http.c +++ b/src/http.c @@ -1,7 +1,5 @@ -/* $Id: http.c,v 1.34 2007-06-15 19:35:17 adam Exp $ - Copyright (c) 2006-2007, Index Data. - -This file is part of Pazpar2. +/* This file is part of Pazpar2. + Copyright (C) 2006-2008 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -14,15 +12,16 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Pazpar2; see the file LICENSE. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. - */ +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ #include #include #include #include +#include #include #include #include @@ -34,7 +33,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #if HAVE_CONFIG_H -#include +#include #endif #include @@ -45,13 +44,14 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include "cconfig.h" #include "util.h" #include "eventl.h" #include "pazpar2.h" #include "http.h" #include "http_command.h" +#define MAX_HTTP_HEADER 4096 + static void proxy_io(IOCHAN i, int event); static struct http_channel *http_create(const char *addr); static void http_destroy(IOCHAN i); @@ -65,11 +65,22 @@ static struct http_channel *http_channel_freelist = 0; struct http_channel_observer_s { void *data; - void (*destroy)(void *data, struct http_channel *chan); + void *data2; + http_channel_destroy_t destroy; struct http_channel_observer_s *next; struct http_channel *chan; }; + +static const char *http_lookup_header(struct http_header *header, + const char *name) +{ + for (; header; header = header->next) + if (!strcasecmp(name, header->name)) + return header->value; + return 0; +} + static struct http_buf *http_buf_create() { struct http_buf *r; @@ -104,17 +115,6 @@ static void http_buf_destroy_queue(struct http_buf *b) } } -#ifdef GAGA -// Calculate length of chain -static int http_buf_len(struct http_buf *b) -{ - int sum = 0; - for (; b; b = b->next) - sum += b->len; - return sum; -} -#endif - static struct http_buf *http_buf_bybuf(char *b, int len) { struct http_buf *res = 0; @@ -151,7 +151,7 @@ static struct http_buf *http_buf_bywrbuf(WRBUF wrbuf) // Non-destructively collapse chain of buffers into a string (max *len) // Return -static int http_buf_peek(struct http_buf *b, char *buf, int len) +static void http_buf_peek(struct http_buf *b, char *buf, int len) { int rd = 0; while (b && rd < len) @@ -164,7 +164,14 @@ static int http_buf_peek(struct http_buf *b, char *buf, int len) b = b->next; } buf[rd] = '\0'; - return rd; +} + +static int http_buf_size(struct http_buf *b) +{ + int sz = 0; + for (; b; b = b->next) + sz += b->len; + return sz; } // Ddestructively munch up to len from head of queue. @@ -272,48 +279,87 @@ struct http_response *http_create_response(struct http_channel *c) r->channel = c; r->headers = 0; r->payload = 0; + r->content_type = "text/xml"; return r; } + +static const char *next_crlf(const char *cp, size_t *skipped) +{ + const char *next_cp = strchr(cp, '\n'); + if (next_cp) + { + if (next_cp > cp && next_cp[-1] == '\r') + *skipped = next_cp - cp - 1; + else + *skipped = next_cp - cp; + next_cp++; + } + return next_cp; +} + // Check if buf contains a package (minus payload) -static int package_check(const char *buf) +static int package_check(const char *buf, int sz) { + int content_len = 0; int len = 0; - while (*buf) // Check if we have a sequence of lines terminated by an empty line + + while (*buf) { - char *b = strstr(buf, "\r\n"); + size_t skipped = 0; + const char *b = next_crlf(buf, &skipped); if (!b) - return 0; - - len += (b - buf) + 2; - if (b == buf) - return len; - buf = b + 2; + { + // we did not find CRLF.. See if buffer is too large.. + if (sz >= MAX_HTTP_HEADER-1) + return MAX_HTTP_HEADER-1; // yes. Return that (will fail later) + break; + } + len += (b - buf); + if (skipped == 0) + { + // CRLF CRLF , i.e. end of header + if (len + content_len <= sz) + return len + content_len; + break; + } + buf = b; + // following first skip of \r\n so that we don't consider Method + if (!strncasecmp(buf, "Content-Length:", 15)) + { + const char *cp = buf+15; + while (*cp == ' ') + cp++; + content_len = 0; + while (*cp && isdigit(*cp)) + content_len = content_len*10 + (*cp++ - '0'); + if (content_len < 0) /* prevent negative offsets */ + content_len = 0; + } } - return 0; + return 0; // incomplete request } // Check if we have a request. Return 0 or length -// (including trailing CRNL) FIXME: Does not deal gracefully with requests -// carrying payload but this is kind of OK since we will reject anything -// other than an empty GET static int request_check(struct http_buf *queue) { - char tmp[4096]; + char tmp[MAX_HTTP_HEADER]; - http_buf_peek(queue, tmp, 4096); - return package_check(tmp); + // only peek at the header.. + http_buf_peek(queue, tmp, MAX_HTTP_HEADER-1); + // still we only return non-zero if the complete request is received.. + return package_check(tmp, http_buf_size(queue)); } struct http_response *http_parse_response_buf(struct http_channel *c, const char *buf, int len) { - char tmp[4096]; + char tmp[MAX_HTTP_HEADER]; struct http_response *r = http_create_response(c); char *p, *p2; struct http_header **hp = &r->headers; - if (len >= 4096) + if (len >= MAX_HTTP_HEADER) return 0; memcpy(tmp, buf, len); for (p = tmp; *p && *p != ' '; p++) // Skip HTTP version @@ -357,24 +403,62 @@ struct http_response *http_parse_response_buf(struct http_channel *c, const char return r; } +static int http_parse_arguments(struct http_request *r, NMEM nmem, + const char *args) +{ + const char *p2 = args; + + while (*p2) + { + struct http_argument *a; + const char *equal = strchr(p2, '='); + const char *eoa = strchr(p2, '&'); + if (!equal) + { + yaz_log(YLOG_WARN, "Expected '=' in argument"); + return -1; + } + if (!eoa) + eoa = equal + strlen(equal); // last argument + else if (equal > eoa) + { + yaz_log(YLOG_WARN, "Missing '&' in argument"); + return -1; + } + a = nmem_malloc(nmem, sizeof(struct http_argument)); + a->name = nmem_strdupn(nmem, p2, equal - p2); + a->value = nmem_strdupn(nmem, equal+1, eoa - equal - 1); + urldecode(a->name, a->name); + urldecode(a->value, a->value); + a->next = r->arguments; + r->arguments = a; + p2 = eoa; + while (*p2 == '&') + p2++; + } + return 0; +} + struct http_request *http_parse_request(struct http_channel *c, struct http_buf **queue, int len) { struct http_request *r = nmem_malloc(c->nmem, sizeof(*r)); char *p, *p2; - char tmp[4096]; - char *buf = tmp; + char *start = nmem_malloc(c->nmem, len+1); + char *buf = start; - if (len > 4096) - return 0; if (http_buf_read(queue, buf, len) < len) + { + yaz_log(YLOG_WARN, "http_buf_read < len (%d)", len); return 0; - + } r->search = ""; r->channel = c; r->arguments = 0; r->headers = 0; + r->content_buf = 0; + r->content_len = 0; // Parse first line for (p = buf, p2 = r->method; *p && *p != ' ' && p - buf < 19; p++) *(p2++) = *p; @@ -387,13 +471,13 @@ struct http_request *http_parse_request(struct http_channel *c, if (!(buf = strchr(buf, ' '))) { - yaz_log(YLOG_WARN, "Syntax error in request (1)"); + yaz_log(YLOG_WARN, "Missing Request-URI in HTTP request"); return 0; } buf++; if (!(p = strchr(buf, ' '))) { - yaz_log(YLOG_WARN, "Syntax error in request (2)"); + yaz_log(YLOG_WARN, "HTTP Request-URI not terminated (too long?)"); return 0; } *(p++) = '\0'; @@ -404,29 +488,7 @@ struct http_request *http_parse_request(struct http_channel *c, { r->search = nmem_strdup(c->nmem, p2); // Parse Arguments - while (*p2) - { - struct http_argument *a; - char *equal = strchr(p2, '='); - char *eoa = strchr(p2, '&'); - if (!equal) - { - yaz_log(YLOG_WARN, "Expected '=' in argument"); - return 0; - } - if (!eoa) - eoa = equal + strlen(equal); // last argument - else - *(eoa++) = '\0'; - a = nmem_malloc(c->nmem, sizeof(struct http_argument)); - *(equal++) = '\0'; - a->name = nmem_strdup(c->nmem, p2); - urldecode(equal, equal); - a->value = nmem_strdup(c->nmem, equal); - a->next = r->arguments; - r->arguments = a; - p2 = eoa; - } + http_parse_arguments(r, c->nmem, p2); } buf = p; @@ -434,12 +496,15 @@ struct http_request *http_parse_request(struct http_channel *c, strcpy(r->http_version, "1.0"); else { - buf += 5; - if (!(p = strstr(buf, "\r\n"))) + size_t skipped; + buf += 5; // strlen("HTTP/") + + p = (char*) next_crlf(buf, &skipped); + if (!p || skipped < 3 || skipped > 5) return 0; - *(p++) = '\0'; - p++; - strcpy(r->http_version, buf); + + memcpy(r->http_version, buf, skipped); + r->http_version[skipped] = '\0'; buf = p; } strcpy(c->version, r->http_version); @@ -447,32 +512,69 @@ struct http_request *http_parse_request(struct http_channel *c, r->headers = 0; while (*buf) { - if (!(p = strstr(buf, "\r\n"))) + size_t skipped; + + p = (char *) next_crlf(buf, &skipped); + if (!p) + { return 0; - if (p == buf) + } + else if (skipped == 0) + { + buf = p; break; + } else { + char *cp; + char *n_v = nmem_malloc(c->nmem, skipped+1); struct http_header *h = nmem_malloc(c->nmem, sizeof(*h)); - if (!(p2 = strchr(buf, ':'))) + + memcpy(n_v, buf, skipped); + n_v[skipped] = '\0'; + + if (!(cp = strchr(n_v, ':'))) return 0; - *(p2++) = '\0'; - h->name = nmem_strdup(c->nmem, buf); - while (isspace(*p2)) - p2++; - if (p2 >= p) // Empty header? - { - buf = p + 2; - continue; - } - *p = '\0'; - h->value = nmem_strdup(c->nmem, p2); + h->name = nmem_strdupn(c->nmem, n_v, cp - n_v); + cp++; + while (isspace(*cp)) + cp++; + h->value = nmem_strdup(c->nmem, cp); h->next = r->headers; r->headers = h; - buf = p + 2; + buf = p; } } + // determine if we do keep alive + if (!strcmp(c->version, "1.0")) + { + const char *v = http_lookup_header(r->headers, "Connection"); + if (v && !strcmp(v, "Keep-Alive")) + c->keep_alive = 1; + else + c->keep_alive = 0; + } + else + { + const char *v = http_lookup_header(r->headers, "Connection"); + if (v && !strcmp(v, "close")) + c->keep_alive = 0; + else + c->keep_alive = 1; + } + if (buf < start + len) + { + const char *content_type = http_lookup_header(r->headers, + "Content-Type"); + r->content_len = start + len - buf; + r->content_buf = buf; + + if (!strcmp(content_type, "application/x-www-form-urlencoded")) + { + http_parse_arguments(r, c->nmem, r->content_buf); + } + } return r; } @@ -482,15 +584,15 @@ static struct http_buf *http_serialize_response(struct http_channel *c, struct http_header *h; wrbuf_rewind(c->wrbuf); - wrbuf_printf(c->wrbuf, "HTTP/1.1 %s %s\r\n", r->code, r->msg); + wrbuf_printf(c->wrbuf, "HTTP/%s %s %s\r\n", c->version, r->code, r->msg); for (h = r->headers; h; h = h->next) wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); if (r->payload) { - wrbuf_printf(c->wrbuf, "Content-length: %d\r\n", r->payload ? + wrbuf_printf(c->wrbuf, "Content-Length: %d\r\n", r->payload ? (int) strlen(r->payload) : 0); - wrbuf_printf(c->wrbuf, "Content-type: text/xml\r\n"); - if (1) + wrbuf_printf(c->wrbuf, "Content-Type: %s\r\n", r->content_type); + if (!strcmp(r->content_type, "text/xml")) { xmlDoc *doc = xmlParseMemory(r->payload, strlen(r->payload)); if (doc) @@ -518,20 +620,10 @@ static struct http_buf *http_serialize_request(struct http_request *r) { struct http_channel *c = r->channel; struct http_header *h; - struct http_argument *a; wrbuf_rewind(c->wrbuf); - wrbuf_printf(c->wrbuf, "%s %s", r->method, r->path); - - if (r->arguments) - { - wrbuf_putc(c->wrbuf, '?'); - for (a = r->arguments; a; a = a->next) { - if (a != r->arguments) - wrbuf_putc(c->wrbuf, '&'); - wrbuf_printf(c->wrbuf, "%s=%s", a->name, a->value); - } - } + wrbuf_printf(c->wrbuf, "%s %s%s%s", r->method, r->path, + *r->search ? "?" : "", r->search); wrbuf_printf(c->wrbuf, " HTTP/%s\r\n", r->http_version); @@ -539,7 +631,14 @@ static struct http_buf *http_serialize_request(struct http_request *r) wrbuf_printf(c->wrbuf, "%s: %s\r\n", h->name, h->value); wrbuf_puts(c->wrbuf, "\r\n"); - + + if (r->content_buf) + wrbuf_write(c->wrbuf, r->content_buf, r->content_len); + +#if 0 + yaz_log(YLOG_LOG, "WRITING TO PROXY:\n%s\n----", + wrbuf_cstr(c->wrbuf)); +#endif return http_buf_bywrbuf(c->wrbuf); } @@ -634,10 +733,8 @@ static int http_proxy(struct http_request *rq) } // Do _not_ modify Host: header, just checking it's existence - for (hp = rq->headers; hp; hp = hp->next) - if (!strcmp(hp->name, "Host")) - break; - if (!hp) + + if (!http_lookup_header(rq->headers, "Host")) { yaz_log(YLOG_WARN, "Failed to find Host header in proxy"); return -1; @@ -660,6 +757,7 @@ static int http_proxy(struct http_request *rq) } requestbuf = http_serialize_request(rq); + http_buf_enqueue(&p->oqueue, requestbuf); iochan_setflag(p->iochan, EVENT_OUTPUT); return 0; @@ -685,6 +783,22 @@ void http_send_response(struct http_channel *ch) } } +static void http_error(struct http_channel *hc, int no, const char *msg) +{ + struct http_response *rs = http_create_response(hc); + + hc->response = rs; + hc->keep_alive = 0; // not keeping this HTTP session alive + + sprintf(rs->code, "%d", no); + + rs->msg = nmem_strdup(hc->nmem, msg); + rs->payload = nmem_malloc(hc->nmem, 100); + yaz_snprintf(rs->payload, 99, "HTTP Error %d: %s\n", + no, msg); + http_send_response(hc); +} + static void http_io(IOCHAN i, int event) { struct http_channel *hc = iochan_getdata(i); @@ -712,39 +826,36 @@ static void http_io(IOCHAN i, int event) htbuf->len = res; http_buf_enqueue(&hc->iqueue, htbuf); - if (hc->state == Http_Busy) - return; - if ((reqlen = request_check(hc->iqueue)) <= 2) - return; - - nmem_reset(hc->nmem); - if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) + while (1) { - yaz_log(YLOG_WARN, "Failed to parse request"); - http_destroy(i); - return; - } - hc->response = 0; - yaz_log(YLOG_LOG, "Request: %s %s%s%s", hc->request->method, - hc->request->path, - *hc->request->search ? "?" : "", - hc->request->search); - if (http_weshouldproxy(hc->request)) - http_proxy(hc->request); - else - { - // Execute our business logic! - hc->state = Http_Busy; - http_command(hc); - } - if (hc->iqueue) - { - yaz_log(YLOG_DEBUG, "We think we have more input to read. Forcing event"); - iochan_setevent(i, EVENT_INPUT); + if (hc->state == Http_Busy) + return; + reqlen = request_check(hc->iqueue); + if (reqlen <= 2) + return; + // we have a complete HTTP request + nmem_reset(hc->nmem); + if (!(hc->request = http_parse_request(hc, &hc->iqueue, reqlen))) + { + yaz_log(YLOG_WARN, "Failed to parse request"); + http_error(hc, 400, "Bad Request"); + return; + } + hc->response = 0; + yaz_log(YLOG_LOG, "Request: %s %s%s%s", hc->request->method, + hc->request->path, + *hc->request->search ? "?" : "", + hc->request->search); + if (http_weshouldproxy(hc->request)) + http_proxy(hc->request); + else + { + // Execute our business logic! + hc->state = Http_Busy; + http_command(hc); + } } - break; - case EVENT_OUTPUT: if (hc->oqueue) { @@ -767,7 +878,7 @@ static void http_io(IOCHAN i, int event) wb->offset += res; } if (!hc->oqueue) { - if (!strcmp(hc->version, "1.0")) + if (!hc->keep_alive) { http_destroy(i); return; @@ -790,25 +901,6 @@ static void http_io(IOCHAN i, int event) } } -#ifdef GAGA -// If this hostname contains our proxy host as a prefix, replace with myurl -static char *sub_hostname(struct http_channel *c, char *buf) -{ - char tmp[1024]; - if (strlen(buf) > 1023) - return buf; - if (strncmp(buf, "http://", 7)) - return buf; - if (!strncmp(buf + 7, proxy_url, strlen(proxy_url))) - { - strcpy(tmp, myurl); - strcat(tmp, buf + strlen(proxy_url) + 7); - return nmem_strdup(c->nmem, tmp); - } - return buf; -} -#endif - // Handles I/O on a client connection to a backend web server (proxy mode) static void proxy_io(IOCHAN pi, int event) { @@ -845,33 +937,6 @@ static void proxy_io(IOCHAN pi, int event) htbuf->buf[res] = '\0'; htbuf->offset = 0; htbuf->len = res; -#ifdef GAGA - if (pc->first_response) // Check if this is a redirect - { - int len; - if ((len = package_check(htbuf->buf))) - { - struct http_response *res = http_parse_response_buf(hc, htbuf->buf, len); - if (res) - { - struct http_header *h; - for (h = res->headers; h; h = h->next) - if (!strcmp(h->name, "Location")) - { - // We found a location header. Rewrite it. - struct http_buf *buf; - h->value = sub_hostname(hc, h->value); - buf = http_serialize_response(hc, res); - yaz_log(YLOG_LOG, "Proxy rewrite"); - http_buf_enqueue(&hc->oqueue, buf); - htbuf->offset = len; - break; - } - } - } - pc->first_response = 0; - } -#endif // Write any remaining payload if (htbuf->len - htbuf->offset > 0) http_buf_enqueue(&hc->oqueue, htbuf); @@ -961,6 +1026,7 @@ static struct http_channel *http_create(const char *addr) r->iochan = 0; r->iqueue = r->oqueue = 0; r->state = Http_Idle; + r->keep_alive = 0; r->request = 0; r->response = 0; if (!addr) @@ -1037,11 +1103,6 @@ void http_init(const char *addr) memcpy(&myaddr.sin_addr.s_addr, he->h_addr_list[0], he->h_length); port = atoi(pp + 1); - - yaz_log(YLOG_LOG, "HTTP address %s:%d", - "" == he->h_addr_list[0] ? he->h_addr_list[0] : "127.0.0.1" , - port); - } else { @@ -1108,7 +1169,7 @@ static void http_fire_observers(struct http_channel *c) http_channel_observer_t p = c->observers; while (p) { - p->destroy(p->data, c); + p->destroy(p->data, c, p->data2); p = p->next; } } @@ -1129,6 +1190,7 @@ http_channel_observer_t http_add_observer(struct http_channel *c, void *data, http_channel_observer_t obs = xmalloc(sizeof(*obs)); obs->chan = c; obs->data = data; + obs->data2 = 0; obs->destroy= des; obs->next = c->observers; c->observers = obs; @@ -1152,6 +1214,12 @@ struct http_channel *http_channel_observer_chan(http_channel_observer_t obs) return obs->chan; } +void http_observer_set_data2(http_channel_observer_t obs, void *data2) +{ + obs->data2 = data2; +} + + /* * Local variables: * c-basic-offset: 4