From d6862f25e98c81258ac86ee865b82ba610fd0b8f Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 11 Jan 2007 10:30:40 +0000 Subject: [PATCH] Implemented bug #806: Deal with HTTP clients sending LF in HTTP headers. --- NEWS | 4 + include/yaz/comstack.h | 3 +- src/comstack.c | 266 +++++++++++++++++++++++++++--------------------- src/zgdu.c | 38 +++---- test/Makefile.am | 6 +- test/tst_comstack.c | 207 +++++++++++++++++++++++++++++++++++++ 6 files changed, 385 insertions(+), 139 deletions(-) create mode 100644 test/tst_comstack.c diff --git a/NEWS b/NEWS index 908ffd8..556e1f0 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Implemented bug #806: Deal with HTTP clients sending LF in HTTP headers. + +Added ZOOM_connection_peek_event. + Implemented Generic select hook for ZOOM (bug #803). This is achieved with the following new functions: ZOOM_process_event, diff --git a/include/yaz/comstack.h b/include/yaz/comstack.h index d011259..216d28d 100644 --- a/include/yaz/comstack.h +++ b/include/yaz/comstack.h @@ -24,7 +24,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* $Id: comstack.h,v 1.25 2007-01-03 08:42:14 adam Exp $ */ +/* $Id: comstack.h,v 1.26 2007-01-11 10:30:40 adam Exp $ */ /** * \file comstack.h @@ -132,6 +132,7 @@ YAZ_EXPORT int cs_set_ssl_ctx(COMSTACK cs, void *ctx); YAZ_EXPORT int cs_set_ssl_certificate_file(COMSTACK cs, const char *fname); YAZ_EXPORT int cs_get_peer_certificate_x509(COMSTACK cs, char **buf, int *len); YAZ_EXPORT void cs_set_max_recv_bytes(COMSTACK cs, int max_recv_bytes); +YAZ_EXPORT int cs_complete_http(const char *buf, int len); /* * error management. diff --git a/src/comstack.c b/src/comstack.c index 7bd72d3..db3b002 100644 --- a/src/comstack.c +++ b/src/comstack.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: comstack.c,v 1.17 2007-01-03 08:42:15 adam Exp $ + * $Id: comstack.c,v 1.18 2007-01-11 10:30:41 adam Exp $ */ /** @@ -146,148 +146,180 @@ int cs_look (COMSTACK cs) return cs->event; } -#define CHUNK_DEBUG 0 -int cs_complete_auto(const unsigned char *buf, int len) +static int skip_crlf(const char *buf, int len, int *i) { - if (len > 5 && buf[0] >= 0x20 && buf[0] < 0x7f - && buf[1] >= 0x20 && buf[1] < 0x7f - && buf[2] >= 0x20 && buf[2] < 0x7f) + if (*i < len) { - /* deal with HTTP request/response */ - int i = 2, content_len = 0, chunked = 0; + if (buf[*i] == '\r' && *i < len-1 && buf[*i + 1] == '\n') + { + (*i) += 2; + return 1; + } + else if (buf[*i] == '\n') + { + (*i)++; + return 1; + } + } + return 0; +} - /* if dealing with HTTP responses - then default - content length is unlimited (socket close) */ - if (!memcmp(buf, "HTTP/", 5)) - content_len = -1; +#define CHUNK_DEBUG 0 - while (i <= len-4) +int cs_complete_http(const char *buf, int len) +{ + /* deal with HTTP request/response */ + int i = 2, content_len = 0, chunked = 0; + + if (len < 6) + return 0; + + /* if dealing with HTTP responses - then default + content length is unlimited (socket close) */ + if (!memcmp(buf, "HTTP/", 5)) + content_len = -1; + +#if 0 + printf("len = %d\n", len); + fwrite (buf, 1, len, stdout); + printf("----------\n"); +#endif + while (i <= len-2) + { + if (i > 8192) { - if (i > 8192) - { - return i; /* do not allow more than 8K HTTP header */ - } - if (buf[i] == '\r' && buf[i+1] == '\n') + return i; /* do not allow more than 8K HTTP header */ + } + if (skip_crlf(buf, len, &i)) + { + if (skip_crlf(buf, len, &i)) { - i += 2; - if (buf[i] == '\r' && buf[i+1] == '\n') - { - if (chunked) - { - /* inside chunked body .. */ - while(1) - { - int j, chunk_len = 0; - i += 2; + /* inside content */ + if (chunked) + { + /* inside chunked body .. */ + while(1) + { + int chunk_len = 0; #if CHUNK_DEBUG -/* debugging */ - if (i >>\n"); - } + if (i < len-2) + { + printf ("\n<<<"); + int j; + for (j = i; j <= i+3; j++) + printf ("%c", buf[j]); + printf (">>>\n"); + } #endif - /* read chunk length */ - while (1) - if (i >= len-2) { + /* read chunk length */ + while (1) + if (i >= len-2) { #if CHUNK_DEBUG -/* debugging */ - printf ("XXXXXXXX not there yet 1\n"); - printf ("i=%d len=%d\n", i, len); + printf ("returning incomplete read at 1\n"); + printf ("i=%d len=%d\n", i, len); #endif - return 0; - } else if (isdigit(buf[i])) - chunk_len = chunk_len * 16 + - (buf[i++] - '0'); - else if (isupper(buf[i])) - chunk_len = chunk_len * 16 + - (buf[i++] - ('A'-10)); - else if (islower(buf[i])) - chunk_len = chunk_len * 16 + - (buf[i++] - ('a'-10)); - else - break; - /* move forward until CRLF - skip chunk ext */ - j = 0; - while (buf[i] != '\r' && buf[i+1] != '\n') - { - if (i >= len-2) - return 0; /* need more buffer .. */ - if (++j > 1000) - return i; /* enough.. stop */ - i++; - } - /* got CRLF */ -#if CHUNK_DEBUG - printf ("XXXXXX chunk_len=%d\n", chunk_len); -#endif - if (chunk_len < 0) - return i+2; /* bad chunk_len */ - if (chunk_len == 0) + return 0; + } else if (isdigit(buf[i])) + chunk_len = chunk_len * 16 + + (buf[i++] - '0'); + else if (isupper(buf[i])) + chunk_len = chunk_len * 16 + + (buf[i++] - ('A'-10)); + else if (islower(buf[i])) + chunk_len = chunk_len * 16 + + (buf[i++] - ('a'-10)); + else break; - i += chunk_len+2; - } - /* consider trailing headers .. */ - while(i <= len-4) + if (chunk_len == 0) + break; + if (chunk_len < 0) + return i; + + while (1) { - if (buf[i] == '\r' && buf[i+1] == '\n' && - buf[i+2] == '\r' && buf[i+3] == '\n') - if (len >= i+4) - return i+4; + if (i >= len -1) + return 0; + if (skip_crlf(buf, len, &i)) + break; i++; } + /* got CRLF */ #if CHUNK_DEBUG -/* debugging */ - printf ("XXXXXXXXX not there yet 2\n"); - printf ("i=%d len=%d\n", i, len); -#endif - return 0; + printf ("chunk_len=%d\n", chunk_len); +#endif + i += chunk_len; + if (i >= len-2) + return 0; + if (!skip_crlf(buf, len, &i)) + return 0; } - else - { /* not chunked ; inside body */ - /* i += 2 seems not to work with GCC -O2 .. - so i+2 is used instead .. */ - if (content_len == -1) - return 0; /* no content length */ - else if (len >= (i+2)+ content_len) + /* consider trailing headers .. */ + while (i < len) + { + if (skip_crlf(buf, len, &i)) { - return (i+2)+ content_len; + if (skip_crlf(buf, len, &i)) + return i; } + else + i++; } - break; - } - else if (i < len - 20 && - !strncasecmp((const char *) buf+i, "Transfer-Encoding:", 18)) - { - i+=18; - while (buf[i] == ' ') - i++; - if (i < len - 8) - if (!strncasecmp((const char *) buf+i, "chunked", 7)) - chunked = 1; - } - else if (i < len - 17 && - !strncasecmp((const char *)buf+i, "Content-Length:", 15)) - { - i+= 15; - while (buf[i] == ' ') - i++; - content_len = 0; - while (i <= len-4 && isdigit(buf[i])) - content_len = content_len*10 + (buf[i++] - '0'); - if (content_len < 0) /* prevent negative offsets */ - content_len = 0; +#if CHUNK_DEBUG + printf ("returning incomplete read at 2\n"); + printf ("i=%d len=%d\n", i, len); +#endif + return 0; } else + { /* not chunked ; inside body */ + if (content_len == -1) + return 0; /* no content length */ + else if (len >= i + content_len) + { + return i + content_len; + } + } + break; + } + else if (i < len - 20 && + !strncasecmp((const char *) buf+i, "Transfer-Encoding:", 18)) + { + i+=18; + while (buf[i] == ' ') + i++; + if (i < len - 8) + if (!strncasecmp((const char *) buf+i, "chunked", 7)) + chunked = 1; + } + else if (i < len - 17 && + !strncasecmp((const char *)buf+i, "Content-Length:", 15)) + { + i+= 15; + while (buf[i] == ' ') i++; + content_len = 0; + while (i <= len-4 && isdigit(buf[i])) + content_len = content_len*10 + (buf[i++] - '0'); + if (content_len < 0) /* prevent negative offsets */ + content_len = 0; } else i++; } - return 0; + else + i++; + } + return 0; +} + +int cs_complete_auto(const unsigned char *buf, int len) +{ + if (len > 5 && buf[0] >= 0x20 && buf[0] < 0x7f + && buf[1] >= 0x20 && buf[1] < 0x7f + && buf[2] >= 0x20 && buf[2] < 0x7f) + { + int r = cs_complete_http((const char *) buf, len); + return r; } return completeBER(buf, len); } diff --git a/src/zgdu.c b/src/zgdu.c index 3634aeb..49f1ea3 100644 --- a/src/zgdu.c +++ b/src/zgdu.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2007, Index Data ApS * See the file LICENSE for details. * - * $Id: zgdu.c,v 1.17 2007-01-03 08:42:15 adam Exp $ + * $Id: zgdu.c,v 1.18 2007-01-11 10:30:41 adam Exp $ */ /** @@ -28,17 +28,16 @@ static int decode_headers_content(ODR o, int off, Z_HTTP_Header **headers, int chunked = 0; *headers = 0; - while (i < o->size-1 && o->buf[i] == '\r') + while (i < o->size-1 && o->buf[i] == '\n') { int po; i++; - if (o->buf[i] != '\n') + if (o->buf[i] == '\r' && i < o->size-1 && o->buf[i+1] == '\n') { - o->error = OHTTP; - return 0; + i++; + break; } - i++; - if (o->buf[i] == '\r') + if (o->buf[i] == '\n') break; for (po = i; ; i++) { @@ -57,7 +56,7 @@ static int decode_headers_content(ODR o, int off, Z_HTTP_Header **headers, i++; while (i < o->size-1 && o->buf[i] == ' ') i++; - for (po = i; i < o->size-1 && o->buf[i] != '\r' ; i++) + for (po = i; i < o->size-1 && !strchr("\r\n", o->buf[i]); i++) ; (*headers)->value = (char*) odr_malloc(o, i - po + 1); @@ -69,9 +68,10 @@ static int decode_headers_content(ODR o, int off, Z_HTTP_Header **headers, !strcasecmp((*headers)->value, "chunked")) chunked = 1; headers = &(*headers)->next; + if (i < o->size-1 && o->buf[i] == '\r') + i++; } *headers = 0; - i++; if (o->buf[i] != '\n') { o->error = OHTTP; @@ -312,7 +312,7 @@ int z_GDU (ODR o, Z_GDU **p, int opt, const char *name) hr->content_len = 0; po = i = 5; - while (i < o->size-2 && o->buf[i] != ' ' && o->buf[i] != '\r') + while (i < o->size-2 && !strchr(" \r\n", o->buf[i])) i++; hr->version = (char *) odr_malloc(o, i - po + 1); if (i - po) @@ -330,7 +330,7 @@ int z_GDU (ODR o, Z_GDU **p, int opt, const char *name) hr->code = hr->code*10 + (o->buf[i] - '0'); i++; } - while (i < o->size-1 && o->buf[i] != '\r') + while (i < o->size-1 && o->buf[i] != '\n') i++; return decode_headers_content(o, i, &hr->headers, &hr->content_buf, &hr->content_len); @@ -378,19 +378,19 @@ int z_GDU (ODR o, Z_GDU **p, int opt, const char *name) } i+= 5; po = i; - while (o->buf[i] != '\r') - { - if (i >= o->size-1) - { - o->error = OHTTP; - return 0; - } + while (i < o->size && !strchr("\r\n", o->buf[i])) i++; - } hr->version = (char *) odr_malloc(o, i - po + 1); memcpy(hr->version, o->buf + po, i - po); hr->version[i - po] = '\0'; /* headers */ + if (i < o->size-1 && o->buf[i] == '\r') + i++; + if (o->buf[i] != '\n') + { + o->error = OHTTP; + return 0; + } return decode_headers_content(o, i, &hr->headers, &hr->content_buf, &hr->content_len); diff --git a/test/Makefile.am b/test/Makefile.am index 5d19146..7d9e7d9 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,10 +1,11 @@ ## Copyright (C) 1995-2007, Index Data ApS ## All rights reserved. -## $Id: Makefile.am,v 1.30 2007-01-05 11:45:11 adam Exp $ +## $Id: Makefile.am,v 1.31 2007-01-11 10:30:41 adam Exp $ check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf tstodr tstccl tstlog \ tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery \ - tst_filepath tst_record_conv tst_retrieval tst_tpath tst_timing + tst_comstack tst_filepath tst_record_conv tst_retrieval tst_tpath \ + tst_timing check_SCRIPTS = tstcql.sh tstmarciso.sh tstmarcxml.sh tstmarccol.sh TESTS = $(check_PROGRAMS) $(check_SCRIPTS) @@ -56,6 +57,7 @@ tstsoap2_SOURCES = tstsoap2.c tstlogthread_SOURCES = tstlogthread.c tstxmlquery_SOURCES = tstxmlquery.c tstpquery_SOURCES = tstpquery.c +tst_comstack_SOURCES = tst_comstack.c tst_filepath_SOURCES = tst_filepath.c tst_record_conv_SOURCES = tst_record_conv.c tst_retrieval_SOURCES = tst_retrieval.c diff --git a/test/tst_comstack.c b/test/tst_comstack.c new file mode 100644 index 0000000..c2e2597 --- /dev/null +++ b/test/tst_comstack.c @@ -0,0 +1,207 @@ +/* + * Copyright (C) 1995-2007, Index Data ApS + * See the file LICENSE for details. + * + * $Id: tst_comstack.c,v 1.1 2007-01-11 10:30:41 adam Exp $ + */ + +#include +#include + +#include +#include + +static void tst_http_request(void) +{ + { + /* no content, no headers */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\r\n" + "\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 16), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 17), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 18), 18); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 19), 18); + } + { + /* one header, no content */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\r\n" + "Content-Type: x\r\n" + "\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 34), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 35), 35); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 36), 35); + } + { + /* one content-length header, length 0 */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\r\n" + "Content-Length: 0\r\n" + "\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 35), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 37), 37); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 38), 37); + } + { + /* one content-length header, length 5 */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\r\n" + "Content-Length: 5\r\n" + "\r\n" + "ABCDE" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 41), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 42), 42); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 43), 42); + } + { + /* LF only in GET, one content-length header, length 5 */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\n" + "Content-Length: 5\r\n" + "\r\n" + "ABCDE" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 40), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 41), 41); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 42), 41); + } + { + /* LF only in all places, one content-length header, length 5 */ + const char *http_buf = + /*123456789012345678 */ + "GET / HTTP/1.1\n" + "Content-Length: 5\n" + "\n" + "ABCDE" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 38), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 39), 39); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 40), 39); + } + + { + /* one header, unknown transfer-encoding (no content) */ + const char *http_buf = + /*12345678901234567890123456789 */ + "GET / HTTP/1.1\r\n" + "Transfer-Encoding: chunke_\r\n" + "\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 45), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 46), 46); + } + + { + /* one header, one chunk */ + const char *http_buf = + /*12345678901234567890123456789 */ + "GET / HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "3\r\n" + "123\r\n" + "0\r\n\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 58), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 59), 59); + } + + { + /* one header, two chunks */ + const char *http_buf = + /*12345678901234567890123456789 */ + "GET / HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "3\r\n" + "123\r\n" + "2\r\n" + "12\n" + "0\r\n\r\n" + "GET / HTTP/1.0\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 64), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 65), 65); + } +} + +static void tst_http_response(void) +{ + { + /* unlimited content, no headers */ + const char *http_buf = + /*123456789012345678 */ + "HTTP/1.1 200 OK\r\n" + "\r\n" + "HTTP/1.1 200 OK\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 24), 0); + } + { + /* response, content */ + const char *http_buf = + /*123456789012345678 */ + "HTTP/1.1 200 OK\r\n" + "Content-Length: 2\r\n" + "\r\n" + "12" + "HTTP/1.1 200 OK\r\n"; + + YAZ_CHECK_EQ(cs_complete_http(http_buf, 1), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 2), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 39), 0); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 40), 40); + YAZ_CHECK_EQ(cs_complete_http(http_buf, 41), 40); + } +} + + +int main (int argc, char **argv) +{ + YAZ_CHECK_INIT(argc, argv); + YAZ_CHECK_LOG(); + tst_http_request(); + tst_http_response(); + YAZ_CHECK_TERM; +} + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4