cs_get_host_args: allow unix:path:args notation
[yaz-moved-to-github.git] / src / comstack.c
index 7ed6665..306ec6d 100644 (file)
@@ -1,22 +1,25 @@
-/*
- * Copyright (c) 1995-2004, Index Data
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  * See the file LICENSE for details.
- *
- * $Id: comstack.c,v 1.4 2004-01-06 20:21:37 adam Exp $
  */
  */
+/**
+ * \file comstack.c
+ * \brief Implements Generic COMSTACK functions
+ */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <string.h>
 
 #include <string.h>
-#include <ctype.h>
 #include <errno.h>
 
 #include <errno.h>
 
+#include <yaz/yaz-iconv.h>
+#include <yaz/log.h>
 #include <yaz/comstack.h>
 #include <yaz/tcpip.h>
 #include <yaz/unix.h>
 #include <yaz/odr.h>
 #include <yaz/comstack.h>
 #include <yaz/tcpip.h>
 #include <yaz/unix.h>
 #include <yaz/odr.h>
-
-#ifdef WIN32
-#define strncasecmp _strnicmp
-#endif
+#include <yaz/matchstr.h>
 
 static const char *cs_errlist[] =
 {
 
 static const char *cs_errlist[] =
 {
@@ -26,21 +29,14 @@ static const char *cs_errlist[] =
     "No data (operation would block)",
     "New data while half of old buffer is on the line (flow control)",
     "Permission denied",
     "No data (operation would block)",
     "New data while half of old buffer is on the line (flow control)",
     "Permission denied",
-    "SSL error"
+    "SSL error",
+    "Too large incoming buffer"
 };
 
 const char *cs_errmsg(int n)
 {
 };
 
 const char *cs_errmsg(int n)
 {
-    static char buf[250];
-
-    if (n < CSNONE || n > CSLASTERROR) {
-       sprintf(buf, "unknown comstack error %d", n);
-       return buf;
-    }
-    if (n == CSYSERR) {
-       sprintf(buf, "%s: %s", cs_errlist[n], strerror(errno));
-       return buf;
-    }
+    if (n < CSNONE || n > CSLASTERROR)
+        n = CSNONE;
     return cs_errlist[n];
 }
 
     return cs_errlist[n];
 }
 
@@ -51,87 +47,210 @@ const char *cs_strerror(COMSTACK h)
 
 void cs_get_host_args(const char *type_and_host, const char **args)
 {
 
 void cs_get_host_args(const char *type_and_host, const char **args)
 {
-    
     *args = "";
     *args = "";
-    if (*type_and_host && strncmp(type_and_host, "unix:", 5))
+    if (!strncmp(type_and_host, "unix:", 5))
     {
     {
-        const char *cp;
-        cp = strstr(type_and_host, "://");
+        const char *cp = strchr(type_and_host + 5, ':');
+        if (!cp)
+            return;
+        type_and_host = cp + 1;
+        if (!strchr(type_and_host, ':'))
+        {
+            *args = type_and_host;  /* unix:path:args */
+            return;
+        }
+    }
+    if (*type_and_host)
+    {
+        const char *cp = strchr(type_and_host, '/');
         if (cp)
         if (cp)
-            cp = cp+3;
-        else
-            cp = type_and_host;
-        cp = strchr(cp, '/');
+        {
+            if (cp > type_and_host && !memcmp(cp - 1, "://", 3))
+                cp = strchr(cp + 2, '/');
+        }
         if (cp)
         if (cp)
-            *args = cp+1;
+            *args = cp + 1;
     }
 }
 
     }
 }
 
-COMSTACK cs_create_host(const char *type_and_host, int blocking, void **vp)
+int cs_parse_host(const char *uri, const char **host,
+                  CS_TYPE *t, enum oid_proto *proto,
+                  char **connect_host)
 {
 {
-    enum oid_proto proto = PROTO_Z3950;
-    const char *host = 0;
-    COMSTACK cs;
-    CS_TYPE t;
+    *connect_host = 0;
 
 
-    if (strncmp (type_and_host, "tcp:", 4) == 0)
+    *t = tcpip_type;
+    if (strncmp(uri, "connect:", 8) == 0)
     {
     {
-       t = tcpip_type;
-        host = type_and_host + 4;
+        const char *cp = strchr(uri, ',');
+        if (cp)
+        {
+            size_t len;
+
+            uri += 8;
+            len = cp - uri;
+            *connect_host = (char *) xmalloc(len + 1);
+            memcpy(*connect_host, uri, len);
+            (*connect_host)[len] = '\0';
+            uri = cp + 1;
+        }
     }
     }
-    else if (strncmp (type_and_host, "ssl:", 4) == 0)
+    else if (strncmp(uri, "unix:", 5) == 0)
     {
     {
-#if HAVE_OPENSSL_SSL_H
-       t = ssl_type;
-        host = type_and_host + 4;
+        const char *cp;
+
+        uri += 5;
+        cp = strchr(uri, ':');
+        if (cp)
+        {
+            size_t len = cp - uri;
+            *connect_host = (char *) xmalloc(len + 1);
+            memcpy(*connect_host, uri, len);
+            (*connect_host)[len] = '\0';
+            uri = cp + 1;
+        }
+#ifdef WIN32
+        xfree(*connect_host);
+        *connect_host = 0;
+        return 0;
 #else
 #else
-       return 0;
+        *t = unix_type;
 #endif
     }
 #endif
     }
-    else if (strncmp (type_and_host, "unix:", 5) == 0)
+
+    if (strncmp (uri, "tcp:", 4) == 0)
     {
     {
-#ifndef WIN32
-       t = unix_type;
-        host = type_and_host + 5;
+        *host = uri + 4;
+        *proto = PROTO_Z3950;
+    }
+    else if (strncmp (uri, "ssl:", 4) == 0)
+    {
+#if HAVE_GNUTLS_H
+        *t = ssl_type;
+        *host = uri + 4;
+        *proto = PROTO_Z3950;
 #else
 #else
-       return 0;
+        xfree(*connect_host);
+        *connect_host = 0;
+        return 0;
 #endif
     }
 #endif
     }
-    else if (strncmp(type_and_host, "http:", 5) == 0)
+    else if (strncmp(uri, "http:", 5) == 0)
     {
     {
-       t = tcpip_type;
-        host = type_and_host + 5;
-        if (host[0] == '/' && host[1] == '/')
-            host = host + 2;
-        proto = PROTO_HTTP;
+        *host = uri + 5;
+        while (**host == '/')
+            (*host)++;
+        *proto = PROTO_HTTP;
     }
     }
-    else if (strncmp(type_and_host, "https:", 6) == 0)
+    else if (strncmp(uri, "https:", 6) == 0)
     {
     {
-#if HAVE_OPENSSL_SSL_H
-       t = ssl_type;
-        host = type_and_host + 6;
-        if (host[0] == '/' && host[1] == '/')
-            host = host + 2;
-        proto = PROTO_HTTP;
+#if HAVE_GNUTLS_H
+        *t = ssl_type;
+        *host = uri + 6;
+        while (**host == '/')
+            (*host)++;
+        *proto = PROTO_HTTP;
 #else
 #else
-       return 0;
+        xfree(*connect_host);
+        *connect_host = 0;
+        return 0;
 #endif
     }
     else
     {
 #endif
     }
     else
     {
-       t = tcpip_type;
-       host = type_and_host;
-        
+        *host = uri;
+        *proto = PROTO_Z3950;
     }
     }
-    cs = cs_create (t, blocking, proto);
-    if (!cs)
-       return 0;
+    return 1;
+}
 
 
-    if (!(*vp = cs_straddr(cs, host)))
+COMSTACK cs_create_host(const char *vhost, int blocking, void **vp)
+{
+    return cs_create_host_proxy(vhost, blocking, vp, 0);
+}
+
+COMSTACK cs_create_host_proxy(const char *vhost, int blocking, void **vp,
+                              const char *proxy_host)
+{
+    int proxy_mode;
+    return cs_create_host2(vhost, blocking, vp, proxy_host, &proxy_mode);
+}
+
+COMSTACK cs_create_host2(const char *vhost, int blocking, void **vp,
+                         const char *proxy_host, int *proxy_mode)
+{
+    enum oid_proto proto = PROTO_Z3950;
+    const char *host = 0;
+    COMSTACK cs;
+    CS_TYPE t;
+    char *connect_host = 0;
+
+    const char *bind_host = strchr(vhost, ' ');
+    if (bind_host && bind_host[1])
+        bind_host++;
+    else
+        bind_host = 0;
+
+    *proxy_mode = 0;
+    if (!cs_parse_host(vhost, &host, &t, &proto, &connect_host))
+        return 0;
+
+    /*  vhost      proxy       proxy method  proxy-flag */
+    /*  TCP+Z3950  TCP+Z3950   TCP+Z3950      1 */
+    /*  TCP+Z3950  TCP+HTTP    CONNECT        0 */
+    /*  TCP+HTTP   TCP+Z3950   TCP+HTTP       1 */
+    /*  TCP+HTTP   TCP+HTTP    TCP+HTTP       1 */
+    /*  SSL+*      TCP+*       CONNECT        0 */
+    /*  ?          SSL         error */
+
+    if (proxy_host && !connect_host)
     {
     {
-       cs_close (cs);
-       return 0;
-    }    
+        enum oid_proto proto1;
+        CS_TYPE t1;
+        const char *host1 = 0;
+
+        if (!cs_parse_host(proxy_host, &host1, &t1, &proto1, &connect_host))
+            return 0;
+        if (connect_host)
+        {
+            xfree(connect_host);
+            return 0;
+        }
+        if (t1 != tcpip_type)
+            return 0;
+
+        if (t == ssl_type || (proto == PROTO_Z3950 && proto1 == PROTO_HTTP))
+            connect_host = xstrdup(host1);
+        else
+        {
+            *proxy_mode = 1;
+            host = host1;
+        }
+    }
+
+    if (t == tcpip_type)
+    {
+        cs = yaz_tcpip_create3(-1, blocking, proto, connect_host ? host : 0,
+                               0 /* user:pass */, bind_host);
+    }
+    else if (t == ssl_type)
+    {
+        cs = yaz_ssl_create(-1, blocking, proto, connect_host ? host : 0,
+                            0 /* user:pass */, bind_host);
+    }
+    else
+    {
+        cs = cs_create(t, blocking, proto);
+    }
+    if (cs)
+    {
+        if (!(*vp = cs_straddr(cs, connect_host ? connect_host : host)))
+        {
+            cs_close (cs);
+            cs = 0;
+        }
+    }
+    xfree(connect_host);
     return cs;
 }
 
     return cs;
 }
 
@@ -140,121 +259,229 @@ int cs_look (COMSTACK cs)
     return cs->event;
 }
 
     return cs->event;
 }
 
-int cs_complete_auto(const unsigned char *buf, int len)
+static int skip_crlf(const char *buf, int len, int *i)
 {
 {
-    if (len > 5 && buf[0] >= 0x20 && buf[0] < 0x7f
-               && buf[1] >= 0x20 && buf[1] < 0x7f
-               && buf[2] >= 0x20 && buf[2] < 0x7f)
+    if (*i < len)
     {
     {
-        /* deal with HTTP request/response */
-       int i = 2, content_len = 0, chunked = 0;
+        if (buf[*i] == '\r' && *i < len-1 && buf[*i + 1] == '\n')
+        {
+            (*i) += 2;
+            return 1;
+        }
+        else if (buf[*i] == '\n')
+        {
+            (*i)++;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+#define CHUNK_DEBUG 0
 
 
-        while (i <= len-4)
+static int cs_read_chunk(const char *buf, int i, int len)
+{
+    /* inside chunked body .. */
+    while (1)
+    {
+        int chunk_len = 0;
+#if CHUNK_DEBUG
+        if (i < len-2)
         {
         {
-           if (i > 8192)
-               return i;  /* do not allow more than 8K HTTP header */
-            if (buf[i] == '\r' && buf[i+1] == '\n')
-            {
-                i += 2;
-                if (buf[i] == '\r' && buf[i+1] == '\n')
-                {
-                    if (chunked)
-                    { 
-                        while(1)
-                        {
-                            int chunk_len = 0;
-                            i += 2;
-#if 0
-/* debugging */
-                            if (i <len-2)
-                            {
-                                printf ("\n>>>");
-                                for (j = i; j <= i+4; j++)
-                                    printf ("%c", buf[j]);
-                                printf ("<<<\n");
-                            }
+            int j;
+            printf ("\n<<<");
+            for (j = i; j <= i+3; j++)
+                printf ("%c", buf[j]);
+            printf (">>>\n");
+        }
 #endif
 #endif
-                            while (1)
-                                if (i >= len-2) {
-#if 0
-/* debugging */                                    
-                                    printf ("XXXXXXXX not there yet 1\n");
-                                    printf ("i=%d len=%d\n", i, len);
+        /* read chunk length */
+        while (1)
+            if (i >= len-2) {
+#if CHUNK_DEBUG
+                printf ("returning incomplete read at 1\n");
+                printf ("i=%d len=%d\n", i, len);
+#endif
+                return 0;
+            } else if (yaz_isdigit(buf[i]))
+                chunk_len = chunk_len * 16 +
+                    (buf[i++] - '0');
+            else if (yaz_isupper(buf[i]))
+                chunk_len = chunk_len * 16 +
+                    (buf[i++] - ('A'-10));
+            else if (yaz_islower(buf[i]))
+                chunk_len = chunk_len * 16 +
+                    (buf[i++] - ('a'-10));
+            else
+                break;
+        if (chunk_len == 0)
+            break;
+        if (chunk_len < 0)
+            return i;
+
+        while (1)
+        {
+            if (i >= len -1)
+                return 0;
+            if (skip_crlf(buf, len, &i))
+                break;
+            i++;
+        }
+        /* got CRLF */
+#if CHUNK_DEBUG
+        printf ("chunk_len=%d\n", chunk_len);
 #endif
 #endif
-                                    return 0;
-                                } else if (isdigit(buf[i]))
-                                    chunk_len = chunk_len * 16 + 
-                                        (buf[i++] - '0');
-                                else if (isupper(buf[i]))
-                                    chunk_len = chunk_len * 16 + 
-                                        (buf[i++] - ('A'-10));
-                                else if (islower(buf[i]))
-                                    chunk_len = chunk_len * 16 + 
-                                        (buf[i++] - ('a'-10));
-                                else
-                                    break;
-                            if (buf[i] != '\r' || buf[i+1] != '\n' ||
-                                chunk_len < 0)
-                                return i+2;    /* bad. stop now */
-                            if (chunk_len == 0)
-                            {
-                                /* consider trailing headers .. */
-                                while(i <= len-4)
-                                {
-                                    if (buf[i] == '\r' &&  buf[i+1] == '\n' &&
-                                        buf[i+2] == '\r' && buf[i+3] == '\n')
-                                        if (len >= i+4)
-                                            return i+4;
-                                    i++;
-                                }
+        i += chunk_len;
+        if (i >= len-2)
+            return 0;
+        if (!skip_crlf(buf, len, &i))
+            return 0;
+    }
+    /* consider trailing headers .. */
+    while (i < len)
+    {
+        if (skip_crlf(buf, len, &i))
+        {
+            if (skip_crlf(buf, len, &i))
+                return i;
+        }
+        else
+            i++;
+    }
+#if CHUNK_DEBUG
+    printf ("returning incomplete read at 2\n");
+    printf ("i=%d len=%d\n", i, len);
+#endif
+    return 0;
+}
+
+static int cs_complete_http(const char *buf, int len, int head_only)
+{
+    /* deal with HTTP request/response */
+    int i, content_len = 0, chunked = 0;
+
+    /* need at least one line followed by \n or \r .. */
+    for (i = 0; ; i++)
+        if (i == len)
+            return 0; /* incomplete */
+        else if (buf[i] == '\n' || buf[i] == '\r')
+            break;
+
+    /* check to see if it's a response with content */
+    if (!head_only && !memcmp(buf, "HTTP/", 5))
+    {
+        int j;
+        for (j = 5; j < i; j++)
+            if (buf[j] == ' ')
+            {
+                ++j;
+                if (buf[j] == '1') /* 1XX */
+                    ;
+                else if (!memcmp(buf + j, "204", 3))
+                    ;
+                else if (!memcmp(buf + j, "304", 3))
+                    ;
+                else
+                    content_len = -1;
+                break;
+            }
+    }
 #if 0
 #if 0
-/* debugging */
-                                printf ("XXXXXXXXX not there yet 2\n");
-                                printf ("i=%d len=%d\n", i, len);
+    printf("len = %d\n", len);
+    fwrite (buf, 1, len, stdout);
+    printf("----------\n");
 #endif
 #endif
-                                return 0;
-                            }
-                            i += chunk_len+2;
-                        }
-                    }
-                    else
-                    {   /* not chunked ; inside body */
-                        /* i += 2 seems not to work with GCC -O2 .. 
-                           so i+2 is used instead .. */
-                        if (len >= (i+2)+ content_len)
-                            return (i+2)+ content_len;
+    for (i = 2; i <= len-2; )
+    {
+        if (i > 8192)
+        {
+            return i;  /* do not allow more than 8K HTTP header */
+        }
+        if (skip_crlf(buf, len, &i))
+        {
+            if (skip_crlf(buf, len, &i))
+            {
+                /* inside content */
+                if (chunked)
+                    return cs_read_chunk(buf, i, len);
+                else
+                {   /* not chunked ; inside body */
+                    if (content_len == -1)
+                        return 0;   /* no content length */
+                    else if (len >= i + content_len)
+                    {
+                        return i + content_len;
                     }
                     }
-                    break;
                 }
                 }
-                else if (i < len - 20 && 
-                         !strncasecmp(buf+i, "Transfer-Encoding:", 18))
-                {
-                    i+=18;
-                    while (buf[i] == ' ')
-                        i++;
-                    if (i < len - 8)
-                        if (!strncasecmp(buf+i, "chunked", 7))
-                            chunked = 1;
-                }
-                else if (i < len - 17 &&
-                         !strncasecmp(buf+i, "Content-Length:", 15))
-                {
-                    i+= 15;
-                    while (buf[i] == ' ')
-                        i++;
-                    content_len = 0;
-                    while (i <= len-4 && isdigit(buf[i]))
-                        content_len = content_len*10 + (buf[i++] - '0');
-                    if (content_len < 0) /* prevent negative offsets */
-                        content_len = 0;
-                }
-                else
+                break;
+            }
+            else if (i < len - 20 &&
+                     !yaz_strncasecmp((const char *) buf+i,
+                                      "Transfer-Encoding:", 18))
+            {
+                i+=18;
+                while (buf[i] == ' ')
                     i++;
                     i++;
+                if (i < len - 8)
+                    if (!yaz_strncasecmp((const char *) buf+i, "chunked", 7))
+                        chunked = 1;
+            }
+            else if (i < len - 17 &&
+                     !yaz_strncasecmp((const char *)buf+i,
+                                      "Content-Length:", 15))
+            {
+                i+= 15;
+                while (buf[i] == ' ')
+                    i++;
+                content_len = 0;
+                while (i <= len-4 && yaz_isdigit(buf[i]))
+                    content_len = content_len*10 + (buf[i++] - '0');
+                if (content_len < 0) /* prevent negative offsets */
+                    content_len = 0;
             }
             else
                 i++;
         }
             }
             else
                 i++;
         }
-        return 0;
+        else
+            i++;
+    }
+    return 0;
+}
+
+static int cs_complete_auto_x(const char *buf, int len, int head_only)
+{
+    if (len > 5 && buf[0] >= 0x20 && buf[0] < 0x7f
+                && buf[1] >= 0x20 && buf[1] < 0x7f
+                && buf[2] >= 0x20 && buf[2] < 0x7f)
+    {
+        int r = cs_complete_http(buf, len, head_only);
+        return r;
     }
     return completeBER(buf, len);
 }
     }
     return completeBER(buf, len);
 }
+
+
+int cs_complete_auto(const char *buf, int len)
+{
+    return cs_complete_auto_x(buf, len, 0);
+}
+
+int cs_complete_auto_head(const char *buf, int len)
+{
+    return cs_complete_auto_x(buf, len, 1);
+}
+
+void cs_set_max_recv_bytes(COMSTACK cs, int max_recv_bytes)
+{
+    cs->max_recv_bytes = max_recv_bytes;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+