Fix problem with decoding of HTTP responses with no Content Length
[yaz-moved-to-github.git] / src / comstack.c
1 /*
2  * Copyright (c) 1995-2004, Index Data
3  * See the file LICENSE for details.
4  *
5  * $Id: comstack.c,v 1.11 2004-09-21 14:59:01 adam Exp $
6  */
7
8 #include <string.h>
9 #include <ctype.h>
10 #include <errno.h>
11
12 #include <yaz/comstack.h>
13 #include <yaz/tcpip.h>
14 #include <yaz/unix.h>
15 #include <yaz/odr.h>
16
17 #ifdef WIN32
18 #define strncasecmp _strnicmp
19 #endif
20
21 static const char *cs_errlist[] =
22 {
23     "No error or unspecified error",
24     "System (lower-layer) error",
25     "Operation out of state",
26     "No data (operation would block)",
27     "New data while half of old buffer is on the line (flow control)",
28     "Permission denied",
29     "SSL error"
30 };
31
32 const char *cs_errmsg(int n)
33 {
34     static char buf[250];
35
36     if (n < CSNONE || n > CSLASTERROR) {
37         sprintf(buf, "unknown comstack error %d", n);
38         return buf;
39     }
40     if (n == CSYSERR) {
41         sprintf(buf, "%s: %s", cs_errlist[n], strerror(errno));
42         return buf;
43     }
44     return cs_errlist[n];
45 }
46
47 const char *cs_strerror(COMSTACK h)
48 {
49     return cs_errmsg(h->cerrno);
50 }
51
52 void cs_get_host_args(const char *type_and_host, const char **args)
53 {
54     
55     *args = "";
56     if (*type_and_host && strncmp(type_and_host, "unix:", 5))
57     {
58         const char *cp;
59         cp = strstr(type_and_host, "://");
60         if (cp)
61             cp = cp+3;
62         else
63             cp = type_and_host;
64         cp = strchr(cp, '/');
65         if (cp)
66             *args = cp+1;
67     }
68 }
69
70 COMSTACK cs_create_host(const char *type_and_host, int blocking, void **vp)
71 {
72     enum oid_proto proto = PROTO_Z3950;
73     const char *host = 0;
74     COMSTACK cs;
75     CS_TYPE t;
76
77     if (strncmp (type_and_host, "tcp:", 4) == 0)
78     {
79         t = tcpip_type;
80         host = type_and_host + 4;
81     }
82     else if (strncmp (type_and_host, "ssl:", 4) == 0)
83     {
84 #if HAVE_OPENSSL_SSL_H
85         t = ssl_type;
86         host = type_and_host + 4;
87 #else
88         return 0;
89 #endif
90     }
91     else if (strncmp (type_and_host, "unix:", 5) == 0)
92     {
93 #ifndef WIN32
94         t = unix_type;
95         host = type_and_host + 5;
96 #else
97         return 0;
98 #endif
99     }
100     else if (strncmp(type_and_host, "http:", 5) == 0)
101     {
102         t = tcpip_type;
103         host = type_and_host + 5;
104         while (host[0] == '/')
105             host++;
106         proto = PROTO_HTTP;
107     }
108     else if (strncmp(type_and_host, "https:", 6) == 0)
109     {
110 #if HAVE_OPENSSL_SSL_H
111         t = ssl_type;
112         host = type_and_host + 6;
113         while (host[0] == '/')
114             host++;
115         proto = PROTO_HTTP;
116 #else
117         return 0;
118 #endif
119     }
120     else
121     {
122         t = tcpip_type;
123         host = type_and_host;
124     }
125     cs = cs_create (t, blocking, proto);
126     if (!cs)
127         return 0;
128
129     if (!(*vp = cs_straddr(cs, host)))
130     {
131         cs_close (cs);
132         return 0;
133     }    
134     return cs;
135 }
136
137 int cs_look (COMSTACK cs)
138 {
139     return cs->event;
140 }
141
142 #define CHUNK_DEBUG 0
143 int cs_complete_auto(const unsigned char *buf, int len)
144 {
145     if (len > 5 && buf[0] >= 0x20 && buf[0] < 0x7f
146                 && buf[1] >= 0x20 && buf[1] < 0x7f
147                 && buf[2] >= 0x20 && buf[2] < 0x7f)
148     {
149         /* deal with HTTP request/response */
150         int i = 2, content_len = -1, chunked = 0;
151
152         while (i <= len-4)
153         {
154             if (i > 8192)
155             {
156                 return i;  /* do not allow more than 8K HTTP header */
157             }
158             if (buf[i] == '\r' && buf[i+1] == '\n')
159             {
160                 i += 2;
161                 if (buf[i] == '\r' && buf[i+1] == '\n')
162                 {
163                     if (chunked)
164                     { 
165                         /* inside chunked body .. */
166                         while(1)
167                         {
168                             int j, chunk_len = 0;
169                             i += 2;
170 #if CHUNK_DEBUG
171 /* debugging */
172                             if (i <len-2)
173                             {
174                                 printf ("\n<<<");
175                                 int j;
176                                 for (j = i; j <= i+4; j++)
177                                     printf ("%c", buf[j]);
178                                 printf (">>>\n");
179                             }
180 #endif
181                             /* read chunk length */
182                             while (1)
183                                 if (i >= len-2) {
184 #if CHUNK_DEBUG
185 /* debugging */                                    
186                                     printf ("XXXXXXXX not there yet 1\n");
187                                     printf ("i=%d len=%d\n", i, len);
188 #endif
189                                     return 0;
190                                 } else if (isdigit(buf[i]))
191                                     chunk_len = chunk_len * 16 + 
192                                         (buf[i++] - '0');
193                                 else if (isupper(buf[i]))
194                                     chunk_len = chunk_len * 16 + 
195                                         (buf[i++] - ('A'-10));
196                                 else if (islower(buf[i]))
197                                     chunk_len = chunk_len * 16 + 
198                                         (buf[i++] - ('a'-10));
199                                 else
200                                     break;
201                             /* move forward until CRLF - skip chunk ext */
202                             j = 0;
203                             while (buf[i] != '\r' && buf[i+1] != '\n')
204                             {
205                                 if (i >= len-2)
206                                     return 0;   /* need more buffer .. */
207                                 if (++j > 1000)
208                                     return i; /* enough.. stop */
209                                 i++;
210                             }
211                             /* got CRLF */
212 #if CHUNK_DEBUG
213                             printf ("XXXXXX chunk_len=%d\n", chunk_len);
214 #endif                      
215                             if (chunk_len < 0)
216                                 return i+2;    /* bad chunk_len */
217                             if (chunk_len == 0)
218                                 break;
219                             i += chunk_len+2;
220                         }
221                         /* consider trailing headers .. */
222                         while(i <= len-4)
223                         {
224                             if (buf[i] == '\r' &&  buf[i+1] == '\n' &&
225                                 buf[i+2] == '\r' && buf[i+3] == '\n')
226                                 if (len >= i+4)
227                                     return i+4;
228                             i++;
229                         }
230 #if CHUNK_DEBUG
231 /* debugging */
232                         printf ("XXXXXXXXX not there yet 2\n");
233                         printf ("i=%d len=%d\n", i, len);
234 #endif
235                         return 0;
236                     }
237                     else
238                     {   /* not chunked ; inside body */
239                         /* i += 2 seems not to work with GCC -O2 .. 
240                            so i+2 is used instead .. */
241                         if (content_len == -1)
242                             return 0;   /* no content length */
243                         else if (len >= (i+2)+ content_len)
244                         {
245                             return (i+2)+ content_len;
246                         }
247                     }
248                     break;
249                 }
250                 else if (i < len - 20 && 
251                          !strncasecmp((const char *) buf+i, "Transfer-Encoding:", 18))
252                 {
253                     i+=18;
254                     while (buf[i] == ' ')
255                         i++;
256                     if (i < len - 8)
257                         if (!strncasecmp((const char *) buf+i, "chunked", 7))
258                             chunked = 1;
259                 }
260                 else if (i < len - 17 &&
261                          !strncasecmp((const char *)buf+i, "Content-Length:", 15))
262                 {
263                     i+= 15;
264                     while (buf[i] == ' ')
265                         i++;
266                     content_len = 0;
267                     while (i <= len-4 && isdigit(buf[i]))
268                         content_len = content_len*10 + (buf[i++] - '0');
269                     if (content_len < 0) /* prevent negative offsets */
270                         content_len = 0;
271                 }
272                 else
273                     i++;
274             }
275             else
276                 i++;
277         }
278         return 0;
279     }
280     return completeBER(buf, len);
281 }