Fix proxy mode for url library YAZ-825
[yaz-moved-to-github.git] / src / url.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file url.c
7  * \brief URL fetch utility
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <yaz/url.h>
14 #include <yaz/comstack.h>
15 #include <yaz/log.h>
16 #include <yaz/wrbuf.h>
17 #include <yaz/cookie.h>
18
19 struct yaz_url {
20     ODR odr_in;
21     ODR odr_out;
22     char *proxy;
23     int max_redirects;
24     WRBUF w_error;
25     int verbose;
26     yaz_cookies_t cookies;
27 };
28
29 yaz_url_t yaz_url_create(void)
30 {
31     yaz_url_t p = xmalloc(sizeof(*p));
32     p->odr_in = odr_createmem(ODR_DECODE);
33     p->odr_out = odr_createmem(ODR_ENCODE);
34     p->proxy = 0;
35     p->max_redirects = 10;
36     p->w_error = wrbuf_alloc();
37     p->verbose = 0;
38     p->cookies = yaz_cookies_create();
39     return p;
40 }
41
42 void yaz_url_destroy(yaz_url_t p)
43 {
44     if (p)
45     {
46         odr_destroy(p->odr_in);
47         odr_destroy(p->odr_out);
48         xfree(p->proxy);
49         wrbuf_destroy(p->w_error);
50         yaz_cookies_destroy(p->cookies);
51         xfree(p);
52     }
53 }
54
55 void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
56 {
57     xfree(p->proxy);
58     p->proxy = 0;
59     if (proxy && *proxy)
60         p->proxy = xstrdup(proxy);
61 }
62
63 void yaz_url_set_max_redirects(yaz_url_t p, int num)
64 {
65     p->max_redirects = num;
66 }
67
68 void yaz_url_set_verbose(yaz_url_t p, int num)
69 {
70     p->verbose = num;
71 }
72
73 static void extract_user_pass(NMEM nmem,
74                               const char *uri,
75                               char **uri_lean, char **http_user,
76                               char **http_pass)
77 {
78     const char *cp1 = strchr(uri, '/');
79     *uri_lean = 0;
80     *http_user = 0;
81     *http_pass = 0;
82     if (cp1 && cp1 > uri)
83     {
84         cp1--;
85
86         if (!strncmp(cp1, "://", 3))
87         {
88             const char *cp3 = 0;
89             const char *cp2 = cp1 + 3;
90             while (*cp2 && *cp2 != '/' && *cp2 != '@')
91             {
92                 if (*cp2 == ':')
93                     cp3 = cp2;
94                 cp2++;
95             }
96             if (*cp2 == '@' && cp3)
97             {
98                 *uri_lean = nmem_malloc(nmem, strlen(uri) + 1);
99                 memcpy(*uri_lean, uri, cp1 + 3 - uri);
100                 strcpy(*uri_lean + (cp1 + 3 - uri), cp2 + 1);
101
102                 *http_user = nmem_strdupn(nmem, cp1 + 3, cp3 - (cp1 + 3));
103                 *http_pass = nmem_strdupn(nmem, cp3 + 1, cp2 - (cp3 + 1));
104             }
105         }
106     }
107     if (*uri_lean == 0)
108         *uri_lean = nmem_strdup(nmem, uri);
109 }
110
111 const char *yaz_url_get_error(yaz_url_t p)
112 {
113     return wrbuf_cstr(p->w_error);
114 }
115
116 static void log_warn(yaz_url_t p)
117 {
118     yaz_log(YLOG_WARN, "yaz_url: %s", wrbuf_cstr(p->w_error));
119 }
120
121 Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri,
122                               const char *method,
123                               Z_HTTP_Header *user_headers,
124                               const char *buf, size_t len)
125 {
126     Z_HTTP_Response *res = 0;
127     int number_of_redirects = 0;
128
129     yaz_cookies_reset(p->cookies);
130     wrbuf_rewind(p->w_error);
131     while (1)
132     {
133         void *add;
134         COMSTACK conn = 0;
135         int code;
136         const char *location = 0;
137         char *http_user = 0;
138         char *http_pass = 0;
139         char *uri_lean = 0;
140         int proxy_mode = 0;
141
142         extract_user_pass(p->odr_out->mem, uri, &uri_lean,
143                           &http_user, &http_pass);
144         conn = cs_create_host2(uri_lean, 1, &add, p->proxy, &proxy_mode);
145         if (!conn)
146         {
147             wrbuf_printf(p->w_error, "Can not resolve URL %s", uri);
148             log_warn(p);
149         }
150         else
151         {
152             Z_GDU *gdu =
153                 z_get_HTTP_Request_uri(p->odr_out, uri_lean, 0, proxy_mode);
154             gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
155             yaz_cookies_request(p->cookies, p->odr_out, gdu->u.HTTP_Request);
156             for ( ; user_headers; user_headers = user_headers->next)
157             {
158                 /* prefer new Host over user-supplied Host */
159                 if (!strcmp(user_headers->name, "Host"))
160                     ;
161                 /* prefer user-supplied User-Agent over YAZ' own */
162                 else if (!strcmp(user_headers->name, "User-Agent"))
163                     z_HTTP_header_set(p->odr_out, &gdu->u.HTTP_Request->headers,
164                                       user_headers->name, user_headers->value);
165                 else
166                     z_HTTP_header_add(p->odr_out, &gdu->u.HTTP_Request->headers,
167                                       user_headers->name, user_headers->value);
168             }
169             if (http_user && http_pass)
170                 z_HTTP_header_add_basic_auth(p->odr_out,
171                                              &gdu->u.HTTP_Request->headers,
172                                              http_user, http_pass);
173             res = 0;
174             if (buf && len)
175             {
176                 gdu->u.HTTP_Request->content_buf = (char *) buf;
177                 gdu->u.HTTP_Request->content_len = len;
178             }
179             if (!z_GDU(p->odr_out, &gdu, 0, 0))
180             {
181                 wrbuf_printf(p->w_error, "Can not encode HTTP request for URL %s",
182                              uri);
183                 log_warn(p);
184                 return 0;
185             }
186             if (cs_connect(conn, add) < 0)
187             {
188                 wrbuf_printf(p->w_error, "Can not connect to URL %s", uri);
189                 log_warn(p);
190             }
191             else
192             {
193                 int len;
194                 char *buf = odr_getbuf(p->odr_out, &len, 0);
195                 if (p->verbose)
196                     fwrite(buf, 1, len, stdout);
197                 if (cs_put(conn, buf, len) < 0)
198                 {
199                     wrbuf_printf(p->w_error, "cs_put fail for URL %s", uri);
200                     log_warn(p);
201                 }
202                 else
203                 {
204                     char *netbuffer = 0;
205                     int netlen = 0;
206                     int cs_res = cs_get(conn, &netbuffer, &netlen);
207                     if (cs_res <= 0)
208                     {
209                         wrbuf_printf(p->w_error, "cs_get failed for URL %s", uri);
210                         log_warn(p);
211                     }
212                     else
213                     {
214                         Z_GDU *gdu;
215                         if (p->verbose)
216                             fwrite(netbuffer, 1, cs_res, stdout);
217                         odr_setbuf(p->odr_in, netbuffer, cs_res, 0);
218                         if (!z_GDU(p->odr_in, &gdu, 0, 0)
219                             || gdu->which != Z_GDU_HTTP_Response)
220                         {
221                             wrbuf_printf(p->w_error, "HTTP decoding fail for "
222                                          "URL %s", uri);
223                             log_warn(p);
224                         }
225                         else
226                         {
227                             res = gdu->u.HTTP_Response;
228                         }
229                     }
230                     xfree(netbuffer);
231                 }
232             }
233             cs_close(conn);
234         }
235         if (!res)
236             break;
237         code = res->code;
238         location = z_HTTP_header_lookup(res->headers, "Location");
239         if (++number_of_redirects <= p->max_redirects &&
240             location && (code == 301 || code == 302 || code == 307))
241         {
242             int host_change = 0;
243             const char *nlocation = yaz_check_location(p->odr_in, uri,
244                                                        location, &host_change);
245
246             odr_reset(p->odr_out);
247             uri = odr_strdup(p->odr_out, nlocation);
248         }
249         else
250             break;
251         yaz_cookies_response(p->cookies, res);
252         odr_reset(p->odr_in);
253     }
254     return res;
255 }
256
257 /*
258  * Local variables:
259  * c-basic-offset: 4
260  * c-file-style: "Stroustrup"
261  * indent-tabs-mode: nil
262  * End:
263  * vim: shiftwidth=4 tabstop=8 expandtab
264  */
265