From 8cd8912e4ff99fd53ff78a028f7d84418e494b0d Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 10 Aug 2011 15:03:53 +0200 Subject: [PATCH] New utility yaz_url: fetches HTTP content This is a simple wrapper around COMSTACK and HTTP utilities; deals with proxy'ing. --- include/yaz/Makefile.am | 2 +- include/yaz/url.h | 87 ++++++++++++++++++++++++++ src/Makefile.am | 2 +- src/url.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++ util/Makefile.am | 6 +- util/yaz-url.c | 136 +++++++++++++++++++++++++++++++++++++++++ win/makefile | 11 +++- 7 files changed, 395 insertions(+), 5 deletions(-) create mode 100644 include/yaz/url.h create mode 100644 src/url.c create mode 100644 util/yaz-url.c diff --git a/include/yaz/Makefile.am b/include/yaz/Makefile.am index 823a8dd..d391988 100644 --- a/include/yaz/Makefile.am +++ b/include/yaz/Makefile.am @@ -24,7 +24,7 @@ pkginclude_HEADERS= backend.h base64.h \ zes-psched.h zes-admin.h zes-pset.h zes-update.h zes-update0.h \ zoom.h z-charneg.h charneg.h soap.h srw.h zgdu.h matchstr.h json.h \ file_glob.h dirent.h thread_id.h gettimeofday.h shptr.h thread_create.h \ - spipe.h stemmer.h + spipe.h stemmer.h url.h EXTRA_DIST = yaz-version.h.in diff --git a/include/yaz/url.h b/include/yaz/url.h new file mode 100644 index 0000000..ac26727 --- /dev/null +++ b/include/yaz/url.h @@ -0,0 +1,87 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2011 Index Data. + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Index Data nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file url.h + * \brief Fetch URL utility + */ + +#ifndef YAZ_URL_H +#define YAZ_URL_H + +#include + +YAZ_BEGIN_CDECL + +/** \brief handle for our URL fetcher */ +typedef struct yaz_url *yaz_url_t; + +/** \brief creates a URL fetcher handle + \returns handle +*/ +YAZ_EXPORT yaz_url_t yaz_url_create(void); + +/** \brief destroys a URL fetcher + \param p handle + + Note: OK to pass NULL as p +*/ +YAZ_EXPORT void yaz_url_destroy(yaz_url_t p); + +/** \brief sets proxy for URL fetcher + \param p handle + \param proxy proxy address , e.g "localhost:3128" + + Passing a proxy of NULL disables proxy use. +*/ +YAZ_EXPORT void yaz_url_set_proxy(yaz_url_t p, const char *proxy); + +/** \brief executes the actual HTTP request (including redirects, etc) + \param p handle + \param uri URL + \param method HTTP method + \param headers HTTP headers to be used (NULL for no custom headers) + \param buf content buffer for HTTP request, NULL for empty content + \param len content length for HTTP request + \returns HTTP response; NULL on ERROR. +*/ +YAZ_EXPORT Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri, + const char *method, + Z_HTTP_Header *headers, + const char *buf, size_t len); +YAZ_END_CDECL + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/Makefile.am b/src/Makefile.am index 8f66427..b7d4369 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -107,7 +107,7 @@ libyaz_la_SOURCES=base64.c version.c options.c log.c \ iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \ iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c \ json.c xml_include.c file_glob.c dirent.c mutex-p.h mutex.c condvar.c \ - thread_id.c gettimeofday.c thread_create.c spipe.c + thread_id.c gettimeofday.c thread_create.c spipe.c url.c libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO) diff --git a/src/url.c b/src/url.c new file mode 100644 index 0000000..eb0ed58 --- /dev/null +++ b/src/url.c @@ -0,0 +1,156 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2011 Index Data + * See the file LICENSE for details. + */ +/** + * \file url.c + * \brief URL fetch utility + */ +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +struct yaz_url { + ODR odr_in; + ODR odr_out; + char *proxy; +}; + +yaz_url_t yaz_url_create(void) +{ + yaz_url_t p = xmalloc(sizeof(*p)); + p->odr_in = odr_createmem(ODR_DECODE); + p->odr_out = odr_createmem(ODR_ENCODE); + p->proxy = 0; + return p; +} + +void yaz_url_destroy(yaz_url_t p) +{ + if (p) + { + odr_destroy(p->odr_in); + odr_destroy(p->odr_out); + xfree(p->proxy); + xfree(p); + } +} + +void yaz_url_set_proxy(yaz_url_t p, const char *proxy) +{ + xfree(p->proxy); + p->proxy = 0; + if (proxy && *proxy) + p->proxy = xstrdup(proxy); +} + +Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri, + const char *method, + Z_HTTP_Header *headers, + const char *buf, size_t len) +{ + Z_HTTP_Response *res = 0; + int number_of_redirects = 0; + + while (1) + { + void *add; + COMSTACK conn = 0; + int code; + struct Z_HTTP_Header **last_header_entry; + const char *location = 0; + Z_GDU *gdu = z_get_HTTP_Request_uri(p->odr_out, uri, 0, + p->proxy ? 1 : 0); + gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method); + + res = 0; + last_header_entry = &gdu->u.HTTP_Request->headers; + while (*last_header_entry) + last_header_entry = &(*last_header_entry)->next; + *last_header_entry = headers; /* attach user headers */ + + if (buf && len) + { + gdu->u.HTTP_Request->content_buf = (char *) buf; + gdu->u.HTTP_Request->content_len = len; + } + if (!z_GDU(p->odr_out, &gdu, 0, 0)) + { + yaz_log(YLOG_WARN, "Can not encode HTTP request URL:%s", uri); + return 0; + } + conn = cs_create_host_proxy(uri, 1, &add, p->proxy); + if (!conn) + { + yaz_log(YLOG_WARN, "Bad address for URL:%s", uri); + } + else if (cs_connect(conn, add) < 0) + { + yaz_log(YLOG_WARN, "Can not connect to URL:%s", uri); + } + else + { + int len; + char *buf = odr_getbuf(p->odr_out, &len, 0); + + if (cs_put(conn, buf, len) < 0) + yaz_log(YLOG_WARN, "cs_put failed URL:%s", uri); + else + { + char *netbuffer = 0; + int netlen = 0; + int cs_res = cs_get(conn, &netbuffer, &netlen); + if (cs_res <= 0) + { + yaz_log(YLOG_WARN, "cs_get failed URL:%s", uri); + } + else + { + Z_GDU *gdu; + odr_setbuf(p->odr_in, netbuffer, cs_res, 0); + if (!z_GDU(p->odr_in, &gdu, 0, 0) + || gdu->which != Z_GDU_HTTP_Response) + { + yaz_log(YLOG_WARN, "HTTP decoding failed " + "URL:%s", uri); + } + else + { + res = gdu->u.HTTP_Response; + } + } + xfree(netbuffer); + } + } + if (conn) + cs_close(conn); + if (!res) + break; + code = res->code; + location = z_HTTP_header_lookup(res->headers, "Location"); + if (++number_of_redirects < 10 && + location && (code == 301 || code == 302 || code == 307)) + { + odr_reset(p->odr_out); + uri = odr_strdup(p->odr_out, location); + odr_reset(p->odr_in); + } + else + break; + } + return res; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/util/Makefile.am b/util/Makefile.am index e2b519d..30029b3 100644 --- a/util/Makefile.am +++ b/util/Makefile.am @@ -11,7 +11,7 @@ AM_CPPFLAGS=-I$(top_srcdir)/include $(XML2_CFLAGS) $(ICU_CPPFLAGS) bin_PROGRAMS = yaz-marcdump yaz-iconv yaz-illclient yaz-icu yaz-json-parse noinst_PROGRAMS = cclsh cql2pqf cql2xcql srwtst yaz-benchmark \ - yaz-xmlquery yaz-record-conv + yaz-xmlquery yaz-record-conv yaz-url # MARC dumper utility yaz_marcdump_SOURCES = marcdump.c @@ -51,3 +51,7 @@ yaz_json_parse_LDADD = ../src/libyaz.la yaz_record_conv_SOURCES = yaz-record-conv.c yaz_record_conv_LDADD = ../src/libyaz.la +yaz_url_SOURCES = yaz-url.c +yaz_url_LDADD =../src/libyaz.la + + diff --git a/util/yaz-url.c b/util/yaz-url.c new file mode 100644 index 0000000..bdc912c --- /dev/null +++ b/util/yaz-url.c @@ -0,0 +1,136 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2011 Index Data + * See the file LICENSE for details. + */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +static void usage(void) +{ + printf("yaz-icu [options] url ..\n"); + printf(" -H name=value HTTP header\n"); + printf(" -p file POST content of file\n"); + printf(" -u user/password Basic HTTP auth\n"); + printf(" -x proxy HTTP proxy\n"); + exit(1); +} + +static char *get_file(const char *fname, size_t *len) +{ + char *buf = 0; + FILE *inf = fopen(fname, "rb"); + if (!inf) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "Could not open %s", fname); + exit(1); + } + if (fseek(inf, 0L, SEEK_END)) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "fseek of %s failed", fname); + exit(1); + } + *len = ftell(inf); + buf = xmalloc(*len); + fseek(inf, 0L, SEEK_SET); + fread(buf, 1, *len, inf); + fclose(inf); + return buf; +} + +int main(int argc, char **argv) +{ + int ret; + char *arg; + yaz_url_t p = yaz_url_create(); + char *post_buf = 0; + size_t post_len = 0; + const char *method = "GET"; + Z_HTTP_Response *http_response; + Z_HTTP_Header *http_headers = 0; + ODR odr = odr_createmem(ODR_ENCODE); + int exit_code = 0; + + while ((ret = options("hH:p:u:x:", argv, argc, &arg)) + != YAZ_OPTIONS_EOF) + { + switch (ret) + { + case 'h': + usage(); + break; + case 'H': + if (!strchr(arg, '=')) + { + yaz_log(YLOG_FATAL, "bad header option (missing =): %s\n", arg); + exit_code = 1; + } + else + { + char *cp = strchr(arg, '='); + char *name = odr_malloc(odr, 1 + cp - arg); + char *value = cp + 1; + memcpy(name, arg, cp - arg); + name[cp - arg] = '\0'; + z_HTTP_header_add(odr, &http_headers, name, value); + } + break; + case 'p': + xfree(post_buf); + post_buf = get_file(arg, &post_len); + method = "POST"; + break; + case 'u': + if (strchr(arg, '/')) + { + char *cp = strchr(arg, '/'); + char *user = odr_malloc(odr, 1 + cp - arg); + char *password = cp + 1; + memcpy(user, arg, cp - arg); + user[cp - arg] = '\0'; + z_HTTP_header_add_basic_auth(odr, &http_headers, user, + password); + } + else + z_HTTP_header_add_basic_auth(odr, &http_headers, arg, 0); + break; + case 'x': + yaz_url_set_proxy(p, arg); + break; + case 0: + http_response = yaz_url_exec(p, arg, method, http_headers, + post_buf, post_len); + if (!http_response) + exit_code = 1; + else + { + fwrite(http_response->content_buf, 1, + http_response->content_len, stdout); + } + break; + default: + usage(); + } + } + yaz_url_destroy(p); + odr_destroy(odr); + exit(exit_code); +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/win/makefile b/win/makefile index 3f8cc6f..51e6d1c 100644 --- a/win/makefile +++ b/win/makefile @@ -42,7 +42,8 @@ BISON=bison default: all -all: dirs generate dll sc_test client ztest yazicu zoomsh utilprog testprog iconv icu libxml2 libxslt +all: dirs generate dll sc_test client ztest yazicu zoomsh utilprog \ + testprog iconv icu libxml2 libxslt $(YAZ_URL) NSIS="c:\program files\nsis\makensis.exe" HHC="c:\program files\html help workshop\hhc.exe" @@ -119,6 +120,7 @@ YAZ_COND_IMPLIB=$(LIBDIR)\yaz_cond4.lib CLIENT=$(BINDIR)\yaz-client.exe YAZ_ICU=$(BINDIR)\yaz-icu.exe +YAZ_URL=$(BINDIR)\yaz-url.exe ZOOMSH=$(BINDIR)\zoomsh.exe ZTEST=$(BINDIR)\yaz-ztest.exe SC_TEST=$(BINDIR)\sc_test.exe @@ -342,6 +344,7 @@ YAZ_CLIENT_OBJS= \ $(OBJDIR)\fhistory.obj YAZ_ICU_OBJS= $(OBJDIR)\yaz-icu.obj +YAZ_URL_OBJS= $(OBJDIR)\yaz-url.obj COND_DLL_OBJS= $(OBJDIR)\condvar.obj ZTEST_OBJS= \ @@ -540,7 +543,8 @@ MISC_OBJS= \ $(OBJDIR)\xml_include.obj \ $(OBJDIR)\file_glob.obj \ $(OBJDIR)\thread_id.obj \ - $(OBJDIR)\dirent.obj + $(OBJDIR)\dirent.obj \ + $(OBJDIR)\url.obj Z3950_OBJS= \ $(OBJDIR)\z-date.obj\ @@ -925,6 +929,9 @@ $(YAZ_ICU) : "$(BINDIR)" $(YAZ_ICU_OBJS) $(YAZ_ICU_DLL) $(LINK_PROGRAM) $(ICU_LIBS) $(YAZ_ICU_IMPLIB) $(YAZ_ICU_OBJS) /out:$@ $(MT) -manifest $@.manifest -outputresource:$@;1 +$(YAZ_URL) : "$(BINDIR)" $(YAZ_URL_OBJS) $(YAZ_DLL) + $(LINK_PROGRAM) $(YAZ_URL_OBJS) /out:$@ + $(MT) -manifest $@.manifest -outputresource:$@;1 $(SC_TEST) : "$(BINDIR)" $(SC_TEST_OBJS) $(YAZ_DLL) $(LINK_PROGRAM) $(SC_TEST_OBJS) /out:$@ -- 1.7.10.4