From 5554ddf9c4d9670aaaa8f8b9ce6def1dadff3c96 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 10 Oct 2003 17:58:28 +0000 Subject: [PATCH] USMARC to MARCXML conversion. Check for reconfigure in more places --- TODO | 2 + doc/proxy.xml | 66 ++++++++++++++------- include/yaz++/proxy.h | 10 ++-- src/config.xml | 6 +- src/yaz-proxy-config.cpp | 19 +++--- src/yaz-proxy.cpp | 146 +++++++++++++++++++++++++++++++++++++++------- src/yaz-z-cache.cpp | 27 +++++++-- 7 files changed, 213 insertions(+), 63 deletions(-) diff --git a/TODO b/TODO index 184382f..066e9f9 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,6 @@ +Control the various optimizations with config.. + Handle invalidate sessions (-1 hit, control-number search). MARCXML support. diff --git a/doc/proxy.xml b/doc/proxy.xml index 90529f0..9d04a65 100644 --- a/doc/proxy.xml +++ b/doc/proxy.xml @@ -157,33 +157,36 @@
- Keep-alive Facility for Stateless Clients + Keep-alive Facility - Stateless clients such as web gateways may generate a cookie for a Z39.50 - session which is sent to the proxy as part of PDUs. - In this case, the proxy will keep alive its Z39.50 session - to the backend target even when the connection from the client - to the proxy is closed. When the client contacts the - proxy again, and re-issues the same cookie, the proxy reuses the - Z39.50 connection with the backend target. + The keep-alive is a facility where the proxy keeps the connection to the + backend - even if the client closes the connection to the proxy. - There is no - guarantee that the Z39.50 connection to the backend - target is kept forever: the proxy will shut it down after certain - idle time. - So in effect, the connection from the client's - point of view should be considered stateless, and the keep-alive - facility should be treated only as a performance booster. + If a new or another client connects to the proxy again and requests the + same backend it will be reassigned to this backend. In this case, the + proxy sends an initialize response directly to the client and an + initialize handshake with the backend is omitted. - Cookies may be passed in an + When a client reconnects, query and record caching works better, if the + proxy assigns it to the same backend as before. And the result set + (if any) is re-used. To achive this, Index Data defined a session + cookie which identifies the backend session. + + + The cookie is defined by the client and is sent as part of the + Initialize Request and passed in an otherInfo element with OID 1.2.840.10003.10.1000.81.2. + + Clients that do not send a cookie as part of the initialize request + may still better performance, since the init handshake is saved. +
-
+
Query Caching Simple stateless clients often send identical Z39.50 searches @@ -208,11 +211,34 @@ You can enable/disable query caching using option -o.
+ -
+
+ Record Caching + + As an option, the proxy may also cache result set records for the + last search. + The proxy takes into account the Record Syntax and CompSpec. + The CompSpec includes simple element set names as well. + +
+ +
+ Query Validation + + +
+ +
+ Record Syntax Validation + + +
+ +
Other Optimizations - - We've had some plans to support caching of result set records, + + We've had some plans to support global caching of result set records, but this has not yet been implemented.
diff --git a/include/yaz++/proxy.h b/include/yaz++/proxy.h index 4cd619c..1d0c12d 100644 --- a/include/yaz++/proxy.h +++ b/include/yaz++/proxy.h @@ -2,7 +2,7 @@ * Copyright (c) 1998-2003, Index Data. * See the file LICENSE for details. * - * $Id: proxy.h,v 1.14 2003-10-09 12:11:09 adam Exp $ + * $Id: proxy.h,v 1.15 2003-10-10 17:58:29 adam Exp $ */ #include @@ -30,11 +30,11 @@ public: int *target_idletime, int *client_idletime, int *max_clients, int *keepalive_limit_bw, int *keepalive_limit_pdu); - void operator=(const Yaz_ProxyConfig &conf); int check_query(ODR odr, const char *name, Z_Query *query, char **addinfo); int check_syntax(ODR odr, const char *name, Odr_oid *syntax, char **addinfo); private: + void operator=(const Yaz_ProxyConfig &conf); #if HAVE_XML2 xmlDocPtr m_docPtr; xmlNodePtr m_proxyPtr; @@ -159,7 +159,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { char *m_optimize; int m_session_no; // sequence for each client session char m_session_str[30]; // session string (time:session_no) - Yaz_ProxyConfig m_config; + Yaz_ProxyConfig *m_config; char *m_config_fname; int m_bytes_sent; int m_bytes_recv; @@ -178,9 +178,11 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { Z_APDU *handle_syntax_validation(Z_APDU *apdu); const char *load_balance(const char **url); int m_reconfig_flag; - void check_reconfigure(); + Yaz_ProxyConfig *check_reconfigure(); int m_request_no; int m_invalid_session; + int m_marcxml_flag; + void convert_to_marcxml(Z_NamePlusRecordList *p); public: Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable); ~Yaz_Proxy(); diff --git a/src/config.xml b/src/config.xml index 82954da..efe5126 100644 --- a/src/config.xml +++ b/src/config.xml @@ -18,21 +18,21 @@ 180 120 + localhost:9999 - localhost:9998 50000 - 11 + 60 10 - + diff --git a/src/yaz-proxy-config.cpp b/src/yaz-proxy-config.cpp index 84a901a..056354b 100644 --- a/src/yaz-proxy-config.cpp +++ b/src/yaz-proxy-config.cpp @@ -2,7 +2,7 @@ * Copyright (c) 1998-2003, Index Data. * See the file LICENSE for details. * - * $Id: yaz-proxy-config.cpp,v 1.7 2003-10-09 12:11:10 adam Exp $ + * $Id: yaz-proxy-config.cpp,v 1.8 2003-10-10 17:58:29 adam Exp $ */ #include @@ -26,15 +26,6 @@ Yaz_ProxyConfig::~Yaz_ProxyConfig() #endif } -void Yaz_ProxyConfig::operator=(const Yaz_ProxyConfig &conf) -{ -#if HAVE_XML2 - m_docPtr = conf.m_docPtr; - m_proxyPtr = conf.m_proxyPtr; -#endif - m_copy = 1; -} - int Yaz_ProxyConfig::read_xml(const char *fname) { #if HAVE_XML2 @@ -348,6 +339,7 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name, int match = 0; // if we match record syntax const char *match_type = 0; const char *match_error = 0; + const char *match_marcxml = 0; struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { @@ -357,6 +349,9 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name, if (!strcmp((const char *) attr->name, "error") && attr->children && attr->children->type == XML_TEXT_NODE) match_error = (const char *) attr->children->content; + if (!strcmp((const char *) attr->name, "marcxml") && + attr->children && attr->children->type == XML_TEXT_NODE) + match_marcxml = (const char *) attr->children->content; } if (match_type) { @@ -377,6 +372,10 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name, } if (match) { + if (match_marcxml) + { + return -1; + } if (match_error) { if (syntax) diff --git a/src/yaz-proxy.cpp b/src/yaz-proxy.cpp index d80d24e..a559383 100644 --- a/src/yaz-proxy.cpp +++ b/src/yaz-proxy.cpp @@ -2,12 +2,14 @@ * Copyright (c) 1998-2003, Index Data. * See the file LICENSE for details. * - * $Id: yaz-proxy.cpp,v 1.55 2003-10-10 12:37:26 adam Exp $ + * $Id: yaz-proxy.cpp,v 1.56 2003-10-10 17:58:29 adam Exp $ */ #include #include +#include +#include #include #include #include @@ -79,6 +81,8 @@ Yaz_Proxy::Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable) : m_config_fname = 0; m_request_no = 0; m_invalid_session = 0; + m_config = 0; + m_marcxml_flag = 0; } Yaz_Proxy::~Yaz_Proxy() @@ -89,15 +93,16 @@ Yaz_Proxy::~Yaz_Proxy() xfree (m_default_target); xfree (m_proxy_authentication); xfree (m_optimize); - if (m_parent) - m_parent->check_reconfigure(); + delete m_config; } int Yaz_Proxy::set_config(const char *config) { + delete m_config; + m_config = new Yaz_ProxyConfig(); xfree(m_config_fname); m_config_fname = xstrdup(config); - int r = m_config.read_xml(config); + int r = m_config->read_xml(config); return r; } @@ -117,16 +122,20 @@ void Yaz_Proxy::set_proxy_authentication (const char *auth) m_proxy_authentication = (char *) xstrdup (auth); } -void Yaz_Proxy::check_reconfigure() +Yaz_ProxyConfig *Yaz_Proxy::check_reconfigure() { + if (m_parent) + return m_parent->check_reconfigure(); + + Yaz_ProxyConfig *cfg = m_config; if (m_reconfig_flag) { yaz_log(LOG_LOG, "reconfigure"); yaz_log_reopen(); - if (m_config_fname) + if (m_config_fname && cfg) { yaz_log(LOG_LOG, "reconfigure config %s", m_config_fname); - int r = m_config.read_xml(m_config_fname); + int r = cfg->read_xml(m_config_fname); if (r) yaz_log(LOG_WARN, "reconfigure failed"); } @@ -134,6 +143,7 @@ void Yaz_Proxy::check_reconfigure() yaz_log(LOG_LOG, "reconfigure"); m_reconfig_flag = 0; } + return cfg; } IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable @@ -142,7 +152,7 @@ IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable check_reconfigure(); Yaz_Proxy *new_proxy = new Yaz_Proxy(the_PDU_Observable); new_proxy->m_parent = this; - new_proxy->m_config = m_config; + new_proxy->m_config = 0; new_proxy->m_config_fname = 0; new_proxy->timeout(m_client_idletime); new_proxy->m_target_idletime = m_target_idletime; @@ -231,19 +241,21 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu) { const char *url[MAX_ZURL_PLEX]; const char *proxy_host = get_proxy(oi); + Yaz_ProxyConfig *cfg = check_reconfigure(); if (proxy_host) { xfree(m_default_target); m_default_target = xstrdup(proxy_host); proxy_host = m_default_target; } - int client_idletime = -1; - m_config.get_target_info(proxy_host, url, &m_bw_max, + if (cfg) + cfg->get_target_info(proxy_host, url, &m_bw_max, &m_pdu_max, &m_max_record_retrieve, &m_target_idletime, &client_idletime, &parent->m_max_clients, - &m_keepalive_limit_bw, &m_keepalive_limit_pdu); + &m_keepalive_limit_bw, + &m_keepalive_limit_pdu); if (client_idletime != -1) { m_client_idletime = client_idletime; @@ -489,6 +501,67 @@ void Yaz_Proxy::display_diagrecs(Z_DiagRec **pp, int num) } } +void Yaz_Proxy::convert_to_marcxml(Z_NamePlusRecordList *p) +{ + int i; + + yaz_marc_t mt = yaz_marc_create(); + yaz_marc_xml(mt, YAZ_MARC_MARCXML); + for (i = 0; i < p->num_records; i++) + { + Z_NamePlusRecord *npr = p->records[i]; + if (npr->which == Z_NamePlusRecord_databaseRecord) + { + Z_External *r = npr->u.databaseRecord; + if (r->which == Z_External_octet) + { + int rlen; + char *result; + if (yaz_marc_decode_buf(mt, (char*) r->u.octet_aligned->buf, + r->u.octet_aligned->len, + &result, &rlen)) + { + yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC-8"); + WRBUF wrbuf = wrbuf_alloc(); + + char outbuf[120]; + size_t inbytesleft = rlen; + const char *inp = result; + while (cd && inbytesleft) + { + size_t outbytesleft = sizeof(outbuf); + char *outp = outbuf; + size_t r; + + r = yaz_iconv (cd, (char**) &inp, + &inbytesleft, + &outp, &outbytesleft); + if (r == (size_t) (-1)) + { + int e = yaz_iconv_error(cd); + if (e != YAZ_ICONV_E2BIG) + { + yaz_log(LOG_WARN, "conversion failure"); + break; + } + } + wrbuf_write(wrbuf, outbuf, outp - outbuf); + } + if (cd) + yaz_iconv_close(cd); + + npr->u.databaseRecord = z_ext_record(odr_encode(), + VAL_TEXT_XML, + wrbuf_buf(wrbuf), + wrbuf_len(wrbuf)); + wrbuf_free(wrbuf, 1); + } + } + } + } + yaz_marc_destroy(mt); +} + int Yaz_Proxy::send_to_client(Z_APDU *apdu) { int len = 0; @@ -506,6 +579,8 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu) } else { + if (m_marcxml_flag && p && p->which == Z_Records_DBOSD) + convert_to_marcxml(p->u.databaseOrSurDiagnostics); if (sr->resultCount) { yaz_log(LOG_LOG, "%s%d hits", m_session_str, @@ -527,6 +602,8 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu) display_diagrecs(&dr_p, 1); } + if (m_marcxml_flag && p && p->which == Z_Records_DBOSD) + convert_to_marcxml(p->u.databaseOrSurDiagnostics); } int r = send_Z_PDU(apdu, &len); yaz_log (LOG_DEBUG, "%sSending %s to client %d bytes", m_session_str, @@ -597,7 +674,7 @@ Z_APDU *Yaz_Proxy::result_set_optimize(Z_APDU *apdu) this_query->set_Z_Query(sr->query); - char query_str[80]; + char query_str[120]; this_query->print(query_str, sizeof(query_str)-1); yaz_log(LOG_LOG, "%sQuery %s", m_session_str, query_str); @@ -845,10 +922,13 @@ Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu) if (apdu->which == Z_APDU_searchRequest) { Z_SearchRequest *sr = apdu->u.searchRequest; - int err; + int err = 0; char *addinfo = 0; - err = m_config.check_query(odr_encode(), m_default_target, sr->query, - &addinfo); + + Yaz_ProxyConfig *cfg = check_reconfigure(); + if (cfg) + err = cfg->check_query(odr_encode(), m_default_target, + sr->query, &addinfo); if (err) { Z_APDU *new_apdu = create_Z_PDU(Z_APDU_searchResponse); @@ -868,17 +948,29 @@ Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu) Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu) { + m_marcxml_flag = 0; if (apdu->which == Z_APDU_searchRequest) { Z_SearchRequest *sr = apdu->u.searchRequest; if (*sr->smallSetUpperBound > 0 || *sr->largeSetLowerBound > 1) { - int err; + int err = 0; char *addinfo = 0; - err = m_config.check_syntax(odr_encode(), m_default_target, + Yaz_ProxyConfig *cfg = check_reconfigure(); + + if (cfg) + err = cfg->check_syntax(odr_encode(), + m_default_target, sr->preferredRecordSyntax, &addinfo); - if (err) + if (err == -1) + { + sr->preferredRecordSyntax = + yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN, + VAL_USMARC); + m_marcxml_flag = 1; + } + else if (err) { Z_APDU *new_apdu = create_Z_PDU(Z_APDU_searchResponse); @@ -896,12 +988,22 @@ Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu) else if (apdu->which == Z_APDU_presentRequest) { Z_PresentRequest *pr = apdu->u.presentRequest; - int err; + int err = 0; char *addinfo = 0; - err = m_config.check_syntax(odr_encode(), m_default_target, + Yaz_ProxyConfig *cfg = check_reconfigure(); + + if (cfg) + err = cfg->check_syntax(odr_encode(), m_default_target, pr->preferredRecordSyntax, &addinfo); - if (err) + if (err == -1) + { + pr->preferredRecordSyntax = + yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN, + VAL_USMARC); + m_marcxml_flag = 1; + } + else if (err) { Z_APDU *new_apdu = create_Z_PDU(Z_APDU_presentResponse); @@ -1044,7 +1146,7 @@ void Yaz_Proxy::shutdown() const char *Yaz_ProxyClient::get_session_str() { if (!m_server) - return "0"; + return "0 "; return m_server->get_session_str(); } diff --git a/src/yaz-z-cache.cpp b/src/yaz-z-cache.cpp index 1468b7d..975aee9 100644 --- a/src/yaz-z-cache.cpp +++ b/src/yaz-z-cache.cpp @@ -2,7 +2,7 @@ * Copyright (c) 2002-2003, Index Data. * See the file LICENSE for details. * - * $Id: yaz-z-cache.cpp,v 1.6 2003-10-08 08:52:59 adam Exp $ + * $Id: yaz-z-cache.cpp,v 1.7 2003-10-10 17:58:30 adam Exp $ */ #include @@ -87,7 +87,10 @@ void Yaz_RecordCache::add (ODR o, Z_NamePlusRecordList *npr, int start, int hits) { if (nmem_total(m_mem) > m_max_size) + { + yaz_log(LOG_LOG, "cache size"); return; + } // Build appropriate compspec for this response Z_RecordComposition *comp = 0; if (hits == -1 && m_presentRequest) @@ -116,7 +119,11 @@ void Yaz_RecordCache::add (ODR o, Z_NamePlusRecordList *npr, int start, { Yaz_RecordCache_Entry *entry = (Yaz_RecordCache_Entry *) nmem_malloc(m_mem, sizeof(*entry)); - entry->m_record = npr->records[i]; + entry->m_record = (Z_NamePlusRecord *) + nmem_malloc(m_mem, sizeof(*entry->m_record)); + entry->m_record->databaseName = npr->records[i]->databaseName; + entry->m_record->which = npr->records[i]->which; + entry->m_record->u.databaseRecord = npr->records[i]->u.databaseRecord; entry->m_comp = comp; entry->m_offset = i + start; entry->m_next = m_entries; @@ -150,7 +157,6 @@ int Yaz_RecordCache::match (Yaz_RecordCache_Entry *entry, odr_destroy(o2); if (!match) return 0; - if (!syntax) return 0; // See if offset, OID match.. @@ -159,6 +165,14 @@ int Yaz_RecordCache::match (Yaz_RecordCache_Entry *entry, !oid_oidcmp(entry->m_record->u.databaseRecord->direct_reference, syntax)) return 1; +#if 0 + char mstr1[100]; + oid_to_dotstring(entry->m_record->u.databaseRecord->direct_reference, mstr1); + char mstr2[100]; + oid_to_dotstring(syntax, mstr2); + yaz_log(LOG_LOG, "match fail 3 d=%s s=%s", mstr1, mstr2); +#endif + return 0; } @@ -191,7 +205,12 @@ int Yaz_RecordCache::lookup (ODR o, Z_NamePlusRecordList **npr, break; if (!entry) return 0; - (*npr)->records[i] = entry->m_record; + (*npr)->records[i] = (Z_NamePlusRecord *) + odr_malloc(o, sizeof(Z_NamePlusRecord)); + (*npr)->records[i]->databaseName = entry->m_record->databaseName; + (*npr)->records[i]->which = entry->m_record->which; + (*npr)->records[i]->u.databaseRecord = + entry->m_record->u.databaseRecord; } return 1; } -- 1.7.10.4