From 2faeba3f61ac423a9c71222e9a15377724d8f0fa Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 18 May 2005 20:15:22 +0000 Subject: [PATCH] Added support for Z39.50 character set negotiation. This allows the proxy to act as a Z39.50 server supporting character set negotiation for backends not supporting it. New config element target-charset specifies encoding for target, and MUST be specified in order for any conversion to take place. Conversion also takes place for SRW/SRU clients that are negotiate implicitly to UTF-8. --- NEWS | 7 ++ etc/config.xml | 9 +- etc/yazproxy.xsd | 4 +- include/yazproxy/proxy.h | 7 +- src/charset-converter.cpp | 24 +++++- src/proxyp.h | 7 +- src/yaz-proxy-config.cpp | 20 ++--- src/yaz-proxy.cpp | 200 ++++++++++++++++++++++++++++++++++++++------- 8 files changed, 229 insertions(+), 49 deletions(-) diff --git a/NEWS b/NEWS index 6e192db..8266f4e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Added support for Z39.50 character set negotiation. This allows +the proxy to act as a Z39.50 server supporting character set negotiation +for backends not supporting it. New config element target-charset +specifies encoding for target, and MUST be specified in order +for any conversion to take place. Conversion also takes place for +SRW/SRU clients that are negotiate implicitly to UTF-8. + --- 1.0 2005/03/14 Added Support OPAC to XML conversion. Triggered by backendtype="opac" diff --git a/etc/config.xml b/etc/config.xml index 08985e9..b7721cc 100644 --- a/etc/config.xml +++ b/etc/config.xml @@ -1,5 +1,5 @@ - + - + + 0 + iso-8859-1 pqf.properties - ISO-8859-1 60 30 - 50 + 2 client-requests server-requests diff --git a/etc/yazproxy.xsd b/etc/yazproxy.xsd index 4e30805..b601c54 100644 --- a/etc/yazproxy.xsd +++ b/etc/yazproxy.xsd @@ -1,6 +1,6 @@ + @@ -52,6 +53,7 @@ + diff --git a/include/yazproxy/proxy.h b/include/yazproxy/proxy.h index f15171e..854191c 100644 --- a/include/yazproxy/proxy.h +++ b/include/yazproxy/proxy.h @@ -1,4 +1,4 @@ -/* $Id: proxy.h,v 1.15 2005-05-04 08:31:44 adam Exp $ +/* $Id: proxy.h,v 1.16 2005-05-18 20:15:22 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -99,7 +99,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { Z_APDU *handle_query_validation(Z_APDU *apdu); Z_APDU *handle_query_transformation(Z_APDU *apdu); - Z_APDU *handle_query_charset_conversion(Z_APDU *apdu); + Z_APDU *handle_target_charset_conversion(Z_APDU *apdu); Z_APDU *handle_syntax_validation(Z_APDU *apdu); @@ -121,6 +121,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { int m_frontend_type; void convert_to_frontend_type(Z_NamePlusRecordList *p); void convert_to_marcxml(Z_NamePlusRecordList *p, const char *charset); + void convert_records_charset(Z_NamePlusRecordList *p, const char *charset); int convert_xsl(Z_NamePlusRecordList *p, Z_APDU *apdu); void convert_xsl_delay(); Z_APDU *m_initRequest_apdu; @@ -189,7 +190,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { const char *option(const char *name, const char *value); void set_default_target(const char *target); void set_proxy_negotiation (const char *charset, const char *lang); - void set_query_charset(const char *charset); + void set_target_charset(const char *charset); char *get_proxy_target() { return m_proxyTarget; }; char *get_session_str() { return m_session_str; }; void set_max_clients(int m) { m_max_clients = m; }; diff --git a/src/charset-converter.cpp b/src/charset-converter.cpp index ad615f6..5bb4e03 100644 --- a/src/charset-converter.cpp +++ b/src/charset-converter.cpp @@ -1,4 +1,4 @@ -/* $Id: charset-converter.cpp,v 1.2 2005-05-06 06:55:54 adam Exp $ +/* $Id: charset-converter.cpp,v 1.3 2005-05-18 20:15:22 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -28,6 +28,7 @@ Yaz_CharsetConverter::Yaz_CharsetConverter() m_wrbuf = wrbuf_alloc(); m_target_query_charset = 0; m_client_query_charset = 0; + m_client_charset_selected = 0; } Yaz_CharsetConverter::~Yaz_CharsetConverter() @@ -37,6 +38,11 @@ Yaz_CharsetConverter::~Yaz_CharsetConverter() xfree(m_client_query_charset); } +const char *Yaz_CharsetConverter::get_target_query_charset() +{ + return m_target_query_charset; +} + void Yaz_CharsetConverter::set_target_query_charset(const char *s) { xfree(m_target_query_charset); @@ -53,6 +59,21 @@ void Yaz_CharsetConverter::set_client_query_charset(const char *s) m_client_query_charset = xstrdup(s); } +const char *Yaz_CharsetConverter::get_client_query_charset() +{ + return m_client_query_charset; +} + +void Yaz_CharsetConverter::set_client_charset_selected(int sel) +{ + m_client_charset_selected = sel; +} + +int Yaz_CharsetConverter::get_client_charset_selected() +{ + return m_client_charset_selected; +} + void Yaz_CharsetConverter::convert_type_1(char *buf_in, int len_in, char **buf_out, int *len_out, ODR o) @@ -111,6 +132,7 @@ void Yaz_CharsetConverter::convert_type_1(Z_RPNStructure *q, ODR o) break; } } + void Yaz_CharsetConverter::convert_type_1(Z_RPNQuery *q, ODR o) { if (m_target_query_charset && m_client_query_charset) diff --git a/src/proxyp.h b/src/proxyp.h index 7455bbd..f36d182 100644 --- a/src/proxyp.h +++ b/src/proxyp.h @@ -1,4 +1,4 @@ -/* $Id: proxyp.h,v 1.4 2005-05-04 08:31:44 adam Exp $ +/* $Id: proxyp.h,v 1.5 2005-05-18 20:15:22 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -58,7 +58,11 @@ public: ~Yaz_CharsetConverter(); void set_target_query_charset(const char *s); void set_client_query_charset(const char *org); + const char *get_client_query_charset(void); + const char *get_target_query_charset(void); void convert_type_1(Z_RPNQuery *q, ODR o); + void set_client_charset_selected(int sel); + int get_client_charset_selected(); private: void convert_type_1(char *buf_in, int len_in, char **buf_out, int *len_out, @@ -68,6 +72,7 @@ private: void convert_type_1(Z_Operand *q, ODR o); char *m_target_query_charset; char *m_client_query_charset; + int m_client_charset_selected; yaz_iconv_t m_ct; WRBUF m_wrbuf; }; diff --git a/src/yaz-proxy-config.cpp b/src/yaz-proxy-config.cpp index 11ad189..76259cf 100644 --- a/src/yaz-proxy-config.cpp +++ b/src/yaz-proxy-config.cpp @@ -1,4 +1,4 @@ -/* $Id: yaz-proxy-config.cpp,v 1.18 2005-05-04 08:31:44 adam Exp $ +/* $Id: yaz-proxy-config.cpp,v 1.19 2005-05-18 20:15:22 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -123,7 +123,7 @@ class Yaz_ProxyConfigP { int *pre_init, const char **cql2rpn, const char **negotiation_charset, const char **negotiation_lang, - const char **query_charset); + const char **target_charset); void return_limit(xmlNodePtr ptr, int *limit_bw, int *limit_pdu, int *limit_req); int check_type_1(ODR odr, xmlNodePtr ptr, Z_RPNQuery *query, @@ -330,7 +330,7 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr, const char **cql2rpn, const char **negotiation_charset, const char **negotiation_lang, - const char **query_charset) + const char **target_charset) { *pre_init = 0; int no_url = 0; @@ -395,11 +395,11 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr, *cql2rpn = t; } if (ptr->type == XML_ELEMENT_NODE - && !strcmp((const char *) ptr->name, "query-charset")) + && !strcmp((const char *) ptr->name, "target-charset")) { const char *t = get_text(ptr); - if (t && query_charset) - *query_charset = t; + if (t && target_charset) + *target_charset = t; } if (ptr->type == XML_ELEMENT_NODE && !strcmp((const char *) ptr->name, "negotiation-charset")) @@ -1031,7 +1031,7 @@ int Yaz_ProxyConfig::get_target_no(int no, const char **authentication, const char **negotiation_charset, const char **negotiation_lang, - const char **query_charset) + const char **target_charset) { #if HAVE_XSLT xmlNodePtr ptr; @@ -1059,7 +1059,7 @@ int Yaz_ProxyConfig::get_target_no(int no, target_idletime, client_idletime, keepalive_limit_bw, keepalive_limit_pdu, pre_init, cql2rpn, - negotiation_charset, negotiation_lang, query_charset); + negotiation_charset, negotiation_lang, target_charset); return 1; } i++; @@ -1259,7 +1259,7 @@ void Yaz_ProxyConfig::get_target_info(const char *name, const char **cql2rpn, const char **negotiation_charset, const char **negotiation_lang, - const char **query_charset) + const char **target_charset) { #if HAVE_XSLT xmlNodePtr ptr; @@ -1297,7 +1297,7 @@ void Yaz_ProxyConfig::get_target_info(const char *name, keepalive_limit_bw, keepalive_limit_pdu, pre_init, cql2rpn, negotiation_charset, negotiation_lang, - query_charset); + target_charset); } #else *url = name; diff --git a/src/yaz-proxy.cpp b/src/yaz-proxy.cpp index f174362..594435e 100644 --- a/src/yaz-proxy.cpp +++ b/src/yaz-proxy.cpp @@ -1,4 +1,4 @@ -/* $Id: yaz-proxy.cpp,v 1.25 2005-05-04 08:31:44 adam Exp $ +/* $Id: yaz-proxy.cpp,v 1.26 2005-05-18 20:15:23 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -453,7 +453,6 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie, m_cql2rpn.set_pqf_file(cql2rpn_fname); if (negotiation_charset || negotiation_lang) { - yaz_log(YLOG_LOG, "set_proxy_negotiation..."); set_proxy_negotiation(negotiation_charset, negotiation_lang); } @@ -842,6 +841,74 @@ void Yaz_Proxy::convert_to_frontend_type(Z_NamePlusRecordList *p) } } +void Yaz_Proxy::convert_records_charset(Z_NamePlusRecordList *p, + const char *backend_charset) +{ + yaz_log(YLOG_LOG, "%sconvert_to_marc", m_session_str); + int sel = m_charset_converter->get_client_charset_selected(); + const char *client_record_charset = + m_charset_converter->get_client_query_charset(); + if (sel && backend_charset && client_record_charset && + strcmp(backend_charset, client_record_charset)) + { + int i; + yaz_iconv_t cd = yaz_iconv_open(client_record_charset, + backend_charset); + yaz_marc_t mt = yaz_marc_create(); + yaz_marc_xml(mt, YAZ_MARC_ISO2709); + yaz_marc_iconv(mt, cd); + for (i = 0; i < p->num_records; i++) + { + Z_NamePlusRecord *npr = p->records[i]; + if (npr->which == Z_NamePlusRecord_databaseRecord) + { + Z_External *r = npr->u.databaseRecord; + oident *ent = oid_getentbyoid(r->direct_reference); + if (!ent || ent->value == VAL_NONE) + continue; + + if (ent->value == VAL_SUTRS) + { + WRBUF w = wrbuf_alloc(); + + wrbuf_iconv_write(w, cd, (char*) r->u.octet_aligned->buf, + r->u.octet_aligned->len); + npr->u.databaseRecord = + z_ext_record(odr_encode(), ent->value, wrbuf_buf(w), + wrbuf_len(w)); + wrbuf_free(w, 1); + } + else if (ent->value == VAL_TEXT_XML) + { + ; + } + else if (r->which == Z_External_octet) + { + int rlen; + char *result; + if (yaz_marc_decode_buf(mt, + (char*) r->u.octet_aligned->buf, + r->u.octet_aligned->len, + &result, &rlen)) + { + npr->u.databaseRecord = + z_ext_record(odr_encode(), ent->value, result, rlen); + yaz_log(YLOG_LOG, "%sRecoding MARC record", + m_session_str); + } + } + } + } + if (cd) + yaz_iconv_close(cd); + yaz_marc_destroy(mt); + } + else + { + yaz_log(YLOG_LOG, "%sSkipping marc convert", m_session_str); + } +} + void Yaz_Proxy::convert_to_marcxml(Z_NamePlusRecordList *p, const char *backend_charset) { @@ -1200,6 +1267,9 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu) if (m_marcxml_mode == marcxml) convert_to_marcxml(p->u.databaseOrSurDiagnostics, m_backend_charset); + else + convert_records_charset(p->u.databaseOrSurDiagnostics, + m_backend_charset); if (convert_xsl(p->u.databaseOrSurDiagnostics, apdu)) return 0; @@ -1245,6 +1315,9 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu) if (m_marcxml_mode == marcxml) convert_to_marcxml(p->u.databaseOrSurDiagnostics, m_backend_charset); + else + convert_records_charset(p->u.databaseOrSurDiagnostics, + m_backend_charset); if (convert_xsl(p->u.databaseOrSurDiagnostics, apdu)) return 0; } @@ -1635,11 +1708,13 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu) { if (apdu->which == Z_APDU_initRequest) { + yaz_log(YLOG_LOG, "%shandle_charset_lang_negotiation", + m_session_str); if (m_initRequest_options && !ODR_MASK_GET(m_initRequest_options, Z_Options_negotiationModel) && (m_proxy_negotiation_charset || m_proxy_negotiation_lang)) { - // There is not negotiation proposal from + // There is no negotiation proposal from // client's side. OK. The proxy negotiation // in use, only. Z_InitRequest *initRequest = apdu->u.initRequest; @@ -1661,6 +1736,66 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu) 1); } } + else if (m_initRequest_options && + ODR_MASK_GET(m_initRequest_options, + Z_Options_negotiationModel) && + m_charset_converter->get_target_query_charset()) + { + yaz_log(YLOG_LOG, "%sManaged charset negotiation: charset=%s", + m_session_str, + m_charset_converter->get_target_query_charset()); + Z_InitRequest *initRequest = apdu->u.initRequest; + Z_CharSetandLanguageNegotiation *negotiation = + yaz_get_charneg_record (initRequest->otherInfo); + if (negotiation && + negotiation->which == Z_CharSetandLanguageNegotiation_proposal) + { + NMEM nmem = nmem_create(); + char **charsets = 0; + int num_charsets = 0; + char **langs = 0; + int num_langs = 0; + int selected = 0; + yaz_get_proposal_charneg (nmem, negotiation, + &charsets, &num_charsets, + &langs, &num_langs, &selected); + int i; + for (i = 0; iget_target_query_charset(); + // sweep through charsets and pick the first supported + // conversion + for (i = 0; iset_client_query_charset(c_charset); + m_charset_converter->set_client_charset_selected(selected); + } + nmem_destroy(nmem); + ODR_MASK_CLEAR(m_initRequest_options, + Z_Options_negotiationModel); + yaz_del_charneg_record(&initRequest->otherInfo); + } + else + { + yaz_log(YLOG_WARN, "%sUnable to decode charset package", + m_session_str); + } + } } else if (apdu->which == Z_APDU_initResponse) { @@ -1708,35 +1843,42 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu) ODR_MASK_CLEAR(initResponse->options, Z_Options_negotiationModel); // Delete negotiation (charneg-3) entry. - Z_OtherInformation *p = *otherInfo; - for (int i=0; inum_elements; i++) + yaz_del_charneg_record(otherInfo); + } + } + else + { + if (m_proxy_negotiation_charset || m_proxy_negotiation_lang) + { + yaz_log(YLOG_LOG, "%sTarget did not honor negotiation", + m_session_str); + } + else if (m_charset_converter->get_client_query_charset()) + { + Z_OtherInformation **otherInfo; + Z_OtherInformationUnit *oi; + get_otherInfoAPDU(apdu, &otherInfo); + oi = update_otherInformation(otherInfo, 1, NULL, 0, 0); + if (oi) { - if (p->list[i]->which == Z_OtherInfo_externallyDefinedInfo) - { - Z_External *pext = - p->list[i]->information.externallyDefinedInfo; - struct oident *e = oid_getentbyoid(pext->direct_reference); - - if (e && e->value == VAL_CHARNEG3 && e->oclass == CLASS_NEGOT && - pext->which == Z_External_charSetandLanguageNegotiation) - { - (p->num_elements)--; - if(p->num_elements == 0) - { - *otherInfo = 0; - } - else - { - for (int j=i; jnum_elements;j++) - p->list[j] = p->list[j+1]; - } - } - } - } + ODR_MASK_SET(initResponse->options, + Z_Options_negotiationModel); + ODR_MASK_SET(m_initRequest_options, + Z_Options_negotiationModel); + + oi->which = Z_OtherInfo_externallyDefinedInfo; + oi->information.externallyDefinedInfo = + yaz_set_response_charneg( + odr_encode(), + m_charset_converter->get_client_query_charset(), + 0 /* no lang */, + m_charset_converter->get_client_charset_selected()); + } } } } } + Z_Records *Yaz_Proxy::create_nonSurrogateDiagnostics(ODR odr, int error, const char *addinfo) @@ -1805,7 +1947,7 @@ Z_APDU *Yaz_Proxy::handle_query_transformation(Z_APDU *apdu) return apdu; } -Z_APDU *Yaz_Proxy::handle_query_charset_conversion(Z_APDU *apdu) +Z_APDU *Yaz_Proxy::handle_target_charset_conversion(Z_APDU *apdu) { if (apdu->which == Z_APDU_searchRequest && apdu->u.searchRequest->query) @@ -2636,7 +2778,7 @@ void Yaz_Proxy::handle_incoming_Z_PDU(Z_APDU *apdu) apdu = handle_query_transformation(apdu); if (apdu) - apdu = handle_query_charset_conversion(apdu); + apdu = handle_target_charset_conversion(apdu); if (apdu) apdu = handle_query_validation(apdu); -- 1.7.10.4