Separate OPAC record character set for ZOOM_record
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 8 Mar 2011 12:28:34 +0000 (13:28 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 8 Mar 2011 12:28:34 +0000 (13:28 +0100)
yaz_record_render allows a character set given as charset=from/opacfrom,to
where 'from' is the character set of a bibliographic record, 'opacfrom'
is character set of OPAC record and 'to' is the target character set.

doc/zoom.xml
src/record_render.c

index da8f0a9..8a6bb92 100644 (file)
@@ -750,18 +750,29 @@ ZOOM_query_cql2rpn(ZOOM_query s, const char *str, ZOOM_connection conn)
     The <parameter>type</parameter> is a string of the format:
    </para>
    <para>
-    <replaceable>form</replaceable>[;charset=<replaceable>from</replaceable>[,<replaceable>to</replaceable>]][;format=<replaceable>v</replaceable>]
+    <replaceable>format</replaceable>[;charset=<replaceable>from</replaceable>[/<replaceable>opacfrom</replaceable>][,<replaceable>to</replaceable>]][;format=<replaceable>v</replaceable>]
    </para>
    <para>
-    where <replaceable>form</replaceable> specifies the format of the
+    where <replaceable>format</replaceable> specifies the format of the
     returned record, <replaceable>from</replaceable>
     specifies the character set of the record in its original form
     (as returned by the server), <replaceable>to</replaceable> specifies
     the output (returned)
     character set encoding.
-    If charset is not given, then no character set conversion takes place.
     If <replaceable>to</replaceable> is omitted UTF-8 is assumed.
+    If charset is not given, then no character set conversion takes place.
+   </para>
+   
+   <para>OPAC records may be returned in a different
+     set from the bibliographic MARC record. If this is this the case,
+    <replaceable>opacfrom</replaceable> should be set to the character set
+    of the OPAC record part.
    </para>
+   <note>
+     <para>
+       Specifying the OPAC record character set requires YAZ 4.1.5 or later.
+     </para>
+   </note>
    <para>
     The format argument controls whether record data should be XML
     pretty-printed (post process operation).
index cf306ef..16351a2 100644 (file)
 #include <yaz/proto.h>
 #include <yaz/oid_db.h>
 
-static yaz_iconv_t iconv_create_charset(const char *record_charset)
+static yaz_iconv_t iconv_create_charset(const char *record_charset,
+                                        yaz_iconv_t *cd2)
 {
-    char to[40];
-    char from[40];
+    char charset_buf[40];
     yaz_iconv_t cd = 0;
-
-    *from = '\0';
-    strcpy(to, "UTF-8");
-    if (record_charset && *record_charset)
+    char *from_set1 = 0;
+    char *from_set2 = 0;
+    char *to_set = 0;
+    if (record_charset)
     {
-        /* Use "from,to" or just "from" */
-        const char *cp = strchr(record_charset, ',');
-        size_t clen = strlen(record_charset);
-        if (cp && cp[1])
+        char *cp = charset_buf;
+        
+        strncpy(charset_buf, record_charset, sizeof(charset_buf)-1);
+        charset_buf[sizeof(charset_buf)-1] = '\0';
+        
+        from_set1 = cp;
+        while (*cp && *cp != ',' && *cp != '/')
+            cp++;
+        if (*cp == '/')
         {
-            strncpy( to, cp+1, sizeof(to)-1);
-            to[sizeof(to)-1] = '\0';
-            clen = cp - record_charset;
+            *cp++ = '\0'; /* terminate from_set1 */
+            from_set2 = cp;
+            while (*cp && *cp != ',')
+                cp++;
+        }
+        if (*cp == ',')
+        {
+            *cp++ = '\0';  /* terminate from_set1 or from_set2 */
+            to_set = cp;
+            while (*cp)
+                cp++;
         }
-        if (clen > sizeof(from)-1)
-            clen = sizeof(from)-1;
-        
-        if (clen)
-            strncpy(from, record_charset, clen);
-        from[clen] = '\0';
     }
-    if (*from && *to)
-        cd = yaz_iconv_open(to, from);
+    
+    if (from_set1)
+        cd = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set1);
+    if (cd2)
+    {
+        if (from_set2)
+            *cd2 = yaz_iconv_open(to_set ? to_set : "UTF-8", from_set2);
+        else
+            *cd2 = 0;
+    }
     return cd;
 }
 
@@ -57,7 +72,7 @@ static const char *return_marc_record(WRBUF wrbuf,
                                       const char *buf, int sz,
                                       const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
     yaz_marc_t mt = yaz_marc_create();
     const char *ret_string = 0;
 
@@ -82,18 +97,25 @@ static const char *return_opac_record(WRBUF wrbuf,
                                       Z_OPACRecord *opac_rec,
                                       const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd2;
+    yaz_iconv_t cd = iconv_create_charset(record_charset, &cd2);
     yaz_marc_t mt = yaz_marc_create();
 
     if (cd)
         yaz_marc_iconv(mt, cd);
     yaz_marc_xml(mt, marc_type);
 
-    yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
+    if (cd2)
+        yaz_opac_decode_wrbuf2(mt, opac_rec, wrbuf, cd2);
+    else
+        yaz_opac_decode_wrbuf(mt, opac_rec, wrbuf);
+        
     yaz_marc_destroy(mt);
 
     if (cd)
         yaz_iconv_close(cd);
+    if (cd2)
+        yaz_iconv_close(cd2);
     if (len)
         *len = wrbuf_len(wrbuf);
     return wrbuf_cstr(wrbuf);
@@ -104,7 +126,7 @@ static const char *return_string_record(WRBUF wrbuf,
                                         const char *buf, int sz,
                                         const char *record_charset)
 {
-    yaz_iconv_t cd = iconv_create_charset(record_charset);
+    yaz_iconv_t cd = iconv_create_charset(record_charset, 0);
 
     if (cd)
     {