From 00106dc85fcaa3b02f9a0d471ea90a594bf3a175 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 15 Mar 2004 21:39:06 +0000 Subject: [PATCH] Extend MARC-8 to handle ESC-G0 EACC. Fix conversion order for MARCXML output. Let CQL parser resolve prefixes to identifiers. Updates tests. --- etc/pqf.properties | 15 ++- include/yaz/cql.h | 19 ++-- include/yaz/wrbuf.h | 7 +- src/Makefile.am | 8 +- src/cql.y | 13 +-- src/cqltransform.c | 267 +++++++++++++++++++-------------------------------- src/cqlutil.c | 88 +++++++++++------ src/marcdisp.c | 53 ++-------- src/siconv.c | 91 ++++++++++++++++-- src/wrbuf.c | 37 +++++-- src/xcqlutil.c | 56 ++++++----- test/cql/8.1.out | 6 -- test/cql/8.3.out | 24 ++--- test/cql/9.4.out | 4 - test/tsticonv.c | 176 ++++++++++++++++++++++++++++----- win/makefile | 6 +- win/yaz.nsi | 5 +- 17 files changed, 521 insertions(+), 354 deletions(-) diff --git a/etc/pqf.properties b/etc/pqf.properties index ae534c2..83defcb 100644 --- a/etc/pqf.properties +++ b/etc/pqf.properties @@ -1,4 +1,4 @@ -# $Id: pqf.properties,v 1.7 2004-01-07 14:01:24 adam Exp $ +# $Id: pqf.properties,v 1.8 2004-03-15 21:39:06 adam Exp $ # # Propeties file to drive org.z3950.zing.cql.CQLNode's toPQF() # back-end and the YAZ CQL-to-PQF converter. This specifies the @@ -12,19 +12,16 @@ # attributes. # Identifiers for prefixes used in this file. (index.*) -set.cql = http://www.loc.gov/zing/cql/context-sets/cql/v1.1/ -set.srw = http://www.loc.gov/zing/cql/srw-indexes/v1.0/ - # The "srw" set is deprecated in favour of the "cql" set. -set.rec = http://srw.o-r-g.org/indexSets/rec/1.0/ -set.dc = http://www.loc.gov/zing/cql/dc-indexes/v1.0/ -set.bath = http://www.loc.gov/zing/cql/context-sets/bath/v1.1/ +set.cql = info:srw/cql-context-set/1/cql-v1.1 +set.rec = info:srw/cql-context-set/2/rec-1.0 +set.dc = info:srw/cql-context-set/1/dc-v1.1 +set.bath = http://zing.z3950.org/cql/bath/2.0/ # default set (in query) -set = http://www.loc.gov/zing/cql/dc-indexes/v1.0/ +set = info:srw/cql-context-set/1/dc-v1.1 # The default access point and result-set references index.cql.serverChoice = 1=1016 -index.srw.serverChoice = 1=1016 # srw.serverChoice is deprecated in favour of cql.serverChoice # BIB-1 "any" diff --git a/include/yaz/cql.h b/include/yaz/cql.h index 6d6782c..41c8315 100644 --- a/include/yaz/cql.h +++ b/include/yaz/cql.h @@ -1,4 +1,4 @@ -/* $Id: cql.h,v 1.6 2004-03-10 16:34:29 adam Exp $ +/* $Id: cql.h,v 1.7 2004-03-15 21:39:06 adam Exp $ Copyright (C) 2002-2004 Index Data Aps @@ -88,17 +88,17 @@ struct cql_node { union { struct { char *index; + char *index_uri; char *term; char *relation; + char *relation_uri; struct cql_node *modifiers; - struct cql_node *prefixes; } st; struct { char *value; struct cql_node *left; struct cql_node *right; struct cql_node *modifiers; - struct cql_node *prefixes; } boolean; } u; }; @@ -120,15 +120,17 @@ YAZ_EXPORT struct cql_node *cql_node_mk_sc(const char *index, const char *relation, const char *term); + + +YAZ_EXPORT +struct cql_node *cql_apply_prefix(struct cql_node *cn, + const char *relation, + const char *term); YAZ_EXPORT struct cql_node *cql_node_mk_boolean(const char *op); YAZ_EXPORT void cql_node_destroy(struct cql_node *cn); YAZ_EXPORT -struct cql_node *cql_node_prefix(struct cql_node *n, - const char *prefix, - const char *uri); -YAZ_EXPORT struct cql_node *cql_node_dup (struct cql_node *cp); YAZ_EXPORT struct cql_node *cql_parser_result(CQL_parser cp); @@ -180,6 +182,9 @@ int cql_transform_error(cql_transform_t ct, const char **addinfo); YAZ_EXPORT const char *cql_strerror(int code); +YAZ_EXPORT +const char *cql_uri(); + YAZ_END_CDECL #endif diff --git a/include/yaz/wrbuf.h b/include/yaz/wrbuf.h index f92c395..d98674a 100644 --- a/include/yaz/wrbuf.h +++ b/include/yaz/wrbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2003, Index Data. + * Copyright (c) 1995-2004, Index Data. * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: wrbuf.h,v 1.10 2003-12-11 00:37:21 adam Exp $ + * $Id: wrbuf.h,v 1.11 2004-03-15 21:39:06 adam Exp $ * */ @@ -47,11 +47,14 @@ YAZ_EXPORT void wrbuf_free(WRBUF b, int free_buf); YAZ_EXPORT void wrbuf_rewind(WRBUF b); YAZ_EXPORT int wrbuf_grow(WRBUF b, int minsize); YAZ_EXPORT int wrbuf_write(WRBUF b, const char *buf, int size); +YAZ_EXPORT int wrbuf_write_cdata(WRBUF b, const char *cp, int size); YAZ_EXPORT int wrbuf_puts(WRBUF b, const char *buf); YAZ_EXPORT int wrbuf_xmlputs(WRBUF b, const char *cp); YAZ_EXPORT void wrbuf_printf(WRBUF b, const char *fmt, ...); YAZ_EXPORT int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size); +YAZ_EXPORT int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, + const char *buf, int size); #define wrbuf_len(b) ((b)->pos) #define wrbuf_buf(b) ((b)->buf) diff --git a/src/Makefile.am b/src/Makefile.am index 9f3052d..81b111d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,6 @@ ## Copyright (C) 1994-2003, Index Data ## All rights reserved. -## $Id: Makefile.am,v 1.7 2004-03-10 16:34:29 adam Exp $ +## $Id: Makefile.am,v 1.8 2004-03-15 21:39:06 adam Exp $ if ISTHR thrlib=libyazthread.la @@ -37,9 +37,13 @@ THREADED_FLAGS = @CFLAGSTHREADS@ marc8.c: charconv.sgm charconv.tcl cd $(srcdir); ./charconv.tcl -p marc8 -s 50 charconv.sgm -o marc8.c +# MARC8 conversion is generated from charconv.sgm +marc8_cjk.c: charconv_cjk.xml charconv.tcl + cd $(srcdir); ./charconv.tcl -p marc8_cjk -s 50 charconv_cjk.xml -o marc8_cjk.c + libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \ nmemsdup.c xmalloc.c readconf.c tpath.c nmem.c matchstr.c atoin.c \ - siconv.c marc8.c \ + siconv.c marc8.c marc8_cjk.c \ odr_bool.c ber_bool.c ber_len.c ber_tag.c odr_util.c \ odr_null.c ber_null.c odr_int.c ber_int.c odr_tag.c odr_cons.c \ odr_seq.c odr_oct.c ber_oct.c odr_bit.c ber_bit.c odr_oid.c \ diff --git a/src/cql.y b/src/cql.y index 0a81738..82db22c 100644 --- a/src/cql.y +++ b/src/cql.y @@ -1,4 +1,4 @@ -/* $Id: cql.y,v 1.2 2004-03-10 16:34:29 adam Exp $ +/* $Id: cql.y,v 1.3 2004-03-15 21:39:06 adam Exp $ Copyright (C) 2002-2004 Index Data Aps @@ -13,6 +13,7 @@ See the file LICENSE. #include #include #include +#include #include #include @@ -91,7 +92,7 @@ searchClause: | searchTerm { struct cql_node *st = cql_node_dup ($0.rel); - st->u.st.term = strdup($1.buf); + st->u.st.term = xstrdup($1.buf); $$.cql = st; } | @@ -105,12 +106,12 @@ searchClause: | '>' searchTerm '=' searchTerm { $$.rel = $0.rel; } cqlQuery { - $$.cql = cql_node_prefix($6.cql, $2.buf, $4.buf); + $$.cql = cql_apply_prefix($6.cql, $2.buf, $4.buf); } | '>' searchTerm { $$.rel = $0.rel; } cqlQuery { - $$.cql = cql_node_prefix($4.cql, 0, $2.buf); + $$.cql = cql_apply_prefix($4.cql, 0, $2.buf); } ; @@ -304,7 +305,7 @@ int cql_parser_stream(CQL_parser cp, CQL_parser cql_parser_create(void) { - CQL_parser cp = (CQL_parser) malloc (sizeof(*cp)); + CQL_parser cp = (CQL_parser) xmalloc (sizeof(*cp)); cp->top = 0; cp->getbyte = 0; @@ -320,7 +321,7 @@ void cql_parser_destroy(CQL_parser cp) { cql_node_destroy(cp->top); nmem_destroy(cp->nmem); - free (cp); + xfree (cp); } struct cql_node *cql_parser_result(CQL_parser cp) diff --git a/src/cqltransform.c b/src/cqltransform.c index 52cb21f..104c02b 100644 --- a/src/cqltransform.c +++ b/src/cqltransform.c @@ -1,5 +1,5 @@ -/* $Id: cqltransform.c,v 1.7 2004-03-10 16:34:29 adam Exp $ - Copyright (C) 2002-2003 +/* $Id: cqltransform.c,v 1.8 2004-03-15 21:39:06 adam Exp $ + Copyright (C) 2002-2004 Index Data Aps This file is part of the YAZ toolkit. @@ -10,6 +10,7 @@ See the file LICENSE. #include #include #include +#include struct cql_prop_entry { char *pattern; @@ -26,7 +27,7 @@ struct cql_transform_t_ { cql_transform_t cql_transform_open_FILE(FILE *f) { char line[1024]; - cql_transform_t ct = (cql_transform_t) malloc (sizeof(*ct)); + cql_transform_t ct = (cql_transform_t) xmalloc (sizeof(*ct)); struct cql_prop_entry **pp = &ct->entry; ct->error = 0; @@ -56,12 +57,12 @@ cql_transform_t cql_transform_open_FILE(FILE *f) if (cp_value_end != cp_value_start && strchr(" \t\r\n", cp_value_end[-1])) cp_value_end--; - *pp = (struct cql_prop_entry *) malloc (sizeof(**pp)); - (*pp)->pattern = (char *) malloc (cp_pattern_end - line + 1); + *pp = (struct cql_prop_entry *) xmalloc (sizeof(**pp)); + (*pp)->pattern = (char *) xmalloc (cp_pattern_end - line + 1); memcpy ((*pp)->pattern, line, cp_pattern_end - line); (*pp)->pattern[cp_pattern_end-line] = 0; - (*pp)->value = (char *) malloc (cp_value_end - cp_value_start + 1); + (*pp)->value = (char *) xmalloc (cp_value_end - cp_value_start + 1); if (cp_value_start != cp_value_end) memcpy ((*pp)->value, cp_value_start, cp_value_end-cp_value_start); (*pp)->value[cp_value_end - cp_value_start] = 0; @@ -80,14 +81,14 @@ void cql_transform_close(cql_transform_t ct) while (pe) { struct cql_prop_entry *pe_next = pe->next; - free (pe->pattern); - free (pe->value); - free (pe); + xfree (pe->pattern); + xfree (pe->value); + xfree (pe); pe = pe_next; } if (ct->addinfo) - free (ct->addinfo); - free (ct); + xfree (ct->addinfo); + xfree (ct); } cql_transform_t cql_transform_open_fname(const char *fname) @@ -102,15 +103,23 @@ cql_transform_t cql_transform_open_fname(const char *fname) } static const char *cql_lookup_property(cql_transform_t ct, - const char *pat1, const char *pat2) + const char *pat1, const char *pat2, + const char *pat3) { - char pattern[80]; + char pattern[120]; struct cql_prop_entry *e; - if (pat2) - sprintf (pattern, "%.39s%.39s", pat1, pat2); - else + if (pat1 && pat2 && pat3) + sprintf (pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3); + else if (pat1 && pat2) + sprintf (pattern, "%.39s.%.39s", pat1, pat2); + else if (pat1 && pat3) + sprintf (pattern, "%.39s.%.39s", pat1, pat3); + else if (pat1) sprintf (pattern, "%.39s", pat1); + else + return 0; + for (e = ct->entry; e; e = e->next) { if (!strcmp(e->pattern, pattern)) @@ -119,33 +128,37 @@ static const char *cql_lookup_property(cql_transform_t ct, return 0; } -static const char *cql_lookup_value(cql_transform_t ct, - const char *prefix, - const char *value) +int cql_pr_attr_uri(cql_transform_t ct, const char *category, + const char *uri, const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) { - struct cql_prop_entry *e; - int len = strlen(prefix); - - for (e = ct->entry; e; e = e->next) + const char *res = 0; + const char *eval = val ? val : default_val; + const char *prefix = 0; + + if (uri) { - if (!memcmp(e->pattern, prefix, len) && !strcmp(e->value, value)) - return e->pattern + len; + struct cql_prop_entry *e; + + for (e = ct->entry; e; e = e->next) + if (!memcmp(e->pattern, "set.", 4) && e->value && + !strcmp(e->value, uri)) + { + prefix = e->pattern+4; + break; + } + /* must have a prefix now - if not it's an error */ } - return 0; -} - -int cql_pr_attr(cql_transform_t ct, const char *category, - const char *val, - const char *default_val, - void (*pr)(const char *buf, void *client_data), - void *client_data, - int errcode) -{ - const char *res; - res = cql_lookup_property(ct, category, val ? val : default_val); - if (!res) - res = cql_lookup_property(ct, category, "*"); + if (!uri || prefix) + { + if (!res) + res = cql_lookup_property(ct, category, prefix, eval); + if (!res) + res = cql_lookup_property(ct, category, prefix, "*"); + } if (res) { char buf[64]; @@ -173,13 +186,23 @@ int cql_pr_attr(cql_transform_t ct, const char *category, { ct->error = errcode; if (val) - ct->addinfo = strdup(val); + ct->addinfo = xstrdup(val); else ct->addinfo = 0; } return 0; } +int cql_pr_attr(cql_transform_t ct, const char *category, + const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) +{ + return cql_pr_attr_uri(ct, category, 0 /* uri */, + val, default_val, pr, client_data, errcode); +} + /* Returns location of first wildcard character in the `length' * characters starting at `term', or a null pointer of there are @@ -211,27 +234,27 @@ void emit_term(cql_transform_t ct, { if (length > 1 && term[0] == '^' && term[length-1] == '^') { - cql_pr_attr(ct, "position.", "firstAndLast", 0, + cql_pr_attr(ct, "position", "firstAndLast", 0, pr, client_data, 32); term++; length -= 2; } else if (term[0] == '^') { - cql_pr_attr(ct, "position.", "first", 0, + cql_pr_attr(ct, "position", "first", 0, pr, client_data, 32); term++; length--; } else if (term[length-1] == '^') { - cql_pr_attr(ct, "position.", "last", 0, + cql_pr_attr(ct, "position", "last", 0, pr, client_data, 32); length--; } else { - cql_pr_attr(ct, "position.", "any", 0, + cql_pr_attr(ct, "position", "any", 0, pr, client_data, 32); } } @@ -246,21 +269,21 @@ void emit_term(cql_transform_t ct, */ if (length > 1 && term[0] == '*' && term[length-1] == '*' && wcchar(term+1, length-2) == 0 && - cql_pr_attr(ct, "truncation.", "both", 0, + cql_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) { term++; length -= 2; } else if (term[0] == '*' && wcchar(term+1, length-1) == 0 && - cql_pr_attr(ct, "truncation.", "left", 0, + cql_pr_attr(ct, "truncation", "left", 0, pr, client_data, 0)) { term++; length--; } else if (term[length-1] == '*' && wcchar(term, length-1) == 0 && - cql_pr_attr(ct, "truncation.", "right", 0, + cql_pr_attr(ct, "truncation", "right", 0, pr, client_data, 0)) { length--; } @@ -277,9 +300,9 @@ void emit_term(cql_transform_t ct, */ int i; char *mem; - cql_pr_attr(ct, "truncation.", "z3958", 0, + cql_pr_attr(ct, "truncation", "z3958", 0, pr, client_data, 28); - mem = malloc(length+1); + mem = xmalloc(length+1); for (i = 0; i < length; i++) { if (term[i] == '*') mem[i] = '?'; else if (term[i] == '?') mem[i] = '#'; @@ -297,7 +320,7 @@ void emit_term(cql_transform_t ct, * to differentiate between this case and the previous * one. */ - cql_pr_attr(ct, "truncation.", "none", 0, + cql_pr_attr(ct, "truncation", "none", 0, pr, client_data, 30); } } @@ -346,139 +369,63 @@ void emit_wordlist(cql_transform_t ct, emit_term(ct, last_term, last_length, pr, client_data); } - -static const char *cql_get_ns(cql_transform_t ct, - struct cql_node *cn, - struct cql_node **prefix_ar, int prefix_level, - const char **n_prefix, - const char **n_suffix) -{ - int i; - const char *ns = 0; - char prefix[32]; - const char *cp = cn->u.st.index; - const char *cp_dot = strchr(cp, '.'); - - /* strz current prefix (empty if not given) */ - if (cp_dot && cp_dot-cp < sizeof(prefix)) - { - memcpy (prefix, cp, cp_dot - cp); - prefix[cp_dot - cp] = 0; - } - else - *prefix = 0; - - /* 2. lookup in prefix_ar. and return NS */ - for (i = prefix_level; !ns && --i >= 0; ) - { - struct cql_node *cn_prefix = prefix_ar[i]; - for (; cn_prefix; cn_prefix = cn_prefix->u.st.modifiers) - { - if (*prefix && cn_prefix->u.st.index && - !strcmp(prefix, cn_prefix->u.st.index)) - { - ns = cn_prefix->u.st.term; - break; - } - else if (!*prefix && !cn_prefix->u.st.index) - { - ns = cn_prefix->u.st.term; - break; - } - } - } - if (!ns) - { - if (!ct->error) - { - ct->error = 15; - ct->addinfo = strdup(prefix); - } - return 0; - } - /* 3. lookup in set.NS for new prefix */ - *n_prefix = cql_lookup_value(ct, "set.", ns); - if (!*n_prefix) - { - if (!ct->error) - { - ct->error = 15; - ct->addinfo = strdup(ns); - } - return 0; - } - /* 4. lookup index.prefix. */ - - cp = cn->u.st.index; - cp_dot = strchr(cp, '.'); - - *n_suffix = cp_dot ? cp_dot+1 : cp; - return ns; -} - void cql_transform_r(cql_transform_t ct, struct cql_node *cn, void (*pr)(const char *buf, void *client_data), - void *client_data, - struct cql_node **prefix_ar, int prefix_level) + void *client_data) { - const char *ns, *n_prefix, *n_suffix; + const char *ns; if (!cn) return; switch (cn->which) { case CQL_NODE_ST: - if (cn->u.st.prefixes && prefix_level < 20) - prefix_ar[prefix_level++] = cn->u.st.prefixes; - ns = cql_get_ns(ct, cn, prefix_ar, prefix_level, &n_prefix, &n_suffix); + ns = cn->u.st.index_uri; if (ns) { - char n_full[64]; - sprintf (n_full, "%.20s.%.40s", n_prefix, n_suffix); - - if ((!strcmp(ns, "http://www.loc.gov/zing/cql/context-sets/cql/v1.1/") || - !strcmp(ns, "http://www.loc.gov/zing/cql/srw-indexes/v1.0/")) - && !strcmp(n_suffix, "resultSet")) + if (!strcmp(ns, cql_uri()) + && cn->u.st.index && !strcmp(cn->u.st.index, "resultSet")) { (*pr)("@set \"", client_data); (*pr)(cn->u.st.term, client_data); (*pr)("\" ", client_data); return ; } - /* ### It would be nice if this could fall back to whichever - of cql.serverChoice and srw.serverChoice is defined */ - if (!cql_pr_attr(ct, "index.", n_full, "cql.serverChoice", - pr, client_data, 16)) { - /* No index.foo; reset error and fall back to qualifier.foo */ - if (ct->error == 16) ct->error = 0; - cql_pr_attr(ct, "qualifier.", n_full, "cql.serverChoice", + cql_pr_attr_uri(ct, "index", ns, + cn->u.st.index, "serverChoice", pr, client_data, 16); - } } - + else + { + if (!ct->error) + { + ct->error = 15; + ct->addinfo = 0; + } + } if (cn->u.st.relation && !strcmp(cn->u.st.relation, "=")) - cql_pr_attr(ct, "relation.", "eq", "scr", + cql_pr_attr(ct, "relation", "eq", "scr", pr, client_data, 19); else if (cn->u.st.relation && !strcmp(cn->u.st.relation, "<=")) - cql_pr_attr(ct, "relation.", "le", "scr", + cql_pr_attr(ct, "relation", "le", "scr", pr, client_data, 19); else if (cn->u.st.relation && !strcmp(cn->u.st.relation, ">=")) - cql_pr_attr(ct, "relation.", "ge", "scr", + cql_pr_attr(ct, "relation", "ge", "scr", pr, client_data, 19); else - cql_pr_attr(ct, "relation.", cn->u.st.relation, "eq", + cql_pr_attr(ct, "relation", cn->u.st.relation, "eq", pr, client_data, 19); if (cn->u.st.modifiers) { struct cql_node *mod = cn->u.st.modifiers; for (; mod; mod = mod->u.st.modifiers) { - cql_pr_attr(ct, "relationModifier.", mod->u.st.term, 0, + cql_pr_attr(ct, "relationModifier", mod->u.st.term, 0, pr, client_data, 20); } } - cql_pr_attr(ct, "structure.", cn->u.st.relation, 0, + cql_pr_attr(ct, "structure", cn->u.st.relation, 0, pr, client_data, 24); if (cn->u.st.relation && !strcmp(cn->u.st.relation, "all")) { @@ -495,16 +442,12 @@ void cql_transform_r(cql_transform_t ct, } break; case CQL_NODE_BOOL: - if (cn->u.boolean.prefixes && prefix_level < 20) - prefix_ar[prefix_level++] = cn->u.boolean.prefixes; (*pr)("@", client_data); (*pr)(cn->u.boolean.value, client_data); (*pr)(" ", client_data); - cql_transform_r(ct, cn->u.boolean.left, pr, client_data, - prefix_ar, prefix_level); - cql_transform_r(ct, cn->u.boolean.right, pr, client_data, - prefix_ar, prefix_level); + cql_transform_r(ct, cn->u.boolean.left, pr, client_data); + cql_transform_r(ct, cn->u.boolean.right, pr, client_data); } } @@ -513,31 +456,21 @@ int cql_transform(cql_transform_t ct, void (*pr)(const char *buf, void *client_data), void *client_data) { - struct cql_node *prefix_ar[20], **pp; struct cql_prop_entry *e; ct->error = 0; if (ct->addinfo) - free (ct->addinfo); + xfree (ct->addinfo); ct->addinfo = 0; - prefix_ar[0] = 0; - pp = &prefix_ar[0]; for (e = ct->entry; e ; e = e->next) { if (!memcmp(e->pattern, "set.", 4)) - { - *pp = cql_node_mk_sc(e->pattern+4, "=", e->value); - pp = &(*pp)->u.st.modifiers; - } + cql_apply_prefix(cn, e->pattern+4, e->value); else if (!strcmp(e->pattern, "set")) - { - *pp = cql_node_mk_sc(e->value, 0, 0); - pp = &(*pp)->u.st.modifiers; - } + cql_apply_prefix(cn, 0, e->value); } - cql_transform_r (ct, cn, pr, client_data, prefix_ar, 1); - cql_node_destroy(prefix_ar[0]); + cql_transform_r (ct, cn, pr, client_data); return ct->error; } diff --git a/src/cqlutil.c b/src/cqlutil.c index 1b842b1..d915c59 100644 --- a/src/cqlutil.c +++ b/src/cqlutil.c @@ -1,4 +1,4 @@ -/* $Id: cqlutil.c,v 1.2 2004-03-10 16:34:29 adam Exp $ +/* $Id: cqlutil.c,v 1.3 2004-03-15 21:39:06 adam Exp $ Copyright (C) 2002-2004 Index Data Aps @@ -10,6 +10,7 @@ See the file LICENSE. #include #include +#include #include void cql_fputs(const char *buf, void *client_data) @@ -31,13 +32,15 @@ struct cql_node *cql_node_dup (struct cql_node *cp) cp->u.st.relation, cp->u.st.term); cn->u.st.modifiers = cql_node_dup(cp->u.st.modifiers); - cn->u.st.prefixes = cql_node_dup(cp->u.st.prefixes); + cn->u.st.index_uri = cp->u.st.index_uri ? + xstrdup(cp->u.st.index_uri) : 0; + cn->u.st.relation_uri = cp->u.st.relation_uri ? + xstrdup(cp->u.st.relation_uri) : 0; break; case CQL_NODE_BOOL: cn = cql_node_mk_boolean(cp->u.boolean.value); cn->u.boolean.left = cql_node_dup(cp->u.boolean.left); cn->u.boolean.right = cql_node_dup(cp->u.boolean.right); - cn->u.boolean.prefixes = cql_node_dup(cp->u.boolean.prefixes); } return cn; } @@ -46,53 +49,79 @@ struct cql_node *cql_node_mk_sc(const char *index, const char *relation, const char *term) { - struct cql_node *p = (struct cql_node *) malloc(sizeof(*p)); + struct cql_node *p = (struct cql_node *) xmalloc(sizeof(*p)); p->which = CQL_NODE_ST; p->u.st.index = 0; if (index) - p->u.st.index = strdup(index); + p->u.st.index = xstrdup(index); + p->u.st.index_uri = 0; p->u.st.term = 0; if (term) - p->u.st.term = strdup(term); + p->u.st.term = xstrdup(term); p->u.st.relation = 0; if (relation) - p->u.st.relation = strdup(relation); + p->u.st.relation = xstrdup(relation); + p->u.st.relation_uri = 0; p->u.st.modifiers = 0; - p->u.st.prefixes = 0; return p; } struct cql_node *cql_node_mk_boolean(const char *op) { - struct cql_node *p = (struct cql_node *) malloc(sizeof(*p)); + struct cql_node *p = (struct cql_node *) xmalloc(sizeof(*p)); p->which = CQL_NODE_BOOL; p->u.boolean.value = 0; if (op) - p->u.boolean.value = strdup(op); + p->u.boolean.value = xstrdup(op); p->u.boolean.left = 0; p->u.boolean.right = 0; p->u.boolean.modifiers = 0; - p->u.boolean.prefixes = 0; return p; } -struct cql_node *cql_node_prefix(struct cql_node *n, const char *prefix, - const char *uri) +const char *cql_uri() +{ + return "info:srw/cql-context-set/1/cql-v1.1"; +} + +struct cql_node *cql_apply_prefix(struct cql_node *n, const char *prefix, + const char *uri) { - struct cql_node **cpp = 0; if (n->which == CQL_NODE_ST) { - cpp = &n->u.st.prefixes; + if (!n->u.st.index_uri && n->u.st.index) + { /* not yet resolved.. */ + const char *cp = strchr(n->u.st.index, '.'); + if (prefix && cp && strlen(prefix) == cp - n->u.st.index && + !memcmp(n->u.st.index, prefix, strlen(prefix))) + { + char *nval = xstrdup(cp+1); + n->u.st.index_uri = xstrdup(uri); + xfree (n->u.st.index); + n->u.st.index = nval; + } + else if (!prefix && !cp) + { + n->u.st.index_uri = xstrdup(uri); + } + } + if (!n->u.st.relation_uri && n->u.st.relation) + { + const char *cp = strchr(n->u.st.relation, '.'); + if (prefix && cp && strlen(prefix) == cp - n->u.st.relation && + !memcmp(n->u.st.relation, prefix, strlen(prefix))) + { + char *nval = xstrdup(cp+1); + n->u.st.relation_uri = xstrdup(uri); + xfree (n->u.st.relation); + n->u.st.relation = nval; + } + } } else if (n->which == CQL_NODE_BOOL) { - cpp = &n->u.boolean.prefixes; - } - if (cpp) - { - struct cql_node *cp = cql_node_mk_sc(prefix, "=", uri); - cp->u.st.modifiers = *cpp; - *cpp = cp; + cql_apply_prefix(n->u.boolean.left, prefix, uri); + cql_apply_prefix(n->u.boolean.right, prefix, uri); } return n; } @@ -104,19 +133,18 @@ void cql_node_destroy(struct cql_node *cn) switch (cn->which) { case CQL_NODE_ST: - free (cn->u.st.index); - free (cn->u.st.relation); - free (cn->u.st.term); + xfree (cn->u.st.index); + xfree (cn->u.st.relation); + xfree (cn->u.st.term); + xfree (cn->u.st.index_uri); + xfree (cn->u.st.relation_uri); cql_node_destroy(cn->u.st.modifiers); - cql_node_destroy(cn->u.st.prefixes); break; case CQL_NODE_BOOL: - free (cn->u.boolean.value); + xfree (cn->u.boolean.value); cql_node_destroy(cn->u.boolean.left); cql_node_destroy(cn->u.boolean.right); - cql_node_destroy(cn->u.boolean.prefixes); cql_node_destroy(cn->u.boolean.modifiers); } - free (cn); + xfree (cn); } - diff --git a/src/marcdisp.c b/src/marcdisp.c index 1266d25..652db12 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1995-2003, Index Data + * Copyright (c) 1995-2004, Index Data * See the file LICENSE for details. * - * $Id: marcdisp.c,v 1.3 2003-12-17 12:28:07 adam Exp $ + * $Id: marcdisp.c,v 1.4 2004-03-15 21:39:06 adam Exp $ */ #if HAVE_CONFIG_H @@ -45,54 +45,11 @@ static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr) { size_t i; if (mt->xml == YAZ_MARC_ISO2709) - { wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); - } else if (mt->xml == YAZ_MARC_LINE) - { wrbuf_iconv_write(wr, mt->iconv_cd, buf, len); - } else - { - int j = 0; - for (i = 0; i j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "<"); - j=i+1; - break; - case '>': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, ">"); - j=i+1; - break; - case '&': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "&"); - j=i+1; - break; - case '"': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, """); - j=i+1; - break; - case '\'': - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - wrbuf_puts(wr, "'"); - j=i+1; - break; - } - } - if (i > j) - wrbuf_iconv_write(wr, mt->iconv_cd, buf+j, i-j); - } + wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len); } int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) @@ -177,7 +134,9 @@ int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr) wrbuf_printf( wr, "\n" - " %.24s\n", buf); + " "); + marc_cdata(mt, buf, 24, wr); + wrbuf_printf(wr, "\n"); break; } } diff --git a/src/siconv.c b/src/siconv.c index a10fd3d..e351b13 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1997-2003, Index Data + * Copyright (c) 1997-2004, Index Data * See the file LICENSE for details. * - * $Id: siconv.c,v 1.2 2004-03-11 10:09:11 oleg Exp $ + * $Id: siconv.c,v 1.3 2004-03-15 21:39:06 adam Exp $ */ /* mini iconv and wrapper for system iconv library (if present) */ @@ -27,6 +27,9 @@ unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft, size_t *no_read); +unsigned long yaz_marc8_cjk_conv (unsigned char *inp, size_t inbytesleft, + size_t *no_read); + struct yaz_iconv_struct { int my_errno; int init_flag; @@ -36,6 +39,7 @@ struct yaz_iconv_struct { size_t inbytesleft, size_t *no_read); size_t (*write_handle)(yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft); + int marc8_esc_mode; #if HAVE_ICONV_H iconv_t iconv_cd; #endif @@ -216,7 +220,78 @@ static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp, static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp, size_t inbytesleft, size_t *no_read) { - return yaz_marc8_conv(inp, inbytesleft, no_read); + *no_read = 0; + while(inbytesleft >= 1 && inp[0] == 27) + { + size_t inbytesleft0 = inbytesleft; + inp++; + inbytesleft--; + if (inbytesleft <= 1) + { + *no_read = 0; + cd->my_errno = YAZ_ICONV_EINVAL; + return 0; + } + if (*inp == '(' || *inp == ',') /* GO, one bytes */ + { + inbytesleft--; + inp++; + } + else if (*inp == '$') /* G0, multi byte */ + { + inbytesleft--; + inp++; + if (inp[0] == ',') + { + inbytesleft--; + inp++; + } + } + if (inbytesleft <= 0) + { + *no_read = 0; + cd->my_errno = YAZ_ICONV_EINVAL; + return 0; + } + if (*inp == '!') + { + if (inbytesleft <= 1) + { + *no_read = 0; + cd->my_errno = YAZ_ICONV_EINVAL; + return 0; + } + inbytesleft--; + inp++; + } + cd->marc8_esc_mode = *inp++; + inbytesleft--; + (*no_read) += inbytesleft0 - inbytesleft; + } + if (inbytesleft <= 0) + return 0; + else + { + unsigned long x; + size_t no_read_sub = 0; + + switch(cd->marc8_esc_mode) + { + case 'B': + case 'E': + x = yaz_marc8_conv(inp, inbytesleft, &no_read_sub); + *no_read += no_read_sub; + return x; + case '1': + x = yaz_marc8_cjk_conv(inp, inbytesleft, &no_read_sub); + *no_read += no_read_sub; + return x; + default: + *no_read = 0; + cd->my_errno = YAZ_ICONV_EILSEQ; + return 0; + } + } } static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, @@ -379,6 +454,7 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) cd->read_handle = 0; cd->init_handle = 0; cd->my_errno = YAZ_ICONV_UNKNOWN; + cd->marc8_esc_mode = 'B'; /* a useful hack: if fromcode has leading @, the library not use YAZ's own conversions .. */ @@ -514,9 +590,12 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, r = (size_t)(-1); break; } - r = (cd->write_handle)(cd, x, outbuf, outbytesleft); - if (r) - break; + if (x) + { + r = (cd->write_handle)(cd, x, outbuf, outbytesleft); + if (r) + break; + } *inbytesleft -= no_read; (*inbuf) += no_read; } diff --git a/src/wrbuf.c b/src/wrbuf.c index c8a3ecf..bb87507 100644 --- a/src/wrbuf.c +++ b/src/wrbuf.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1995-2003, Index Data. + * Copyright (c) 1995-2004, Index Data. * See the file LICENSE for details. * - * $Id: wrbuf.c,v 1.2 2003-12-11 00:37:22 adam Exp $ + * $Id: wrbuf.c,v 1.3 2004-03-15 21:39:06 adam Exp $ */ /* @@ -82,7 +82,12 @@ int wrbuf_puts(WRBUF b, const char *buf) int wrbuf_xmlputs(WRBUF b, const char *cp) { - while (*cp) + return wrbuf_write_cdata(b, cp, strlen(cp)); +} + +int wrbuf_write_cdata(WRBUF b, const char *cp, int size) +{ + while (--size >= 0) { switch(*cp) { @@ -132,7 +137,8 @@ void wrbuf_printf(WRBUF b, const char *fmt, ...) va_end(ap); } -int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size) +static int wrbuf_iconv_write_x(WRBUF b, yaz_iconv_t cd, const char *buf, + int size, int cdata) { if (cd) { @@ -151,10 +157,29 @@ int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size) if (e != YAZ_ICONV_E2BIG) break; } - wrbuf_write(b, outbuf, outp - outbuf); + if (cdata) + wrbuf_write_cdata(b, outbuf, outp - outbuf); + else + wrbuf_write(b, outbuf, outp - outbuf); } } else - wrbuf_write(b, buf, size); + { + if (cdata) + wrbuf_write_cdata(b, buf, size); + else + wrbuf_write(b, buf, size); + } return wrbuf_len(b); } + +int wrbuf_iconv_write(WRBUF b, yaz_iconv_t cd, const char *buf, int size) +{ + return wrbuf_iconv_write_x(b, cd, buf, size, 0); +} + +int wrbuf_iconv_write_cdata(WRBUF b, yaz_iconv_t cd, const char *buf, int size) +{ + return wrbuf_iconv_write_x(b, cd, buf, size, 1); +} + diff --git a/src/xcqlutil.c b/src/xcqlutil.c index b884b6e..45815b0 100644 --- a/src/xcqlutil.c +++ b/src/xcqlutil.c @@ -1,4 +1,4 @@ -/* $Id: xcqlutil.c,v 1.2 2004-03-10 16:34:29 adam Exp $ +/* $Id: xcqlutil.c,v 1.3 2004-03-15 21:39:06 adam Exp $ Copyright (C) 2002-2004 Index Data Aps @@ -54,28 +54,33 @@ static void prefixes(struct cql_node *cn, void (*pr)(const char *buf, void *client_data), void *client_data, int level) { - if (cn) + int head = 0; + if (cn->u.st.index_uri) { pr_n("\n", pr, client_data, level); - for (; cn; cn = cn->u.st.modifiers) - { - pr_n("\n", pr, client_data, level+2); - if (cn->u.st.index) - { - pr_n("", pr, client_data, level+4); - pr_cdata(cn->u.st.index, pr, client_data); - pr_n("\n", pr, client_data, 0); - } - if (cn->u.st.term) - { - pr_n("", pr, client_data, level+4); - pr_cdata(cn->u.st.term, pr, client_data); - pr_n("\n", pr, client_data, 0); - } - pr_n("\n", pr, client_data, level+2); - } - pr_n("\n", pr, client_data, level); + head = 1; + + pr_n("\n", pr, client_data, level+2); + pr_n("", pr, client_data, level+4); + pr_cdata(cn->u.st.index_uri, pr, client_data); + pr_n("\n", pr, client_data, 0); + pr_n("\n", pr, client_data, level+2); } + if (cn->u.st.relation_uri && cn->u.st.relation) + { + if (!head) + pr_n("\n", pr, client_data, level); + pr_n("\n", pr, client_data, level+2); + pr_n("", pr, client_data, level+4); + pr_cdata("rel", pr, client_data); + pr_n("\n", pr, client_data, 0); + pr_n("", pr, client_data, level+4); + pr_cdata(cn->u.st.relation_uri, pr, client_data); + pr_n("\n", pr, client_data, 0); + pr_n("\n", pr, client_data, level+2); + } + if (head) + pr_n("\n", pr, client_data, level); } static void cql_to_xml_mod(struct cql_node *m, @@ -119,7 +124,7 @@ static void cql_to_xml_r(struct cql_node *cn, { case CQL_NODE_ST: pr_n("\n", pr, client_data, level); - prefixes(cn->u.st.prefixes, pr, client_data, level+2); + prefixes(cn, pr, client_data, level+2); if (cn->u.st.index) { pr_n("", pr, client_data, level+2); @@ -130,9 +135,17 @@ static void cql_to_xml_r(struct cql_node *cn, { pr_n("\n", pr, client_data, level+2); pr_n("", pr, client_data, level+4); + if (cn->u.st.relation_uri) + pr_cdata("rel.", pr, client_data); pr_cdata(cn->u.st.relation, pr, client_data); pr_n("\n", pr, client_data, 0); + if (cn->u.st.relation_uri) + { + pr_n("", pr, client_data, level+4); + pr_cdata(cn->u.st.relation_uri, pr, client_data); + pr_n("\n", pr, client_data, 0); + } cql_to_xml_mod(cn->u.st.modifiers, pr, client_data, level+4); @@ -148,7 +161,6 @@ static void cql_to_xml_r(struct cql_node *cn, break; case CQL_NODE_BOOL: pr_n("\n", pr, client_data, level); - prefixes(cn->u.st.prefixes, pr, client_data, level+2); if (cn->u.boolean.value) { pr_n("\n", pr, client_data, level+2); diff --git a/test/cql/8.1.out b/test/cql/8.1.out index 2b489af..40a273f 100644 --- a/test/cql/8.1.out +++ b/test/cql/8.1.out @@ -1,10 +1,4 @@ - - - foo - info:srw/cql-context-set/1/dc-v1.1 - - dc.title = diff --git a/test/cql/8.3.out b/test/cql/8.3.out index 4eebbd3..68b3a5b 100644 --- a/test/cql/8.3.out +++ b/test/cql/8.3.out @@ -1,20 +1,15 @@ - - - foo - info:srw/cql-context-set/1/dc-v1.1 - - - ccg - http://srw.cheshire3.org/contextSets/ccg/1.1/ - - and - foo.title + + + info:srw/cql-context-set/1/dc-v1.1 + + + title = @@ -23,7 +18,12 @@ - ccg.force + + + http://srw.cheshire3.org/contextSets/ccg/1.1/ + + + force = diff --git a/test/cql/9.4.out b/test/cql/9.4.out index aed8eca..f8701ef 100644 --- a/test/cql/9.4.out +++ b/test/cql/9.4.out @@ -3,10 +3,6 @@ any - - any - exact - any diff --git a/test/tsticonv.c b/test/tsticonv.c index 6410e57..1799002 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2002-2003, Index Data + * Copyright (c) 2002-2004, Index Data * See the file LICENSE for details. * - * $Id: tsticonv.c,v 1.1 2003-10-27 12:21:38 adam Exp $ + * $Id: tsticonv.c,v 1.2 2004-03-15 21:39:06 adam Exp $ */ #if HAVE_CONFIG_H @@ -16,7 +16,7 @@ #include /* some test strings in ISO-8859-1 format */ -const char *buf[] = { +static const char *iso_8859_1_a[] = { "ax" , "\330", "eneb\346r", @@ -25,8 +25,8 @@ const char *buf[] = { "\xfbr", 0 }; -/* some test strings in MARC-8 format */ -const char *marc8_strings[] = { +/* same test strings in MARC-8 format */ +static const char *marc8_a[] = { "ax", "\xa2", /* latin capital letter o with stroke */ "eneb\xb5r", /* latin small letter ae */ @@ -36,16 +36,21 @@ const char *marc8_strings[] = { 0 }; -static void marc8_tst() +static void marc8_tst_a() { int i; yaz_iconv_t cd; cd = yaz_iconv_open("ISO-8859-1", "MARC8"); - for (i = 0; buf[i]; i++) + if (!cd) + { + printf("tsticonv 10 yaz_iconv_open failed\n"); + exit(10); + } + for (i = 0; iso_8859_1_a[i]; i++) { size_t r; - char *inbuf= (char*) marc8_strings[i]; + char *inbuf= (char*) marc8_a[i]; size_t inbytesleft = strlen(inbuf); char outbuf0[24]; char *outbuf = outbuf0; @@ -56,15 +61,131 @@ static void marc8_tst() { int e = yaz_iconv_error(cd); - printf ("tsticonv 6 i=%d e=%d\n", i, e); - exit(6); + printf ("tsticonv 11 i=%d e=%d\n", i, e); + exit(11); + } + if ((outbuf - outbuf0) != strlen(iso_8859_1_a[i]) + || memcmp(outbuf0, iso_8859_1_a[i], + strlen(iso_8859_1_a[i]))) + { + printf ("tsticonv 12 i=%d\n", i); + printf ("buf=%s out=%s\n", iso_8859_1_a[i], outbuf0); + exit(12); + } + } + yaz_iconv_close(cd); +} + +static void marc8_tst_b() +{ + static const char *marc8_b[] = { + "\033$1" "\x21\x2B\x3B" /* FF1F */ "\033(B" "o", + "\033$1" "\x6F\x77\x29" /* AE0E */ "\x6F\x52\x7C" /* c0F4 */ "\033(B", + "\033$1" + "\x21\x50\x6E" /* 7CFB */ + "\x21\x51\x31" /* 7D71 */ + "\x21\x3A\x67" /* 5B89 */ + "\x21\x33\x22" /* 5168 */ + "\x21\x33\x53" /* 5206 */ + "\x21\x44\x2B" /* 6790 */ + "\033(B", + 0 + }; + static const char *ucs4_b[] = { + "\x00\x00\xFF\x1F" "\x00\x00\x00o", + "\x00\x00\xAE\x0E" "\x00\x00\xC0\xF4", + "\x00\x00\x7C\xFB" + "\x00\x00\x7D\x71" + "\x00\x00\x5B\x89" + "\x00\x00\x51\x68" + "\x00\x00\x52\x06" + "\x00\x00\x67\x90", + 0 + }; + int i; + yaz_iconv_t cd; + + cd = yaz_iconv_open("UCS4", "MARC8"); + if (!cd) + { + printf ("tsticonv 20 yaz_iconv_open failed\n"); + exit(20); + } + for (i = 0; marc8_b[i]; i++) + { + size_t r; + size_t len; + size_t expect_len = (i == 2 ? 24 : 8); + char *inbuf= (char*) marc8_b[i]; + size_t inbytesleft = strlen(inbuf); + char outbuf0[24]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0); + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t) (-1)) + { + int e = yaz_iconv_error(cd); + + printf ("tsticonv 21 i=%d e=%d\n", i, e); + exit(21); + } + len = outbuf - outbuf0; + if (len != expect_len || memcmp(outbuf0, ucs4_b[i], len)) + { + printf ("tsticonv 22 len=%d gotlen=%d i=%d\n", expect_len, len, i); + exit(22); + } + } + yaz_iconv_close(cd); +} + +static void marc8_tst_c() +{ + static const char *ucs4_c[] = { + "\x00\x00\xFF\x1F\x00\x00\x00o", + "\x00\x00\xAE\x0E\x00\x00\xC0\xF4", + 0 + }; + static const char *utf8_c[] = { + "\xEF\xBC\x9F\x6F", + "\xEA\xB8\x8E\xEC\x83\xB4", + 0 + }; + + int i; + yaz_iconv_t cd; + + cd = yaz_iconv_open("UTF8", "UCS4"); + if (!cd) + { + printf ("tsticonv 30 yaz_iconv_open failed\n"); + exit(30); + } + for (i = 0; ucs4_c[i]; i++) + { + size_t r; + size_t len; + char *inbuf= (char*) ucs4_c[i]; + size_t inbytesleft = 8; + char outbuf0[24]; + char *outbuf = outbuf0; + size_t outbytesleft = sizeof(outbuf0); + + r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (r == (size_t) (-1)) + { + int e = yaz_iconv_error(cd); + + printf ("tsticonv 31 i=%d e=%d\n", i, e); + exit(31); } - if ((outbuf - outbuf0) != strlen(buf[i]) - || memcmp(outbuf0, buf[i], strlen(buf[i]))) + len = outbuf - outbuf0; + if (len != strlen(utf8_c[i]) || memcmp(outbuf0, utf8_c[i], len)) { - printf ("tsticonv 7 i=%d\n", i); - printf ("buf=%s out=%s\n", buf[i], outbuf0); - exit(7); + printf ("tsticonv 32 len=%d gotlen=%d i=%d\n", + strlen(utf8_c[i]), len, i); + exit(32); } } yaz_iconv_close(cd); @@ -74,10 +195,10 @@ static void dconvert(int mandatory, const char *tmpcode) { int i; yaz_iconv_t cd; - for (i = 0; buf[i]; i++) + for (i = 0; iso_8859_1_a[i]; i++) { size_t r; - char *inbuf = (char*) buf[i]; + char *inbuf = (char*) iso_8859_1_a[i]; size_t inbytesleft = strlen(inbuf); char outbuf0[24]; char outbuf1[10]; @@ -89,7 +210,7 @@ static void dconvert(int mandatory, const char *tmpcode) { if (!mandatory) return; - printf ("tsticonv 1\n"); + printf ("tsticonv code=%s 1\n", tmpcode); exit(1); } r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); @@ -97,7 +218,7 @@ static void dconvert(int mandatory, const char *tmpcode) { int e = yaz_iconv_error(cd); - printf ("tsticonv 2 e=%d\n", e); + printf ("tsticonv code=%s 2 e=%d\n", tmpcode, e); exit(2); } yaz_iconv_close(cd); @@ -107,7 +228,7 @@ static void dconvert(int mandatory, const char *tmpcode) { if (!mandatory) return; - printf ("tsticonv 3\n"); + printf ("tsticonv code=%s 3\n", tmpcode); exit(3); } inbuf = outbuf0; @@ -119,13 +240,15 @@ static void dconvert(int mandatory, const char *tmpcode) if (r == (size_t)(-1)) { int e = yaz_iconv_error(cd); - printf ("tsticonv 4 e=%d\n", e); + printf ("tsticonv code=%s 4 e=%d\n", tmpcode, e); exit(4); } - if (strlen(buf[i]) == (sizeof(outbuf1) - outbytesleft) && - memcmp(outbuf1, buf[i], strlen(buf[i]))) + if (strlen(iso_8859_1_a[i]) == + (sizeof(outbuf1) - outbytesleft) && + memcmp(outbuf1, iso_8859_1_a[i], + strlen(iso_8859_1_a[i]))) { - printf ("tsticonv 5\n"); + printf ("tsticonv code=%s 5\n", tmpcode); exit(5); } yaz_iconv_close(cd); @@ -137,7 +260,10 @@ int main (int argc, char **argv) dconvert(1, "UTF-8"); dconvert(1, "ISO-8859-1"); dconvert(1, "UCS4"); + dconvert(1, "UCS4LE"); dconvert(0, "CP865"); - marc8_tst(); + marc8_tst_a(); + marc8_tst_b(); + marc8_tst_c(); exit (0); } diff --git a/win/makefile b/win/makefile index d6b43d2..1979f19 100644 --- a/win/makefile +++ b/win/makefile @@ -1,6 +1,6 @@ # Copyright (C) 1994-2004, Index Data # All rights reserved. -# $Id: makefile,v 1.70 2004-03-10 16:34:31 adam Exp $ +# $Id: makefile,v 1.71 2004-03-15 21:39:07 adam Exp $ # # Programmed by # HL: Heikki Levanto, Index Data @@ -617,6 +617,10 @@ $(SRCDIR)\marc8.c: $(SRCDIR)\charconv.sgm @cd $(SRCDIR) $(TCL) charconv.tcl -p marc8 -s 50 charconv.sgm -o marc8.c +$(SRCDIR)\marc8_cjk.c: $(SRCDIR)\charconv_cjk.xml + @cd $(SRCDIR) + $(TCL) charconv.tcl -p marc8_cjk -s 50 charconv_cjk.xml -o marc8_cjk.c + !endif !if $(HAVE_BISON) diff --git a/win/yaz.nsi b/win/yaz.nsi index ae3a7cd..923c10a 100644 --- a/win/yaz.nsi +++ b/win/yaz.nsi @@ -1,4 +1,4 @@ -; $Id: yaz.nsi,v 1.38 2004-03-01 17:33:03 adam Exp $ +; $Id: yaz.nsi,v 1.39 2004-03-15 21:39:07 adam Exp $ !define VERSION "2.0.15" @@ -113,7 +113,8 @@ Section "YAZ Source" File ..\src\*.y File ..\src\*.tcl File ..\src\*.asn - File ..\src\*.sgm + File ..\src\charconv.sgm + File ..\src\charconv_cjk.xml SetOutPath $INSTDIR\zoom File ..\zoom\*.c SetOutPath $INSTDIR\ztest -- 1.7.10.4