X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=data1%2Fd1_read.c;h=c1f18d7875c9f13f0f3c0b181d66de2b9559adbe;hp=68ee97f06da30f2c9a7b2c5b5107ccd1dfb5f01d;hb=a030c87bc444608639905eca95e29f84a4f1d991;hpb=3e4a78274a6cb7a99f3e90967ea30c830ffbf8c3 diff --git a/data1/d1_read.c b/data1/d1_read.c index 68ee97f..c1f18d7 100644 --- a/data1/d1_read.c +++ b/data1/d1_read.c @@ -1,8 +1,5 @@ -/* $Id: d1_read.c,v 1.23 2007-03-19 21:50:39 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -174,6 +171,7 @@ data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name) { data1_absyn *absyn = data1_get_absyn(dh, name, 1); data1_node *res; + if (!absyn) { yaz_log(YLOG_WARN, "Unable to acquire abstract syntax " "for '%s'", @@ -1003,34 +1001,12 @@ data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf) } -static int conv_item (NMEM m, yaz_iconv_t t, - WRBUF wrbuf, char *inbuf, size_t inlen) +static int conv_item(NMEM m, yaz_iconv_t t, + WRBUF wrbuf, char *inbuf, size_t inlen) { - wrbuf_rewind (wrbuf); - if (wrbuf->size < 10) - wrbuf_grow (wrbuf, 10); - for (;;) - { - char *outbuf = wrbuf->buf + wrbuf->pos; - size_t outlen = wrbuf->size - wrbuf->pos; - if (yaz_iconv (t, &inbuf, &inlen, &outbuf, &outlen) == - (size_t)(-1) && yaz_iconv_error(t) != YAZ_ICONV_E2BIG) - { - /* bad data. stop and skip conversion entirely */ - return -1; - } - else if (inlen == 0) - { /* finished converting */ - wrbuf->pos = wrbuf->size - outlen; - break; - } - else - { - /* buffer too small: make sure we expand buffer */ - wrbuf->pos = wrbuf->size - outlen; - wrbuf_grow(wrbuf, 20); - } - } + wrbuf_rewind(wrbuf); + wrbuf_iconv_write(wrbuf, t, inbuf, inlen); + wrbuf_iconv_reset(wrbuf, t); return 0; } @@ -1107,8 +1083,8 @@ const char *data1_get_encoding (data1_handle dh, data1_node *n) } int data1_iconv (data1_handle dh, NMEM m, data1_node *n, - const char *tocode, - const char *fromcode) + const char *tocode, + const char *fromcode) { if (yaz_matchstr (tocode, fromcode)) { @@ -1126,6 +1102,35 @@ int data1_iconv (data1_handle dh, NMEM m, data1_node *n, return 0; } +void data1_chop_text(data1_handle dh, NMEM m, data1_node *n) +{ + for (; n; n = n->next) + { + if (n->which == DATA1N_data) + { + + int sz = n->u.data.len; + const char *ndata = n->u.data.data; + int off = 0; + + for (off = 0; off < sz; off++) + if (!d1_isspace(ndata[off])) + break; + sz = sz - off; + ndata += off; + + while (sz && d1_isspace(ndata[sz - 1])) + sz--; + + n->u.data.data = nmem_malloc(m, sz); + n->u.data.len = sz; + memcpy(n->u.data.data, ndata, sz); + + } + data1_chop_text(dh, m, n->child); + } +} + void data1_concat_text(data1_handle dh, NMEM m, data1_node *n) { for (; n; n = n->next) @@ -1155,9 +1160,11 @@ void data1_concat_text(data1_handle dh, NMEM m, data1_node *n) data1_concat_text(dh, m, n->child); } } + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab