X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=retrieval%2Fd1_read.c;h=281f7caf5494f9c845e58a50d81548eafc444b64;hb=5b690aebb8dc2d05cad8f668de8fd821a1c231fa;hp=289d35223d4c40ced14ae2bff78bfeea6141022d;hpb=9fa5e9b2a5a3ba0644178d5ed85869a31ffe59f2;p=yaz-moved-to-github.git diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index 289d352..281f7ca 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -3,20 +3,15 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_read.c,v 1.49 2002-08-23 14:27:18 adam Exp $ + * $Id: d1_read.c,v 1.54 2002-10-08 23:00:09 adam Exp $ */ #include #include #include -#include - -#if HAVE_ICONV_H -#include -#endif - #include +#include #include #include @@ -65,29 +60,8 @@ data1_node *data1_mk_node_type (data1_handle dh, NMEM m, int type) return data1_mk_node2 (dh, m, type, 0); } -data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type, - data1_node *parent) +static void data1_init_node (data1_handle dh, data1_node *r, int type) { - data1_node *r; - - r = (data1_node *)nmem_malloc(m, sizeof(*r)); - r->next = r->child = r->last_child = 0; - r->destroy = 0; - - if (!parent) - { - r->root = r; - } - else - { - r->root = parent->root; - r->parent = parent; - if (!parent->child) - parent->child = parent->last_child = r; - else - parent->last_child->next = r; - parent->last_child = r; - } r->which = type; switch(type) { @@ -127,6 +101,57 @@ data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type, default: logf (LOG_WARN, "data_mk_node_type. bad type = %d\n", type); } +} + +data1_node *data1_append_node (data1_handle dh, NMEM m, int type, + data1_node *parent) +{ + data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r)); + r->next = r->child = r->last_child = 0; + r->destroy = 0; + + if (!parent) + r->root = r; + else + { + r->root = parent->root; + r->parent = parent; + if (!parent->child) + parent->child = parent->last_child = r; + else + parent->last_child->next = r; + parent->last_child = r; + } + data1_init_node(dh, r, type); + return r; +} + +data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type, + data1_node *parent) +{ + return data1_append_node (dh, m, type, parent); +} + +data1_node *data1_insert_node (data1_handle dh, NMEM m, int type, + data1_node *parent) +{ + data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r)); + r->next = r->child = r->last_child = 0; + r->destroy = 0; + + if (!parent) + r->root = r; + else + { + r->root = parent->root; + r->parent = parent; + if (!parent->child) + parent->last_child = r; + else + r->next = parent->child; + parent->child = r; + } + data1_init_node(dh, r, type); return r; } @@ -170,19 +195,30 @@ void data1_set_root(data1_handle dh, data1_node *res, } data1_node *data1_mk_preprocess (data1_handle dh, NMEM nmem, - const char *target, const char **attr, - data1_node *at) + const char *target, + const char **attr, data1_node *at) +{ + return data1_mk_preprocess_n (dh, nmem, target, strlen(target), + attr, at); +} + +data1_node *data1_mk_preprocess_n (data1_handle dh, NMEM nmem, + const char *target, size_t len, + const char **attr, data1_node *at) { data1_xattr **p; data1_node *res = data1_mk_node2 (dh, nmem, DATA1N_preprocess, at); - res->u.preprocess.target = data1_insert_string (dh, res, nmem, target); - + res->u.preprocess.target = data1_insert_string_n (dh, res, nmem, + target, len); + p = &res->u.preprocess.attributes; while (attr && *attr) { *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; + p = &(*p)->next; } *p = 0; @@ -219,6 +255,7 @@ data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; p = &(*p)->next; } *p = 0; @@ -242,6 +279,7 @@ void data1_tag_add_attr (data1_handle dh, NMEM nmem, *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; p = &(*p)->next; } *p = 0; @@ -331,7 +369,7 @@ char *data1_insert_string_n (data1_handle dh, data1_node *res, { char *b; if (len >= DATA1_LOCALDATA) - b = nmem_malloc (m, len+1); + b = (char *) nmem_malloc (m, len+1); else b = res->lbuf; memcpy (b, str, len); @@ -348,7 +386,8 @@ char *data1_insert_string (data1_handle dh, data1_node *res, static data1_node *data1_add_insert_taggeddata(data1_handle dh, data1_node *at, const char *tagname, NMEM m, - int local_allowed) + int local_allowed, + int insert_mode) { data1_node *root = at->root; data1_node *partag = get_parent_tag (dh, at); @@ -366,7 +405,10 @@ static data1_node *data1_add_insert_taggeddata(data1_handle dh, } if (local_allowed || e) { - tagn = data1_mk_node2 (dh, m, DATA1N_tag, at); + if (insert_mode) + tagn = data1_insert_node (dh, m, DATA1N_tag, at); + else + tagn = data1_append_node (dh, m, DATA1N_tag, at); tagn->u.tag.tag = data1_insert_string (dh, tagn, m, tagname); tagn->u.tag.element = e; datn = data1_mk_node2 (dh, m, DATA1N_data, tagn); @@ -377,7 +419,7 @@ static data1_node *data1_add_insert_taggeddata(data1_handle dh, data1_node *data1_mk_tag_data(data1_handle dh, data1_node *at, const char *tagname, NMEM m) { - return data1_add_insert_taggeddata (dh, at, tagname, m, 1); + return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0); } @@ -389,21 +431,21 @@ data1_node *data1_mk_tag_data(data1_handle dh, data1_node *at, data1_node *data1_mk_tag_data_wd(data1_handle dh, data1_node *at, const char *tagname, NMEM m) { - return data1_add_insert_taggeddata (dh, at, tagname, m, 0); + return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1); } data1_node *data1_insert_taggeddata (data1_handle dh, data1_node *root, data1_node *at, const char *tagname, NMEM m) { - return data1_add_insert_taggeddata (dh, at, tagname, m, 0); + return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1); } data1_node *data1_add_taggeddata (data1_handle dh, data1_node *root, data1_node *at, const char *tagname, NMEM m) { - return data1_add_insert_taggeddata (dh, at, tagname, m, 1); + return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0); } data1_node *data1_mk_tag_data_int (data1_handle dh, data1_node *at, @@ -485,6 +527,11 @@ data1_node *data1_mk_tag_data_text_uni (data1_handle dh, data1_node *at, static int ampr (int (*get_byte)(void *fh), void *fh, int *amp) { +#if 1 + int c = (*get_byte)(fh); + *amp = 0; + return c; +#else int c = (*get_byte)(fh); *amp = 0; if (c == '&') @@ -520,6 +567,7 @@ static int ampr (int (*get_byte)(void *fh), void *fh, int *amp) } } return c; +#endif } data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, @@ -533,14 +581,15 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, { data1_xattr *p; int len; - while (amp || (c && d1_isspace(c))) + while (*amp || (c && d1_isspace(c))) c = ampr (get_byte, fh, amp); - if (amp == 0 && (c == 0 || c == '>' || c == '/')) + if (*amp == 0 && (c == 0 || c == '>' || c == '/')) break; *pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p)); p->next = 0; pp = &p->next; p->value = 0; + p->what = DATA1I_xmltext; wrbuf_rewind(wrbuf); while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c)) @@ -555,11 +604,11 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, if (c == '=') { c = ampr (get_byte, fh, amp); - if (amp == 0 && c == '"') + if (*amp == 0 && c == '"') { c = ampr (get_byte, fh, amp); wrbuf_rewind(wrbuf); - while (amp || (c && c != '"')) + while (*amp || (c && c != '"')) { wrbuf_putc (wrbuf, c); c = ampr (get_byte, fh, amp); @@ -567,11 +616,11 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, if (c) c = ampr (get_byte, fh, amp); } - else if (amp == 0 && c == '\'') + else if (*amp == 0 && c == '\'') { c = ampr (get_byte, fh, amp); wrbuf_rewind(wrbuf); - while (amp || (c && c != '\'')) + while (*amp || (c && c != '\'')) { wrbuf_putc (wrbuf, c); c = ampr (get_byte, fh, amp); @@ -582,7 +631,7 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, else { wrbuf_rewind(wrbuf); - while (amp || (c && c != '>' && c != '/')) + while (*amp || (c && c != '>' && c != '/')) { wrbuf_putc (wrbuf, c); c = ampr (get_byte, fh, amp); @@ -634,13 +683,63 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, c = ampr (get_byte, fh, &); } else if (amp == 0 && c == '!') - /* tags/comments that we don't deal with yet */ { - while (amp || (c && c != '>')) - c = ampr (get_byte, fh, &); - if (c) - c = ampr (get_byte, fh, &); - continue; + int c0, amp0; + + wrbuf_rewind(wrbuf); + + c0 = ampr (get_byte, fh, &0); + if (amp0 == 0 && c0 == '\0') + break; + c = ampr (get_byte, fh, &); + + if (amp0 == 0 && c0 == '-' && amp == 0 && c == '-') + { + /* COMMENT: */ + int no_dash = 0; + + c = ampr (get_byte, fh, &); + while (amp || c) + { + if (amp == 0 && c == '-') + no_dash++; + else if (amp == 0 && c == '>' && no_dash >= 2) + { + if (level > 0) + d1_stack[level] = + data1_mk_comment_n ( + dh, m, + wrbuf_buf(wrbuf), wrbuf_len(wrbuf)-2, + d1_stack[level-1]); + c = ampr (get_byte, fh, &); /* skip > */ + break; + } + else + no_dash = 0; + wrbuf_putc (wrbuf, c); + c = ampr (get_byte, fh, &); + } + continue; + } + else + { /* DIRECTIVE: */ + + int blevel = 0; + while (amp || c) + { + if (amp == 0 && c == '>' && blevel == 0) + { + c = ampr (get_byte, fh, &); + break; + } + if (amp == 0 && c == '[') + blevel++; + if (amp == 0 && c == ']' && blevel > 0) + blevel--; + c = ampr (get_byte, fh, &); + } + continue; + } } while (amp || (c && c != '>' && c != '/' && !d1_isspace(c))) { @@ -783,7 +882,7 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, continue; } res = data1_mk_node2 (dh, m, DATA1N_data, parent); - res->u.data.what = DATA1I_text; + res->u.data.what = DATA1I_xmltext; res->u.data.formatted_text = 0; d1_stack[level] = res; @@ -871,9 +970,7 @@ data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf) } -#if HAVE_ICONV_H - -static int conv_item (NMEM m, iconv_t t, +static int conv_item (NMEM m, yaz_iconv_t t, WRBUF wrbuf, char *inbuf, size_t inlen) { wrbuf_rewind (wrbuf); @@ -883,8 +980,8 @@ static int conv_item (NMEM m, iconv_t t, { char *outbuf = wrbuf->buf + wrbuf->pos; size_t outlen = wrbuf->size - wrbuf->pos; - if (iconv (t, &inbuf, &inlen, &outbuf, &outlen) == - (size_t)(-1) && errno != E2BIG) + if (yaz_iconv (t, &inbuf, &inlen, &outbuf, &outlen) == + (size_t)(-1) && yaz_iconv_error(t) != YAZ_ICONV_E2BIG) { /* bad data. stop and skip conversion entirely */ return -1; @@ -905,7 +1002,7 @@ static int conv_item (NMEM m, iconv_t t, } static void data1_iconv_s (data1_handle dh, NMEM m, data1_node *n, - iconv_t t, WRBUF wrbuf, const char *tocode) + yaz_iconv_t t, WRBUF wrbuf, const char *tocode) { for (; n; n = n->next) { @@ -957,7 +1054,6 @@ static void data1_iconv_s (data1_handle dh, NMEM m, data1_node *n, data1_iconv_s (dh, m, n->child, t, wrbuf, tocode); } } -#endif const char *data1_get_encoding (data1_handle dh, data1_node *n) { @@ -982,19 +1078,15 @@ int data1_iconv (data1_handle dh, NMEM m, data1_node *n, const char *tocode, const char *fromcode) { -#if HAVE_ICONV_H if (strcmp (tocode, fromcode)) { WRBUF wrbuf = wrbuf_alloc(); - iconv_t t = iconv_open (tocode, fromcode); - if (t == (iconv_t) (-1)) + yaz_iconv_t t = yaz_iconv_open (tocode, fromcode); + if (!t) return -1; data1_iconv_s (dh, m, n, t, wrbuf, tocode); - iconv_close (t); + yaz_iconv_close (t); wrbuf_free (wrbuf, 1); } return 0; -#else - return -2; -#endif }