X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=retrieval%2Fd1_read.c;h=e79ad726ec9a0fdaed95def944b511be3fbc8793;hb=080575c92b35597c3beb394c23d2a1d22532bbdc;hp=ac19a9f2292008d128d0212761877e2ad023303e;hpb=651ccb74d12f2968e791b3f411a6c55bca68428a;p=yaz-moved-to-github.git diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index ac19a9f..e79ad72 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -3,17 +3,33 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_read.c,v 1.42 2002-05-13 14:13:37 adam Exp $ + * $Id: d1_read.c,v 1.45 2002-07-05 16:04:28 adam Exp $ */ #include #include #include +#include + +#if HAVE_ICONV_H +#include +#endif + #include #include #include +data1_node *data1_get_root_tag (data1_handle dh, data1_node *n) +{ + if (!n) + return 0; + n = n->child; + while (n && n->which != DATA1N_tag) + n = n->next; + return n; +} + /* * get the tag which is the immediate parent of this node (this may mean * traversing intermediate things like variants and stuff. @@ -81,10 +97,20 @@ data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type, r->u.data.what = 0; r->u.data.formatted_text = 0; break; + case DATA1N_comment: + r->u.data.data = 0; + r->u.data.len = 0; + r->u.data.what = 0; + r->u.data.formatted_text = 1; + break; case DATA1N_variant: r->u.variant.type = 0; r->u.variant.value = 0; break; + case DATA1N_preprocess: + r->u.preprocess.target = 0; + r->u.preprocess.attributes = 0; + break; default: logf (LOG_WARN, "data_mk_node_type. bad type = %d\n", type); } @@ -105,7 +131,7 @@ void data1_free_tree (data1_handle dh, data1_node *t) (*t->destroy)(t); } -data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name) +data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name) { data1_absyn *absyn = data1_get_absyn (dh, name); data1_node *res; @@ -121,6 +147,35 @@ data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name) return res; } +void data1_set_root(data1_handle dh, data1_node *res, + NMEM nmem, const char *name) +{ + data1_absyn *absyn = data1_get_absyn (dh, name); + + res->u.root.type = data1_insert_string (dh, res, nmem, name); + res->u.root.absyn = absyn; +} + +data1_node *data1_mk_preprocess (data1_handle dh, NMEM nmem, + const char *target, const char **attr, + data1_node *at) +{ + data1_xattr **p; + data1_node *res = data1_mk_node2 (dh, nmem, DATA1N_preprocess, at); + res->u.preprocess.target = data1_insert_string (dh, res, nmem, target); + + p = &res->u.preprocess.attributes; + while (attr && *attr) + { + *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); + (*p)->name = nmem_strdup (nmem, *attr++); + (*p)->value = nmem_strdup (nmem, *attr++); + p = &(*p)->next; + } + *p = 0; + return res; +} + data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, const char *tag, size_t len, const char **attr, data1_node *at) @@ -141,14 +196,36 @@ data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, while (attr && *attr) { *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); - (*p)->name = nmem_strdup (nmem, attr[0]); - (*p)->value = nmem_strdup (nmem, attr[1]); + (*p)->name = nmem_strdup (nmem, *attr++); + (*p)->value = nmem_strdup (nmem, *attr++); p = &(*p)->next; } *p = 0; return res; } +void data1_tag_add_attr (data1_handle dh, NMEM nmem, + data1_node *res, const char **attr) +{ + data1_xattr **p; + + if (res->which != DATA1N_tag) + return; + + p = &res->u.tag.attributes; + while (*p) + p = &(*p)->next; + + while (attr && *attr) + { + *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); + (*p)->name = nmem_strdup (nmem, *attr++); + (*p)->value = nmem_strdup (nmem, *attr++); + p = &(*p)->next; + } + *p = 0; +} + data1_node *data1_mk_tag (data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at) { @@ -158,6 +235,13 @@ data1_node *data1_mk_tag (data1_handle dh, NMEM nmem, data1_node *data1_search_tag (data1_handle dh, data1_node *n, const char *tag) { + if (*tag == '/') + { + n = data1_get_root_tag (dh, n); + if (n) + n = n->child; + tag++; + } for (; n; n = n->next) if (n->which == DATA1N_tag && n->u.tag.tag && !yaz_matchstr (tag, n->u.tag.tag)) @@ -178,7 +262,6 @@ data1_node *data1_mk_tag_uni (data1_handle dh, NMEM nmem, return node; } - data1_node *data1_mk_text_n (data1_handle dh, NMEM mem, const char *buf, size_t len, data1_node *parent) { @@ -190,6 +273,13 @@ data1_node *data1_mk_text_n (data1_handle dh, NMEM mem, return res; } +data1_node *data1_mk_text_nf (data1_handle dh, NMEM mem, + const char *buf, size_t len, data1_node *parent) +{ + data1_node *res = data1_mk_text_n (dh, mem, buf, len, parent); + res->u.data.formatted_text = 1; + return res; +} data1_node *data1_mk_text (data1_handle dh, NMEM mem, const char *buf, data1_node *parent) @@ -197,6 +287,24 @@ data1_node *data1_mk_text (data1_handle dh, NMEM mem, return data1_mk_text_n (dh, mem, buf, strlen(buf), parent); } +data1_node *data1_mk_comment_n (data1_handle dh, NMEM mem, + const char *buf, size_t len, + data1_node *parent) +{ + data1_node *res = data1_mk_node2 (dh, mem, DATA1N_comment, parent); + res->u.data.what = DATA1I_text; + res->u.data.len = len; + + res->u.data.data = data1_insert_string_n (dh, res, mem, buf, len); + return res; +} + +data1_node *data1_mk_comment (data1_handle dh, NMEM mem, + const char *buf, data1_node *parent) +{ + return data1_mk_comment_n (dh, mem, buf, strlen(buf), parent); +} + char *data1_insert_string_n (data1_handle dh, data1_node *res, NMEM m, const char *str, size_t len) { @@ -514,14 +622,10 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, break; } } - if (level == 0) + if (level <= 1) return d1_stack[0]; continue; } - if (level == 0) /* root ? */ - { - res = data1_mk_root (dh, m, tag); - } else if (!strcmp(tag, "var")) { char tclass[DATA1_MAX_SYMBOL], type[DATA1_MAX_SYMBOL]; @@ -567,7 +671,13 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, } else { - /* tag.. acquire our element in the abstract syntax */ + + /* tag .. acquire our element in the abstract syntax */ + if (level == 0) + { + parent = data1_mk_root (dh, m, tag); + d1_stack[level++] = parent; + } res = data1_mk_tag (dh, m, tag, 0 /* attr */, parent); res->u.tag.attributes = xattr; } @@ -689,3 +799,99 @@ data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf) return data1_read_node (dh, &bp, m); } + +#if HAVE_ICONV_H + +static int conv_item (NMEM m, iconv_t t, + WRBUF wrbuf, char *inbuf, size_t inlen) +{ + wrbuf_rewind (wrbuf); + if (wrbuf->size < 10) + wrbuf_grow (wrbuf, 10); + for (;;) + { + char *outbuf = wrbuf->buf + wrbuf->pos; + size_t outlen = wrbuf->size - wrbuf->pos; + if (iconv (t, &inbuf, &inlen, &outbuf, &outlen) == + (size_t)(-1) && errno != E2BIG) + { + /* bad data. stop and skip conversion entirely */ + return -1; + } + else if (inlen == 0) + { /* finished converting */ + wrbuf->pos = wrbuf->size - outlen; + break; + } + else + { + /* buffer too small: make sure we expand buffer */ + wrbuf->pos = wrbuf->size - outlen; + wrbuf_grow(wrbuf, 20); + } + } + return 0; +} + +static void data1_iconv_s (data1_handle dh, NMEM m, data1_node *n, + iconv_t t, WRBUF wrbuf) +{ + for (; n; n = n->next) + { + switch (n->which) + { + case DATA1N_data: + case DATA1N_comment: + if (conv_item (m, t, wrbuf, n->u.data.data, n->u.data.len) == 0) + { + n->u.data.data = + data1_insert_string_n (dh, n, m, wrbuf->buf, + wrbuf->pos); + n->u.data.len = wrbuf->pos; + } + break; + case DATA1N_tag: + if (conv_item (m, t, wrbuf, n->u.tag.tag, strlen(n->u.tag.tag)) + == 0) + { + n->u.tag.tag = + data1_insert_string_n (dh, n, m, + wrbuf->buf, wrbuf->pos); + } + if (n->u.tag.attributes) + { + data1_xattr *p; + for (p = n->u.tag.attributes; p; p = p->next) + { + if (conv_item(m, t, wrbuf, p->value, strlen(p->value)) + == 0) + { + wrbuf_puts (wrbuf, ""); + p->value = nmem_strdup (m, wrbuf->buf); + } + } + } + break; + } + data1_iconv_s (dh, m, n->child, t, wrbuf); + } +} +#endif + +int data1_iconv (data1_handle dh, NMEM m, data1_node *n, + const char *tocode, + const char *fromcode) +{ +#if HAVE_ICONV_H + WRBUF wrbuf = wrbuf_alloc(); + iconv_t t = iconv_open (tocode, fromcode); + if (t == (iconv_t) (-1)) + return -1; + data1_iconv_s (dh, m, n, t, wrbuf); + iconv_close (t); + wrbuf_free (wrbuf, 1); + return 0; +#else + return -2; +#endif +}