X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=retrieval%2Fd1_read.c;h=0af749251df77c3602e5943a91ce0708c260e2ee;hp=a4305657383bed57fe69ecba611b717522999df7;hb=09405de098947fc5f359e9bd1225b2747596b513;hpb=e9728b65671cd921aeefddf5474689b0dbaf2010 diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index a430565..0af7492 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -1,10 +1,102 @@ /* - * Copyright (c) 1995, Index Data. + * Copyright (c) 1995-2001, Index Data. * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * * $Log: d1_read.c,v $ - * Revision 1.13 1996-10-29 13:35:38 adam + * Revision 1.38 2001-03-27 23:06:21 adam + * Quotes and slashes may occur within attributes. + * + * Revision 1.37 2001/02/28 09:00:06 adam + * Fixed problem with stack overflow for very nested records. + * + * Revision 1.36 2001/02/21 13:46:53 adam + * C++ fixes. + * + * Revision 1.35 2000/12/05 14:44:25 adam + * Readers skips sections. + * + * Revision 1.34 2000/12/05 10:06:23 adam + * Added support for null-data rules like . + * + * Revision 1.33 2000/11/29 14:22:47 adam + * Implemented XML/SGML attributes for data1 so that d1_read reads them + * and d1_write generates proper attributes for XML/SGML records. Added + * register locking for threaded version. + * + * Revision 1.32 2000/01/06 11:25:59 adam + * Added case to prevent warning. + * + * Revision 1.31 1999/12/21 14:16:20 ian + * Changed retrieval module to allow data1 trees with no associated absyn. + * Also added a simple interface for extracting values from data1 trees using + * a string based tagpath. + * + * Revision 1.30 1999/11/30 13:47:12 adam + * Improved installation. Moved header files to include/yaz. + * + * Revision 1.29 1999/10/21 12:06:29 adam + * Retrieval module no longer uses ctype.h - functions. + * + * Revision 1.28 1999/10/21 09:50:33 adam + * SGML reader uses own isspace - it doesn't do 8-bit on WIN32! + * + * Revision 1.27 1999/08/27 09:40:32 adam + * Renamed logf function to yaz_log. Removed VC++ project files. + * + * Revision 1.26 1999/07/13 13:23:48 adam + * Non-recursive version of data1_read_node. data1_read_nodex reads + * stream of bytes (instead of buffer in memory). + * + * Revision 1.25 1999/04/20 09:56:48 adam + * Added 'name' paramter to encoder/decoder routines (typedef Odr_fun). + * Modified all encoders/decoders to reflect this change. + * + * Revision 1.24 1998/10/28 15:10:09 adam + * Added --with-yc option to configure. For the data1_node in data1.h: + * decreased size of localdata and removed member "line" which wasn't useful. + * + * Revision 1.23 1998/03/12 11:28:45 adam + * Fix: didn't set root member of tagged node in function. + * data1_add_insert_taggeddata. + * + * Revision 1.22 1998/03/05 08:15:32 adam + * Implemented data1_add_insert_taggeddata utility which is more flexible + * than data1_insert_taggeddata. + * + * Revision 1.21 1998/02/27 14:08:05 adam + * Added const to some char pointer arguments. + * Reworked data1_read_node so that it doesn't create a tree with + * pointers to original "SGML"-buffer. + * + * Revision 1.20 1998/02/11 11:53:35 adam + * Changed code so that it compiles as C++. + * + * Revision 1.19 1997/12/09 16:17:09 adam + * Fix bug regarding variants. Tags with prefix "var" was incorrectly + * interpreted as "start of variants". Now, only "var" indicates such + * start. + * Cleaned up data1_read_node so tag names and variant names are + * copied and not pointed to by the generated data1 tree. Data nodes + * still point to old buffer. + * + * Revision 1.18 1997/11/18 09:51:09 adam + * Removed element num_children from data1_node. Minor changes in + * data1 to Explain. + * + * Revision 1.17 1997/11/05 09:20:51 adam + * Minor change. + * + * Revision 1.16 1997/09/17 12:10:37 adam + * YAZ version 1.4. + * + * Revision 1.15 1997/09/05 09:50:57 adam + * Removed global data1_tabpath - uses data1_get_tabpath() instead. + * + * Revision 1.14 1997/05/14 06:54:04 adam + * C++ support. + * + * Revision 1.13 1996/10/29 13:35:38 adam * Implemented data1_set_tabpath and data1_get_tabpath. * * Revision 1.12 1996/10/11 10:35:38 adam @@ -90,44 +182,19 @@ * */ -#include +#include #include #include -#include -#include -#include - -char *data1_tabpath = 0; /* global path for tables */ - -void data1_set_tabpath(const char *p) -{ - if (data1_tabpath) - { - xfree (data1_tabpath); - data1_tabpath = NULL; - } - if (p) - { - data1_tabpath = xmalloc (strlen(p)+1); - strcpy (data1_tabpath, p); - } -} - -const char *data1_get_tabpath (void) -{ - return data1_tabpath; -} - -#if 0 -static data1_node *freelist = 0; -#endif +#include +#include +#include /* * get the tag which is the immediate parent of this node (this may mean * traversing intermediate things like variants and stuff. */ -data1_node *get_parent_tag(data1_node *n) +data1_node *get_parent_tag (data1_handle dh, data1_node *n) { for (; n && n->which != DATA1N_root; n = n->parent) if (n->which == DATA1N_tag) @@ -135,342 +202,526 @@ data1_node *get_parent_tag(data1_node *n) return 0; } -data1_node *data1_mk_node(NMEM m) +data1_node *data1_mk_node (data1_handle dh, NMEM m) { data1_node *r; -#if 0 - if ((r = freelist)) - freelist = r->next; - else - if (!(r = xmalloc(sizeof(*r)))) - abort(); -#else - r = nmem_malloc(m, sizeof(*r)); -#endif + r = (data1_node *)nmem_malloc(m, sizeof(*r)); r->next = r->child = r->last_child = r->parent = 0; - r->num_children = 0; r->destroy = 0; return r; } -#if 0 -static void fr_node(data1_node *n) +data1_node *data1_mk_node_type (data1_handle dh, NMEM m, int type) { - n->next = freelist; - freelist = n; -} + data1_node *r; + + r = data1_mk_node(dh, m); + r->which = type; + switch(type) + { + case DATA1N_tag: + r->u.tag.tag = 0; + r->u.tag.element = 0; + r->u.tag.no_data_requested = 0; + r->u.tag.node_selected = 0; + r->u.tag.make_variantlist = 0; + r->u.tag.get_bytes = -1; +#if DATA1_USING_XATTR + r->u.tag.attributes = 0; #endif + break; + case DATA1N_root: + r->u.root.type = 0; + r->u.root.absyn = 0; + break; + case DATA1N_data: + r->u.data.data = 0; + r->u.data.len = 0; + r->u.data.what = 0; + r->u.data.formatted_text = 0; + break; + default: + logf (LOG_WARN, "data_mk_node_type. bad type = %d\n", type); + } + return r; +} -void data1_free_tree(data1_node *t) +void data1_free_tree (data1_handle dh, data1_node *t) { data1_node *p = t->child, *pn; while (p) { pn = p->next; - data1_free_tree(p); + data1_free_tree (dh, p); p = pn; } if (t->destroy) (*t->destroy)(t); -#if 0 - fr_node(t); -#endif } -/* - * Insert a tagged node into the record root as first child of the node at - * which should be root or tag itself). Returns pointer to the data node, - * which can then be modified. - */ -data1_node *data1_insert_taggeddata(data1_node *root, data1_node *at, - char *tagname, NMEM m) +char *data1_insert_string (data1_handle dh, data1_node *res, + NMEM m, const char *str) +{ + int len = strlen(str); + + if (len >= DATA1_LOCALDATA) + return nmem_strdup (m, str); + else + { + strcpy (res->lbuf, str); + return res->lbuf; + } +} + +data1_node *data1_add_insert_taggeddata(data1_handle dh, data1_node *root, + data1_node *at, + const char *tagname, NMEM m, + int first_flag, int local_allowed) { - data1_node *tagn = data1_mk_node(m); + data1_node *partag = get_parent_tag (dh, at); + data1_node *tagn = data1_mk_node_type (dh, m, DATA1N_tag); + data1_element *e = NULL; data1_node *datn; - tagn->which = DATA1N_tag; - tagn->line = -1; - tagn->u.tag.tag = 0; - tagn->u.tag.node_selected = 0; - tagn->u.tag.make_variantlist = 0; - tagn->u.tag.no_data_requested = 0; - tagn->u.tag.get_bytes = -1; - if (!(tagn->u.tag.element = data1_getelementbytagname(root->u.root.absyn, - 0, tagname))) - return 0; - tagn->child = datn = data1_mk_node(m); - tagn->num_children = 1; + tagn->u.tag.tag = data1_insert_string (dh, tagn, m, tagname); + + if (partag) + e = partag->u.tag.element; + tagn->u.tag.element = + data1_getelementbytagname (dh, root->u.root.absyn, e, tagname); + if (!local_allowed && !tagn->u.tag.element) + return NULL; + tagn->last_child = tagn->child = datn = data1_mk_node_type (dh, m, DATA1N_data); + tagn->root = root; datn->parent = tagn; datn->root = root; - datn->which = DATA1N_data; datn->u.data.formatted_text = 0; - tagn->next = at->child; tagn->parent = at; - at->child = tagn; - at->num_children++; + + if (first_flag) + { + tagn->next = at->child; + if (!tagn->next) + at->last_child = tagn; + at->child = tagn; + } + else + { + if (!at->child) + at->child = tagn; + else + { + assert (at->last_child); + at->last_child->next = tagn; + } + at->last_child = tagn; + } return datn; } +data1_node *data1_add_taggeddata(data1_handle dh, data1_node *root, + data1_node *at, + const char *tagname, NMEM m) +{ + return data1_add_insert_taggeddata (dh, root, at, tagname, m, 0, 1); +} + + /* - * Ugh. Sometimes functions just grow and grow on you. This one reads a - * 'node' and its children. + * Insert a tagged node into the record root as first child of the node at + * which should be root or tag itself). Returns pointer to the data node, + * which can then be modified. */ -data1_node *data1_read_node(char **buf, data1_node *parent, int *line, - data1_absyn *absyn, NMEM m) +data1_node *data1_insert_taggeddata(data1_handle dh, data1_node *root, + data1_node *at, + const char *tagname, NMEM m) { - data1_node *res; - - while (**buf && isspace(**buf)) - { - if (**buf == '\n') - (*line)++; - (*buf)++; - } - if (!**buf) - return 0; + return data1_add_insert_taggeddata (dh, root, at, tagname, m, 1, 0); +} - if (**buf == '<') /* beginning of tag */ +#if DATA1_USING_XATTR +data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, + int (*get_byte)(void *fh), void *fh, + WRBUF wrbuf, int *ch) +{ + data1_xattr *p_first = 0; + data1_xattr **pp = &p_first; + int c = *ch; + for (;;) { - char *tag = (*buf) + 1; - char *args = 0; - char *t = tag; - data1_node **pp; - data1_element *elem = 0; - - for (; *t && *t != '>' && !isspace(*t); t++); - if (*t != '>' && !isspace(*t)) + data1_xattr *p; + int len; + while (c && d1_isspace(c)) + c = (*get_byte)(fh); + if (!c || c == '>' || c == '/') + break; + *pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p)); + p->next = 0; + pp = &p->next; + p->value = 0; + + wrbuf_rewind(wrbuf); + while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c)) { - logf(LOG_WARN, "d1: %d: Malformed tag", *line); - return 0; + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); } - if (isspace(*t)) /* the tag has arguments */ + wrbuf_putc (wrbuf, '\0'); + len = wrbuf_len(wrbuf); + p->name = (char*) nmem_malloc (m, len); + strcpy (p->name, wrbuf_buf(wrbuf)); + if (c == '=') { - while (isspace(*t)) - t++; - if (*t != '>') + c = (*get_byte)(fh); + if (c == '"') { - args = t; - for (; *t && *t != '>'; t++); - if (*t != '>' && !isspace(*t)) + c = (*get_byte)(fh); + wrbuf_rewind(wrbuf); + while (c && c != '"') { - logf(LOG_WARN, "d1: %d: Malformed tag", *line); - return 0; - } + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); + } + if (c) + c = (*get_byte)(fh); } + else + { + wrbuf_rewind(wrbuf); + while (c && c != '>' && c != '/') + { + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); + } + } + wrbuf_putc (wrbuf, '\0'); + len = wrbuf_len(wrbuf); + p->value = (char*) nmem_malloc (m, len); + strcpy (p->value, wrbuf_buf(wrbuf)); } + } + *ch = c; + return p_first; +} +#endif - /* - * if end-tag, see if we terminate parent. If so, consume and return. - * Else, return. - */ - *t = '\0'; - if (*tag == '/') +/* + * Ugh. Sometimes functions just grow and grow on you. This one reads a + * 'node' and its children. + */ +data1_node *data1_read_nodex (data1_handle dh, NMEM m, + int (*get_byte)(void *fh), void *fh, WRBUF wrbuf) +{ + data1_absyn *absyn = 0; + data1_node *d1_stack[256]; + data1_node *res; + int c; + int level = 0; + int line = 1; + + d1_stack[level] = 0; + c = (*get_byte)(fh); + while (1) + { + data1_node *parent = level ? d1_stack[level-1] : 0; + while (c != '\0' && d1_isspace(c)) { - if (!parent) - return 0; - if (!*(tag +1) || (parent->which == DATA1N_root && !strcmp(tag + 1, - parent->u.root.type)) || - (parent->which == DATA1N_tag && !strcmp(tag + 1, - parent->u.tag.tag))) + if (c == '\n') + line++; + c = (*get_byte)(fh); + } + if (c == '\0') + break; + + if (c == '<') /* beginning of tag */ + { +#if DATA1_USING_XATTR + data1_xattr *xattr; +#endif + char tag[64]; + char args[256]; + int null_tag = 0; + int end_tag = 0; + size_t i = 0; + + c = (*get_byte)(fh); + if (c == '/') { - *buf = t + 1; - return 0; + end_tag = 1; + c = (*get_byte)(fh); } - else + else if (c == '!') /* tags/comments that we don't deal with yet */ { - *t = '>'; - return 0; + while (c && c != '>') + c = (*get_byte)(fh); + if (c) + c = (*get_byte)(fh); + continue; } - } - - if (!absyn) /* parent node - what are we? */ - { - if (!(absyn = data1_get_absyn(tag))) + while (c && c != '>' && c != '/' && !d1_isspace(c)) { - logf(LOG_WARN, "Unable to acquire abstract syntax for '%s'", - tag); - return 0; + if (i < (sizeof(tag)-1)) + tag[i++] = c; + c = (*get_byte)(fh); } - res = data1_mk_node(m); - res->which = DATA1N_root; - res->u.root.type = tag; - res->u.root.absyn = absyn; - res->root = res; - *buf = t + 1; - } - else if (!strncmp(tag, "var", 3)) - { - char class[DATA1_MAX_SYMBOL], type[DATA1_MAX_SYMBOL]; - data1_vartype *tp; - int val_offset; - data1_node *p; - - if (sscanf(args, "%s %s %n", class, type, &val_offset) != 2) + tag[i] = '\0'; +#if DATA1_USING_XATTR + xattr = data1_read_xattr (dh, m, get_byte, fh, wrbuf, &c); + args[0] = '\0'; +#else + while (d1_isspace(c)) + c = (*get_byte)(fh); + for (i = 0; c && c != '>' && c != '/'; c = (*get_byte)(fh)) + if (i < (sizeof(args)-1)) + args[i++] = c; + args[i] = '\0'; +#endif + if (c == '/') + { /* or */ + null_tag = 1; + c = (*get_byte)(fh); + } + if (c != '>') { - logf(LOG_WARN, "Malformed variant triple at '%s'", tag); + yaz_log(LOG_WARN, "d1: %d: Malformed tag", line); return 0; } - if (!(tp = data1_getvartypebyct(parent->root->u.root.absyn->varset, - class, type))) - return 0; - - /* - * If we're the first variant in this group, create a parent var, - * and insert it before the current variant. - */ - if (parent->which != DATA1N_variant) + else + c = (*get_byte)(fh); + + /* End tag? */ + if (end_tag) { - res = data1_mk_node(m); - res->which = DATA1N_variant; - res->u.variant.type = 0; - res->u.variant.value = 0; - res->root = parent->root; - *t = '>'; + if (*tag == '\0') + --level; /* */ + else + { /* */ + int i = level; + while (i > 0) + { + parent = d1_stack[--i]; + if ((parent->which == DATA1N_root && + !strcmp(tag, parent->u.root.type)) || + (parent->which == DATA1N_tag && + !strcmp(tag, parent->u.tag.tag))) + { + level = i; + break; + } + } + if (i != level) + { + yaz_log (LOG_WARN, "%d: no begin tag for %s", + line, tag); + break; + } + } + if (level == 0) + return d1_stack[0]; + continue; + } + if (level == 0) /* root ? */ + { + if (!(absyn = data1_get_absyn (dh, tag))) + { + yaz_log(LOG_WARN, "Unable to acquire abstract syntax " "for '%s'", tag); + /* It's now OK for a record not to have an absyn */ + } + res = data1_mk_node_type (dh, m, DATA1N_root); + res->u.root.type = data1_insert_string (dh, res, m, tag); + res->u.root.absyn = absyn; + res->root = res; } - else + else if (!strcmp(tag, "var")) { + char tclass[DATA1_MAX_SYMBOL], type[DATA1_MAX_SYMBOL]; + data1_vartype *tp; + int val_offset; + + if (sscanf(args, "%s %s %n", tclass, type, &val_offset) != 2) + { + yaz_log(LOG_WARN, "Malformed variant triple at '%s'", tag); + continue; + } + if (!(tp = + data1_getvartypebyct(dh, + parent->root->u.root.absyn->varset, + tclass, type))) + continue; /* - * now determine if one of our ancestor triples is of same type. - * If so, we break here. This will make the parser unwind until - * we become a sibling (alternate variant) to the aforementioned - * triple. It stinks that we re-parse these tags on every - * iteration of this. This is a function in need of a rewrite. + * If we're the first variant in this group, create a parent + * variant, and insert it before the current variant. */ - for (p = parent; p->which == DATA1N_variant; p = p->parent) - if (p->u.variant.type == tp) - { - *t = '>'; - return 0; - } - - res = data1_mk_node(m); - res->which = DATA1N_variant; + if (parent->which != DATA1N_variant) + { + res = data1_mk_node (dh, m); + res->which = DATA1N_variant; + res->u.variant.type = 0; + res->u.variant.value = 0; + } + else + { + /* + * now determine if one of our ancestor triples is of + * same type. If so, we break here. + */ + int i; + for (i = level-1; d1_stack[i]->which==DATA1N_variant; --i) + if (d1_stack[i]->u.variant.type == tp) + { + level = i; + break; + } + res = data1_mk_node (dh, m); + res->which = DATA1N_variant; + res->u.variant.type = tp; + res->u.variant.value = + data1_insert_string (dh, res, m, args + val_offset); + } + } + else /* tag.. acquire our element in the abstract syntax */ + { + data1_node *partag = get_parent_tag (dh, parent); + data1_element *elem, *e = 0; + int localtag = 0; + + if (parent->which == DATA1N_variant) + return 0; + if (partag) + if (!(e = partag->u.tag.element)) + localtag = 1; /* our parent is a local tag */ + + elem = data1_getelementbytagname(dh, absyn, e, tag); + res = data1_mk_node_type (dh, m, DATA1N_tag); + res->u.tag.tag = data1_insert_string (dh, res, m, tag); + res->u.tag.element = elem; +#if DATA1_USING_XATTR + res->u.tag.attributes = xattr; +#endif + } + if (parent) + { + parent->last_child = res; res->root = parent->root; - res->u.variant.type = tp; - res->u.variant.value = args + val_offset; - *buf = t + 1; } + res->parent = parent; + if (d1_stack[level]) + d1_stack[level]->next = res; + else if (parent) + parent->child = res; + d1_stack[level] = res; + d1_stack[level+1] = 0; + if (level < 250 && !null_tag) + ++level; } - else /* tag.. acquire our element in the abstract syntax */ + else /* != '<'... this is a body of text */ { - data1_node *partag = get_parent_tag(parent); - data1_element *e = 0; - int localtag = 0; - - if (parent->which == DATA1N_variant) + const char *src; + char *dst; + int len, prev_char = 0; + + if (level == 0) { - *t = '>'; - return 0; + c = (*get_byte)(fh); + continue; } - if (partag) - if (!(e = partag->u.tag.element)) - localtag = 1; /* our parent is a local tag */ + res = data1_mk_node_type (dh, m, DATA1N_data); + res->parent = parent; + res->u.data.what = DATA1I_text; + res->u.data.formatted_text = 0; + res->root = parent->root; + parent->last_child = res; + if (d1_stack[level]) + d1_stack[level]->next = res; + else + parent->child = res; + d1_stack[level] = res; + + wrbuf_rewind(wrbuf); -#if 0 - if (!localtag && !(elem = data1_getelementbytagname(absyn, - e, tag)) && (data1_gettagbyname(absyn->tagset, tag))) + while (c && c != '<') { - if (parent->which == DATA1N_root) - logf(LOG_WARN, "Tag '%s' used out of context", tag); - *t = '>'; - return 0; + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); } -#else - elem = data1_getelementbytagname(absyn, e, tag); -#endif - res = data1_mk_node(m); - res->which = DATA1N_tag; - res->u.tag.element = elem; - res->u.tag.tag = tag; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; - res->u.tag.get_bytes = -1; - res->root = parent->root; - *buf = t + 1; - } - - res->parent = parent; - res->num_children = 0; - - pp = &res->child; - /* - * Read child nodes. - */ - while ((*pp = data1_read_node(buf, res, line, absyn, m))) - { - res->last_child = *pp; - res->num_children++; - pp = &(*pp)->next; - } - } - else /* != '<'... this is a body of text */ - { - int len = 0; - char *data = *buf, *pp = *buf; -#if 0 - data1_node *partag = get_parent_tag(parent); -#endif + len = wrbuf_len(wrbuf); - if (!parent) /* abort if abstract syntax is undefined */ - return 0; - /* Determine length and remove newlines/extra blanks */ - while (**buf && **buf != '<') - { - if (**buf == '\n') - (*line)++; - if (isspace(**buf)) + /* use local buffer of nmem if too large */ + if (len >= DATA1_LOCALDATA) + res->u.data.data = (char*) nmem_malloc (m, len); + else + res->u.data.data = res->lbuf; + + /* read "data" and transfer while removing white space */ + dst = res->u.data.data; + for (src = wrbuf_buf(wrbuf); --len >= 0; src++) { - *(pp++) = ' '; - (*buf)++; - while (isspace(**buf)) - (*buf)++; + if (*src == '\n') + line++; + if (d1_isspace (*src)) + prev_char = ' '; + else + { + if (prev_char) + { + *dst++ = prev_char; + prev_char = 0; + } + *dst++ = *src; + } } - else - *(pp++) = *((*buf)++); - len++; + res->u.data.len = dst - res->u.data.data; } - while (isspace(data[len-1])) - len--; - res = data1_mk_node(m); - res->parent = parent; - res->which = DATA1N_data; - res->u.data.what = DATA1I_text; - res->u.data.len = len; - res->u.data.data = data; - res->u.data.formatted_text = 0; - res->root = parent->root; } - return res; + return 0; +} + +int getc_mem (void *fh) +{ + const char **p = (const char **) fh; + if (**p) + return *(*p)++; + return 0; +} + +data1_node *data1_read_node (data1_handle dh, const char **buf, NMEM m) +{ + WRBUF wrbuf = wrbuf_alloc(); + data1_node *node; + + node = data1_read_nodex(dh, m, getc_mem, (void *) (buf), wrbuf); + wrbuf_free (wrbuf, 1); + return node; } /* * Read a record in the native syntax. */ -data1_node *data1_read_record(int (*rf)(void *, char *, size_t), void *fh, +data1_node *data1_read_record(data1_handle dh, + int (*rf)(void *, char *, size_t), void *fh, NMEM m) { - static char *buf = 0; - char *bp; - static int size; + int *size; + char **buf = data1_get_read_buf (dh, &size); + const char *bp; int rd = 0, res; - int line = 0; - - if (!buf && !(buf = xmalloc(size = 4096))) - abort(); - + + if (!*buf) + *buf = (char *)xmalloc(*size = 4096); + for (;;) { - if (rd + 4096 > size && !(buf =xrealloc(buf, size *= 2))) + if (rd + 2048 >= *size && !(*buf =(char *)xrealloc(*buf, *size *= 2))) abort(); - if ((res = (*rf)(fh, buf + rd, 4096)) <= 0) + if ((res = (*rf)(fh, *buf + rd, 2048)) <= 0) { if (!res) { - bp = buf; - return data1_read_node(&bp, 0, &line, 0, m); + bp = *buf; + (*buf)[rd] = '\0'; + return data1_read_node(dh, &bp, m); } else return 0; @@ -478,3 +729,10 @@ data1_node *data1_read_record(int (*rf)(void *, char *, size_t), void *fh, rd += res; } } + +data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf) +{ + const char *bp = buf; + return data1_read_node (dh, &bp, m); +} +