X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=retrieval%2Fd1_read.c;h=0af749251df77c3602e5943a91ce0708c260e2ee;hb=3ee667935c4cac82445ecd05c8ae421888dfd40f;hp=a2753415569c2eacc237b9493bc07bba2c16f907;hpb=43801085d371c69455730182f72c4e53d8c8218f;p=yaz-moved-to-github.git diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index a275341..0af7492 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -1,10 +1,25 @@ /* - * Copyright (c) 1995-1999, Index Data. + * Copyright (c) 1995-2001, Index Data. * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * * $Log: d1_read.c,v $ - * Revision 1.33 2000-11-29 14:22:47 adam + * Revision 1.38 2001-03-27 23:06:21 adam + * Quotes and slashes may occur within attributes. + * + * Revision 1.37 2001/02/28 09:00:06 adam + * Fixed problem with stack overflow for very nested records. + * + * Revision 1.36 2001/02/21 13:46:53 adam + * C++ fixes. + * + * Revision 1.35 2000/12/05 14:44:25 adam + * Readers skips sections. + * + * Revision 1.34 2000/12/05 10:06:23 adam + * Added support for null-data rules like . + * + * Revision 1.33 2000/11/29 14:22:47 adam * Implemented XML/SGML attributes for data1 so that d1_read reads them * and d1_write generates proper attributes for XML/SGML records. Added * register locking for threaded version. @@ -342,7 +357,7 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, c = (*get_byte)(fh); if (!c || c == '>' || c == '/') break; - *pp = p = nmem_malloc (m, sizeof(*p)); + *pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p)); p->next = 0; pp = &p->next; p->value = 0; @@ -355,25 +370,36 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, } wrbuf_putc (wrbuf, '\0'); len = wrbuf_len(wrbuf); - p->name = nmem_malloc (m, len); + p->name = (char*) nmem_malloc (m, len); strcpy (p->name, wrbuf_buf(wrbuf)); if (c == '=') { c = (*get_byte)(fh); if (c == '"') - c = (*get_byte)(fh); - wrbuf_rewind(wrbuf); - while (c && c != '"' && c != '>' && c != '/') { - wrbuf_putc (wrbuf, c); - c = (*get_byte)(fh); + c = (*get_byte)(fh); + wrbuf_rewind(wrbuf); + while (c && c != '"') + { + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); + } + if (c) + c = (*get_byte)(fh); } + else + { + wrbuf_rewind(wrbuf); + while (c && c != '>' && c != '/') + { + wrbuf_putc (wrbuf, c); + c = (*get_byte)(fh); + } + } wrbuf_putc (wrbuf, '\0'); len = wrbuf_len(wrbuf); - p->value = nmem_malloc (m, len); + p->value = (char*) nmem_malloc (m, len); strcpy (p->value, wrbuf_buf(wrbuf)); - if (c == '"') - c = (*get_byte)(fh); } } *ch = c; @@ -416,10 +442,30 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, #endif char tag[64]; char args[256]; - size_t i; - for (i = 0; (c=(*get_byte)(fh)) && c != '>' && !d1_isspace(c);) + int null_tag = 0; + int end_tag = 0; + size_t i = 0; + + c = (*get_byte)(fh); + if (c == '/') + { + end_tag = 1; + c = (*get_byte)(fh); + } + else if (c == '!') /* tags/comments that we don't deal with yet */ + { + while (c && c != '>') + c = (*get_byte)(fh); + if (c) + c = (*get_byte)(fh); + continue; + } + while (c && c != '>' && c != '/' && !d1_isspace(c)) + { if (i < (sizeof(tag)-1)) tag[i++] = c; + c = (*get_byte)(fh); + } tag[i] = '\0'; #if DATA1_USING_XATTR xattr = data1_read_xattr (dh, m, get_byte, fh, wrbuf, &c); @@ -427,11 +473,16 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, #else while (d1_isspace(c)) c = (*get_byte)(fh); - for (i = 0; c && c != '>'; c = (*get_byte)(fh)) + for (i = 0; c && c != '>' && c != '/'; c = (*get_byte)(fh)) if (i < (sizeof(args)-1)) args[i++] = c; args[i] = '\0'; #endif + if (c == '/') + { /* or */ + null_tag = 1; + c = (*get_byte)(fh); + } if (c != '>') { yaz_log(LOG_WARN, "d1: %d: Malformed tag", line); @@ -441,9 +492,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, c = (*get_byte)(fh); /* End tag? */ - if (*tag == '/') + if (end_tag) { - if (tag[1] == '\0') + if (*tag == '\0') --level; /* */ else { /* */ @@ -452,9 +503,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, { parent = d1_stack[--i]; if ((parent->which == DATA1N_root && - !strcmp(tag+1, parent->u.root.type)) || + !strcmp(tag, parent->u.root.type)) || (parent->which == DATA1N_tag && - !strcmp(tag+1, parent->u.tag.tag))) + !strcmp(tag, parent->u.tag.tag))) { level = i; break; @@ -561,7 +612,9 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, else if (parent) parent->child = res; d1_stack[level] = res; - d1_stack[++level] = 0; + d1_stack[level+1] = 0; + if (level < 250 && !null_tag) + ++level; } else /* != '<'... this is a body of text */ {