From 3eef743c121a36edcf66e4e3651d5ceb8b4e0156 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 8 Oct 2002 20:14:43 +0000 Subject: [PATCH] sgml filter doesn't interpret entities --- debian/changelog | 6 ++++++ debian/rules | 2 +- include/yaz/data1.h | 15 +++++++++------ retrieval/d1_prtree.c | 7 ++++++- retrieval/d1_read.c | 15 +++++++++++++-- retrieval/d1_write.c | 47 ++++++++++++++++++++++++++++------------------- 6 files changed, 63 insertions(+), 29 deletions(-) diff --git a/debian/changelog b/debian/changelog index 8134884..40a2a10 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +yaz (1.9.1-3) unstable; urgency=low + + * don't let grs.sgml filter interpret &-entities. + + -- Adam Dickmeiss Tue, 8 Oct 2002 22:13:30 +0100 + yaz (1.9.1-2) unstable; urgency=low * official release. diff --git a/debian/rules b/debian/rules index ca06c9f..4370644 100755 --- a/debian/rules +++ b/debian/rules @@ -84,7 +84,7 @@ stamp-install: stamp-build binary-indep: build install dh_testdir dh_testroot - dh_installdocs -p yaz-doc doc/*.html doc/*.pdf + dh_installdocs -p yaz-doc doc/*.html doc/*.pdf doc/*.png dh_installchangelogs -p yaz-doc dh_link -p yaz-doc dh_strip -p yaz-doc diff --git a/include/yaz/data1.h b/include/yaz/data1.h index e214fa5..0db650f 100644 --- a/include/yaz/data1.h +++ b/include/yaz/data1.h @@ -23,7 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Id: data1.h,v 1.17 2002-09-24 07:58:59 adam Exp $ + * $Id: data1.h,v 1.18 2002-10-08 20:14:44 adam Exp $ */ #ifndef DATA1_H @@ -197,6 +197,7 @@ typedef struct data1_xattr { char *name; char *value; struct data1_xattr *next; + unsigned short what; /* DATA1I_text, .. see data1_node.u.data */ } data1_xattr; typedef struct data1_absyn @@ -257,16 +258,18 @@ typedef struct data1_node { char *data; /* filename or data */ int len; - /* text inclusion */ + /* text inclusion */ #define DATA1I_inctxt 1 - /* binary data inclusion */ + /* binary data inclusion */ #define DATA1I_incbin 2 /* text data */ #define DATA1I_text 3 - /* numerical data */ + /* numerical data */ #define DATA1I_num 4 - /* object identifier */ -#define DATA1I_oid 5 + /* object identifier */ +#define DATA1I_oid 5 + /* XML text */ +#define DATA1I_xmltext 6 unsigned what:7; unsigned formatted_text : 1; /* newlines are significant */ } data; diff --git a/retrieval/d1_prtree.c b/retrieval/d1_prtree.c index 6984659..4354b30 100644 --- a/retrieval/d1_prtree.c +++ b/retrieval/d1_prtree.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_prtree.c,v 1.10 2002-09-24 13:58:13 adam Exp $ + * $Id: d1_prtree.c,v 1.11 2002-10-08 20:14:44 adam Exp $ */ #include @@ -71,6 +71,11 @@ static void pr_tree (data1_handle dh, data1_node *n, FILE *out, int level) pr_string (out, n->u.data.data, n->u.data.len); fprintf (out, "'\n"); break; + case DATA1I_xmltext: + fprintf (out, "xml text '"); + pr_string (out, n->u.data.data, n->u.data.len); + fprintf (out, "'\n"); + break; default: fprintf (out, "unknown(%d)\n", n->u.data.what); break; diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c index 2660283..934bd87 100644 --- a/retrieval/d1_read.c +++ b/retrieval/d1_read.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_read.c,v 1.52 2002-09-24 07:58:59 adam Exp $ + * $Id: d1_read.c,v 1.53 2002-10-08 20:14:44 adam Exp $ */ #include @@ -187,6 +187,8 @@ data1_node *data1_mk_preprocess_n (data1_handle dh, NMEM nmem, *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; + p = &(*p)->next; } *p = 0; @@ -223,6 +225,7 @@ data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem, *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; p = &(*p)->next; } *p = 0; @@ -246,6 +249,7 @@ void data1_tag_add_attr (data1_handle dh, NMEM nmem, *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p)); (*p)->name = nmem_strdup (nmem, *attr++); (*p)->value = nmem_strdup (nmem, *attr++); + (*p)->what = DATA1I_text; p = &(*p)->next; } *p = 0; @@ -489,6 +493,11 @@ data1_node *data1_mk_tag_data_text_uni (data1_handle dh, data1_node *at, static int ampr (int (*get_byte)(void *fh), void *fh, int *amp) { +#if 1 + int c = (*get_byte)(fh); + *amp = 0; + return c; +#else int c = (*get_byte)(fh); *amp = 0; if (c == '&') @@ -524,6 +533,7 @@ static int ampr (int (*get_byte)(void *fh), void *fh, int *amp) } } return c; +#endif } data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, @@ -545,6 +555,7 @@ data1_xattr *data1_read_xattr (data1_handle dh, NMEM m, p->next = 0; pp = &p->next; p->value = 0; + p->what = DATA1I_xmltext; wrbuf_rewind(wrbuf); while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c)) @@ -837,7 +848,7 @@ data1_node *data1_read_nodex (data1_handle dh, NMEM m, continue; } res = data1_mk_node2 (dh, m, DATA1N_data, parent); - res->u.data.what = DATA1I_text; + res->u.data.what = DATA1I_xmltext; res->u.data.formatted_text = 0; d1_stack[level] = res; diff --git a/retrieval/d1_write.c b/retrieval/d1_write.c index 3114e40..b884248 100644 --- a/retrieval/d1_write.c +++ b/retrieval/d1_write.c @@ -3,7 +3,7 @@ * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Id: d1_write.c,v 1.18 2002-09-24 13:58:13 adam Exp $ + * $Id: d1_write.c,v 1.19 2002-10-08 20:14:44 adam Exp $ */ #include @@ -65,6 +65,28 @@ static void wrbuf_put_cdata(WRBUF b, const char *msg) wrbuf_write_cdata (b, msg, strlen(msg)); } +static void wrbuf_put_xattr(WRBUF b, data1_xattr *p) +{ + for (; p; p = p->next) + { + wrbuf_putc (b, ' '); + if (p->what == DATA1I_xmltext) + wrbuf_puts (b, p->name); + else + wrbuf_put_cdata (b, p->name); + if (p->value) + { + wrbuf_putc (b, '='); + wrbuf_putc (b, '"'); + if (p->what == DATA1I_text) + wrbuf_put_cdata (b, p->value); + else + wrbuf_puts (b, p->value); + wrbuf_putc (b, '"'); + } + } +} + static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col, int pretty_format) { @@ -82,15 +104,7 @@ static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col, indent (b, col); wrbuf_puts (b, "u.preprocess.target); - for (p = c->u.preprocess.attributes; p; p = p->next) - { - wrbuf_putc (b, ' '); - wrbuf_put_cdata (b, p->name); - wrbuf_putc (b, '='); - wrbuf_putc (b, '"'); - wrbuf_put_cdata (b, p->value); - wrbuf_putc (b, '"'); - } + wrbuf_put_xattr (b, c->u.preprocess.attributes); if (c->child) wrbuf_puts(b, " "); if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2, @@ -116,15 +130,7 @@ static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col, indent (b, col); wrbuf_puts (b, "<"); wrbuf_put_cdata (b, tag); - for (p = c->u.tag.attributes; p; p = p->next) - { - wrbuf_putc (b, ' '); - wrbuf_put_cdata (b, p->name); - wrbuf_putc (b, '='); - wrbuf_putc (b, '"'); - wrbuf_put_cdata (b, p->value); - wrbuf_putc (b, '"'); - } + wrbuf_put_xattr (b, c->u.tag.attributes); wrbuf_puts(b, ">"); if (pretty_format) wrbuf_puts(b, "\n"); @@ -153,6 +159,9 @@ static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col, wrbuf_puts (b, "