1 /* $Id: d1_read.c,v 1.12 2004-12-10 11:56:21 heikki Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 * This module reads "loose" SGML and converts it to data1 tree
32 /* Old yaz-util includes (FIXME - clean up what is not needed)*/
33 #include <yaz/yconfig.h>
34 #include <yaz/yaz-version.h>
35 #include <yaz/xmalloc.h>
37 #include <yaz/tpath.h>
38 #include <yaz/options.h>
39 #include <yaz/wrbuf.h>
41 #include <yaz/readconf.h>
42 #include <yaz/marcdisp.h>
43 #include <yaz/yaz-iconv.h>
46 data1_node *data1_get_root_tag (data1_handle dh, data1_node *n)
50 if (data1_is_xmlmode(dh))
53 while (n && n->which != DATA1N_tag)
60 * get the tag which is the immediate parent of this node (this may mean
61 * traversing intermediate things like variants and stuff.
63 data1_node *get_parent_tag (data1_handle dh, data1_node *n)
65 if (data1_is_xmlmode(dh))
67 for (; n && n->which != DATA1N_root; n = n->parent)
68 if (n->which == DATA1N_tag && n->parent &&
69 n->parent->which != DATA1N_root)
74 for (; n && n->which != DATA1N_root; n = n->parent)
75 if (n->which == DATA1N_tag)
81 data1_node *data1_mk_node (data1_handle dh, NMEM m)
83 return data1_mk_node2 (dh, m, DATA1N_root, 0);
86 data1_node *data1_mk_node_type (data1_handle dh, NMEM m, int type)
88 return data1_mk_node2 (dh, m, type, 0);
91 static void data1_init_node (data1_handle dh, data1_node *r, int type)
99 r->u.tag.no_data_requested = 0;
100 r->u.tag.node_selected = 0;
101 r->u.tag.make_variantlist = 0;
102 r->u.tag.get_bytes = -1;
103 r->u.tag.attributes = 0;
113 r->u.data.formatted_text = 0;
119 r->u.data.formatted_text = 1;
122 r->u.variant.type = 0;
123 r->u.variant.value = 0;
125 case DATA1N_preprocess:
126 r->u.preprocess.target = 0;
127 r->u.preprocess.attributes = 0;
130 yaz_log (YLOG_WARN, "data_mk_node_type. bad type = %d\n", type);
134 data1_node *data1_append_node (data1_handle dh, NMEM m, int type,
137 data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r));
138 r->next = r->child = r->last_child = 0;
146 r->root = parent->root;
148 parent->child = parent->last_child = r;
150 parent->last_child->next = r;
151 parent->last_child = r;
153 data1_init_node(dh, r, type);
157 data1_node *data1_mk_node2 (data1_handle dh, NMEM m, int type,
160 return data1_append_node (dh, m, type, parent);
163 data1_node *data1_insert_node (data1_handle dh, NMEM m, int type,
166 data1_node *r = (data1_node *)nmem_malloc(m, sizeof(*r));
167 r->next = r->child = r->last_child = 0;
174 r->root = parent->root;
177 parent->last_child = r;
179 r->next = parent->child;
182 data1_init_node(dh, r, type);
186 void data1_free_tree (data1_handle dh, data1_node *t)
188 data1_node *p = t->child, *pn;
193 data1_free_tree (dh, p);
200 data1_node *data1_mk_root (data1_handle dh, NMEM nmem, const char *name)
202 data1_absyn *absyn = data1_get_absyn (dh, name);
206 yaz_log(YLOG_WARN, "Unable to acquire abstract syntax " "for '%s'",
208 /* It's now OK for a record not to have an absyn */
210 res = data1_mk_node2 (dh, nmem, DATA1N_root, 0);
211 res->u.root.type = data1_insert_string (dh, res, nmem, name);
212 res->u.root.absyn = absyn;
216 void data1_set_root(data1_handle dh, data1_node *res,
217 NMEM nmem, const char *name)
219 data1_absyn *absyn = data1_get_absyn (dh, name);
221 res->u.root.type = data1_insert_string (dh, res, nmem, name);
222 res->u.root.absyn = absyn;
225 void data1_add_attrs(data1_handle dh, NMEM nmem, const char **attr,
231 while (attr && *attr)
233 *p = (data1_xattr*) nmem_malloc (nmem, sizeof(**p));
234 (*p)->name = nmem_strdup (nmem, *attr++);
235 (*p)->value = nmem_strdup (nmem, *attr++);
236 (*p)->what = DATA1I_text;
243 data1_node *data1_mk_preprocess (data1_handle dh, NMEM nmem,
245 const char **attr, data1_node *at)
247 return data1_mk_preprocess_n (dh, nmem, target, strlen(target),
251 data1_node *data1_mk_preprocess_n (data1_handle dh, NMEM nmem,
252 const char *target, size_t len,
253 const char **attr, data1_node *at)
255 data1_node *res = data1_mk_node2 (dh, nmem, DATA1N_preprocess, at);
256 res->u.preprocess.target = data1_insert_string_n (dh, res, nmem,
259 data1_add_attrs(dh, nmem, attr, &res->u.preprocess.attributes);
263 data1_node *data1_insert_preprocess (data1_handle dh, NMEM nmem,
265 const char **attr, data1_node *at)
267 return data1_insert_preprocess_n (dh, nmem, target, strlen(target),
271 data1_node *data1_insert_preprocess_n (data1_handle dh, NMEM nmem,
272 const char *target, size_t len,
273 const char **attr, data1_node *at)
275 data1_node *res = data1_insert_node (dh, nmem, DATA1N_preprocess, at);
276 res->u.preprocess.target = data1_insert_string_n (dh, res, nmem,
279 data1_add_attrs(dh, nmem, attr, &res->u.preprocess.attributes);
283 data1_node *data1_mk_tag_n (data1_handle dh, NMEM nmem,
284 const char *tag, size_t len, const char **attr,
287 data1_node *partag = get_parent_tag(dh, at);
288 data1_node *res = data1_mk_node2 (dh, nmem, DATA1N_tag, at);
289 data1_element *e = 0;
291 res->u.tag.tag = data1_insert_string_n (dh, res, nmem, tag, len);
293 if (!partag) /* top tag? */
294 e = data1_getelementbytagname (dh, at->root->u.root.absyn,
295 0 /* index as local */,
299 /* only set element for known tags */
300 e = partag->u.tag.element;
302 e = data1_getelementbytagname (dh, at->root->u.root.absyn,
305 res->u.tag.element = e;
306 data1_add_attrs(dh, nmem, attr, &res->u.tag.attributes);
310 void data1_tag_add_attr (data1_handle dh, NMEM nmem,
311 data1_node *res, const char **attr)
313 if (res->which != DATA1N_tag)
316 data1_add_attrs(dh, nmem, attr, &res->u.tag.attributes);
319 data1_node *data1_mk_tag (data1_handle dh, NMEM nmem,
320 const char *tag, const char **attr, data1_node *at)
322 return data1_mk_tag_n (dh, nmem, tag, strlen(tag), attr, at);
325 data1_node *data1_search_tag (data1_handle dh, data1_node *n,
330 n = data1_get_root_tag (dh, n);
335 for (; n; n = n->next)
336 if (n->which == DATA1N_tag && n->u.tag.tag &&
337 !yaz_matchstr (n->u.tag.tag, tag))
344 data1_node *data1_mk_tag_uni (data1_handle dh, NMEM nmem,
345 const char *tag, data1_node *at)
347 data1_node *node = data1_search_tag (dh, at->child, tag);
349 node = data1_mk_tag (dh, nmem, tag, 0 /* attr */, at);
351 node->child = node->last_child = 0;
355 data1_node *data1_mk_text_n (data1_handle dh, NMEM mem,
356 const char *buf, size_t len, data1_node *parent)
358 data1_node *res = data1_mk_node2 (dh, mem, DATA1N_data, parent);
359 res->u.data.what = DATA1I_text;
360 res->u.data.len = len;
362 res->u.data.data = data1_insert_string_n (dh, res, mem, buf, len);
366 data1_node *data1_mk_text_nf (data1_handle dh, NMEM mem,
367 const char *buf, size_t len, data1_node *parent)
369 data1_node *res = data1_mk_text_n (dh, mem, buf, len, parent);
370 res->u.data.formatted_text = 1;
374 data1_node *data1_mk_text (data1_handle dh, NMEM mem,
375 const char *buf, data1_node *parent)
377 return data1_mk_text_n (dh, mem, buf, strlen(buf), parent);
380 data1_node *data1_mk_comment_n (data1_handle dh, NMEM mem,
381 const char *buf, size_t len,
384 data1_node *res = data1_mk_node2 (dh, mem, DATA1N_comment, parent);
385 res->u.data.what = DATA1I_text;
386 res->u.data.len = len;
388 res->u.data.data = data1_insert_string_n (dh, res, mem, buf, len);
392 data1_node *data1_mk_comment (data1_handle dh, NMEM mem,
393 const char *buf, data1_node *parent)
395 return data1_mk_comment_n (dh, mem, buf, strlen(buf), parent);
398 char *data1_insert_string_n (data1_handle dh, data1_node *res,
399 NMEM m, const char *str, size_t len)
402 if (len >= DATA1_LOCALDATA)
403 b = (char *) nmem_malloc (m, len+1);
406 memcpy (b, str, len);
411 char *data1_insert_string (data1_handle dh, data1_node *res,
412 NMEM m, const char *str)
414 return data1_insert_string_n (dh, res, m, str, strlen(str));
417 static data1_node *data1_add_insert_taggeddata(data1_handle dh,
419 const char *tagname, NMEM m,
423 data1_node *root = at->root;
424 data1_node *partag = get_parent_tag (dh, at);
425 data1_element *e = NULL;
426 data1_node *datn = 0;
427 data1_node *tagn = 0;
430 e = data1_getelementbytagname (dh, root->u.root.absyn, 0, tagname);
433 e = partag->u.tag.element;
435 e = data1_getelementbytagname (dh, root->u.root.absyn, e, tagname);
437 if (local_allowed || e)
440 tagn = data1_insert_node (dh, m, DATA1N_tag, at);
442 tagn = data1_append_node (dh, m, DATA1N_tag, at);
443 tagn->u.tag.tag = data1_insert_string (dh, tagn, m, tagname);
444 tagn->u.tag.element = e;
445 datn = data1_mk_node2 (dh, m, DATA1N_data, tagn);
450 data1_node *data1_mk_tag_data(data1_handle dh, data1_node *at,
451 const char *tagname, NMEM m)
453 return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0);
458 * Insert a tagged node into the record root as first child of the node at
459 * which should be root or tag itself). Returns pointer to the data node,
460 * which can then be modified.
462 data1_node *data1_mk_tag_data_wd(data1_handle dh, data1_node *at,
463 const char *tagname, NMEM m)
465 return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1);
468 data1_node *data1_insert_taggeddata (data1_handle dh, data1_node *root,
469 data1_node *at, const char *tagname,
472 return data1_add_insert_taggeddata (dh, at, tagname, m, 0, 1);
475 data1_node *data1_add_taggeddata (data1_handle dh, data1_node *root,
476 data1_node *at, const char *tagname,
479 return data1_add_insert_taggeddata (dh, at, tagname, m, 1, 0);
482 data1_node *data1_mk_tag_data_zint (data1_handle dh, data1_node *at,
483 const char *tag, zint num,
486 data1_node *node_data;
488 node_data = data1_mk_tag_data (dh, at, tag, nmem);
491 node_data->u.data.what = DATA1I_num;
492 node_data->u.data.data = node_data->lbuf;
493 sprintf (node_data->u.data.data, ZINT_FORMAT, num);
494 node_data->u.data.len = strlen (node_data->u.data.data);
498 data1_node *data1_mk_tag_data_int (data1_handle dh, data1_node *at,
499 const char *tag, int num,
502 return data1_mk_tag_data_zint(dh, at, tag, num, nmem);
505 data1_node *data1_mk_tag_data_oid (data1_handle dh, data1_node *at,
506 const char *tag, Odr_oid *oid,
509 data1_node *node_data;
510 char str[128], *p = str;
513 node_data = data1_mk_tag_data (dh, at, tag, nmem);
517 for (ii = oid; *ii >= 0; ii++)
521 sprintf (p, "%d", *ii);
524 node_data->u.data.what = DATA1I_oid;
525 node_data->u.data.len = strlen (str);
526 node_data->u.data.data = data1_insert_string (dh, node_data, nmem, str);
531 data1_node *data1_mk_tag_data_text (data1_handle dh, data1_node *at,
532 const char *tag, const char *str,
535 data1_node *node_data;
537 node_data = data1_mk_tag_data (dh, at, tag, nmem);
540 node_data->u.data.what = DATA1I_text;
541 node_data->u.data.len = strlen (str);
542 node_data->u.data.data = data1_insert_string (dh, node_data, nmem, str);
547 data1_node *data1_mk_tag_data_text_uni (data1_handle dh, data1_node *at,
548 const char *tag, const char *str,
551 data1_node *node = data1_search_tag (dh, at->child, tag);
553 return data1_mk_tag_data_text (dh, at, tag, str, nmem);
556 data1_node *node_data = node->child;
557 node_data->u.data.what = DATA1I_text;
558 node_data->u.data.len = strlen (str);
559 node_data->u.data.data = data1_insert_string (dh, node_data,
561 node_data->child = node_data->last_child = 0;
566 static int ampr (int (*get_byte)(void *fh), void *fh, int *amp)
569 int c = (*get_byte)(fh);
573 int c = (*get_byte)(fh);
588 if (!strcmp (ent, "quot"))
590 if (!strcmp (ent, "apos"))
592 if (!strcmp (ent, "gt"))
594 if (!strcmp (ent, "lt"))
596 if (!strcmp (ent, "amp"))
601 else if (c == 0 || d1_isspace(c))
611 data1_xattr *data1_read_xattr (data1_handle dh, NMEM m,
612 int (*get_byte)(void *fh), void *fh,
613 WRBUF wrbuf, int *ch, int *amp)
615 data1_xattr *p_first = 0;
616 data1_xattr **pp = &p_first;
622 while (*amp || (c && d1_isspace(c)))
623 c = ampr (get_byte, fh, amp);
624 if (*amp == 0 && (c == 0 || c == '>' || c == '/'))
626 *pp = p = (data1_xattr *) nmem_malloc (m, sizeof(*p));
630 p->what = DATA1I_xmltext;
633 while (c && c != '=' && c != '>' && c != '/' && !d1_isspace(c))
635 wrbuf_putc (wrbuf, c);
636 c = ampr (get_byte, fh, amp);
638 wrbuf_putc (wrbuf, '\0');
639 len = wrbuf_len(wrbuf);
640 p->name = (char*) nmem_malloc (m, len);
641 strcpy (p->name, wrbuf_buf(wrbuf));
644 c = ampr (get_byte, fh, amp);
645 if (*amp == 0 && c == '"')
647 c = ampr (get_byte, fh, amp);
649 while (*amp || (c && c != '"'))
651 wrbuf_putc (wrbuf, c);
652 c = ampr (get_byte, fh, amp);
655 c = ampr (get_byte, fh, amp);
657 else if (*amp == 0 && c == '\'')
659 c = ampr (get_byte, fh, amp);
661 while (*amp || (c && c != '\''))
663 wrbuf_putc (wrbuf, c);
664 c = ampr (get_byte, fh, amp);
667 c = ampr (get_byte, fh, amp);
672 while (*amp || (c && c != '>' && c != '/'))
674 wrbuf_putc (wrbuf, c);
675 c = ampr (get_byte, fh, amp);
678 wrbuf_putc (wrbuf, '\0');
679 len = wrbuf_len(wrbuf);
680 p->value = (char*) nmem_malloc (m, len);
681 strcpy (p->value, wrbuf_buf(wrbuf));
689 * Ugh. Sometimes functions just grow and grow on you. This one reads a
690 * 'node' and its children.
692 data1_node *data1_read_nodex (data1_handle dh, NMEM m,
693 int (*get_byte)(void *fh), void *fh, WRBUF wrbuf)
695 data1_node *d1_stack[256];
702 c = ampr (get_byte, fh, &);
705 data1_node *parent = level ? d1_stack[level-1] : 0;
707 if (amp == 0 && c == '<') /* beginning of tag */
717 c = ampr (get_byte, fh, &);
718 if (amp == 0 && c == '/')
721 c = ampr (get_byte, fh, &);
723 else if (amp == 0 && c == '!')
729 c0 = ampr (get_byte, fh, &0);
730 if (amp0 == 0 && c0 == '\0')
732 c = ampr (get_byte, fh, &);
734 if (amp0 == 0 && c0 == '-' && amp == 0 && c == '-')
736 /* COMMENT: <!-- ... --> */
739 c = ampr (get_byte, fh, &);
742 if (amp == 0 && c == '-')
744 else if (amp == 0 && c == '>' && no_dash >= 2)
750 wrbuf_buf(wrbuf), wrbuf_len(wrbuf)-2,
752 c = ampr (get_byte, fh, &); /* skip > */
757 wrbuf_putc (wrbuf, c);
758 c = ampr (get_byte, fh, &);
763 { /* DIRECTIVE: <! .. > */
768 if (amp == 0 && c == '>' && blevel == 0)
770 c = ampr (get_byte, fh, &);
773 if (amp == 0 && c == '[')
775 if (amp == 0 && c == ']' && blevel > 0)
777 c = ampr (get_byte, fh, &);
782 while (amp || (c && c != '>' && c != '/' && !d1_isspace(c)))
784 if (i < (sizeof(tag)-1))
786 c = ampr (get_byte, fh, &);
789 xattr = data1_read_xattr (dh, m, get_byte, fh, wrbuf, &c, &);
791 if (amp == 0 && c == '/')
792 { /* <tag attrs/> or <tag/> */
794 c = ampr (get_byte, fh, &);
798 yaz_log(YLOG_WARN, "d1: %d: Malformed tag", line);
802 c = ampr (get_byte, fh, &);
814 parent = d1_stack[--i];
815 if ((parent->which == DATA1N_root &&
816 !strcmp(tag, parent->u.root.type)) ||
817 (parent->which == DATA1N_tag &&
818 !strcmp(tag, parent->u.tag.tag)))
826 yaz_log (YLOG_WARN, "%d: no begin tag for %s",
831 if (data1_is_xmlmode(dh))
843 else if (!strcmp(tag, "var"))
845 char tclass[DATA1_MAX_SYMBOL], type[DATA1_MAX_SYMBOL];
849 if (sscanf(args, "%s %s %n", tclass, type, &val_offset) != 2)
851 yaz_log(YLOG_WARN, "Malformed variant triple at '%s'", tag);
855 data1_getvartypebyct(dh,
856 parent->root->u.root.absyn->varset,
860 * If we're the first variant in this group, create a parent
861 * variant, and insert it before the current variant.
863 if (parent->which != DATA1N_variant)
865 res = data1_mk_node2 (dh, m, DATA1N_variant, parent);
870 * now determine if one of our ancestor triples is of
871 * same type. If so, we break here.
874 for (i = level-1; d1_stack[i]->which==DATA1N_variant; --i)
875 if (d1_stack[i]->u.variant.type == tp)
880 res = data1_mk_node2 (dh, m, DATA1N_variant, parent);
881 res->u.variant.type = tp;
882 res->u.variant.value =
883 data1_insert_string (dh, res, m, args + val_offset);
889 /* tag .. acquire our element in the abstract syntax */
892 parent = data1_mk_root (dh, m, tag);
893 res = d1_stack[level] = parent;
895 if (data1_is_xmlmode(dh))
898 res = data1_mk_tag (dh, m, tag, 0 /* attr */, parent);
899 res->u.tag.attributes = xattr;
904 res = data1_mk_tag (dh, m, tag, 0 /* attr */, parent);
905 res->u.tag.attributes = xattr;
908 d1_stack[level] = res;
909 d1_stack[level+1] = 0;
910 if (level < 250 && !null_tag)
913 else /* != '<'... this is a body of text */
919 c = ampr (get_byte, fh, &);
922 res = data1_mk_node2 (dh, m, DATA1N_data, parent);
923 res->u.data.what = DATA1I_xmltext;
924 res->u.data.formatted_text = 0;
925 d1_stack[level] = res;
929 while (amp || (c && c != '<'))
931 wrbuf_putc (wrbuf, c);
932 c = ampr (get_byte, fh, &);
934 len = wrbuf_len(wrbuf);
936 /* use local buffer of nmem if too large */
937 if (len >= DATA1_LOCALDATA)
938 res->u.data.data = (char*) nmem_malloc (m, len);
940 res->u.data.data = res->lbuf;
943 memcpy (res->u.data.data, wrbuf_buf(wrbuf), len);
945 res->u.data.data = 0;
946 res->u.data.len = len;
952 int getc_mem (void *fh)
954 const char **p = (const char **) fh;
960 data1_node *data1_read_node (data1_handle dh, const char **buf, NMEM m)
962 WRBUF wrbuf = wrbuf_alloc();
965 node = data1_read_nodex(dh, m, getc_mem, (void *) (buf), wrbuf);
966 wrbuf_free (wrbuf, 1);
971 * Read a record in the native syntax.
973 data1_node *data1_read_record(data1_handle dh,
974 int (*rf)(void *, char *, size_t), void *fh,
978 char **buf = data1_get_read_buf (dh, &size);
983 *buf = (char *)xmalloc(*size = 4096);
987 if (rd + 2048 >= *size && !(*buf =(char *)xrealloc(*buf, *size *= 2)))
989 if ((res = (*rf)(fh, *buf + rd, 2048)) <= 0)
995 return data1_read_node(dh, &bp, m);
1004 data1_node *data1_read_sgml (data1_handle dh, NMEM m, const char *buf)
1006 const char *bp = buf;
1007 return data1_read_node (dh, &bp, m);
1011 static int conv_item (NMEM m, yaz_iconv_t t,
1012 WRBUF wrbuf, char *inbuf, size_t inlen)
1014 wrbuf_rewind (wrbuf);
1015 if (wrbuf->size < 10)
1016 wrbuf_grow (wrbuf, 10);
1019 char *outbuf = wrbuf->buf + wrbuf->pos;
1020 size_t outlen = wrbuf->size - wrbuf->pos;
1021 if (yaz_iconv (t, &inbuf, &inlen, &outbuf, &outlen) ==
1022 (size_t)(-1) && yaz_iconv_error(t) != YAZ_ICONV_E2BIG)
1024 /* bad data. stop and skip conversion entirely */
1027 else if (inlen == 0)
1028 { /* finished converting */
1029 wrbuf->pos = wrbuf->size - outlen;
1034 /* buffer too small: make sure we expand buffer */
1035 wrbuf->pos = wrbuf->size - outlen;
1036 wrbuf_grow(wrbuf, 20);
1042 static void data1_iconv_s (data1_handle dh, NMEM m, data1_node *n,
1043 yaz_iconv_t t, WRBUF wrbuf, const char *tocode)
1045 for (; n; n = n->next)
1050 case DATA1N_comment:
1051 if (conv_item (m, t, wrbuf, n->u.data.data, n->u.data.len) == 0)
1054 data1_insert_string_n (dh, n, m, wrbuf->buf,
1056 n->u.data.len = wrbuf->pos;
1060 if (conv_item (m, t, wrbuf, n->u.tag.tag, strlen(n->u.tag.tag))
1064 data1_insert_string_n (dh, n, m,
1065 wrbuf->buf, wrbuf->pos);
1067 if (n->u.tag.attributes)
1070 for (p = n->u.tag.attributes; p; p = p->next)
1073 conv_item(m, t, wrbuf, p->value, strlen(p->value))
1076 wrbuf_puts (wrbuf, "");
1077 p->value = nmem_strdup (m, wrbuf->buf);
1082 case DATA1N_preprocess:
1083 if (strcmp(n->u.preprocess.target, "xml") == 0)
1085 data1_xattr *p = n->u.preprocess.attributes;
1086 for (; p; p = p->next)
1087 if (strcmp (p->name, "encoding") == 0)
1088 p->value = nmem_strdup (m, tocode);
1092 data1_iconv_s (dh, m, n->child, t, wrbuf, tocode);
1096 const char *data1_get_encoding (data1_handle dh, data1_node *n)
1098 /* see if we have an xml header that specifies encoding */
1099 if (n && n->child && n->child->which == DATA1N_preprocess &&
1100 strcmp (n->child->u.preprocess.target, "xml") == 0)
1102 data1_xattr *xp = n->child->u.preprocess.attributes;
1103 for (; xp; xp = xp->next)
1104 if (!strcmp (xp->name, "encoding") == 0)
1107 /* no encoding in header, so see if "encoding" was specified for abs */
1108 if (n && n->which == DATA1N_root &&
1109 n->u.root.absyn && n->u.root.absyn->encoding)
1110 return n->u.root.absyn->encoding;
1111 /* none of above, return a hard coded default */
1112 return "ISO-8859-1";
1115 int data1_iconv (data1_handle dh, NMEM m, data1_node *n,
1117 const char *fromcode)
1119 if (yaz_matchstr (tocode, fromcode))
1121 WRBUF wrbuf = wrbuf_alloc();
1122 yaz_iconv_t t = yaz_iconv_open (tocode, fromcode);
1125 data1_iconv_s (dh, m, n, t, wrbuf, tocode);
1126 yaz_iconv_close (t);
1127 wrbuf_free (wrbuf, 1);
1132 void data1_concat_text(data1_handle dh, NMEM m, data1_node *n)
1134 for (; n; n = n->next)
1136 if (n->which == DATA1N_data && n->next &&
1137 n->next->which == DATA1N_data)
1143 for (np = n; np && np->which == DATA1N_data; np=np->next)
1144 sz += np->u.data.len;
1145 ndata = nmem_malloc(m, sz);
1146 for (np = n; np && np->which == DATA1N_data; np=np->next)
1148 memcpy(ndata+off, np->u.data.data, np->u.data.len);
1149 off += np->u.data.len;
1151 n->u.data.data = ndata;
1154 if (!np && n->parent)
1155 n->parent->last_child = n;
1158 data1_concat_text(dh, m, n->child);