Character set negotiation updates
[yaz-moved-to-github.git] / retrieval / d1_write.c
index 544fba3..69d8db6 100644 (file)
 /*
- * Copyright (c) 1995, Index Data.
+ * Copyright (c) 1995-2002, Index Data.
  * See the file LICENSE for details.
  * Sebastian Hammer, Adam Dickmeiss
  *
- * $Log: d1_write.c,v $
- * Revision 1.1  1995-12-13 15:38:43  quinn
- * Added SGML-output filter.
- *
- *
+ * $Id: d1_write.c,v 1.15 2002-07-25 12:52:53 adam Exp $
  */
 
 #include <string.h>
-#include <ctype.h>
 
-#include <data1.h>
-#include <wrbuf.h>
+#include <yaz/data1.h>
+#include <yaz/wrbuf.h>
 
 #define IDSGML_MARGIN 75
 
-static int wordlen(char *b)
+#define PRETTY_FORMAT 0
+
+static int wordlen(char *b, int max)
 {
     int l = 0;
 
-    while (*b && !isspace(*b))
+    while (l < max && !d1_isspace(*b))
        l++, b++;
     return l;
 }
 
-static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col)
+static void indent (WRBUF b, int col)
+{
+    int i;
+    for (i = 0; i<col; i++)
+        wrbuf_putc (b, ' ');
+}
+
+static void wrbuf_write_cdata(WRBUF b, const char *msg, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+    {
+        switch (msg[i])
+        {
+        case '"':
+            wrbuf_puts (b, "&quot;");
+            break;
+        case '>':
+            wrbuf_puts (b, "&gt;");
+            break;
+        case '<':
+            wrbuf_puts (b, "&lt;");
+            break;
+#if 0
+        case '&':
+            wrbuf_puts (b, "&amp;");
+            break;
+#endif
+        default:
+            wrbuf_putc(b, msg[i]);
+        }
+    }
+}
+
+static void wrbuf_put_cdata(WRBUF b, const char *msg)
+{
+    wrbuf_write_cdata (b, msg, strlen(msg));
+}
+
+static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col,
+                        int pretty_format)
 {
     data1_node *c;
-    char line[1024];
 
     for (c = n->child; c; c = c->next)
     {
        char *tag;
 
-       if (c->which == DATA1N_tag)
+        if (c->which == DATA1N_preprocess)
+        {
+            data1_xattr *p;
+
+            if (pretty_format)
+                indent (b, col);
+           wrbuf_puts (b, "<?");
+            wrbuf_put_cdata (b, c->u.preprocess.target);
+            for (p = c->u.preprocess.attributes; p; p = p->next)
+            {
+                wrbuf_putc (b, ' ');
+                wrbuf_put_cdata (b, p->name);
+                wrbuf_putc (b, '=');
+                wrbuf_putc (b, '"');
+                wrbuf_put_cdata (b, p->value);
+                wrbuf_putc (b, '"');
+            }
+            if (c->child)
+                wrbuf_puts(b, " ");
+            if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
+                             pretty_format) < 0)
+                return -1;
+            wrbuf_puts (b, "?>\n");
+        }
+        else if (c->which == DATA1N_tag)
        {
            if (select && c->u.tag.node_selected)
                continue;
-           if (c->u.tag.element && c->u.tag.element->tag)
-               tag = c->u.tag.element->tag->names->name; /* first name */
+            tag = c->u.tag.tag;
+           if (!data1_matchstr(tag, "wellknown")) /* skip wellknown */
+           {
+               if (nodetoidsgml(c, select, b, col, pretty_format) < 0)
+                   return -1;
+           }
            else
-               tag = c->u.tag.tag; /* local string tag */
-           if (data1_matchstr(tag, "wellknown")) /* skip wellknown */
            {
-               sprintf(line, "<%s>\n", tag);
-               wrbuf_write(b, line, strlen(line));
-               col = 0;
+               data1_xattr *p;
+
+                if (pretty_format)
+                    indent (b, col);
+               wrbuf_puts (b, "<");    
+               wrbuf_put_cdata (b, tag);
+               for (p = c->u.tag.attributes; p; p = p->next)
+               {
+                   wrbuf_putc (b, ' ');
+                   wrbuf_put_cdata (b, p->name);
+                   wrbuf_putc (b, '=');
+                   wrbuf_putc (b, '"');
+                   wrbuf_put_cdata (b, p->value);
+                   wrbuf_putc (b, '"');
+               }
+               wrbuf_puts(b, ">");
+                if (pretty_format)
+                    wrbuf_puts(b, "\n");
+               if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
+                                 pretty_format) < 0)
+                   return -1;
+                if (pretty_format)
+                    indent (b, col);
+               wrbuf_puts(b, "</");
+               wrbuf_put_cdata(b, tag);
+               wrbuf_puts(b, ">");
+                if (pretty_format)
+                    wrbuf_puts (b, "\n");
            }
-           if (nodetoidsgml(c, select, b, col) < 0)
-               return -1;
-           wrbuf_write(b, "</>\n", 5);
-           col = 0;
        }
-       else if (c->which == DATA1N_data)
+       else if (c->which == DATA1N_data || c->which == DATA1N_comment)
        {
            char *p = c->u.data.data;
            int l = c->u.data.len;
            int first = 1;
+           int lcol = col;
 
-           if (c->u.data.what == DATA1I_text)
+            if (pretty_format && !c->u.data.formatted_text)
+                indent (b, col);
+            if (c->which == DATA1N_comment)
+                wrbuf_puts (b, "<!--");
+           switch (c->u.data.what)
            {
-               while (l)
-               {
-                   int wlen;
-
-                   while (l && isspace(*p))
-                       p++, l--;
-                   if (!l)
-                       break;
-                   /* break if we'll cross margin and word is not too long */
-                   if (col + (wlen = wordlen(p)) > IDSGML_MARGIN && wlen <
-                       IDSGML_MARGIN)
-                   {
-                       sprintf(line, "\n");
-                       col = 0;
-                       wrbuf_write(b, line, strlen(line));
-                       first = 1;
-                   }
-                   if (!first)
-                   {
-                       wrbuf_putc(b, ' ');
-                       col++;
-                   }
-                   while (l && !isspace(*p))
-                   {
-#if 0
-                       if (col > NTOBUF_MARGIN)
-                       {
-                           wrbuf_putc(b, '=');
-                           wrbuf_putc(b, '\n');
-                           sprintf(line, "%*s", indent * NTOBUF_INDENT, "");
-                           wrbuf_write(b, line, strlen(line));
-                           col = indent * NTOBUF_INDENT;
-                       }
-#endif
-                       wrbuf_putc(b, *p);
-                       p++;
-                       l--;
-                       col++;
-                   }
-                   first = 0;
-               }
-               wrbuf_write(b, "\n", 1);
-               col = 0;
-           }
-           else if (c->u.data.what == DATA1I_num)
-           {
-               wrbuf_putc(b, ' ');
-               wrbuf_write(b, c->u.data.data, c->u.data.len);
+           case DATA1I_text:
+                if (!pretty_format || c->u.data.formatted_text)
+                {
+                    wrbuf_write_cdata (b, p, l);
+                }
+                else
+                {
+                    while (l)
+                    {
+                        int wlen;
+                        
+                        while (l && d1_isspace(*p))
+                            p++, l--;
+                        if (!l)
+                            break;
+                        /* break if we cross margin and word is not too long */
+                        if (lcol + (wlen = wordlen(p, l)) > IDSGML_MARGIN &&
+                            wlen < IDSGML_MARGIN)
+                        {
+                            wrbuf_puts (b, "\n");
+                            indent (b, col);
+                            lcol = col;
+                            first = 1;
+                        }
+                        if (!first)
+                        {
+                            wrbuf_putc(b, ' ');
+                            lcol++;
+                        }
+                        while (l && !d1_isspace(*p))
+                        {
+                            wrbuf_putc(b, *p);
+                            p++;
+                            l--;
+                            lcol++;
+                        }
+                        first = 0;
+                    }
+                    wrbuf_puts(b, "\n");
+                }
+               break;
+           case DATA1I_num:
+               wrbuf_write_cdata(b, c->u.data.data, c->u.data.len);
+                if (pretty_format)
+                    wrbuf_puts(b, "\n");
+               break;
+           case DATA1I_oid:
+               wrbuf_write_cdata(b, c->u.data.data, c->u.data.len);
+                if (pretty_format)
+                    wrbuf_puts(b, "\n");
            }
+            if (c->which == DATA1N_comment)
+            {
+                wrbuf_puts(b, "-->");
+                if (pretty_format)
+                    wrbuf_puts(b, "\n");
+            }
        }
     }
     return 0;
 }
 
-char *data1_nodetoidsgml(data1_node *n, int select, int *len)
+char *data1_nodetoidsgml (data1_handle dh, data1_node *n, int select, int *len)
 {
-    static WRBUF b = 0;
-    char line[1024];
-
-    if (!b)
-       b = wrbuf_alloc();
-    else
-       wrbuf_rewind(b);
+    WRBUF b = data1_get_wrbuf (dh);
+    
+    wrbuf_rewind(b);
     
-    sprintf(line, "<%s>\n", n->u.root.type);
-    wrbuf_write(b, line, strlen(line));
-    if (nodetoidsgml(n, select, b, 0))
+    if (nodetoidsgml(n, select, b, 0, 0 /* no pretty format */))
        return 0;
-    sprintf(line, "</%s>\n", n->u.root.type);
-    wrbuf_write(b, line, strlen(line));
     *len = wrbuf_len(b);
     return wrbuf_buf(b);
 }