Added grs.danbib filter - for Danish Bibliographic Centre.
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 21 May 2004 11:58:56 +0000 (11:58 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 21 May 2004 11:58:56 +0000 (11:58 +0000)
NEWS
recctrl/Makefile.am
recctrl/danbibr.c [new file with mode: 0644]
recctrl/grsread.h
recctrl/recgrs.c

diff --git a/NEWS b/NEWS
index 0afb77d..ff4f8c4 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,6 @@
 
+Added grs.danbib filter - for Danish Bibliographic Centre.
+
 Rename CHANGELOG to NEWS.
 
 --- 1.3.16 2004/03/29
index 909ba3a..c96180a 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.8 2003-02-28 12:33:39 oleg Exp $
+## $Id: Makefile.am,v 1.9 2004-05-21 11:58:56 adam Exp $
 
 noinst_LIBRARIES = librecctrl.a
 
@@ -12,7 +12,7 @@ endif
 
 librecctrl_a_SOURCES = recctrl.c recgrs.c sgmlread.c regxread.c \
        marcread.c marcomp.c marcomp.h inline.c inline.h rectext.c \
-       grsread.h recgrs.h rectext.h xmlread.c \
+       grsread.h recgrs.h rectext.h xmlread.c danbibr.c \
        $(PERLREAD)
 
 INCLUDES = -I$(srcdir)/../include @YAZINC@ $(TCL_INCLUDE)
diff --git a/recctrl/danbibr.c b/recctrl/danbibr.c
new file mode 100644 (file)
index 0000000..1b160af
--- /dev/null
@@ -0,0 +1,198 @@
+/* $Id: danbibr.c,v 1.1 2004-05-21 11:58:56 adam Exp $
+   Copyright (C) 2004
+   Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <yaz/log.h>
+
+#include "grsread.h"
+
+#include <yaz/xmalloc.h>
+#include <yaz/log.h>
+#include <data1.h>
+
+#define READ_CHUNK 200
+
+struct danbibr_info {
+    WRBUF rec_buf;
+    char read_buf[READ_CHUNK+1];  /* space for \0 */
+};
+
+static void *grs_init_danbib(void)
+{
+    struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
+
+    p->rec_buf = wrbuf_alloc();
+    wrbuf_puts(p->rec_buf, "");
+    return p;
+}
+
+static int read_rec(struct grs_read_info *p)
+{
+    struct danbibr_info *info = p->clientData;
+    
+    wrbuf_rewind(info->rec_buf);
+    while(1)
+    {
+       char *cp_split = 0;
+       int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
+       if (r <= 0)
+       {
+           if (wrbuf_len(info->rec_buf) > 0)
+               return 1;
+           else
+               return 0;
+       }
+       info->read_buf[r] = '\0';
+       wrbuf_puts(info->rec_buf, info->read_buf);
+
+       cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
+       if (cp_split)
+       {
+           cp_split++; /* now at $ */
+           if (p->endf)
+               (*p->endf)(p->fh, p->offset + 
+                          (cp_split - wrbuf_buf(info->rec_buf)));
+           
+           cp_split[0] = '\0';
+           return 1;
+       }
+    }
+}
+
+static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
+{
+    data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
+    const char *cp = rec_buf;
+
+    if (1)  /* <text> all </text> */
+    {
+       data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root);
+       data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
+    }
+    while (*cp)
+    {
+       const char *start_tag = cp;
+       const char *start_text;
+       if (*cp == '\n')
+       {
+           cp++;
+           continue;
+       }
+       if (*cp == ' ')  /* continuation */
+       {
+           while (*cp && *cp != '\n')
+               cp++;
+       }
+       else if (*cp == '$')  /* header */
+       {
+           int no = 1;
+           cp++;
+           start_text = cp;
+           for(start_text = cp; *cp && *cp != '\n'; cp++)
+               if (*cp == ':')
+               {
+                   if (start_text != cp)
+                   {
+                       char elemstr[20];
+                       data1_node *hnode;
+                       sprintf(elemstr, "head%d", no);
+
+                       hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root);
+                       data1_mk_text_n(p->dh, p->mem, start_text,
+                                       cp - start_text, hnode);
+                       start_text = cp+1;
+                   }
+                   no++;
+               }
+       }
+       else /* other */
+       {
+           while (*cp != ' ' && *cp && *cp != '\n')
+               cp++;
+           if (*cp == ' ')
+           {
+               data1_node *tag_node =
+                   data1_mk_tag_n(p->dh, p->mem,
+                                  start_tag, cp - start_tag, 0, root);
+               cp++;
+               start_text = cp;
+               while (*cp != '\n' && *cp)
+               {
+                   if (*cp == '*' && cp[1]) /* subfield */
+                   {
+                       data1_node *sub_tag_node;
+                       if (start_text != cp)
+                           data1_mk_text_n(p->dh, p->mem, start_text,
+                                           cp-start_text, tag_node);
+                       cp++;
+                       sub_tag_node =
+                           data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
+                       cp++;
+                       start_text = cp;
+                       while (*cp && *cp != '\n'&& *cp != '*')
+                           cp++;
+                       if (start_text != cp)
+                           data1_mk_text_n(p->dh, p->mem, start_text,
+                                           cp-start_text, sub_tag_node);
+                       start_text = cp;
+                   }
+                   else
+                       cp++;
+               }
+               if (start_text != cp)
+                   data1_mk_text_n(p->dh, p->mem, start_text,
+                                   cp-start_text, tag_node);
+           }
+       }
+    }
+    return root;
+}
+
+static data1_node *grs_read_danbib (struct grs_read_info *p)
+{
+    struct danbibr_info *info = p->clientData;
+
+    if (read_rec(p)) 
+       return mk_tree(p, wrbuf_buf(info->rec_buf));
+    return 0;
+}
+
+static void grs_destroy_danbib(void *clientData)
+{
+    struct danbibr_info *p = (struct danbibr_info *) clientData;
+
+    wrbuf_free(p->rec_buf, 1);
+    xfree (p);
+}
+
+static struct recTypeGrs danbib_type = {
+    "danbib",
+    grs_init_danbib,
+    grs_destroy_danbib,
+    grs_read_danbib
+};
+
+RecTypeGrs recTypeGrs_danbib = &danbib_type;
+
index 0a3e23a..480e1dd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: grsread.h,v 1.13 2003-08-21 10:29:00 adam Exp $
+/* $Id: grsread.h,v 1.14 2004-05-21 11:58:56 adam Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -58,6 +58,7 @@ extern RecTypeGrs recTypeGrs_marc;
 extern RecTypeGrs recTypeGrs_marcxml;
 extern RecTypeGrs recTypeGrs_xml;
 extern RecTypeGrs recTypeGrs_perl;
+extern RecTypeGrs recTypeGrs_danbib;
 
 #ifdef __cplusplus
 }
index 49fdf91..4a0f045 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recgrs.c,v 1.85 2003-10-07 09:18:21 adam Exp $
+/* $Id: recgrs.c,v 1.86 2004-05-21 11:58:56 adam Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
    Index Data Aps
 
@@ -107,6 +107,7 @@ static void *grs_init(RecType recType)
 #if HAVE_PERL
     grs_add_handler (h, recTypeGrs_perl);
 #endif
+    grs_add_handler (h, recTypeGrs_danbib);
     return h;
 }