From f456ec59032877bdf30c3c7432a11a003020c01e Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 21 May 2004 11:58:56 +0000 Subject: [PATCH] Added grs.danbib filter - for Danish Bibliographic Centre. --- NEWS | 2 + recctrl/Makefile.am | 4 +- recctrl/danbibr.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++ recctrl/grsread.h | 3 +- recctrl/recgrs.c | 3 +- 5 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 recctrl/danbibr.c diff --git a/NEWS b/NEWS index 0afb77d..ff4f8c4 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,6 @@ +Added grs.danbib filter - for Danish Bibliographic Centre. + Rename CHANGELOG to NEWS. --- 1.3.16 2004/03/29 diff --git a/recctrl/Makefile.am b/recctrl/Makefile.am index 909ba3a..c96180a 100644 --- a/recctrl/Makefile.am +++ b/recctrl/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.8 2003-02-28 12:33:39 oleg Exp $ +## $Id: Makefile.am,v 1.9 2004-05-21 11:58:56 adam Exp $ noinst_LIBRARIES = librecctrl.a @@ -12,7 +12,7 @@ endif librecctrl_a_SOURCES = recctrl.c recgrs.c sgmlread.c regxread.c \ marcread.c marcomp.c marcomp.h inline.c inline.h rectext.c \ - grsread.h recgrs.h rectext.h xmlread.c \ + grsread.h recgrs.h rectext.h xmlread.c danbibr.c \ $(PERLREAD) INCLUDES = -I$(srcdir)/../include @YAZINC@ $(TCL_INCLUDE) diff --git a/recctrl/danbibr.c b/recctrl/danbibr.c new file mode 100644 index 0000000..1b160af --- /dev/null +++ b/recctrl/danbibr.c @@ -0,0 +1,198 @@ +/* $Id: danbibr.c,v 1.1 2004-05-21 11:58:56 adam Exp $ + Copyright (C) 2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#include +#include +#include + +#include + +#include "grsread.h" + +#include +#include +#include + +#define READ_CHUNK 200 + +struct danbibr_info { + WRBUF rec_buf; + char read_buf[READ_CHUNK+1]; /* space for \0 */ +}; + +static void *grs_init_danbib(void) +{ + struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p)); + + p->rec_buf = wrbuf_alloc(); + wrbuf_puts(p->rec_buf, ""); + return p; +} + +static int read_rec(struct grs_read_info *p) +{ + struct danbibr_info *info = p->clientData; + + wrbuf_rewind(info->rec_buf); + while(1) + { + char *cp_split = 0; + int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK); + if (r <= 0) + { + if (wrbuf_len(info->rec_buf) > 0) + return 1; + else + return 0; + } + info->read_buf[r] = '\0'; + wrbuf_puts(info->rec_buf, info->read_buf); + + cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$"); + if (cp_split) + { + cp_split++; /* now at $ */ + if (p->endf) + (*p->endf)(p->fh, p->offset + + (cp_split - wrbuf_buf(info->rec_buf))); + + cp_split[0] = '\0'; + return 1; + } + } +} + +static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf) +{ + data1_node *root = data1_mk_root(p->dh, p->mem, "danbib"); + const char *cp = rec_buf; + + if (1) /* all */ + { + data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root); + data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node); + } + while (*cp) + { + const char *start_tag = cp; + const char *start_text; + if (*cp == '\n') + { + cp++; + continue; + } + if (*cp == ' ') /* continuation */ + { + while (*cp && *cp != '\n') + cp++; + } + else if (*cp == '$') /* header */ + { + int no = 1; + cp++; + start_text = cp; + for(start_text = cp; *cp && *cp != '\n'; cp++) + if (*cp == ':') + { + if (start_text != cp) + { + char elemstr[20]; + data1_node *hnode; + sprintf(elemstr, "head%d", no); + + hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root); + data1_mk_text_n(p->dh, p->mem, start_text, + cp - start_text, hnode); + start_text = cp+1; + } + no++; + } + } + else /* other */ + { + while (*cp != ' ' && *cp && *cp != '\n') + cp++; + if (*cp == ' ') + { + data1_node *tag_node = + data1_mk_tag_n(p->dh, p->mem, + start_tag, cp - start_tag, 0, root); + cp++; + start_text = cp; + while (*cp != '\n' && *cp) + { + if (*cp == '*' && cp[1]) /* subfield */ + { + data1_node *sub_tag_node; + if (start_text != cp) + data1_mk_text_n(p->dh, p->mem, start_text, + cp-start_text, tag_node); + cp++; + sub_tag_node = + data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node); + cp++; + start_text = cp; + while (*cp && *cp != '\n'&& *cp != '*') + cp++; + if (start_text != cp) + data1_mk_text_n(p->dh, p->mem, start_text, + cp-start_text, sub_tag_node); + start_text = cp; + } + else + cp++; + } + if (start_text != cp) + data1_mk_text_n(p->dh, p->mem, start_text, + cp-start_text, tag_node); + } + } + } + return root; +} + +static data1_node *grs_read_danbib (struct grs_read_info *p) +{ + struct danbibr_info *info = p->clientData; + + if (read_rec(p)) + return mk_tree(p, wrbuf_buf(info->rec_buf)); + return 0; +} + +static void grs_destroy_danbib(void *clientData) +{ + struct danbibr_info *p = (struct danbibr_info *) clientData; + + wrbuf_free(p->rec_buf, 1); + xfree (p); +} + +static struct recTypeGrs danbib_type = { + "danbib", + grs_init_danbib, + grs_destroy_danbib, + grs_read_danbib +}; + +RecTypeGrs recTypeGrs_danbib = &danbib_type; + diff --git a/recctrl/grsread.h b/recctrl/grsread.h index 0a3e23a..480e1dd 100644 --- a/recctrl/grsread.h +++ b/recctrl/grsread.h @@ -1,4 +1,4 @@ -/* $Id: grsread.h,v 1.13 2003-08-21 10:29:00 adam Exp $ +/* $Id: grsread.h,v 1.14 2004-05-21 11:58:56 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -58,6 +58,7 @@ extern RecTypeGrs recTypeGrs_marc; extern RecTypeGrs recTypeGrs_marcxml; extern RecTypeGrs recTypeGrs_xml; extern RecTypeGrs recTypeGrs_perl; +extern RecTypeGrs recTypeGrs_danbib; #ifdef __cplusplus } diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 49fdf91..4a0f045 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.85 2003-10-07 09:18:21 adam Exp $ +/* $Id: recgrs.c,v 1.86 2004-05-21 11:58:56 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -107,6 +107,7 @@ static void *grs_init(RecType recType) #if HAVE_PERL grs_add_handler (h, recTypeGrs_perl); #endif + grs_add_handler (h, recTypeGrs_danbib); return h; } -- 1.7.10.4