Added grs.danbib filter - for Danish Bibliographic Centre.
[idzebra-moved-to-github.git] / recctrl / danbibr.c
1 /* $Id: danbibr.c,v 1.1 2004-05-21 11:58:56 adam Exp $
2    Copyright (C) 2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <yaz/log.h>
28
29 #include "grsread.h"
30
31 #include <yaz/xmalloc.h>
32 #include <yaz/log.h>
33 #include <data1.h>
34
35 #define READ_CHUNK 200
36
37 struct danbibr_info {
38     WRBUF rec_buf;
39     char read_buf[READ_CHUNK+1];  /* space for \0 */
40 };
41
42 static void *grs_init_danbib(void)
43 {
44     struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
45
46     p->rec_buf = wrbuf_alloc();
47     wrbuf_puts(p->rec_buf, "");
48     return p;
49 }
50
51 static int read_rec(struct grs_read_info *p)
52 {
53     struct danbibr_info *info = p->clientData;
54     
55     wrbuf_rewind(info->rec_buf);
56     while(1)
57     {
58         char *cp_split = 0;
59         int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
60         if (r <= 0)
61         {
62             if (wrbuf_len(info->rec_buf) > 0)
63                 return 1;
64             else
65                 return 0;
66         }
67         info->read_buf[r] = '\0';
68         wrbuf_puts(info->rec_buf, info->read_buf);
69
70         cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
71         if (cp_split)
72         {
73             cp_split++; /* now at $ */
74             if (p->endf)
75                 (*p->endf)(p->fh, p->offset + 
76                            (cp_split - wrbuf_buf(info->rec_buf)));
77             
78             cp_split[0] = '\0';
79             return 1;
80         }
81     }
82 }
83
84 static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
85 {
86     data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
87     const char *cp = rec_buf;
88
89     if (1)  /* <text> all </text> */
90     {
91         data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root);
92         data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
93     }
94     while (*cp)
95     {
96         const char *start_tag = cp;
97         const char *start_text;
98         if (*cp == '\n')
99         {
100             cp++;
101             continue;
102         }
103         if (*cp == ' ')  /* continuation */
104         {
105             while (*cp && *cp != '\n')
106                 cp++;
107         }
108         else if (*cp == '$')  /* header */
109         {
110             int no = 1;
111             cp++;
112             start_text = cp;
113             for(start_text = cp; *cp && *cp != '\n'; cp++)
114                 if (*cp == ':')
115                 {
116                     if (start_text != cp)
117                     {
118                         char elemstr[20];
119                         data1_node *hnode;
120                         sprintf(elemstr, "head%d", no);
121
122                         hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root);
123                         data1_mk_text_n(p->dh, p->mem, start_text,
124                                         cp - start_text, hnode);
125                         start_text = cp+1;
126                     }
127                     no++;
128                 }
129         }
130         else /* other */
131         {
132             while (*cp != ' ' && *cp && *cp != '\n')
133                 cp++;
134             if (*cp == ' ')
135             {
136                 data1_node *tag_node =
137                     data1_mk_tag_n(p->dh, p->mem,
138                                    start_tag, cp - start_tag, 0, root);
139                 cp++;
140                 start_text = cp;
141                 while (*cp != '\n' && *cp)
142                 {
143                     if (*cp == '*' && cp[1]) /* subfield */
144                     {
145                         data1_node *sub_tag_node;
146                         if (start_text != cp)
147                             data1_mk_text_n(p->dh, p->mem, start_text,
148                                             cp-start_text, tag_node);
149                         cp++;
150                         sub_tag_node =
151                             data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
152                         cp++;
153                         start_text = cp;
154                         while (*cp && *cp != '\n'&& *cp != '*')
155                             cp++;
156                         if (start_text != cp)
157                             data1_mk_text_n(p->dh, p->mem, start_text,
158                                             cp-start_text, sub_tag_node);
159                         start_text = cp;
160                     }
161                     else
162                         cp++;
163                 }
164                 if (start_text != cp)
165                     data1_mk_text_n(p->dh, p->mem, start_text,
166                                     cp-start_text, tag_node);
167             }
168         }
169     }
170     return root;
171 }
172
173 static data1_node *grs_read_danbib (struct grs_read_info *p)
174 {
175     struct danbibr_info *info = p->clientData;
176
177     if (read_rec(p)) 
178         return mk_tree(p, wrbuf_buf(info->rec_buf));
179     return 0;
180 }
181
182 static void grs_destroy_danbib(void *clientData)
183 {
184     struct danbibr_info *p = (struct danbibr_info *) clientData;
185
186     wrbuf_free(p->rec_buf, 1);
187     xfree (p);
188 }
189
190 static struct recTypeGrs danbib_type = {
191     "danbib",
192     grs_init_danbib,
193     grs_destroy_danbib,
194     grs_read_danbib
195 };
196
197 RecTypeGrs recTypeGrs_danbib = &danbib_type;
198