Implemented loadable filters.
[idzebra-moved-to-github.git] / recctrl / danbibr.c
1 /* $Id: danbibr.c,v 1.5 2004-09-27 10:44:50 adam Exp $
2    Copyright (C) 2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <yaz/log.h>
28
29 #include "grsread.h"
30
31 #include <yaz/xmalloc.h>
32 #include <yaz/log.h>
33 #include <data1.h>
34
35 #define READ_CHUNK 200
36
37 struct danbibr_info {
38     WRBUF rec_buf;
39     char read_buf[READ_CHUNK+1];  /* space for \0 */
40 };
41
42 static void *init_danbib(Res res, RecType rt)
43 {
44     struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
45
46     p->rec_buf = wrbuf_alloc();
47     wrbuf_puts(p->rec_buf, "");
48     return p;
49 }
50
51 static int read_rec(struct grs_read_info *p)
52 {
53     struct danbibr_info *info = p->clientData;
54     
55     wrbuf_rewind(info->rec_buf);
56     while(1)
57     {
58         char *cp_split = 0;
59         int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
60         if (r <= 0)
61         {
62             if (wrbuf_len(info->rec_buf) > 0)
63                 return 1;
64             else
65                 return 0;
66         }
67         info->read_buf[r] = '\0';
68         wrbuf_puts(info->rec_buf, info->read_buf);
69
70         cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
71         if (cp_split)
72         {
73             cp_split++; /* now at $ */
74             if (p->endf)
75                 (*p->endf)(p->fh, p->offset + 
76                            (cp_split - wrbuf_buf(info->rec_buf)));
77             
78             cp_split[0] = '\0';
79             return 1;
80         }
81     }
82 }
83
84 static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
85 {
86     data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
87     data1_node *root_tag = data1_mk_tag(p->dh, p->mem, "danbib", 0, root);
88     const char *cp = rec_buf;
89
90     if (1)  /* <text> all </text> */
91     {
92         data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root_tag);
93         data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
94     }
95     while (*cp)
96     {
97         const char *start_tag = cp;
98         const char *start_text;
99         if (*cp == '\n')
100         {
101             cp++;
102             continue;
103         }
104         else if (*cp == ' ')  /* bad continuation */
105         {
106             while (*cp && *cp != '\n')
107                 cp++;
108         }
109         else if (*cp == '$')  /* header */
110         {
111             int no = 1;
112             cp++;
113             start_text = cp;
114             for(start_text = cp; *cp && *cp != '\n'; cp++)
115                 if (*cp == ':')
116                 {
117                     if (start_text != cp)
118                     {
119                         char elemstr[20];
120                         data1_node *hnode;
121                         sprintf(elemstr, "head%d", no);
122
123                         hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root_tag);
124                         data1_mk_text_n(p->dh, p->mem, start_text,
125                                         cp - start_text, hnode);
126                         start_text = cp+1;
127                     }
128                     no++;
129                 }
130         }
131         else /* other */
132         {
133             while (*cp != ' ' && *cp && *cp != '\n')
134                 cp++;
135             if (*cp == ' ')
136             {
137                 data1_node *tag_node =
138                     data1_mk_tag_n(p->dh, p->mem,
139                                    start_tag, cp - start_tag, 0, root_tag);
140                 cp++;
141                 start_text = cp;
142                 while (*cp != '\n' && *cp)
143                 {
144                     if (*cp == '*' && cp[1]) /* subfield */
145                     {
146                         data1_node *sub_tag_node;
147                         if (start_text != cp)
148                             data1_mk_text_n(p->dh, p->mem, start_text,
149                                             cp-start_text, tag_node);
150                         cp++;
151                         sub_tag_node =
152                             data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
153                         cp++;
154                         start_text = cp;
155                         while (*cp)
156                         {
157                             if (*cp == '\n' && cp[1] == ' ')
158                             {
159                                 cp++;
160                                 if (start_text != cp)
161                                     data1_mk_text_n(p->dh, p->mem, start_text,
162                                                     cp-start_text, sub_tag_node);
163                                 while (*cp == ' ')
164                                     cp++;
165                                 start_text = cp;
166                             }
167                             else if (*cp == '\n')
168                                 break;
169                             else if (*cp == '*')
170                                 break;
171                             else
172                                 cp++;
173                         }
174                         if (start_text != cp)
175                             data1_mk_text_n(p->dh, p->mem, start_text,
176                                             cp-start_text, sub_tag_node);
177                         start_text = cp;
178                     }
179                     else
180                         cp++;
181                 }
182                 if (start_text != cp)
183                     data1_mk_text_n(p->dh, p->mem, start_text,
184                                     cp-start_text, tag_node);
185             }
186         }
187     }
188     return root;
189 }
190
191 static data1_node *read_danbib (struct grs_read_info *p)
192 {
193     struct danbibr_info *info = p->clientData;
194
195     if (read_rec(p)) 
196         return mk_tree(p, wrbuf_buf(info->rec_buf));
197     return 0;
198 }
199
200 static void destroy_danbib(void *clientData)
201 {
202     struct danbibr_info *p = (struct danbibr_info *) clientData;
203
204     wrbuf_free(p->rec_buf, 1);
205     xfree (p);
206 }
207
208
209 static int extract_danbib(void *clientData, struct recExtractCtrl *ctrl)
210 {
211     return zebra_grs_extract(clientData, ctrl, read_danbib);
212 }
213
214 static int retrieve_danbib(void *clientData, struct recRetrieveCtrl *ctrl)
215 {
216     return zebra_grs_retrieve(clientData, ctrl, read_danbib);
217 }
218
219 static struct recType danbib_type = {
220     "grs.danbib",
221     init_danbib,
222     0,
223     destroy_danbib,
224     extract_danbib,
225     retrieve_danbib,
226 };
227
228 RecType
229 #ifdef IDZEBRA_STATIC_GRS_DANBIB
230 idzebra_filter_grs_danbib
231 #else
232 idzebra_filter
233 #endif
234
235 [] = {
236     &danbib_type,
237     0,
238 };
239     
240
241