f7478bae96820b99d31fbc94ff8a83208bc11ecf
[idzebra-moved-to-github.git] / recctrl / danbibr.c
1 /* $Id: danbibr.c,v 1.7 2004-11-19 10:27:12 heikki Exp $
2    Copyright (C) 2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <yaz/ylog.h>
28
29 #include <idzebra/recgrs.h>
30
31 #include <yaz/xmalloc.h>
32
33 #define READ_CHUNK 200
34
35 struct danbibr_info {
36     WRBUF rec_buf;
37     char read_buf[READ_CHUNK+1];  /* space for \0 */
38 };
39
40 static void *init_danbib(Res res, RecType rt)
41 {
42     struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
43
44     p->rec_buf = wrbuf_alloc();
45     wrbuf_puts(p->rec_buf, "");
46     return p;
47 }
48
49 static int read_rec(struct grs_read_info *p)
50 {
51     struct danbibr_info *info = p->clientData;
52     
53     wrbuf_rewind(info->rec_buf);
54     while(1)
55     {
56         char *cp_split = 0;
57         int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
58         if (r <= 0)
59         {
60             if (wrbuf_len(info->rec_buf) > 0)
61                 return 1;
62             else
63                 return 0;
64         }
65         info->read_buf[r] = '\0';
66         wrbuf_puts(info->rec_buf, info->read_buf);
67
68         cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
69         if (cp_split)
70         {
71             cp_split++; /* now at $ */
72             if (p->endf)
73                 (*p->endf)(p->fh, p->offset + 
74                            (cp_split - wrbuf_buf(info->rec_buf)));
75             
76             cp_split[0] = '\0';
77             return 1;
78         }
79     }
80 }
81
82 static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
83 {
84     data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
85     data1_node *root_tag = data1_mk_tag(p->dh, p->mem, "danbib", 0, root);
86     const char *cp = rec_buf;
87
88     if (1)  /* <text> all </text> */
89     {
90         data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root_tag);
91         data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
92     }
93     while (*cp)
94     {
95         const char *start_tag = cp;
96         const char *start_text;
97         if (*cp == '\n')
98         {
99             cp++;
100             continue;
101         }
102         else if (*cp == ' ')  /* bad continuation */
103         {
104             while (*cp && *cp != '\n')
105                 cp++;
106         }
107         else if (*cp == '$')  /* header */
108         {
109             int no = 1;
110             cp++;
111             start_text = cp;
112             for(start_text = cp; *cp && *cp != '\n'; cp++)
113                 if (*cp == ':')
114                 {
115                     if (start_text != cp)
116                     {
117                         char elemstr[20];
118                         data1_node *hnode;
119                         sprintf(elemstr, "head%d", no);
120
121                         hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root_tag);
122                         data1_mk_text_n(p->dh, p->mem, start_text,
123                                         cp - start_text, hnode);
124                         start_text = cp+1;
125                     }
126                     no++;
127                 }
128         }
129         else /* other */
130         {
131             while (*cp != ' ' && *cp && *cp != '\n')
132                 cp++;
133             if (*cp == ' ')
134             {
135                 data1_node *tag_node =
136                     data1_mk_tag_n(p->dh, p->mem,
137                                    start_tag, cp - start_tag, 0, root_tag);
138                 cp++;
139                 start_text = cp;
140                 while (*cp != '\n' && *cp)
141                 {
142                     if (*cp == '*' && cp[1]) /* subfield */
143                     {
144                         data1_node *sub_tag_node;
145                         if (start_text != cp)
146                             data1_mk_text_n(p->dh, p->mem, start_text,
147                                             cp-start_text, tag_node);
148                         cp++;
149                         sub_tag_node =
150                             data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
151                         cp++;
152                         start_text = cp;
153                         while (*cp)
154                         {
155                             if (*cp == '\n' && cp[1] == ' ')
156                             {
157                                 cp++;
158                                 if (start_text != cp)
159                                     data1_mk_text_n(p->dh, p->mem, start_text,
160                                                     cp-start_text, sub_tag_node);
161                                 while (*cp == ' ')
162                                     cp++;
163                                 start_text = cp;
164                             }
165                             else if (*cp == '\n')
166                                 break;
167                             else if (*cp == '*')
168                                 break;
169                             else
170                                 cp++;
171                         }
172                         if (start_text != cp)
173                             data1_mk_text_n(p->dh, p->mem, start_text,
174                                             cp-start_text, sub_tag_node);
175                         start_text = cp;
176                     }
177                     else
178                         cp++;
179                 }
180                 if (start_text != cp)
181                     data1_mk_text_n(p->dh, p->mem, start_text,
182                                     cp-start_text, tag_node);
183             }
184         }
185     }
186     return root;
187 }
188
189 static data1_node *read_danbib (struct grs_read_info *p)
190 {
191     struct danbibr_info *info = p->clientData;
192
193     if (read_rec(p)) 
194         return mk_tree(p, wrbuf_buf(info->rec_buf));
195     return 0;
196 }
197
198 static void destroy_danbib(void *clientData)
199 {
200     struct danbibr_info *p = (struct danbibr_info *) clientData;
201
202     wrbuf_free(p->rec_buf, 1);
203     xfree (p);
204 }
205
206
207 static int extract_danbib(void *clientData, struct recExtractCtrl *ctrl)
208 {
209     return zebra_grs_extract(clientData, ctrl, read_danbib);
210 }
211
212 static int retrieve_danbib(void *clientData, struct recRetrieveCtrl *ctrl)
213 {
214     return zebra_grs_retrieve(clientData, ctrl, read_danbib);
215 }
216
217 static struct recType danbib_type = {
218     "grs.danbib",
219     init_danbib,
220     0,
221     destroy_danbib,
222     extract_danbib,
223     retrieve_danbib,
224 };
225
226 RecType
227 #ifdef IDZEBRA_STATIC_GRS_DANBIB
228 idzebra_filter_grs_danbib
229 #else
230 idzebra_filter
231 #endif
232
233 [] = {
234     &danbib_type,
235     0,
236 };
237     
238
239