Bug fix: did not split on *
[idzebra-moved-to-github.git] / recctrl / danbibr.c
1 /* $Id: danbibr.c,v 1.3 2004-05-26 13:26:17 adam Exp $
2    Copyright (C) 2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <yaz/log.h>
28
29 #include "grsread.h"
30
31 #include <yaz/xmalloc.h>
32 #include <yaz/log.h>
33 #include <data1.h>
34
35 #define READ_CHUNK 200
36
37 struct danbibr_info {
38     WRBUF rec_buf;
39     char read_buf[READ_CHUNK+1];  /* space for \0 */
40 };
41
42 static void *grs_init_danbib(void)
43 {
44     struct danbibr_info *p = (struct danbibr_info *) xmalloc (sizeof(*p));
45
46     p->rec_buf = wrbuf_alloc();
47     wrbuf_puts(p->rec_buf, "");
48     return p;
49 }
50
51 static int read_rec(struct grs_read_info *p)
52 {
53     struct danbibr_info *info = p->clientData;
54     
55     wrbuf_rewind(info->rec_buf);
56     while(1)
57     {
58         char *cp_split = 0;
59         int r = (*p->readf)(p->fh, info->read_buf, READ_CHUNK);
60         if (r <= 0)
61         {
62             if (wrbuf_len(info->rec_buf) > 0)
63                 return 1;
64             else
65                 return 0;
66         }
67         info->read_buf[r] = '\0';
68         wrbuf_puts(info->rec_buf, info->read_buf);
69
70         cp_split = strstr(wrbuf_buf(info->rec_buf), "\n$");
71         if (cp_split)
72         {
73             cp_split++; /* now at $ */
74             if (p->endf)
75                 (*p->endf)(p->fh, p->offset + 
76                            (cp_split - wrbuf_buf(info->rec_buf)));
77             
78             cp_split[0] = '\0';
79             return 1;
80         }
81     }
82 }
83
84 static data1_node *mk_tree(struct grs_read_info *p, const char *rec_buf)
85 {
86     data1_node *root = data1_mk_root(p->dh, p->mem, "danbib");
87     const char *cp = rec_buf;
88
89     root = data1_mk_tag(p->dh, p->mem, "danbib", 0, root);
90
91     if (1)  /* <text> all </text> */
92     {
93         data1_node *text_node = data1_mk_tag(p->dh, p->mem, "text", 0, root);
94         data1_mk_text_n(p->dh, p->mem, rec_buf, strlen(rec_buf), text_node);
95     }
96     while (*cp)
97     {
98         const char *start_tag = cp;
99         const char *start_text;
100         if (*cp == '\n')
101         {
102             cp++;
103             continue;
104         }
105         else if (*cp == ' ')  /* bad continuation */
106         {
107             while (*cp && *cp != '\n')
108                 cp++;
109         }
110         else if (*cp == '$')  /* header */
111         {
112             int no = 1;
113             cp++;
114             start_text = cp;
115             for(start_text = cp; *cp && *cp != '\n'; cp++)
116                 if (*cp == ':')
117                 {
118                     if (start_text != cp)
119                     {
120                         char elemstr[20];
121                         data1_node *hnode;
122                         sprintf(elemstr, "head%d", no);
123
124                         hnode = data1_mk_tag(p->dh, p->mem, elemstr, 0, root);
125                         data1_mk_text_n(p->dh, p->mem, start_text,
126                                         cp - start_text, hnode);
127                         start_text = cp+1;
128                     }
129                     no++;
130                 }
131         }
132         else /* other */
133         {
134             while (*cp != ' ' && *cp && *cp != '\n')
135                 cp++;
136             if (*cp == ' ')
137             {
138                 data1_node *tag_node =
139                     data1_mk_tag_n(p->dh, p->mem,
140                                    start_tag, cp - start_tag, 0, root);
141                 cp++;
142                 start_text = cp;
143                 while (*cp != '\n' && *cp)
144                 {
145                     if (*cp == '*' && cp[1]) /* subfield */
146                     {
147                         data1_node *sub_tag_node;
148                         if (start_text != cp)
149                             data1_mk_text_n(p->dh, p->mem, start_text,
150                                             cp-start_text, tag_node);
151                         cp++;
152                         sub_tag_node =
153                             data1_mk_tag_n(p->dh, p->mem, cp, 1, 0, tag_node);
154                         cp++;
155                         start_text = cp;
156                         while (*cp)
157                         {
158                             if (*cp == '\n' && cp[1] == ' ')
159                             {
160                                 cp++;
161                                 if (start_text != cp)
162                                     data1_mk_text_n(p->dh, p->mem, start_text,
163                                                     cp-start_text, sub_tag_node);
164                                 while (*cp == ' ')
165                                     cp++;
166                                 start_text = cp;
167                             }
168                             else if (*cp == '\n')
169                                 break;
170                             else if (*cp == '*')
171                                 break;
172                             else
173                                 cp++;
174                         }
175                         if (start_text != cp)
176                             data1_mk_text_n(p->dh, p->mem, start_text,
177                                             cp-start_text, sub_tag_node);
178                         start_text = cp;
179                     }
180                     else
181                         cp++;
182                 }
183                 if (start_text != cp)
184                     data1_mk_text_n(p->dh, p->mem, start_text,
185                                     cp-start_text, tag_node);
186             }
187         }
188     }
189     return root;
190 }
191
192 static data1_node *grs_read_danbib (struct grs_read_info *p)
193 {
194     struct danbibr_info *info = p->clientData;
195
196     if (read_rec(p)) 
197         return mk_tree(p, wrbuf_buf(info->rec_buf));
198     return 0;
199 }
200
201 static void grs_destroy_danbib(void *clientData)
202 {
203     struct danbibr_info *p = (struct danbibr_info *) clientData;
204
205     wrbuf_free(p->rec_buf, 1);
206     xfree (p);
207 }
208
209 static struct recTypeGrs danbib_type = {
210     "danbib",
211     grs_init_danbib,
212     grs_destroy_danbib,
213     grs_read_danbib
214 };
215
216 RecTypeGrs recTypeGrs_danbib = &danbib_type;
217