Use WRBUF for MARC line reading.
[yaz-moved-to-github.git] / src / marc_read_line.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marc_read_line.c,v 1.5 2007-03-17 09:13:29 adam Exp $
6  */
7
8 /**
9  * \file marc_read_line.c
10  * \brief Implements reading of MARC in line format
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <assert.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25
26 #include <yaz/marcdisp.h>
27 #include <yaz/wrbuf.h>
28 #include <yaz/yaz-util.h>
29
30 int yaz_gets(int (*getbyte)(void *client_data),
31              void (*ungetbyte)(int b, void *client_data),
32              void *client_data,
33              WRBUF w)
34 {
35     size_t sz = 0;
36     int ch = getbyte(client_data);
37
38     wrbuf_rewind(w);
39     while (ch != '\0' && ch != '\r' && ch != '\n')
40     {
41         wrbuf_putc(w, ch);
42         sz++;
43         ch = getbyte(client_data);
44     }
45     if (ch == '\r')
46     {
47         ch = getbyte(client_data);
48         if (ch != '\n' && ch != '\0')
49             ungetbyte(ch, client_data);
50     }
51     else if (ch == '\n')
52     {
53         ch = getbyte(client_data);
54         if (ch != '\r' && ch != '\0')
55             ungetbyte(ch, client_data);
56     }
57     if (sz)
58     {
59         return 1;
60     }
61     return 0;
62 }
63     
64 int yaz_marc_read_line(yaz_marc_t mt,
65                        int (*getbyte)(void *client_data),
66                        void (*ungetbyte)(int b, void *client_data),
67                        void *client_data)
68 {
69     int indicator_length;
70     int identifier_length;
71     int base_address;
72     int length_data_entry;
73     int length_starting;
74     int length_implementation;
75     int marker_ch = 0;
76     int marker_skip = 0;
77     int header_created = 0;
78     WRBUF wrbuf_line = wrbuf_alloc();
79
80     yaz_marc_reset(mt);
81
82     while (yaz_gets(getbyte, ungetbyte, client_data, wrbuf_line))
83     {
84         const char *line = wrbuf_cstr(wrbuf_line);
85         int val;
86         size_t line_len = strlen(line);
87         if (line_len == 0)       /* empty line indicates end of record */
88         {
89             if (header_created)
90                 break;
91         }
92         else if (line[0] == '$') /* indicates beginning/end of record */
93         {
94             if (header_created)
95                 break;
96         }
97         else if (line[0] == '(') /* annotation, skip it */
98             ;
99         else if (line_len == 24 && atoi_n_check(line, 5, &val) && val >= 24)
100         {
101             /* deal with header lines:  00366nam  22001698a 4500
102             */
103
104             if (header_created)
105                 break;
106             yaz_marc_set_leader(mt, line,
107                                 &indicator_length,
108                                 &identifier_length,
109                                 &base_address,
110                                 &length_data_entry,
111                                 &length_starting,
112                                 &length_implementation);
113             header_created = 1;
114         }
115         else if (line_len > 5 && memcmp(line, "    ", 4) == 0)
116         {  /* continuation line */
117             ;
118         }
119         else if (line_len > 5 && line[3] == ' ')
120         {
121             /* deal with data/control lines: 245 12 ........ */
122             char tag[4];
123             const char *datafield_start = line+6;
124             marker_ch = 0;
125             marker_skip = 0;
126
127             memcpy(tag, line, 3);
128             tag[3] = '\0';
129             if (line_len >= 8) /* control - or datafield ? */
130             {
131                 if (*datafield_start == ' ')
132                     datafield_start++;  /* skip blank after indicator */
133
134                 if (strchr("$_*", *datafield_start))
135                 {
136                     marker_ch = *datafield_start;
137                     if (datafield_start[2] == ' ')
138                         marker_skip = 1; /* subfields has blank before data */
139                 }
140             }
141             if (!header_created)
142             {
143                 const char *leader = "01000cam  2200265 i 4500";
144
145                 yaz_marc_set_leader(mt, leader,
146                                     &indicator_length,
147                                     &identifier_length,
148                                     &base_address,
149                                     &length_data_entry,
150                                     &length_starting,
151                                     &length_implementation);
152                 header_created = 1;
153             }
154
155             if (marker_ch == 0)
156             {   /* control field */
157                 yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4));
158             }
159             else
160             {   /* data field */
161                 const char *indicator = line+4;
162                 int indicator_len = 2;
163                 const char *cp = datafield_start;
164
165                 yaz_marc_add_datafield(mt, tag, indicator, indicator_len);
166                 for (;;)
167                 {
168                     const char *next;
169                     size_t len;
170                     
171                     assert(cp[0] == marker_ch);
172                     cp++;
173                     next = cp;
174                     while ((next = strchr(next, marker_ch)))
175                     {
176                         if ((next[1] >= 'A' && next[1] <= 'Z')
177                             ||(next[1] >= 'a' && next[1] <= 'z'))
178                         {
179                             if (!marker_skip)
180                                 break;
181                             else if (next[2] == ' ')
182                                 break;
183                         }
184                         next++;
185                     }
186                     len = strlen(cp);
187                     if (next)
188                         len = next - cp - marker_skip;
189
190                     if (marker_skip)
191                     {
192                         /* remove ' ' after subfield marker */
193                         char *cp_blank = strchr(cp, ' ');
194                         if (cp_blank)
195                         {
196                             len--;
197                             while (cp_blank != cp)
198                             {
199                                 cp_blank[0] = cp_blank[-1];
200                                 cp_blank--;
201                             }
202                             cp++;
203                         }
204                     }
205                     assert(len >= 0);
206                     yaz_marc_add_subfield(mt, cp, len);
207                     if (!next)
208                         break;
209                     cp = next;
210                 }
211             }
212         }
213     }
214     wrbuf_destroy(wrbuf_line);
215     if (!header_created)
216         return -1;
217     return 0;
218 }
219
220 /*
221  * Local variables:
222  * c-basic-offset: 4
223  * indent-tabs-mode: nil
224  * End:
225  * vim: shiftwidth=4 tabstop=8 expandtab
226  */
227