Happy new year
[yaz-moved-to-github.git] / src / marc_read_line.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_line.c
8  * \brief Implements reading of MARC in line format
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <assert.h>
20 #include <stdio.h>
21 #include <string.h>
22
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26
27 int yaz_gets(int (*getbyte)(void *client_data),
28              void (*ungetbyte)(int b, void *client_data),
29              void *client_data,
30              WRBUF w)
31 {
32     size_t sz = 0;
33     int ch = getbyte(client_data);
34
35     while (ch != '\0' && ch != '\r' && ch != '\n')
36     {
37         wrbuf_putc(w, ch);
38         sz++;
39         ch = getbyte(client_data);
40     }
41     if (ch == '\r')
42     {
43         ch = getbyte(client_data);
44         if (ch != '\n' && ch != '\0')
45             ungetbyte(ch, client_data);
46     }
47     else if (ch == '\n')
48     {
49         ch = getbyte(client_data);
50         if (ch != '\r' && ch != '\0')
51             ungetbyte(ch, client_data);
52     }
53     if (sz)
54     {
55         return 1;
56     }
57     return 0;
58 }
59
60 static int yaz_marc_line_gets(int (*getbyte)(void *client_data),
61                               void (*ungetbyte)(int b, void *client_data),
62                               void *client_data,
63                               WRBUF w)
64 {
65     int more;
66
67     wrbuf_rewind(w);
68     more = yaz_gets(getbyte, ungetbyte, client_data, w);
69     if (!more)
70         return 0;
71
72     while (more)
73     {
74         int i;
75         for (i = 0; i<4; i++)
76         {
77             int ch = getbyte(client_data);
78             if (ch != ' ')
79             {
80                 if (ch)
81                     ungetbyte(ch, client_data);
82                 return 1;
83             }
84         }
85         if (wrbuf_len(w) > 60 && wrbuf_buf(w)[wrbuf_len(w)-1] == '=')
86             wrbuf_cut_right(w, 1);
87         else
88             wrbuf_puts(w, " ");
89         more = yaz_gets(getbyte, ungetbyte, client_data, w);
90     }
91     return 1;
92 }
93
94     
95 int yaz_marc_read_line(yaz_marc_t mt,
96                        int (*getbyte)(void *client_data),
97                        void (*ungetbyte)(int b, void *client_data),
98                        void *client_data)
99 {
100     int indicator_length;
101     int identifier_length;
102     int base_address;
103     int length_data_entry;
104     int length_starting;
105     int length_implementation;
106     int marker_ch = 0;
107     int marker_skip = 0;
108     int header_created = 0;
109     WRBUF wrbuf_line = wrbuf_alloc();
110
111     yaz_marc_reset(mt);
112
113     while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line))
114     {
115         const char *line = wrbuf_cstr(wrbuf_line);
116         int val;
117         size_t line_len = strlen(line);
118         if (line_len == 0)       /* empty line indicates end of record */
119         {
120             if (header_created)
121                 break;
122         }
123         else if (line[0] == '$') /* indicates beginning/end of record */
124         {
125             if (header_created)
126                 break;
127         }
128         else if (line[0] == '(') /* annotation, skip it */
129             ;
130         else if (line_len == 24 && atoi_n_check(line, 5, &val))
131         {
132             /* deal with header lines:  00366nam  22001698a 4500
133             */
134
135             if (header_created)
136                 break;
137             yaz_marc_set_leader(mt, line,
138                                 &indicator_length,
139                                 &identifier_length,
140                                 &base_address,
141                                 &length_data_entry,
142                                 &length_starting,
143                                 &length_implementation);
144             header_created = 1;
145         }
146         else if (line_len > 4 && line[0] != ' ' && line[1] != ' '
147                  && line[2] != ' ' && line[3] == ' ' )
148         {
149             /* deal with data/control lines: 245 12 ........ */
150             char tag[4];
151             const char *datafield_start = line+6;
152             marker_ch = 0;
153             marker_skip = 0;
154
155             memcpy(tag, line, 3);
156             tag[3] = '\0';
157             if (line_len >= 8) /* control - or datafield ? */
158             {
159                 if (*datafield_start == ' ')
160                     datafield_start++;  /* skip blank after indicator */
161
162                 if (strchr("$_*", *datafield_start))
163                 {
164                     marker_ch = *datafield_start;
165                     if (datafield_start[2] == ' ')
166                         marker_skip = 1; /* subfields has blank before data */
167                 }
168             }
169             if (!header_created)
170             {
171                 const char *leader = "01000cam  2200265 i 4500";
172
173                 yaz_marc_set_leader(mt, leader,
174                                     &indicator_length,
175                                     &identifier_length,
176                                     &base_address,
177                                     &length_data_entry,
178                                     &length_starting,
179                                     &length_implementation);
180                 header_created = 1;
181             }
182
183             if (marker_ch == 0)
184             {   /* control field */
185                 yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4));
186             }
187             else
188             {   /* data field */
189                 const char *indicator = line+4;
190                 int indicator_len = 2;
191                 const char *cp = datafield_start;
192
193                 yaz_marc_add_datafield(mt, tag, indicator, indicator_len);
194                 for (;;)
195                 {
196                     const char *next;
197                     size_t len;
198                     
199                     assert(cp[0] == marker_ch);
200                     cp++;
201                     next = cp;
202                     while ((next = strchr(next, marker_ch)))
203                     {
204                         if ((next[1] >= 'A' && next[1] <= 'Z')
205                             ||(next[1] >= 'a' && next[1] <= 'z'))
206                         {
207                             if (!marker_skip)
208                                 break;
209                             else if (next[2] == ' ')
210                                 break;
211                         }
212                         next++;
213                     }
214                     len = strlen(cp);
215                     if (next)
216                         len = next - cp - marker_skip;
217
218                     if (marker_skip)
219                     {
220                         /* remove ' ' after subfield marker */
221                         char *cp_blank = strchr(cp, ' ');
222                         if (cp_blank)
223                         {
224                             len--;
225                             while (cp_blank != cp)
226                             {
227                                 cp_blank[0] = cp_blank[-1];
228                                 cp_blank--;
229                             }
230                             cp++;
231                         }
232                     }
233                     yaz_marc_add_subfield(mt, cp, len);
234                     if (!next)
235                         break;
236                     cp = next;
237                 }
238             }
239         }
240         else
241         {
242             yaz_marc_cprintf(mt, "Ignoring line: %s", line);
243         }
244     }
245     wrbuf_destroy(wrbuf_line);
246     if (!header_created)
247         return -1;
248     return 0;
249 }
250
251 /*
252  * Local variables:
253  * c-basic-offset: 4
254  * c-file-style: "Stroustrup"
255  * indent-tabs-mode: nil
256  * End:
257  * vim: shiftwidth=4 tabstop=8 expandtab
258  */
259