eefeb84f0ce6bdc7ca8282d24346c7ea437acfea
[yaz-moved-to-github.git] / src / marc_read_line.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_line.c
8  * \brief Implements reading of MARC in line format
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <assert.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27
28 int yaz_gets(int (*getbyte)(void *client_data),
29              void (*ungetbyte)(int b, void *client_data),
30              void *client_data,
31              WRBUF w)
32 {
33     size_t sz = 0;
34     int ch = getbyte(client_data);
35
36     while (ch != '\0' && ch != '\r' && ch != '\n')
37     {
38         wrbuf_putc(w, ch);
39         sz++;
40         ch = getbyte(client_data);
41     }
42     if (ch == '\r')
43     {
44         ch = getbyte(client_data);
45         if (ch != '\n' && ch != '\0')
46             ungetbyte(ch, client_data);
47     }
48     else if (ch == '\n')
49     {
50         ch = getbyte(client_data);
51         if (ch != '\r' && ch != '\0')
52             ungetbyte(ch, client_data);
53     }
54     if (sz)
55     {
56         return 1;
57     }
58     return 0;
59 }
60
61 static int yaz_marc_line_gets(int (*getbyte)(void *client_data),
62                               void (*ungetbyte)(int b, void *client_data),
63                               void *client_data,
64                               WRBUF w)
65 {
66     int more;
67
68     wrbuf_rewind(w);
69     more = yaz_gets(getbyte, ungetbyte, client_data, w);
70     if (!more)
71         return 0;
72
73     while (more)
74     {
75         int i;
76         for (i = 0; i<4; i++)
77         {
78             int ch = getbyte(client_data);
79             if (ch != ' ')
80             {
81                 if (ch)
82                     ungetbyte(ch, client_data);
83                 return 1;
84             }
85         }
86         if (wrbuf_len(w) > 60 && wrbuf_buf(w)[wrbuf_len(w)-1] == '=')
87             wrbuf_cut_right(w, 1);
88         else
89             wrbuf_puts(w, " ");
90         more = yaz_gets(getbyte, ungetbyte, client_data, w);
91     }
92     return 1;
93 }
94
95     
96 int yaz_marc_read_line(yaz_marc_t mt,
97                        int (*getbyte)(void *client_data),
98                        void (*ungetbyte)(int b, void *client_data),
99                        void *client_data)
100 {
101     int indicator_length;
102     int identifier_length;
103     int base_address;
104     int length_data_entry;
105     int length_starting;
106     int length_implementation;
107     int marker_ch = 0;
108     int marker_skip = 0;
109     int header_created = 0;
110     WRBUF wrbuf_line = wrbuf_alloc();
111
112     yaz_marc_reset(mt);
113
114     while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line))
115     {
116         const char *line = wrbuf_cstr(wrbuf_line);
117         int val;
118         size_t line_len = strlen(line);
119         if (line_len == 0)       /* empty line indicates end of record */
120         {
121             if (header_created)
122                 break;
123         }
124         else if (line[0] == '$') /* indicates beginning/end of record */
125         {
126             if (header_created)
127                 break;
128         }
129         else if (line[0] == '(') /* annotation, skip it */
130             ;
131         else if (line_len == 24 && atoi_n_check(line, 5, &val) && val >= 24)
132         {
133             /* deal with header lines:  00366nam  22001698a 4500
134             */
135
136             if (header_created)
137                 break;
138             yaz_marc_set_leader(mt, line,
139                                 &indicator_length,
140                                 &identifier_length,
141                                 &base_address,
142                                 &length_data_entry,
143                                 &length_starting,
144                                 &length_implementation);
145             header_created = 1;
146         }
147         else if (line_len > 4 && line[0] != ' ' && line[1] != ' '
148                  && line[2] != ' ' && line[3] == ' ' )
149         {
150             /* deal with data/control lines: 245 12 ........ */
151             char tag[4];
152             const char *datafield_start = line+6;
153             marker_ch = 0;
154             marker_skip = 0;
155
156             memcpy(tag, line, 3);
157             tag[3] = '\0';
158             if (line_len >= 8) /* control - or datafield ? */
159             {
160                 if (*datafield_start == ' ')
161                     datafield_start++;  /* skip blank after indicator */
162
163                 if (strchr("$_*", *datafield_start))
164                 {
165                     marker_ch = *datafield_start;
166                     if (datafield_start[2] == ' ')
167                         marker_skip = 1; /* subfields has blank before data */
168                 }
169             }
170             if (!header_created)
171             {
172                 const char *leader = "01000cam  2200265 i 4500";
173
174                 yaz_marc_set_leader(mt, leader,
175                                     &indicator_length,
176                                     &identifier_length,
177                                     &base_address,
178                                     &length_data_entry,
179                                     &length_starting,
180                                     &length_implementation);
181                 header_created = 1;
182             }
183
184             if (marker_ch == 0)
185             {   /* control field */
186                 yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4));
187             }
188             else
189             {   /* data field */
190                 const char *indicator = line+4;
191                 int indicator_len = 2;
192                 const char *cp = datafield_start;
193
194                 yaz_marc_add_datafield(mt, tag, indicator, indicator_len);
195                 for (;;)
196                 {
197                     const char *next;
198                     size_t len;
199                     
200                     assert(cp[0] == marker_ch);
201                     cp++;
202                     next = cp;
203                     while ((next = strchr(next, marker_ch)))
204                     {
205                         if ((next[1] >= 'A' && next[1] <= 'Z')
206                             ||(next[1] >= 'a' && next[1] <= 'z'))
207                         {
208                             if (!marker_skip)
209                                 break;
210                             else if (next[2] == ' ')
211                                 break;
212                         }
213                         next++;
214                     }
215                     len = strlen(cp);
216                     if (next)
217                         len = next - cp - marker_skip;
218
219                     if (marker_skip)
220                     {
221                         /* remove ' ' after subfield marker */
222                         char *cp_blank = strchr(cp, ' ');
223                         if (cp_blank)
224                         {
225                             len--;
226                             while (cp_blank != cp)
227                             {
228                                 cp_blank[0] = cp_blank[-1];
229                                 cp_blank--;
230                             }
231                             cp++;
232                         }
233                     }
234                     yaz_marc_add_subfield(mt, cp, len);
235                     if (!next)
236                         break;
237                     cp = next;
238                 }
239             }
240         }
241         else
242         {
243             yaz_marc_cprintf(mt, "Ignoring line: %s", line);
244         }
245     }
246     wrbuf_destroy(wrbuf_line);
247     if (!header_created)
248         return -1;
249     return 0;
250 }
251
252 /*
253  * Local variables:
254  * c-basic-offset: 4
255  * c-file-style: "Stroustrup"
256  * indent-tabs-mode: nil
257  * End:
258  * vim: shiftwidth=4 tabstop=8 expandtab
259  */
260