zoomtst3: print event name
[yaz-moved-to-github.git] / src / marc_read_iso2709.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_iso2709.c
8  * \brief Implements reading of MARC as ISO2709
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <yaz/marcdisp.h>
23 #include <yaz/wrbuf.h>
24 #include <yaz/yaz-util.h>
25
26 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
27 {
28     int entry_p;
29     int record_length;
30     int indicator_length;
31     int identifier_length;
32     int end_of_directory;
33     int base_address;
34     int length_data_entry;
35     int length_starting;
36     int length_implementation;
37
38     yaz_marc_reset(mt);
39
40     record_length = atoi_n (buf, 5);
41     if (record_length < 25)
42     {
43         yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
44         return -1;
45     }
46     /* ballout if bsize is known and record_length is less than that */
47     if (bsize != -1 && record_length > bsize)
48     {
49         yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
50                          record_length, bsize);
51         return -1;
52     }
53     if (yaz_marc_get_debug(mt))
54         yaz_marc_cprintf(mt, "Record length         %5d", record_length);
55
56     yaz_marc_set_leader(mt, buf,
57                         &indicator_length,
58                         &identifier_length,
59                         &base_address,
60                         &length_data_entry,
61                         &length_starting,
62                         &length_implementation);
63
64     /* First pass. determine length of directory & base of data */
65     for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
66     {
67         /* length of directory entry */
68         int l = 3 + length_data_entry + length_starting;
69         if (entry_p + l >= record_length)
70         {
71             yaz_marc_cprintf(mt, "Directory offset %d: end of record."
72                              " Missing FS char", entry_p);
73             return -1;
74         }
75         if (yaz_marc_get_debug(mt))
76         {
77             WRBUF hex = wrbuf_alloc();
78
79             wrbuf_puts(hex, "Tag ");
80             wrbuf_write_escaped(hex, buf + entry_p, 3);
81             wrbuf_puts(hex, ", length ");
82             wrbuf_write_escaped(hex, buf + entry_p + 3,
83                                 length_data_entry);
84             wrbuf_puts(hex, ", starting ");
85             wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
86                                 length_starting);
87             yaz_marc_cprintf(mt, "Directory offset %d: %s",
88                              entry_p, wrbuf_cstr(hex));
89             wrbuf_destroy(hex);
90         }
91         /* Check for digits in length+starting info */
92         while (--l >= 3)
93             if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
94                 break;
95         if (l >= 3)
96         {
97             WRBUF hex = wrbuf_alloc();
98             /* Not all digits, so stop directory scan */
99             wrbuf_write_escaped(hex, buf + entry_p, 
100                                 length_data_entry + length_starting + 3);
101             yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
102                              " length and/or length starting (%s)", entry_p,
103                              wrbuf_cstr(hex));
104             wrbuf_destroy(hex);
105             break;
106         }
107         entry_p += 3 + length_data_entry + length_starting;
108     }
109     end_of_directory = entry_p;
110     if (base_address != entry_p+1)
111     {
112         yaz_marc_cprintf(mt, "Base address not at end of directory,"
113                          " base %d, end %d", base_address, entry_p+1);
114     }
115
116     /* Second pass. parse control - and datafields */
117     for (entry_p = 24; entry_p != end_of_directory; )
118     {
119         int data_length;
120         int data_offset;
121         int end_offset;
122         int i;
123         char tag[4];
124         int identifier_flag = 0;
125         int entry_p0 = entry_p;
126
127         memcpy (tag, buf+entry_p, 3);
128         entry_p += 3;
129         tag[3] = '\0';
130         data_length = atoi_n(buf+entry_p, length_data_entry);
131         entry_p += length_data_entry;
132         data_offset = atoi_n(buf+entry_p, length_starting);
133         entry_p += length_starting;
134         i = data_offset + base_address;
135         end_offset = i+data_length-1;
136
137         if (data_length <= 0 || data_offset < 0)
138             break;
139         
140         if (yaz_marc_get_debug(mt))
141         {
142             yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
143                              " data-offset %d",
144                              tag, entry_p0, data_length, data_offset);
145         }
146         if (end_offset >= record_length)
147         {
148             yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
149                              entry_p0, end_offset, record_length);
150             break;
151         }
152         
153         if (memcmp (tag, "00", 2))
154             identifier_flag = 1;  /* if not 00X assume subfields */
155         else if (indicator_length < 4 && indicator_length > 0)
156         {
157             /* Danmarc 00X have subfields */
158             if (buf[i + indicator_length] == ISO2709_IDFS)
159                 identifier_flag = 1;
160             else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
161                 identifier_flag = 2;
162         }
163
164         if (identifier_flag)
165         {
166             /* datafield */
167             i += identifier_flag-1;
168             if (indicator_length)
169             {
170                 /* skip RS/FS bytes in indicator. They are not allowed there */
171                 int j;
172                 for (j = indicator_length; --j >= 0; )
173                     if (buf[j+i] < ' ')
174                     {
175                         j++;
176                         i += j;
177                         end_offset += j;
178                         yaz_marc_cprintf(mt, "Bad indicator data. "
179                                          "Skipping %d bytes", j);
180                         break;
181                     }
182                 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
183                 i += indicator_length;
184             }
185
186             while (i < end_offset &&
187                     buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
188             {
189                 int code_offset = i+1;
190
191                 i ++;
192                 while (i < end_offset &&
193                         buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
194                        buf[i] != ISO2709_FS)
195                     i++;
196                 if (i > code_offset)
197                     yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
198             }
199         }
200         else
201         {
202             /* controlfield */
203             int i0 = i;
204             while (i < end_offset && 
205                 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
206                 i++;
207             yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
208         }
209         if (i < end_offset)
210         {
211             yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
212                     data_length);
213         }
214         if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
215         {
216             yaz_marc_cprintf(mt, "No separator at end of field length=%d",
217                              data_length);
218         }
219     }
220     return record_length;
221 }
222
223 /*
224  * Local variables:
225  * c-basic-offset: 4
226  * c-file-style: "Stroustrup"
227  * indent-tabs-mode: nil
228  * End:
229  * vim: shiftwidth=4 tabstop=8 expandtab
230  */
231