Add new function nmem_strsplitx.
[yaz-moved-to-github.git] / src / marc_read_iso2709.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2011 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_iso2709.c
8  * \brief Implements reading of MARC as ISO2709
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <yaz/marcdisp.h>
22 #include <yaz/wrbuf.h>
23 #include <yaz/yaz-util.h>
24
25 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
26 {
27     int entry_p;
28     int record_length;
29     int indicator_length;
30     int identifier_length;
31     int end_of_directory;
32     int base_address;
33     int length_data_entry;
34     int length_starting;
35     int length_implementation;
36
37     yaz_marc_reset(mt);
38
39     record_length = atoi_n (buf, 5);
40     if (record_length < 25)
41     {
42         yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
43         return -1;
44     }
45     /* ballout if bsize is known and record_length is less than that */
46     if (bsize != -1 && record_length > bsize)
47     {
48         yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
49                          record_length, bsize);
50         return -1;
51     }
52     if (yaz_marc_get_debug(mt))
53         yaz_marc_cprintf(mt, "Record length         %5d", record_length);
54
55     yaz_marc_set_leader(mt, buf,
56                         &indicator_length,
57                         &identifier_length,
58                         &base_address,
59                         &length_data_entry,
60                         &length_starting,
61                         &length_implementation);
62
63     /* First pass. determine length of directory & base of data */
64     for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
65     {
66         /* length of directory entry */
67         int l = 3 + length_data_entry + length_starting;
68         if (entry_p + l >= record_length)
69         {
70             yaz_marc_cprintf(mt, "Directory offset %d: end of record."
71                              " Missing FS char", entry_p);
72             return -1;
73         }
74         if (yaz_marc_get_debug(mt))
75         {
76             WRBUF hex = wrbuf_alloc();
77
78             wrbuf_puts(hex, "Tag ");
79             wrbuf_write_escaped(hex, buf + entry_p, 3);
80             wrbuf_puts(hex, ", length ");
81             wrbuf_write_escaped(hex, buf + entry_p + 3,
82                                 length_data_entry);
83             wrbuf_puts(hex, ", starting ");
84             wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
85                                 length_starting);
86             yaz_marc_cprintf(mt, "Directory offset %d: %s",
87                              entry_p, wrbuf_cstr(hex));
88             wrbuf_destroy(hex);
89         }
90         /* Check for digits in length+starting info */
91         while (--l >= 3)
92             if (!yaz_isdigit(buf[entry_p + l]))
93                 break;
94         if (l >= 3)
95         {
96             WRBUF hex = wrbuf_alloc();
97             /* Not all digits, so stop directory scan */
98             wrbuf_write_escaped(hex, buf + entry_p, 
99                                 length_data_entry + length_starting + 3);
100             yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
101                              " length and/or length starting (%s)", entry_p,
102                              wrbuf_cstr(hex));
103             wrbuf_destroy(hex);
104             break;
105         }
106         entry_p += 3 + length_data_entry + length_starting;
107     }
108     end_of_directory = entry_p;
109     if (base_address != entry_p+1)
110     {
111         yaz_marc_cprintf(mt, "Base address not at end of directory,"
112                          " base %d, end %d", base_address, entry_p+1);
113     }
114
115     /* Second pass. parse control - and datafields */
116     for (entry_p = 24; entry_p != end_of_directory; )
117     {
118         int data_length;
119         int data_offset;
120         int end_offset;
121         int i;
122         char tag[4];
123         int identifier_flag = 0;
124         int entry_p0 = entry_p;
125
126         memcpy (tag, buf+entry_p, 3);
127         entry_p += 3;
128         tag[3] = '\0';
129         data_length = atoi_n(buf+entry_p, length_data_entry);
130         entry_p += length_data_entry;
131         data_offset = atoi_n(buf+entry_p, length_starting);
132         entry_p += length_starting;
133         i = data_offset + base_address;
134         end_offset = i+data_length-1;
135
136         if (data_length <= 0 || data_offset < 0)
137             break;
138         
139         if (yaz_marc_get_debug(mt))
140         {
141             yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
142                              " data-offset %d",
143                              tag, entry_p0, data_length, data_offset);
144         }
145         if (end_offset >= record_length)
146         {
147             yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
148                              entry_p0, end_offset, record_length);
149             break;
150         }
151         
152         if (memcmp (tag, "00", 2))
153             identifier_flag = 1;  /* if not 00X assume subfields */
154         else if (indicator_length < 4 && indicator_length > 0)
155         {
156             /* Danmarc 00X have subfields */
157             if (buf[i + indicator_length] == ISO2709_IDFS)
158                 identifier_flag = 1;
159             else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
160                 identifier_flag = 2;
161         }
162
163         if (identifier_flag)
164         {
165             /* datafield */
166             i += identifier_flag-1;
167             if (indicator_length)
168             {
169                 /* skip RS/FS bytes in indicator. They are not allowed there */
170                 int j;
171                 for (j = indicator_length; --j >= 0; )
172                     if (buf[j+i] < ' ')
173                     {
174                         j++;
175                         i += j;
176                         end_offset += j;
177                         yaz_marc_cprintf(mt, "Bad indicator data. "
178                                          "Skipping %d bytes", j);
179                         break;
180                     }
181                 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
182                 i += indicator_length;
183             }
184
185             while (i < end_offset &&
186                     buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
187             {
188                 int code_offset = i+1;
189
190                 i ++;
191                 while (i < end_offset &&
192                         buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
193                        buf[i] != ISO2709_FS)
194                     i++;
195                 if (i > code_offset)
196                     yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
197             }
198         }
199         else
200         {
201             /* controlfield */
202             int i0 = i;
203             while (i < end_offset && 
204                 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
205                 i++;
206             yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
207         }
208         if (i < end_offset)
209         {
210             yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
211                     data_length);
212         }
213         if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
214         {
215             yaz_marc_cprintf(mt, "No separator at end of field length=%d",
216                              data_length);
217         }
218     }
219     return record_length;
220 }
221
222 /*
223  * Local variables:
224  * c-basic-offset: 4
225  * c-file-style: "Stroustrup"
226  * indent-tabs-mode: nil
227  * End:
228  * vim: shiftwidth=4 tabstop=8 expandtab
229  */
230