yaz_url uses yaz_check_location and offers verbose mode
[yaz-moved-to-github.git] / src / marc_read_iso2709.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_iso2709.c
8  * \brief Implements reading of MARC as ISO2709
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <yaz/marcdisp.h>
22 #include <yaz/wrbuf.h>
23 #include <yaz/yaz-util.h>
24
25 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
26 {
27     int entry_p;
28     int record_length;
29     int indicator_length;
30     int identifier_length;
31     int end_of_directory;
32     int base_address;
33     int length_data_entry;
34     int length_starting;
35     int length_implementation;
36
37     yaz_marc_reset(mt);
38
39     if (!atoi_n_check(buf, 5, &record_length))
40     {
41         yaz_marc_cprintf(mt, "Bad leader");
42         return -1;
43     }
44     if (record_length < 25)
45     {
46         yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
47         return -1;
48     }
49     /* ballout if bsize is known and record_length is less than that */
50     if (bsize != -1 && record_length > bsize)
51     {
52         yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
53                          record_length, bsize);
54         return -1;
55     }
56     if (yaz_marc_get_debug(mt))
57         yaz_marc_cprintf(mt, "Record length         %5d", record_length);
58
59     yaz_marc_set_leader(mt, buf,
60                         &indicator_length,
61                         &identifier_length,
62                         &base_address,
63                         &length_data_entry,
64                         &length_starting,
65                         &length_implementation);
66
67     /* First pass. determine length of directory & base of data */
68     for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
69     {
70         /* length of directory entry */
71         int l = 3 + length_data_entry + length_starting;
72         if (entry_p + l >= record_length)
73         {
74             yaz_marc_cprintf(mt, "Directory offset %d: end of record."
75                              " Missing FS char", entry_p);
76             return -1;
77         }
78         if (yaz_marc_get_debug(mt))
79         {
80             WRBUF hex = wrbuf_alloc();
81
82             wrbuf_puts(hex, "Tag ");
83             wrbuf_write_escaped(hex, buf + entry_p, 3);
84             wrbuf_puts(hex, ", length ");
85             wrbuf_write_escaped(hex, buf + entry_p + 3,
86                                 length_data_entry);
87             wrbuf_puts(hex, ", starting ");
88             wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
89                                 length_starting);
90             yaz_marc_cprintf(mt, "Directory offset %d: %s",
91                              entry_p, wrbuf_cstr(hex));
92             wrbuf_destroy(hex);
93         }
94         /* Check for digits in length+starting info */
95         while (--l >= 3)
96             if (!yaz_isdigit(buf[entry_p + l]))
97                 break;
98         if (l >= 3)
99         {
100             WRBUF hex = wrbuf_alloc();
101             /* Not all digits, so stop directory scan */
102             wrbuf_write_escaped(hex, buf + entry_p,
103                                 length_data_entry + length_starting + 3);
104             yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
105                              " length and/or length starting (%s)", entry_p,
106                              wrbuf_cstr(hex));
107             wrbuf_destroy(hex);
108             break;
109         }
110         entry_p += 3 + length_data_entry + length_starting;
111     }
112     end_of_directory = entry_p;
113     if (base_address != entry_p+1)
114     {
115         yaz_marc_cprintf(mt, "Base address not at end of directory,"
116                          " base %d, end %d", base_address, entry_p+1);
117     }
118
119     /* Second pass. parse control - and datafields */
120     for (entry_p = 24; entry_p != end_of_directory; )
121     {
122         int data_length;
123         int data_offset;
124         int end_offset;
125         int i;
126         char tag[4];
127         int identifier_flag = 0;
128         int entry_p0 = entry_p;
129
130         memcpy (tag, buf+entry_p, 3);
131         entry_p += 3;
132         tag[3] = '\0';
133         data_length = atoi_n(buf+entry_p, length_data_entry);
134         entry_p += length_data_entry;
135         data_offset = atoi_n(buf+entry_p, length_starting);
136         entry_p += length_starting;
137         i = data_offset + base_address;
138         end_offset = i+data_length-1;
139
140         if (data_length <= 0 || data_offset < 0)
141             break;
142
143         if (yaz_marc_get_debug(mt))
144         {
145             yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
146                              " data-offset %d",
147                              tag, entry_p0, data_length, data_offset);
148         }
149         if (end_offset >= record_length)
150         {
151             yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
152                              entry_p0, end_offset, record_length);
153             break;
154         }
155
156         if (memcmp (tag, "00", 2))
157             identifier_flag = 1;  /* if not 00X assume subfields */
158         else if (indicator_length < 4 && indicator_length > 0)
159         {
160             /* Danmarc 00X have subfields */
161             if (buf[i + indicator_length] == ISO2709_IDFS)
162                 identifier_flag = 1;
163             else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
164                 identifier_flag = 2;
165         }
166
167         if (identifier_flag)
168         {
169             /* datafield */
170             i += identifier_flag-1;
171             if (indicator_length)
172             {
173                 /* skip RS/FS bytes in indicator. They are not allowed there */
174                 int j;
175                 for (j = indicator_length; --j >= 0; )
176                     if (buf[j+i] < ' ')
177                     {
178                         j++;
179                         i += j;
180                         end_offset += j;
181                         yaz_marc_cprintf(mt, "Bad indicator data. "
182                                          "Skipping %d bytes", j);
183                         break;
184                     }
185                 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
186                 i += indicator_length;
187             }
188
189             while (i < end_offset &&
190                     buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
191             {
192                 int code_offset = i+1;
193
194                 i ++;
195                 while (i < end_offset &&
196                         buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
197                        buf[i] != ISO2709_FS)
198                     i++;
199                 if (i > code_offset)
200                     yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
201             }
202         }
203         else
204         {
205             /* controlfield */
206             int i0 = i;
207             while (i < end_offset &&
208                 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
209                 i++;
210             yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
211         }
212         if (i < end_offset)
213         {
214             yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
215                     data_length);
216         }
217         if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
218         {
219             yaz_marc_cprintf(mt, "No separator at end of field length=%d",
220                              data_length);
221         }
222     }
223     return record_length;
224 }
225
226 /*
227  * Local variables:
228  * c-basic-offset: 4
229  * c-file-style: "Stroustrup"
230  * indent-tabs-mode: nil
231  * End:
232  * vim: shiftwidth=4 tabstop=8 expandtab
233  */
234