GFS: fix sizeof: saved by the bell
[yaz-moved-to-github.git] / src / marc_read_xml.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marc_read_xml.c
8  * \brief Implements reading of MARC as XML
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <yaz/marcdisp.h>
23 #include <yaz/wrbuf.h>
24 #include <yaz/yaz-util.h>
25 #include <yaz/nmem_xml.h>
26
27 #if YAZ_HAVE_XML2
28 #include <libxml/tree.h>
29 #endif
30
31 #if YAZ_HAVE_XML2
32 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
33 {
34     NMEM nmem = yaz_marc_get_nmem(mt);
35     for (; ptr; ptr = ptr->next)
36     {
37         if (ptr->type == XML_ELEMENT_NODE)
38         {
39             if (!strcmp((const char *) ptr->name, "subfield"))
40             {
41                 size_t ctrl_data_len = 0;
42                 char *ctrl_data_buf = 0;
43                 const xmlNode *p = 0, *ptr_code = 0;
44                 struct _xmlAttr *attr;
45                 for (attr = ptr->properties; attr; attr = attr->next)
46                     if (!strcmp((const char *)attr->name, "code"))
47                         ptr_code = attr->children;
48                     else
49                     {
50                         yaz_marc_cprintf(
51                             mt, "Bad attribute '%.80s' for 'subfield'",
52                             attr->name);
53                         return -1;
54                     }
55                 if (!ptr_code)
56                 {
57                     yaz_marc_cprintf(
58                         mt, "Missing attribute 'code' for 'subfield'" );
59                     return -1;
60                 }
61                 if (ptr_code->type == XML_TEXT_NODE)
62                 {
63                     ctrl_data_len = 
64                         strlen((const char *)ptr_code->content);
65                 }
66                 else
67                 {
68                     yaz_marc_cprintf(
69                         mt, "Missing value for 'code' in 'subfield'" );
70                     return -1;
71                 }
72                 for (p = ptr->children; p ; p = p->next)
73                     if (p->type == XML_TEXT_NODE)
74                         ctrl_data_len += strlen((const char *)p->content);
75                 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
76                 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
77                 for (p = ptr->children; p ; p = p->next)
78                     if (p->type == XML_TEXT_NODE)
79                         strcat(ctrl_data_buf, (const char *)p->content);
80                 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
81             }
82             else
83             {
84                 yaz_marc_cprintf(
85                     mt, "Expected element 'subfield', got '%.80s'", ptr->name);
86                 return -1;
87             }
88         }
89     }
90     return 0;
91 }
92
93 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
94 {
95     int indicator_length;
96     int identifier_length;
97     int base_address;
98     int length_data_entry;
99     int length_starting;
100     int length_implementation;
101     const char *leader = 0;
102     const xmlNode *ptr = *ptr_p;
103
104     for(; ptr; ptr = ptr->next)
105         if (ptr->type == XML_ELEMENT_NODE)
106         {
107             if (!strcmp((const char *) ptr->name, "leader"))
108             {
109                 xmlNode *p = ptr->children;
110                 for(; p; p = p->next)
111                     if (p->type == XML_TEXT_NODE)
112                         leader = (const char *) p->content;
113                 break;
114             }
115             else
116             {
117                 yaz_marc_cprintf(
118                     mt, "Expected element 'leader', got '%.80s'", ptr->name);
119             }
120         }
121     if (!leader)
122     {
123         yaz_marc_cprintf(mt, "Missing element 'leader'");
124         return -1;
125     }
126     if (strlen(leader) != 24)
127     {
128         yaz_marc_cprintf(mt, "Bad length %d of leader data."
129                          " Must have length of 24 characters", strlen(leader));
130         return -1;
131     }
132     yaz_marc_set_leader(mt, leader,
133                         &indicator_length,
134                         &identifier_length,
135                         &base_address,
136                         &length_data_entry,
137                         &length_starting,
138                         &length_implementation);
139     *ptr_p = ptr;
140     return 0;
141 }
142
143 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
144 {
145     for(; ptr; ptr = ptr->next)
146         if (ptr->type == XML_ELEMENT_NODE)
147         {
148             if (!strcmp((const char *) ptr->name, "controlfield"))
149             {
150                 const xmlNode *ptr_tag = 0;
151                 struct _xmlAttr *attr;
152                 for (attr = ptr->properties; attr; attr = attr->next)
153                     if (!strcmp((const char *)attr->name, "tag"))
154                         ptr_tag = attr->children;
155                     else
156                     {
157                         yaz_marc_cprintf(
158                             mt, "Bad attribute '%.80s' for 'controlfield'",
159                             attr->name);
160                         return -1;
161                     }
162                 if (!ptr_tag)
163                 {
164                     yaz_marc_cprintf(
165                         mt, "Missing attribute 'tag' for 'controlfield'" );
166                     return -1;
167                 }
168                 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
169             }
170             else if (!strcmp((const char *) ptr->name, "datafield"))
171             {
172                 char indstr[11]; /* 0(unused), 1,....9, + zero term */
173                 const xmlNode *ptr_tag = 0;
174                 struct _xmlAttr *attr;
175                 int i;
176                 for (i = 0; i<11; i++)
177                     indstr[i] = '\0';
178                 for (attr = ptr->properties; attr; attr = attr->next)
179                     if (!strcmp((const char *)attr->name, "tag"))
180                         ptr_tag = attr->children;
181                     else if (strlen((const char *)attr->name) == 4 &&
182                              !memcmp(attr->name, "ind", 3))
183                     {
184                         int no = atoi((const char *)attr->name+3);
185                         if (attr->children
186                             && attr->children->type == XML_TEXT_NODE)
187                             indstr[no] = attr->children->content[0];
188                     }
189                     else
190                     {
191                         yaz_marc_cprintf(
192                             mt, "Bad attribute '%.80s' for 'datafield'",
193                             attr->name);
194                     }
195                 if (!ptr_tag)
196                 {
197                     yaz_marc_cprintf(
198                         mt, "Missing attribute 'tag' for 'datafield'" );
199                     return -1;
200                 }
201                 /* note that indstr[0] is unused so we use indstr[1..] */
202                 yaz_marc_add_datafield_xml(mt, ptr_tag,
203                                            indstr+1, strlen(indstr+1));
204                 
205                 if (yaz_marc_read_xml_subfields(mt, ptr->children))
206                     return -1;
207             }
208             else
209             {
210                 yaz_marc_cprintf(mt,
211                                  "Expected element controlfield or datafield,"
212                                  " got %.80s", ptr->name);
213                 return -1;
214             }
215         }
216     return 0;
217 }
218 #endif
219
220 #if YAZ_HAVE_XML2
221 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
222 {
223     yaz_marc_reset(mt);
224
225     for(; ptr; ptr = ptr->next)
226         if (ptr->type == XML_ELEMENT_NODE)
227         {
228             if (!strcmp((const char *) ptr->name, "record"))
229                 break;
230             else
231             {
232                 yaz_marc_cprintf(
233                     mt, "Unknown element '%.80s' in MARC XML reader",
234                     ptr->name);
235                 return -1;
236             }
237         }
238     if (!ptr)
239     {
240         yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
241         return -1;
242     }
243     /* ptr points to record node now */
244     ptr = ptr->children;
245     if (yaz_marc_read_xml_leader(mt, &ptr))
246         return -1;
247     return yaz_marc_read_xml_fields(mt, ptr->next);
248 }
249 #endif
250
251
252 /*
253  * Local variables:
254  * c-basic-offset: 4
255  * c-file-style: "Stroustrup"
256  * indent-tabs-mode: nil
257  * End:
258  * vim: shiftwidth=4 tabstop=8 expandtab
259  */
260