/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2010 Index Data
+ * Copyright (C) Index Data
* See the file LICENSE for details.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
#include <yaz/marcdisp.h>
#include <yaz/wrbuf.h>
#include <yaz/yaz-util.h>
collection_first,
collection_second
};
-
+
/** \brief node types for yaz_marc_node */
enum YAZ_MARC_NODE_TYPE
-{
+{
YAZ_MARC_DATAFIELD,
YAZ_MARC_CONTROLFIELD,
YAZ_MARC_COMMENT,
static int marc_exec_leader(const char *leader_spec, char *leader,
size_t size);
+#if YAZ_HAVE_XML2
static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
- const char *ns,
+ const char *ns,
const char *format,
const char *type);
+#endif
static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
{
mt->subfield_pp = &n->u.datafield.subfields;
}
-// Magic function: adds a attribute value to the element name if it is plain characters.
-// if not, and if the attribute name is not null, it will append a attribute element with the value
-// if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
+/** \brief adds a attribute value to the element name if it is plain chars
-int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len)
+ If not, and if the attribute name is not null, it will append a
+ attribute element with the value if attribute name is null it will
+ return a non-zero value meaning it couldnt handle the value.
+*/
+static int element_name_append_attribute_value(
+ yaz_marc_t mt, WRBUF buffer,
+ const char *attribute_name, char *code_data, size_t code_len)
{
- // TODO Map special codes to something possible for XML ELEMENT names
+ /* TODO Map special codes to something possible for XML ELEMENT names */
int encode = 0;
- int index = 0;
+ size_t index = 0;
int success = 0;
for (index = 0; index < code_len; index++)
{
(code_data[index] >= 'A' && code_data[index] <= 'Z')))
encode = 1;
}
- // Add as attribute
+ /* Add as attribute */
if (encode && attribute_name)
wrbuf_printf(buffer, " %s=\"", attribute_name);
success = -1;
if (encode && attribute_name)
- wrbuf_printf(buffer, "\""); // return error if we couldn't handle it.
+ wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
return success;
}
n->u.datafield.indicator = indicators;
n->u.datafield.subfields = 0;
- // make subfield_pp the current (last one)
+ /* make subfield_pp the current (last one) */
mt->subfield_pp = &n->u.datafield.subfields;
}
}
}
+static void check_ascii(yaz_marc_t mt, char *leader, int offset,
+ int ch_default)
+{
+ if (leader[offset] < ' ' || leader[offset] > 127)
+ {
+ yaz_marc_cprintf(mt,
+ "Leader character at offset %d is non-ASCII. "
+ "Setting value to '%c'", offset, ch_default);
+ leader[offset] = ch_default;
+ }
+}
+
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
int *indicator_length,
int *identifier_length,
memcpy(leader, leader_c, 24);
- if (!atoi_n_check(leader+10, 1, indicator_length))
+ check_ascii(mt, leader, 5, 'a');
+ check_ascii(mt, leader, 6, 'a');
+ check_ascii(mt, leader, 7, 'a');
+ check_ascii(mt, leader, 8, '#');
+ check_ascii(mt, leader, 9, '#');
+ if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
{
- yaz_marc_cprintf(mt,
- "Indicator length at offset 10 should hold a digit."
- " Assuming 2");
+ yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
+ " hold a number 1-9. Assuming 2");
leader[10] = '2';
*indicator_length = 2;
}
- if (!atoi_n_check(leader+11, 1, identifier_length))
+ if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
{
- yaz_marc_cprintf(mt,
- "Identifier length at offset 11 should hold a digit."
- " Assuming 2");
+ yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
+ " hold a number 1-9. Assuming 2");
leader[11] = '2';
*identifier_length = 2;
}
if (!atoi_n_check(leader+12, 5, base_address))
{
- yaz_marc_cprintf(mt,
- "Base address at offsets 12..16 should hold a number."
- " Assuming 0");
+ yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
+ " hold a number. Assuming 0");
*base_address = 0;
}
- if (!atoi_n_check(leader+20, 1, length_data_entry))
+ check_ascii(mt, leader, 17, '#');
+ check_ascii(mt, leader, 18, '#');
+ check_ascii(mt, leader, 19, '#');
+ if (!atoi_n_check(leader+20, 1, length_data_entry) ||
+ *length_data_entry < 3)
{
- yaz_marc_cprintf(mt,
- "Length data entry at offset 20 should hold a digit."
- " Assuming 4");
+ yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
+ " hold a number 3-9. Assuming 4");
*length_data_entry = 4;
leader[20] = '4';
}
- if (!atoi_n_check(leader+21, 1, length_starting))
+ if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
{
- yaz_marc_cprintf(mt,
- "Length starting at offset 21 should hold a digit."
- " Assuming 5");
+ yaz_marc_cprintf(mt, "Length starting at offset 21 should"
+ " hold a number 4-9. Assuming 5");
*length_starting = 5;
leader[21] = '5';
}
if (!atoi_n_check(leader+22, 1, length_implementation))
{
- yaz_marc_cprintf(mt,
- "Length implementation at offset 22 should hold a digit."
- " Assuming 0");
+ yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
+ " hold a number. Assuming 0");
*length_implementation = 0;
leader[22] = '0';
}
+ check_ascii(mt, leader, 23, '0');
if (mt->debug)
{
size_t inbytesleft = i;
size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
&outp, &outbytesleft);
+ yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
if (r != (size_t) (-1))
return i; /* got a complete sequence */
}
return 1; /* giving up */
}
+ else
+ {
+ int error = 0;
+ size_t no_read = 0;
+ (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf),
+ &no_read, &error);
+ if (error == 0 && no_read > 0)
+ return no_read;
+ }
return 1; /* we don't know */
}
-
+
void yaz_marc_reset(yaz_marc_t mt)
{
nmem_reset(mt->nmem);
leader = n->u.leader;
break;
}
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+11, 1, &identifier_length))
switch(n->which)
{
case YAZ_MARC_COMMENT:
- wrbuf_iconv_write(wr, mt->iconv_cd,
+ wrbuf_iconv_write(wr, mt->iconv_cd,
n->u.comment, strlen(n->u.comment));
wrbuf_puts(wr, "\n");
break;
leader = n->u.leader;
break;
}
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+11, 1, &identifier_length))
{
size_t using_code_len = get_subfield_len(mt, s->code_data,
identifier_length);
-
- wrbuf_puts (wr, mt->subfield_str);
- wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
+
+ wrbuf_puts (wr, mt->subfield_str);
+ wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
using_code_len);
wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
- wrbuf_iconv_puts(wr, mt->iconv_cd,
+ wrbuf_iconv_puts(wr, mt->iconv_cd,
s->code_data + using_code_len);
marc_iconv_reset(mt, wr);
}
break;
case YAZ_MARC_COMMENT:
wrbuf_puts(wr, "(");
- wrbuf_iconv_write(wr, mt->iconv_cd,
+ wrbuf_iconv_write(wr, mt->iconv_cd,
n->u.comment, strlen(n->u.comment));
marc_iconv_reset(mt, wr);
wrbuf_puts(wr, ")\n");
switch(mt->output_format)
{
case YAZ_MARC_MARCXML:
- case YAZ_MARC_TMARCXML:
+ case YAZ_MARC_TURBOMARC:
wrbuf_printf(wr, "</collection>\n");
break;
case YAZ_MARC_XCHANGE:
return yaz_marc_write_line(mt, wr);
case YAZ_MARC_MARCXML:
return yaz_marc_write_marcxml(mt, wr);
- case YAZ_MARC_TMARCXML:
- return yaz_marc_write_turbo_xml(mt, wr);
+ case YAZ_MARC_TURBOMARC:
+ return yaz_marc_write_turbomarc(mt, wr);
case YAZ_MARC_XCHANGE:
return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
case YAZ_MARC_ISO2709:
return yaz_marc_write_iso2709(mt, wr);
case YAZ_MARC_CHECK:
return yaz_marc_write_check(mt, wr);
+ case YAZ_MARC_JSON:
+ return yaz_marc_write_json(mt, wr);
}
return -1;
}
\retval -1 failure
*/
static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
- const char *ns,
+ const char *ns,
const char *format,
const char *type,
int turbo)
leader = n->u.leader;
break;
}
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+11, 1, &identifier_length))
return -1;
-
+
if (mt->enable_collection != no_collection)
{
if (mt->enable_collection == collection_first)
wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
s->code_data, using_code_len);
wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
- }
+ }
else
{
element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
wrbuf_puts(wr, ">\n");
}
wrbuf_printf(wr, " </%s", datafield_name[turbo]);
- //TODO Not CDATA
+ /* TODO Not CDATA */
if (turbo)
wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
strlen(n->u.datafield.tag));
}
else
{
- //TODO convert special
+ /* TODO convert special */
wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
strlen(n->u.controlfield.tag));
wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
strlen(n->u.controlfield.data));
marc_iconv_reset(mt, wr);
wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
- //TODO convert special
+ /* TODO convert special */
if (turbo)
wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
strlen(n->u.controlfield.tag));
}
static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
- const char *ns,
+ const char *ns,
const char *format,
const char *type,
int turbo)
0, 0, 0);
}
-int yaz_marc_write_turbo_xml(yaz_marc_t mt, WRBUF wr)
+int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
{
/* set leader 09 to 'a' for UNICODE */
/* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
if (!mt->leader_spec)
yaz_marc_modify_leader(mt, 9, "a");
return yaz_marc_write_marcxml_ns(mt, wr,
- "http://www.indexdata.com/MARC21/turboxml", 0, 0, 1);
+ "http://www.indexdata.com/turbomarc", 0, 0, 1);
}
int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
struct yaz_marc_subfield *s;
WRBUF subfield_name = wrbuf_alloc();
- //TODO consider if safe
+ /* TODO consider if safe */
char field[10];
field[0] = 'd';
strncpy(field + 1, n->u.datafield.tag, 3);
{
char ind_str[6];
char ind_val[2];
-
+
ind_val[0] = n->u.datafield.indicator[i];
ind_val[1] = '\0';
sprintf(ind_str, "%s%d", indicator_name[1], i+1);
wrbuf_rewind(wr_cdata);
wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
marc_iconv_reset(mt, wr_cdata);
-
+
wrbuf_rewind(subfield_name);
wrbuf_puts(subfield_name, "s");
not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
BAD_CAST wrbuf_cstr(wr_cdata));
if (not_written)
{
- // Generate code attribute value and add
+ /* Generate code attribute value and add */
wrbuf_rewind(wr_cdata);
wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
}
static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
- const char *ns,
+ const char *ns,
const char *format,
const char *type)
{
leader = n->u.leader;
break;
}
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+11, 1, &identifier_length))
char field[10];
field[0] = 'c';
field[4] = '\0';
-
+
switch(n->which)
{
case YAZ_MARC_DATAFIELD:
wrbuf_rewind(wr_cdata);
wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
marc_iconv_reset(mt, wr_cdata);
-
+
strncpy(field + 1, n->u.controlfield.tag, 3);
ptr = xmlNewTextChild(record_ptr, ns_record,
BAD_CAST field,
xmlAddChild(record_ptr, ptr);
break;
case YAZ_MARC_LEADER:
- {
- char *field = "leader";
- field = "l";
- xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
+ xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
BAD_CAST n->u.leader);
- }
- break;
+ break;
}
}
wrbuf_destroy(wr_cdata);
int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
- const char *ns,
+ const char *ns,
const char *format,
const char *type)
{
leader = n->u.leader;
break;
}
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+11, 1, &identifier_length))
wrbuf_rewind(wr_cdata);
wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
marc_iconv_reset(mt, wr_cdata);
-
+
ptr = xmlNewTextChild(record_ptr, ns_record,
BAD_CAST "controlfield",
BAD_CAST wrbuf_cstr(wr_cdata));
-
+
xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
break;
case YAZ_MARC_COMMENT:
const char *leader = 0;
WRBUF wr_dir, wr_head, wr_data_tmp;
int base_address;
-
+
for (n = mt->nodes; n; n = n->next)
if (n->which == YAZ_MARC_LEADER)
leader = n->u.leader;
-
+
if (!leader)
return -1;
if (!atoi_n_check(leader+10, 1, &indicator_length))
wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
wrbuf_rewind(wr_data_tmp);
- wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
+ wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
n->u.controlfield.data);
marc_iconv_reset(mt, wr_data_tmp);
wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
wrbuf_printf(wr_head, "%05d", base_address);
/* from "original" leader */
wrbuf_write(wr_head, leader+17, 7);
-
+
wrbuf_write(wr, wrbuf_buf(wr_head), 24);
wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
wrbuf_destroy(wr_head);
switch(n->which)
{
case YAZ_MARC_DATAFIELD:
- wrbuf_printf(wr, "%.*s", indicator_length,
- n->u.datafield.indicator);
+ wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
for (s = n->u.datafield.subfields; s; s = s->next)
{
wrbuf_putc(wr, ISO2709_IDFS);
return 0;
}
+int yaz_marc_write_json(yaz_marc_t mt, WRBUF w)
+{
+ int identifier_length;
+ struct yaz_marc_node *n;
+ const char *leader = 0;
+ int first = 1;
+
+ wrbuf_puts(w, "{\n");
+ for (n = mt->nodes; n; n = n->next)
+ if (n->which == YAZ_MARC_LEADER)
+ leader = n->u.leader;
+
+ if (!leader)
+ return -1;
+
+ if (!atoi_n_check(leader+11, 1, &identifier_length))
+ return -1;
+
+ wrbuf_puts(w, "\t\"leader\":\"");
+ wrbuf_json_puts(w, leader);
+ wrbuf_puts(w, "\",\n");
+ wrbuf_puts(w, "\t\"fields\":\n\t[\n");
+
+ for (n = mt->nodes; n; n = n->next)
+ {
+ struct yaz_marc_subfield *s;
+ const char *sep = "";
+ switch (n->which)
+ {
+ case YAZ_MARC_LEADER:
+ case YAZ_MARC_COMMENT:
+ break;
+ case YAZ_MARC_CONTROLFIELD:
+ if (first)
+ first = 0;
+ else
+ wrbuf_puts(w, ",\n");
+ wrbuf_puts(w, "\t\t{\n\t\t\t\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag);
+ wrbuf_puts(w, "\":\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data);
+ wrbuf_puts(w, "\"\n\t\t}");
+ break;
+ case YAZ_MARC_DATAFIELD:
+ if (first)
+ first = 0;
+ else
+ wrbuf_puts(w, ",\n");
+
+ wrbuf_puts(w, "\t\t{\n\t\t\t\"");
+ wrbuf_json_puts(w, n->u.datafield.tag);
+ wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n");
+ for (s = n->u.datafield.subfields; s; s = s->next)
+ {
+ size_t using_code_len = get_subfield_len(mt, s->code_data,
+ identifier_length);
+ wrbuf_puts(w, sep);
+ sep = ",\n";
+ wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\"");
+ wrbuf_iconv_json_write(w, mt->iconv_cd,
+ s->code_data, using_code_len);
+ wrbuf_puts(w, "\":\"");
+ wrbuf_iconv_json_puts(w, mt->iconv_cd,
+ s->code_data + using_code_len);
+ wrbuf_puts(w, "\"\n\t\t\t\t\t}");
+ }
+ wrbuf_puts(w, "\n\t\t\t\t]");
+ if (n->u.datafield.indicator[0])
+ {
+ int i;
+ for (i = 0; n->u.datafield.indicator[i]; i++)
+ {
+ wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1,
+ n->u.datafield.indicator[i]);
+ }
+ }
+ wrbuf_puts(w, "\n\t\t\t}\n");
+ wrbuf_puts(w, "\n\t\t}");
+ break;
+ }
+ }
+ wrbuf_puts(w, "\n\t]\n");
+ wrbuf_puts(w, "}\n");
+ return 0;
+}
int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
{
{
const char *vp = strchr(val+1, '\'');
size_t len;
-
+
if (!vp)
return -1;
len = vp-val-1;
int yaz_marc_decode_formatstr(const char *arg)
{
- int mode = -1;
+ int mode = -1;
if (!strcmp(arg, "marc"))
mode = YAZ_MARC_ISO2709;
if (!strcmp(arg, "marcxml"))
mode = YAZ_MARC_MARCXML;
- if (!strcmp(arg, "tmarcxml"))
- mode = YAZ_MARC_TMARCXML;
+ if (!strcmp(arg, "turbomarc"))
+ mode = YAZ_MARC_TURBOMARC;
if (!strcmp(arg, "marcxchange"))
mode = YAZ_MARC_XCHANGE;
if (!strcmp(arg, "line"))
mode = YAZ_MARC_LINE;
+ if (!strcmp(arg, "json"))
+ mode = YAZ_MARC_JSON;
return mode;
}
mt->write_using_libxml2 = enable;
}
+int yaz_marc_check_marc21_coding(const char *charset,
+ const char *marc_buf, int sz)
+{
+ if (charset && (!yaz_matchstr(charset, "MARC8?") ||
+ !yaz_matchstr(charset, "MARC8")) && marc_buf && sz > 25
+ && marc_buf[9] == 'a')
+ return 1;
+ return 0;
+}
+
/*
* Local variables:
* c-basic-offset: 4