Using the new ylog.h everywhere, and fixing what that breaks!
[idzebra-moved-to-github.git] / recctrl / marcread.c
index c112f9c..a0fdcac 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: marcread.c,v 1.19 2003-03-05 11:12:18 oleg Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+/* $Id: marcread.c,v 1.27 2004-11-19 10:27:12 heikki Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
    Index Data Aps
 
 This file is part of the Zebra server.
@@ -24,18 +24,23 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <ctype.h>
 #include <assert.h>
 
-#include <yaz/log.h>
+#include <yaz/ylog.h>
 #include <yaz/yaz-util.h>
 #include <yaz/marcdisp.h>
-#include "grsread.h"
+#include <idzebra/recgrs.h>
 #include "marcomp.h"
 #include "inline.h"
 
 #define MARC_DEBUG 0
 #define MARCOMP_DEBUG 0
 
-static data1_node *grs_read_iso2709 (struct grs_read_info *p)
+struct marc_info {
+    char type[256];
+};
+
+static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
 {
+    struct marc_info *mi = (struct marc_info*) p->clientData;
     char buf[100000];
     int entry_p;
     int record_length;
@@ -58,14 +63,14 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
     record_length = atoi_n (buf, 5);
     if (record_length < 25)
     {
-        logf (LOG_WARN, "MARC record length < 25, is %d", record_length);
+        yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length);
         return NULL;
     }
     /* read remaining part - attempt to read one byte furhter... */
     read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
     if (read_bytes < record_length-5)
     {
-        logf (LOG_WARN, "Couldn't read whole MARC record");
+        yaz_log (YLOG_WARN, "Couldn't read whole MARC record");
         return NULL;
     }
     if (read_bytes == record_length - 4)
@@ -76,16 +81,27 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
        if (p->endf)
            (*p->endf)(p->fh, cur_offset - 1);
     }
-    absynName = p->type;
+    absynName = mi->type;
     res_root = data1_mk_root (p->dh, p->mem, absynName);
     if (!res_root)
     {
-        yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax");
+        yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax");
         return 0;
     }
-    res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
+    if (marc_xml)
+    {
+       data1_node *lead;
+       const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0};
+                        
+       res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root);
+
+       lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top);
+       data1_mk_text_n(p->dh, p->mem, buf, 24, lead);
+    }
+    else
+       res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
 
-    if (marctab = res_root->u.root.absyn->marc)
+    if ((marctab = data1_absyn_getmarctab(p->dh, res_root->u.root.absyn)))
     {
        memcpy(marctab->leader, buf, 24);
         memcpy(marctab->implementation_codes, buf+6, 4);
@@ -102,7 +118,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
        identifier_length = marctab->force_identifier_length;
     else
        identifier_length = atoi_n (buf+11, 1);
-    base_address = atoi_n (buf+12, 4);
+    base_address = atoi_n (buf+12, 5);
 
     length_data_entry = atoi_n (buf+20, 1);
     length_starting = atoi_n (buf+21, 1);
@@ -125,10 +141,11 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
         entry_p += 3;
         tag[3] = '\0';
 
-
-        /* generate field node */
-        res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
-
+       if (marc_xml)
+           res = parent;
+       else
+           res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
+       
 #if MARC_DEBUG
         fprintf (outf, "%s ", tag);
 #endif
@@ -142,58 +159,86 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
         if (memcmp (tag, "00", 2) && indicator_length)
         {
             /* generate indicator node */
+           if (marc_xml)
+           {
+               const char *attr[10];
+               int j;
+
+               attr[0] = "tag";
+               attr[1] = tag;
+               attr[2] = 0;
+
+               res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res);
+
+               for (j = 0; j<indicator_length; j++)
+               {
+                   char str1[18], str2[2];
+                   sprintf (str1, "ind%d", j+1);
+                   str2[0] = buf[i+j];
+                   str2[1] = '\0';
+
+                   attr[0] = str1;
+                   attr[1] = str2;
+                   
+                   data1_tag_add_attr (p->dh, p->mem, res, attr);
+               }
+           }
+           else
+           {
 #if MARC_DEBUG
-            int j;
+               int j;
 #endif
-            res = data1_mk_tag_n (p->dh, p->mem, 
-                                  buf+i, indicator_length, 0 /* attr */, res);
+               res = data1_mk_tag_n (p->dh, p->mem, 
+                                     buf+i, indicator_length, 0 /* attr */, res);
 #if MARC_DEBUG
-            for (j = 0; j<indicator_length; j++)
-                fprintf (outf, "%c", buf[j+i]);
+               for (j = 0; j<indicator_length; j++)
+                   fprintf (outf, "%c", buf[j+i]);
 #endif
-            i += indicator_length;
-        }
+           }
+           i += indicator_length;
+        } 
+       else
+       {
+           if (marc_xml)
+           {
+               const char *attr[10];
+               
+               attr[0] = "tag";
+               attr[1] = tag;
+               attr[2] = 0;
+               
+               res = data1_mk_tag(p->dh, p->mem, "controlfield", attr, res);
+           }
+       }
         parent = res;
         /* traverse sub fields */
         i0 = i;
         while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
         {
-
-           if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")||
-               !yaz_matchstr(absynName, "RUSMARC")))
-           {
-               int go = 1;
-               data1_node *res =
-                   data1_mk_tag_n (p->dh, p->mem,
-                                    buf+i+1, identifier_length-1, 
-                                    0 /* attr */, parent);
-                i += identifier_length;
-                i0 = i;
-               do {
-                   while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
-                        buf[i] != ISO2709_FS && i < end_offset)
-                   {
-                       i++;
-                   }
-                   if (!memcmp(buf+i+1, "1", 1) && i<end_offset)
-                   {
-                       go = 0;
-                   }
-                   else
-                   {
-                       buf[i] = '$';
-                   }               
-               } while (go && i < end_offset);
-               
-               data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
-               i0 = i;
-           }
-            else if (memcmp (tag, "00", 2) && identifier_length)
+           if (memcmp (tag, "00", 2) && identifier_length)
             {
-               data1_node *res =
-                   data1_mk_tag_n (p->dh, p->mem,
-                                    buf+i+1, identifier_length-1, 
-                                    0 /* attr */, parent);
+               data1_node *res;
+               if (marc_xml)
+               {
+                   int j;
+                   const char *attr[3];
+                   char code[10];
+                   
+                   for (j = 1; j<identifier_length && j < 9; j++)
+                       code[j-1] = buf[i+j];
+                   code[j-1] = 0;
+                   attr[0] = "code";
+                   attr[1] = code;
+                   attr[2] = 0;
+                   res = data1_mk_tag(p->dh, p->mem, "subfield",
+                                      attr, parent);
+               }
+               else
+               {
+                   res = data1_mk_tag_n (p->dh, p->mem,
+                                          buf+i+1, identifier_length-1, 
+                                          0 /* attr */, parent);
+               }
 #if MARC_DEBUG
                 fprintf (outf, " $"); 
                 for (j = 1; j<identifier_length; j++)
@@ -235,6 +280,7 @@ static data1_node *grs_read_iso2709 (struct grs_read_info *p)
     }
     return res_root;
 }
+
 /*
  * Locate some data under this node. This routine should handle variants
  * prettily.
@@ -270,18 +316,7 @@ static char *get_data(data1_node *n, int *len)
     *len = strlen(r);
     return r;
 }
-static char *tr(char *s, int c1, int c2)
-{
-    char *p = s;
-    
-    while(*p)
-    {
-       if (*p == c1)
-           *p = c2;
-       p++;
-    }
-    return s;
-}
+
 static data1_node *lookup_subfield(data1_node *node, const char *name)
 {
     data1_node *p;
@@ -293,7 +328,9 @@ static data1_node *lookup_subfield(data1_node *node, const char *name)
     }
     return 0;
 }
-static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name)
+
+static inline_subfield *lookup_inline_subfield(inline_subfield *pisf,
+                                              const char *name)
 {
     inline_subfield *p;
     
@@ -304,7 +341,9 @@ static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char
     }
     return 0;
 }
-static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf)
+
+static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf,
+                                           inline_subfield *pisf)
 {
     mc_subfield *p;
     
@@ -318,23 +357,26 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_
            {
                if (strcmp(p->prefix, "_"))
                {
-                   strcat(strcat(buf, " "), p->prefix);
+                   wrbuf_puts(buf, " ");
+                   wrbuf_puts(buf, p->prefix);
                }
                if (p->interval.start == -1)
                {
-                   strcat(buf, found->data);
+                   wrbuf_puts(buf, found->data);
                }
                else
                {
-                   strncat(buf, found->data+p->interval.start,
-                       p->interval.end-p->interval.start+1);
+                   wrbuf_write(buf, found->data+p->interval.start,
+                               p->interval.end-p->interval.start);
+                   wrbuf_puts(buf, "");
                }
                if (strcmp(p->suffix, "_"))
                {
-                   strcat(strcat(buf, p->suffix), " ");
+                   wrbuf_puts(buf, p->suffix);
+                   wrbuf_puts(buf, " ");
                }
 #if MARCOMP_DEBUG
-               logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
+               yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
 #endif         
                pisf = found->next;
            }
@@ -365,30 +407,54 @@ static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_
            }
            if (found)
            {
-               strcat(buf, " (");
+               wrbuf_puts(buf, " (");
                pisf = cat_inline_subfield(p->u.child, buf, pisf);
-               strcat(buf, ") ");
+               wrbuf_puts(buf, ") ");
            }
        }
     }
     return pisf; 
 }
-static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
-{
-    
+
+static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield)
+{    
     if (!pf || !subfield)
        return;
 
-    for (;subfield; subfield = subfield->next)
+    for (;subfield;)
     {
        int len;
-       inline_field *pif = inline_parse(get_data(subfield,&len));
+       inline_field *pif=NULL;
+       data1_node *psubf;
+       
+       if (yaz_matchstr(subfield->u.tag.tag, "1"))
+       {
+           subfield = subfield->next;
+           continue;
+       }
+       
+       psubf = subfield;
+       pif = inline_mk_field();
+       do
+       {
+           int i;
+           if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0)
+           {
+               yaz_log(YLOG_WARN, "inline subfield ($%s): parse error",
+                   psubf->u.tag.tag);
+               inline_destroy_field(pif);
+               return; 
+           }
+           psubf = psubf->next;
+       } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1"));
+       
+       subfield = psubf;
        
        if (pif && !yaz_matchstr(pif->name, pf->name))
        {
            if (!pf->list && pif->list)
            {
-               strcat(buf, pif->list->data);
+               wrbuf_puts(buf, pif->list->data);
            }
            else
            {
@@ -409,24 +475,26 @@ static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
                    /*
                        add separator for inline fields
                    */
-                   if (strlen(buf))
+                   if (wrbuf_len(buf))
                    {
-                       strcat(buf, "\n");
+                       wrbuf_puts(buf, "\n");
                    }
                }
                else
                {
-                   logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name);
+                   yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name);
                }
            }
        }
        inline_destroy_field(pif);
     }
 #if MARCOMP_DEBUG    
-    logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
+    yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
 #endif
 }
-static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield)
+
+static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf,
+                               data1_node *subfield)
 {
     mc_subfield *p;
     
@@ -442,7 +510,8 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
                
                if (strcmp(p->prefix, "_"))
                {
-                   strcat(strcat(buf, " "), p->prefix);
+                   wrbuf_puts(buf, " ");
+                   wrbuf_puts(buf, p->prefix);
                }
                
                if (p->u.in_line)
@@ -451,19 +520,21 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
                }
                else if (p->interval.start == -1)
                {
-                   strcat(buf, get_data(found, &len));
+                   wrbuf_puts(buf, get_data(found, &len));
                }
                else
                {
-                   strncat(buf, get_data(found, &len)+p->interval.start,
-                       p->interval.end-p->interval.start+1);
+                   wrbuf_write(buf, get_data(found, &len)+p->interval.start,
+                       p->interval.end-p->interval.start);
+                   wrbuf_puts(buf, "");
                }
                if (strcmp(p->suffix, "_"))
                {
-                   strcat(strcat(buf, p->suffix), " ");
+                   wrbuf_puts(buf, p->suffix);
+                   wrbuf_puts(buf, " ");
                }
 #if MARCOMP_DEBUG              
-               logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
+               yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
 #endif         
                subfield = found->next;
            }
@@ -493,15 +564,17 @@ static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfiel
            }
            if (found)
            {
-               strcat(buf, " (");
+               wrbuf_puts(buf, " (");
                subfield = cat_subfield(p->u.child, buf, subfield);
-               strcat(buf, ") ");
+               wrbuf_puts(buf, ") ");
            }
        }
     }
     return subfield;
 }
-static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field)
+
+static data1_node *cat_field(struct grs_read_info *p, mc_field *pf,
+                            WRBUF buf, data1_node *field)
 {
     data1_node *subfield;
     int ind1, ind2;
@@ -528,15 +601,16 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d
        
        if (pf->interval.start == -1)
        {
-           strcat(buf, get_data(field, &len));
+           wrbuf_puts(buf, get_data(field, &len));
        }
        else
        {
-           strncat(buf, get_data(field, &len)+pf->interval.start,
-               pf->interval.end-pf->interval.start+1);
+           wrbuf_write(buf, get_data(field, &len)+pf->interval.start,
+                       pf->interval.end-pf->interval.start);
+           wrbuf_puts(buf, "");
        }
 #if MARCOMP_DEBUG
-        logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+        yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
 #endif
        return field->next;
     }
@@ -554,7 +628,7 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d
        ))
     {
 #if MARCOMP_DEBUG
-       logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
+       yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
 #endif
        return field->next;
     }
@@ -567,11 +641,12 @@ static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, d
     cat_subfield(pf->list, buf, subfield);
 
 #if MARCOMP_DEBUG    
-    logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+    yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
 #endif
     
     return field->next;    
 }
+
 static int is_empty(char *s)
 {
     char *p = s;
@@ -583,14 +658,16 @@ static int is_empty(char *s)
     }
     return 1;
 }
-static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root)
+
+static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt,
+                            data1_node *root)
 {
-    data1_marctab *marctab = root->u.root.absyn->marc;
+    data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root->u.root.absyn);
     data1_node *top = root->child;
     data1_node *field;
     mc_context *c;
     mc_field *pf;
-    char buf[1000000];
+    WRBUF buf;
     
     c = mc_mk_context(mc_stmnt+3);
     
@@ -604,14 +681,15 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
        mc_destroy_context(c);
        return;
     }
+    buf = wrbuf_alloc();
 #if MARCOMP_DEBUG    
-    logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
+    yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
 #endif
     if (!yaz_matchstr(pf->name, "ldr"))
     {
        data1_node *new;
 #if MARCOMP_DEBUG
-       logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
+       yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
            pf->interval.start, pf->interval.end);
 #endif 
        new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
@@ -627,13 +705,16 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
            if (!yaz_matchstr(field->u.tag.tag, pf->name))
            {
                data1_node *new;
-               char *pb = buf;
+               char *pb;
 #if MARCOMP_DEBUG              
-               logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
+               yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
 #endif         
-               *buf = '\0';        
+               wrbuf_rewind(buf);
+               wrbuf_puts(buf, "");
+
                field = cat_field(p, pf, buf, field);
                
+               pb = wrbuf_buf(buf);
                for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
                {
                        if (!is_empty(pb))
@@ -651,17 +732,37 @@ static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data
     }
     mc_destroy_field(pf);
     mc_destroy_context(c);
+    wrbuf_free(buf, 1);
+}
+
+data1_node *grs_read_marcxml(struct grs_read_info *p)
+{
+    data1_node *root = grs_read_iso2709(p, 1);
+    data1_element *e;
+
+    if (!root)
+       return 0;
+       
+    for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next)
+    {
+       data1_tag *tag = e->tag;
+       
+       if (tag && tag->which == DATA1T_string &&
+           !yaz_matchstr(tag->value.string, "mc?"))
+               parse_data1_tree(p, tag->value.string, root);
+    }
+    return root;
 }
 
 data1_node *grs_read_marc(struct grs_read_info *p)
 {
-    data1_node *root = grs_read_iso2709(p);
+    data1_node *root = grs_read_iso2709(p, 0);
     data1_element *e;
 
     if (!root)
        return 0;
        
-    for (e=root->u.root.absyn->main_elements; e; e=e->next)
+    for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next)
     {
        data1_tag *tag = e->tag;
        
@@ -671,20 +772,76 @@ data1_node *grs_read_marc(struct grs_read_info *p)
     }
     return root;
 }
-static void *grs_init_marc(void)
+
+static void *init_marc(Res res, RecType rt)
 {
-    return 0;
+    struct marc_info *p = xmalloc(sizeof(*p));
+    strcpy(p->type, "");
+    return p;
+}
+
+static void config_marc(void *clientData, Res res, const char *args)
+{
+    struct marc_info *p = (struct marc_info*) clientData;
+    if (strlen(args) < sizeof(p->type))
+       strcpy(p->type, args);
+}
+
+static void destroy_marc(void *clientData)
+{
+    struct marc_info *p = (struct marc_info*) clientData;
+    xfree (p);
+}
+
+
+static int extract_marc(void *clientData, struct recExtractCtrl *ctrl)
+{
+    return zebra_grs_extract(clientData, ctrl, grs_read_marc);
+}
+
+static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl)
+{
+    return zebra_grs_retrieve(clientData, ctrl, grs_read_marc);
+}
+
+static struct recType marc_type = {
+    "grs.marc",
+    init_marc,
+    config_marc,
+    destroy_marc,
+    extract_marc,
+    retrieve_marc,
+};
+
+static int extract_marcxml(void *clientData, struct recExtractCtrl *ctrl)
+{
+    return zebra_grs_extract(clientData, ctrl, grs_read_marcxml);
 }
 
-static void grs_destroy_marc(void *clientData)
+static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl)
 {
+    return zebra_grs_retrieve(clientData, ctrl, grs_read_marcxml);
 }
 
-static struct recTypeGrs marc_type = {
-    "marc",
-    grs_init_marc,
-    grs_destroy_marc,
-    grs_read_marc
+static struct recType marcxml_type = {
+    "grs.marcxml",
+    init_marc,
+    config_marc,
+    destroy_marc,
+    extract_marcxml,
+    retrieve_marcxml,
 };
 
-RecTypeGrs recTypeGrs_marc = &marc_type;
+RecType
+#ifdef IDZEBRA_STATIC_GRS_MARC
+idzebra_filter_grs_marc
+#else
+idzebra_filter
+#endif
+
+[] = {
+    &marc_type,
+    &marcxml_type,
+    0,
+};
+