C++ compilation.
[idzebra-moved-to-github.git] / recctrl / recgrs.c
index a7056a3..680dde2 100644 (file)
@@ -1,10 +1,69 @@
 /*
- * Copyright (C) 1994-1997, Index Data I/S 
+ * Copyright (C) 1994-1999, Index Data
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: recgrs.c,v $
- * Revision 1.10  1997-09-18 08:59:21  adam
+ * Revision 1.29  1999-05-26 07:49:14  adam
+ * C++ compilation.
+ *
+ * Revision 1.28  1999/05/21 12:00:17  adam
+ * Better diagnostics for extraction process.
+ *
+ * Revision 1.27  1999/05/20 12:57:18  adam
+ * Implemented TCL filter. Updated recctrl system.
+ *
+ * Revision 1.26  1999/03/02 16:15:44  quinn
+ * Added "tagsysno" and "tagrank" directives to zebra.cfg.
+ *
+ * Revision 1.25  1999/02/18 15:01:26  adam
+ * Minor changes.
+ *
+ * Revision 1.24  1999/02/02 14:51:28  adam
+ * Updated WIN32 code specific sections. Changed header.
+ *
+ * Revision 1.23  1998/10/18 07:51:10  adam
+ * Changed one logf call.
+ *
+ * Revision 1.22  1998/10/16 08:14:37  adam
+ * Updated record control system.
+ *
+ * Revision 1.21  1998/07/01 09:16:10  adam
+ * Element localno only added when it's greater than 0.
+ *
+ * Revision 1.20  1998/05/20 10:12:26  adam
+ * Implemented automatic EXPLAIN database maintenance.
+ * Modified Zebra to work with ASN.1 compiled version of YAZ.
+ *
+ * Revision 1.19  1998/03/11 11:19:05  adam
+ * Changed the way sequence numbers are generated.
+ *
+ * Revision 1.18  1998/03/05 08:41:31  adam
+ * Minor changes.
+ *
+ * Revision 1.17  1998/02/10 12:03:06  adam
+ * Implemented Sort.
+ *
+ * Revision 1.16  1998/01/29 13:38:17  adam
+ * Fixed problem with mapping to record with unknown schema.
+ *
+ * Revision 1.15  1998/01/26 10:37:57  adam
+ * Better diagnostics.
+ *
+ * Revision 1.14  1997/11/06 11:41:01  adam
+ * Implemented "begin variant" for the sgml.regx filter.
+ *
+ * Revision 1.13  1997/10/31 12:35:44  adam
+ * Added a few log statements.
+ *
+ * Revision 1.12  1997/10/29 12:02:22  adam
+ * Using oid_ent_to_oid used instead of the non thread-safe oid_getoidbyent.
+ *
+ * Revision 1.11  1997/10/27 14:34:00  adam
+ * Work on generic character mapping depending on "structure" field
+ * in abstract syntax file.
+ *
+ * Revision 1.10  1997/09/18 08:59:21  adam
  * Extra generic handle for the character mapping routines.
  *
  * Revision 1.9  1997/09/17 12:19:21  adam
 #include <stdio.h>
 #include <assert.h>
 #include <sys/types.h>
-#ifndef WINDOWS
+#ifndef WIN32
 #include <unistd.h>
 #endif
 
 #include <oid.h>
 
 #include <recctrl.h>
-#include <charmap.h>
 #include "grsread.h"
 
 #define GRS_MAX_WORD 512
 
-static int seqno = 0;
+struct grs_handler {
+    RecTypeGrs type;
+    void *clientData;
+    int initFlag;
+    struct grs_handler *next;
+};
+
+struct grs_handlers {
+    struct grs_handler *handlers;
+};
 
-static data1_node *read_grs_type (struct grs_read_info *p, const char *type)
+static int read_grs_type (struct grs_handlers *h,
+                         struct grs_read_info *p, const char *type,
+                         data1_node **root)
 {
-    static struct {
-        char *type;
-        data1_node *(*func)(struct grs_read_info *p);
-    } tab[] = {
-        { "sgml",  grs_read_sgml },
-        { "regx",  grs_read_regx },
-        { "marc",  grs_read_marc },
-        { NULL, NULL }
-    };
+    struct grs_handler *gh = h->handlers;
     const char *cp = strchr (type, '.');
-    int i;
 
     if (cp == NULL || cp == type)
     {
@@ -161,129 +221,67 @@ static data1_node *read_grs_type (struct grs_read_info *p, const char *type)
     }
     else
         strcpy (p->type, cp+1);
-    for (i=0; tab[i].type; i++)
+    for (gh = h->handlers; gh; gh = gh->next)
     {
-        if (!memcmp (type, tab[i].type, cp-type))
-            return (tab[i].func)(p);
+        if (!memcmp (type, gh->type->type, cp-type))
+       {
+           if (!gh->initFlag)
+           {
+               gh->initFlag = 1;
+               gh->clientData = (*gh->type->init)();
+           }
+           p->clientData = gh->clientData;
+            *root = (gh->type->read)(p);
+           gh->clientData = p->clientData;
+           return 0;
+       }
     }
-    return NULL;
+    return 1;
 }
 
-static void grs_init(void)
+static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
 {
+    struct grs_handler *gh = (struct grs_handler *) malloc (sizeof(*gh));
+    gh->next = h->handlers;
+    h->handlers = gh;
+    gh->initFlag = 0;
+    gh->clientData = 0;
+    gh->type = t;
 }
 
-static void dumpkeys_word(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
+static void *grs_init(RecType recType)
 {
-    const char *b = n->u.data.data;
-    int remain;
-    const char **map = 0;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(0, &b, remain);
-
-    while (map)
-    {
-       RecWord wrd;
-       char buf[GRS_MAX_WORD+1];
-       int i, remain;
-
-       /* Skip spaces */
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(0, &b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-       i = 0;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           const char *cp = *map;
+    struct grs_handlers *h = (struct grs_handlers *) malloc (sizeof(*h));
+    h->handlers = 0;
 
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(0, &b, remain);
-           else
-               map = 0;
-       }
-       if (!i)
-           return;
-       buf[i] = '\0';
-       (*p->init)(&wrd);      /* set defaults */
-       wrd.which = Word_String;
-       wrd.seqno = seqno++;
-       wrd.u.string = buf;
-       wrd.attrSet = att->parent->ordinal;
-       wrd.attrUse = att->locals->local;
-       (*p->add)(&wrd);
-    }
+    grs_add_handler (h, recTypeGrs_sgml);
+    grs_add_handler (h, recTypeGrs_regx);
+#if HAVE_TCL_H
+    grs_add_handler (h, recTypeGrs_tcl);
+#endif
+    grs_add_handler (h, recTypeGrs_marc);
+    return h;
 }
 
-static void dumpkeys_phrase(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
+static void grs_destroy(void *clientData)
 {
-    const char *b = n->u.data.data;
-    char buf[GRS_MAX_WORD+1];
-    const char **map = 0;
-    RecWord wrd;
-    int i = 0, remain;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(0, &b, remain);
-
-    while (remain > 0 && i < GRS_MAX_WORD)
+    struct grs_handlers *h = (struct grs_handlers *) clientData;
+    struct grs_handler *gh = h->handlers, *gh_next;
+    while (gh)
     {
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(0, &b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-
-       if (i && i < GRS_MAX_WORD)
-           buf[i++] = *CHR_SPACE;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           const char *cp = *map;
-
-           if (i >= GRS_MAX_WORD)
-               break;
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(0, &b, remain);
-           else
-               map = 0;
-       }
+       gh_next = gh->next;
+       if (gh->initFlag)
+           (*gh->type->destroy)(gh->clientData);
+       free (gh);
+       gh = gh_next;
     }
-    if (!i)
-       return;
-    buf[i] = '\0';
-    (*p->init)(&wrd);
-    wrd.which = Word_Phrase;
-    wrd.seqno = seqno++;
-    wrd.u.string = buf;
-    wrd.attrSet = att->parent->ordinal;
-    wrd.attrUse = att->locals->local;
-    (*p->add)(&wrd);
+    free (h);
 }
 
 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 {
+    RecWord wrd;
+    (*p->init)(p, &wrd);      /* set defaults */
     for (; n; n = n->next)
     {
        if (p->flagShowRecords) /* display element description to user */
@@ -331,13 +329,15 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
        {
            data1_node *par = get_parent_tag(p->dh, n);
            data1_termlist *tlist = 0;
+           data1_datatype dtype = DATA1K_string;
 
            if (p->flagShowRecords)
            {
                printf("%*s", level * 4, "");
                printf("Data: ");
-               if (n->u.data.len > 20)
-                   printf("'%.20s...'\n", n->u.data.data);
+               if (n->u.data.len > 32)
+                   printf("'%.24s ... %.6s'\n", n->u.data.data,
+                          n->u.data.data + n->u.data.len-6);
                else if (n->u.data.len > 0)
                    printf("'%.*s'\n", n->u.data.len, n->u.data.data);
                else
@@ -353,39 +353,30 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
             */
 
            while (!par->u.tag.element)
-               if (!par->parent || !(par = get_parent_tag(p->dh, par->parent)))
+               if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
                    break;
-           if (!par)
-               tlist = 0;
-           else if (par->u.tag.element->termlists)
-               tlist = par->u.tag.element->termlists;
-           else
+           if (!par || !(tlist = par->u.tag.element->termlists))
                continue;
-
+           if (par->u.tag.element->tag)
+               dtype = par->u.tag.element->tag->kind;
            for (; tlist; tlist = tlist->next)
            {
                if (p->flagShowRecords)
                {
-                   printf("%*sIdx: [", (level + 1) * 4, "");
-                   switch (tlist->structure)
-                   {
-                       case DATA1S_word: printf("w"); break;
-                       case DATA1S_phrase: printf("p"); break;
-                       default: printf("?"); break;
-                   }
-                   printf("] ");
-                   printf("%s:%s [%d]\n", tlist->att->parent->name,
-                       tlist->att->name, tlist->att->value);
+                   printf("%*sIdx: [%s]", (level + 1) * 4, "",
+                          tlist->structure);
+                   printf("%s:%s [%d]\n",
+                          tlist->att->parent->name,
+                          tlist->att->name, tlist->att->value);
                }
-               else switch (tlist->structure)
+               else
                {
-                   case DATA1S_word:
-                       dumpkeys_word(n, p, tlist->att); break;
-                   case DATA1S_phrase:
-                       dumpkeys_phrase(n, p, tlist->att); break;
-                   default:
-                       logf(LOG_FATAL, "Bad structure type in dumpkeys");
-                       abort();
+                   wrd.reg_type = *tlist->structure;
+                   wrd.string = n->u.data.data;
+                   wrd.length = n->u.data.len;
+                   wrd.attrSet = (int) (tlist->att->parent->reference);
+                   wrd.attrUse = tlist->att->locals->local;
+                   (*p->addWord)(&wrd);
                }
            }
        }
@@ -397,13 +388,31 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
     return 0;
 }
 
-static int grs_extract(struct recExtractCtrl *p)
+int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
+{
+    oident oe;
+    int oidtmp[OID_SIZE];
+
+    oe.proto = PROTO_Z3950;
+    oe.oclass = CLASS_SCHEMA;
+    oe.value = n->u.root.absyn->reference;
+
+    if ((oid_ent_to_oid (&oe, oidtmp)))
+       (*p->addSchema)(p, oidtmp);
+
+    return dumpkeys(n, p, 0);
+}
+
+static int grs_extract(void *clientData, struct recExtractCtrl *p)
 {
     data1_node *n;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
-    seqno = 0;
+    oident oe;
+    int oidtmp[OID_SIZE];
+    struct grs_handlers *h = (struct grs_handlers *) clientData;
 
+    mem = nmem_create (); 
     gri.readf = p->readf;
     gri.seekf = p->seekf;
     gri.tellf = p->tellf;
@@ -413,17 +422,25 @@ static int grs_extract(struct recExtractCtrl *p)
     gri.mem = mem;
     gri.dh = p->dh;
 
-    n = read_grs_type (&gri, p->subType);
+    if (read_grs_type (h, &gri, p->subType, &n))
+       return RECCTRL_EXTRACT_ERROR;
     if (!n)
-        return -1;
+        return RECCTRL_EXTRACT_EOF;
+
+    oe.proto = PROTO_Z3950;
+    oe.oclass = CLASS_SCHEMA;
+    oe.value = n->u.root.absyn->reference;
+    if ((oid_ent_to_oid (&oe, oidtmp)))
+       (*p->addSchema)(p, oidtmp);
+
     if (dumpkeys(n, p, 0) < 0)
     {
        data1_free_tree(p->dh, n);
-       return -2;
+       return RECCTRL_EXTRACT_ERROR;
     }
     data1_free_tree(p->dh, n);
     nmem_destroy(mem);
-    return 0;
+    return RECCTRL_EXTRACT_OK;
 }
 
 /*
@@ -489,20 +506,29 @@ static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
                return 26; /* fix */
     }
     if (espec)
+    {
+        logf (LOG_DEBUG, "Element: Espec-1 match");
        return data1_doespec1(dh, n, espec);
+    }
     else
+    {
+       logf (LOG_DEBUG, "Element: all match");
        return -1;
+    }
 }
 
-static int grs_retrieve(struct recRetrieveCtrl *p)
+static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
 {
     data1_node *node = 0, *onode = 0;
-    data1_node *new;
+    data1_node *dnew;
     data1_maptab *map;
     int res, selected = 0;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
+    char *tagname;
+    struct grs_handlers *h = (struct grs_handlers *) clientData;
     
+    mem = nmem_create();
     gri.readf = p->readf;
     gri.seekf = p->seekf;
     gri.tellf = p->tellf;
@@ -512,41 +538,66 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     gri.mem = mem;
     gri.dh = p->dh;
 
-    node = read_grs_type (&gri, p->subType);
+    logf (LOG_DEBUG, "grs_retrieve");
+    if (read_grs_type (h, &gri, p->subType, &node))
+    {
+       p->diagnostic = 14;
+        nmem_destroy (mem);
+       return 0;
+    }
     if (!node)
     {
-       p->diagnostic = 2;
+       p->diagnostic = 14;
+        nmem_destroy (mem);
        return 0;
     }
-    if (p->score >= 0 && (new =
+    logf (LOG_DEBUG, "grs_retrieve: size");
+    if ((dnew = data1_insert_taggeddata(p->dh, node, node,
+                                      "size", mem)))
+    {
+       dnew->u.data.what = DATA1I_text;
+       dnew->u.data.data = dnew->lbuf;
+       sprintf(dnew->u.data.data, "%d", p->recordSize);
+       dnew->u.data.len = strlen(dnew->u.data.data);
+    }
+
+    tagname = res_get_def(p->res, "tagrank", "rank");
+    if (strcmp(tagname, "0") && p->score >= 0 && (dnew =
                          data1_insert_taggeddata(p->dh, node,
-                                                 node, "rank",
+                                                 node, tagname,
                                                  mem)))
     {
-       new->u.data.what = DATA1I_num;
-       new->u.data.data = new->u.data.lbuf;
-       sprintf(new->u.data.data, "%d", p->score);
-       new->u.data.len = strlen(new->u.data.data);
+        logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
+       dnew->u.data.what = DATA1I_num;
+       dnew->u.data.data = dnew->lbuf;
+       sprintf(dnew->u.data.data, "%d", p->score);
+       dnew->u.data.len = strlen(dnew->u.data.data);
     }
-    if ((new = data1_insert_taggeddata(p->dh, node, node,
-                                      "localControlNumber", mem)))
+
+    tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
+    if (strcmp(tagname, "0") && p->localno > 0 &&
+        (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem)))
     {
-       new->u.data.what = DATA1I_text;
-       new->u.data.data = new->u.data.lbuf;
-       sprintf(new->u.data.data, "%d", p->localno);
-       new->u.data.len = strlen(new->u.data.data);
+        logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
+       dnew->u.data.what = DATA1I_text;
+       dnew->u.data.data = dnew->lbuf;
+       sprintf(dnew->u.data.data, "%d", p->localno);
+       dnew->u.data.len = strlen(dnew->u.data.data);
     }
+
+    logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
     if (p->input_format == VAL_GRS1 && node->u.root.absyn &&
        node->u.root.absyn->reference != VAL_NONE)
     {
        oident oe;
        Odr_oid *oid;
+       int oidtmp[OID_SIZE];
 
        oe.proto = PROTO_Z3950;
        oe.oclass = CLASS_SCHEMA;
        oe.value = node->u.root.absyn->reference;
 
-       if ((oid = oid_getoidbyent(&oe)))
+       if ((oid = oid_ent_to_oid (&oe, oidtmp)))
        {
            char tmp[128];
            data1_handle dh = p->dh;
@@ -562,17 +613,18 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            }
            *(p++) = '\0';
 
-           if ((new = data1_insert_taggeddata(dh, node, node,
+           if ((dnew = data1_insert_taggeddata(dh, node, node,
                                               "schemaIdentifier", mem)))
            {
-               new->u.data.what = DATA1I_oid;
-               new->u.data.data = nmem_malloc(mem, p - tmp);
-               memcpy(new->u.data.data, tmp, p - tmp);
-               new->u.data.len = p - tmp;
+               dnew->u.data.what = DATA1I_oid;
+               dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
+               memcpy(dnew->u.data.data, tmp, p - tmp);
+               dnew->u.data.len = p - tmp;
            }
        }
     }
 
+    logf (LOG_DEBUG, "grs_retrieve: schema mapping");
     /*
      * Does the requested format match a known schema-mapping? (this reflects
      * the overlap of schema and formatting which is inherent in the MARC
@@ -587,12 +639,13 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            if (!(node = data1_map_record(p->dh, onode, map, mem)))
            {
                p->diagnostic = 14;
+                nmem_destroy (mem);
                return 0;
            }
-
            break;
        }
 
+    logf (LOG_DEBUG, "grs_retrieve: element spec");
     if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
     {
        p->diagnostic = res;
@@ -605,6 +658,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     else if (p->comp && !res)
        selected = 1;
 
+    logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
     switch (p->output_format = (p->input_format != VAL_NONE ?
        p->input_format : VAL_SUTRS))
     {
@@ -615,56 +669,70 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            dummy = 0;
            if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
                                              p->odr, &dummy)))
-               p->diagnostic = 2; /* this should be better specified */
+               p->diagnostic = 238; /* not available in requested syntax */
            else
-               p->rec_len = -1;
+               p->rec_len = (size_t) (-1);
            break;
        case VAL_EXPLAIN:
            if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
                                                   p->odr)))
-               p->diagnostic = 2; /* this should be better specified */
+               p->diagnostic = 238;
            else
-               p->rec_len = -1;
+               p->rec_len = (size_t) (-1);
            break;
        case VAL_SUMMARY:
            if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
                                                   p->odr)))
-               p->diagnostic = 2;
+               p->diagnostic = 238;
            else
-               p->rec_len = -1;
+               p->rec_len = (size_t) (-1);
            break;
        case VAL_SUTRS:
            if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
                (int*)&p->rec_len)))
+               p->diagnostic = 238;
+           else
            {
-               p->diagnostic = 2;
-               break;
+               char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
+               memcpy (new_buf, p->rec_buf, p->rec_len);
+               p->rec_buf = new_buf;
            }
            break;
        case VAL_SOIF:
            if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
                                                (int*)&p->rec_len)))
+               p->diagnostic = 238;
+           else
            {
-               p->diagnostic = 2;
-               break;
+               char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
+               memcpy (new_buf, p->rec_buf, p->rec_len);
+               p->rec_buf = new_buf;
            }
            break;
        default:
+            if (!node->u.root.absyn)
+            {
+               p->diagnostic = 238;
+               break;
+           }
            for (marctab = node->u.root.absyn->marc; marctab;
                marctab = marctab->next)
                if (marctab->reference == p->input_format)
                    break;
            if (!marctab)
            {
-               p->diagnostic = 227;
+               p->diagnostic = 238;
                break;
            }
            if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
                                                selected,
                                                (int*)&p->rec_len)))
+               p->diagnostic = 238;
+           else
            {
-               p->diagnostic = 2;
-               break;
+               char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
+               memcpy (new_buf, p->rec_buf, p->rec_len);
+               p->rec_buf = new_buf;
            }
     }
     if (node)
@@ -679,6 +747,7 @@ static struct recType grs_type =
 {
     "grs",
     grs_init,
+    grs_destroy,
     grs_extract,
     grs_retrieve
 };