Implemented Sort.
[idzebra-moved-to-github.git] / recctrl / recgrs.c
index d8f3453..76e0918 100644 (file)
@@ -1,10 +1,53 @@
 /*
- * Copyright (C) 1994-1996, Index Data I/S 
+ * Copyright (C) 1994-1998, Index Data I/S 
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: recgrs.c,v $
- * Revision 1.4  1997-04-30 08:56:08  quinn
+ * Revision 1.17  1998-02-10 12:03:06  adam
+ * Implemented Sort.
+ *
+ * Revision 1.16  1998/01/29 13:38:17  adam
+ * Fixed problem with mapping to record with unknown schema.
+ *
+ * Revision 1.15  1998/01/26 10:37:57  adam
+ * Better diagnostics.
+ *
+ * Revision 1.14  1997/11/06 11:41:01  adam
+ * Implemented "begin variant" for the sgml.regx filter.
+ *
+ * Revision 1.13  1997/10/31 12:35:44  adam
+ * Added a few log statements.
+ *
+ * Revision 1.12  1997/10/29 12:02:22  adam
+ * Using oid_ent_to_oid used instead of the non thread-safe oid_getoidbyent.
+ *
+ * Revision 1.11  1997/10/27 14:34:00  adam
+ * Work on generic character mapping depending on "structure" field
+ * in abstract syntax file.
+ *
+ * Revision 1.10  1997/09/18 08:59:21  adam
+ * Extra generic handle for the character mapping routines.
+ *
+ * Revision 1.9  1997/09/17 12:19:21  adam
+ * Zebra version corresponds to YAZ version 1.4.
+ * Changed Zebra server so that it doesn't depend on global common_resource.
+ *
+ * Revision 1.8  1997/09/09 13:38:14  adam
+ * Partial port to WIN95/NT.
+ *
+ * Revision 1.7  1997/09/05 15:30:10  adam
+ * Changed prototype for chr_map_input - added const.
+ * Added support for C++, headers uses extern "C" for public definitions.
+ *
+ * Revision 1.6  1997/09/04 13:54:40  adam
+ * Added MARC filter - type grs.marc.<syntax> where syntax refers
+ * to abstract syntax. New method tellf in retrieve/extract method.
+ *
+ * Revision 1.5  1997/07/15 16:29:03  adam
+ * Initialized dummy variable to keep checker gcc happy.
+ *
+ * Revision 1.4  1997/04/30 08:56:08  quinn
  * null
  *
  * Revision 1.2  1996/10/11  16:06:43  quinn
 #include <stdio.h>
 #include <assert.h>
 #include <sys/types.h>
+#ifndef WINDOWS
 #include <unistd.h>
+#endif
 
 #include <log.h>
 #include <oid.h>
 
 #include <recctrl.h>
-#include <charmap.h>
 #include "grsread.h"
 
 #define GRS_MAX_WORD 512
@@ -125,6 +169,7 @@ static data1_node *read_grs_type (struct grs_read_info *p, const char *type)
     } tab[] = {
         { "sgml",  grs_read_sgml },
         { "regx",  grs_read_regx },
+        { "marc",  grs_read_marc },
         { NULL, NULL }
     };
     const char *cp = strchr (type, '.');
@@ -149,116 +194,10 @@ static void grs_init(void)
 {
 }
 
-static void dumpkeys_word(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
-{
-    char *b = n->u.data.data;
-    int remain;
-    char **map = 0;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(&b, remain);
-
-    while (map)
-    {
-       RecWord wrd;
-       char buf[GRS_MAX_WORD+1];
-       int i, remain;
-
-       /* Skip spaces */
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-       i = 0;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           char *cp = *map;
-
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!i)
-           return;
-       buf[i] = '\0';
-       (*p->init)(&wrd);      /* set defaults */
-       wrd.which = Word_String;
-       wrd.seqno = seqno++;
-       wrd.u.string = buf;
-       wrd.attrSet = att->parent->ordinal;
-       wrd.attrUse = att->locals->local;
-       (*p->add)(&wrd);
-    }
-}
-
-static void dumpkeys_phrase(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
-{
-    char *b = n->u.data.data;
-    char buf[GRS_MAX_WORD+1], **map = 0;
-    RecWord wrd;
-    int i = 0, remain;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(&b, remain);
-
-    while (remain > 0 && i < GRS_MAX_WORD)
-    {
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-
-       if (i && i < GRS_MAX_WORD)
-           buf[i++] = *CHR_SPACE;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           char *cp = *map;
-
-           if (i >= GRS_MAX_WORD)
-               break;
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-    }
-    if (!i)
-       return;
-    buf[i] = '\0';
-    (*p->init)(&wrd);
-    wrd.which = Word_Phrase;
-    wrd.seqno = seqno++;
-    wrd.u.string = buf;
-    wrd.attrSet = att->parent->ordinal;
-    wrd.attrUse = att->locals->local;
-    (*p->add)(&wrd);
-}
-
 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 {
+    RecWord wrd;
+    (*p->init)(p, &wrd);      /* set defaults */
     for (; n; n = n->next)
     {
        if (p->flagShowRecords) /* display element description to user */
@@ -304,8 +243,9 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 
        if (n->which == DATA1N_data)
        {
-           data1_node *par = get_parent_tag(n);
+           data1_node *par = get_parent_tag(p->dh, n);
            data1_termlist *tlist = 0;
+           data1_datatype dtype = DATA1K_string;
 
            if (p->flagShowRecords)
            {
@@ -328,39 +268,32 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
             */
 
            while (!par->u.tag.element)
-               if (!par->parent || !(par = get_parent_tag(par->parent)))
+               if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
                    break;
-           if (!par)
-               tlist = 0;
-           else if (par->u.tag.element->termlists)
-               tlist = par->u.tag.element->termlists;
-           else
+           if (!par || !(tlist = par->u.tag.element->termlists))
                continue;
-
+           if (par->u.tag.element->tag)
+               dtype = par->u.tag.element->tag->kind;
            for (; tlist; tlist = tlist->next)
            {
                if (p->flagShowRecords)
                {
-                   printf("%*sIdx: [", (level + 1) * 4, "");
-                   switch (tlist->structure)
-                   {
-                       case DATA1S_word: printf("w"); break;
-                       case DATA1S_phrase: printf("p"); break;
-                       default: printf("?"); break;
-                   }
-                   printf("] ");
-                   printf("%s:%s [%d]\n", tlist->att->parent->name,
-                       tlist->att->name, tlist->att->value);
+                   printf("%*sIdx: [%s]", (level + 1) * 4, "",
+                          tlist->structure);
+                   printf("%s:%s [%d]\n",
+                          tlist->att->parent->name,
+                          tlist->att->name, tlist->att->value);
                }
-               else switch (tlist->structure)
+               else
                {
-                   case DATA1S_word:
-                       dumpkeys_word(n, p, tlist->att); break;
-                   case DATA1S_phrase:
-                       dumpkeys_phrase(n, p, tlist->att); break;
-                   default:
-                       logf(LOG_FATAL, "Bad structure type in dumpkeys");
-                       abort();
+                   wrd.reg_type = *tlist->structure;
+                   wrd.seqno = seqno;
+                   wrd.string = n->u.data.data;
+                   wrd.length = n->u.data.len;
+                   wrd.attrSet = tlist->att->parent->ordinal;
+                   wrd.attrUse = tlist->att->locals->local;
+                   (*p->add)(&wrd);
+                   seqno = wrd.seqno;
                }
            }
        }
@@ -375,26 +308,29 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 static int grs_extract(struct recExtractCtrl *p)
 {
     data1_node *n;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
     seqno = 0;
 
+    mem = nmem_create (); 
     gri.readf = p->readf;
     gri.seekf = p->seekf;
+    gri.tellf = p->tellf;
     gri.endf = p->endf;
     gri.fh = p->fh;
     gri.offset = p->offset;
     gri.mem = mem;
+    gri.dh = p->dh;
 
     n = read_grs_type (&gri, p->subType);
     if (!n)
         return -1;
     if (dumpkeys(n, p, 0) < 0)
     {
-       data1_free_tree(n);
+       data1_free_tree(p->dh, n);
        return -2;
     }
-    data1_free_tree(n);
+    data1_free_tree(p->dh, n);
     nmem_destroy(mem);
     return 0;
 }
@@ -402,7 +338,7 @@ static int grs_extract(struct recExtractCtrl *p)
 /*
  * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
  */
-static int process_comp(data1_node *n, Z_RecordComposition *c)
+static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
 {
     data1_esetname *eset;
     Z_Espec1 *espec = 0;
@@ -413,7 +349,7 @@ static int process_comp(data1_node *n, Z_RecordComposition *c)
        case Z_RecordComp_simple:
            if (c->u.simple->which != Z_ElementSetNames_generic)
                return 26; /* only generic form supported. Fix this later */
-           if (!(eset = data1_getesetbyname(n->u.root.absyn,
+           if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
                c->u.simple->u.generic)))
            {
                logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
@@ -431,8 +367,10 @@ static int process_comp(data1_node *n, Z_RecordComposition *c)
                    switch (p->which)
                    {
                        case Z_ElementSpec_elementSetName:
-                           if (!(eset = data1_getesetbyname(n->u.root.absyn,
-                               p->u.elementSetName)))
+                           if (!(eset =
+                                 data1_getesetbyname(dh,
+                                                     n->u.root.absyn,
+                                                     p->u.elementSetName)))
                            {
                                logf(LOG_LOG, "Unknown esetname '%s'",
                                    p->u.elementSetName);
@@ -460,9 +398,15 @@ static int process_comp(data1_node *n, Z_RecordComposition *c)
                return 26; /* fix */
     }
     if (espec)
-       return data1_doespec1(n, espec);
+    {
+        logf (LOG_LOG, "Element: Espec-1 match");
+       return data1_doespec1(dh, n, espec);
+    }
     else
+    {
+       logf (LOG_DEBUG, "Element: all match");
        return -1;
+    }
 }
 
 static int grs_retrieve(struct recRetrieveCtrl *p)
@@ -471,51 +415,63 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     data1_node *new;
     data1_maptab *map;
     int res, selected = 0;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
     
+    mem = nmem_create();
     gri.readf = p->readf;
     gri.seekf = p->seekf;
+    gri.tellf = p->tellf;
     gri.endf = NULL;
     gri.fh = p->fh;
     gri.offset = 0;
     gri.mem = mem;
+    gri.dh = p->dh;
 
+    logf (LOG_DEBUG, "grs_retrieve");
     node = read_grs_type (&gri, p->subType);
-/* node = data1_read_record(p->readf, p->fh, mem); */
     if (!node)
     {
-       p->diagnostic = 2;
+       p->diagnostic = 14;
+        nmem_destroy (mem);
        return 0;
     }
-    if (p->score >= 0 && (new = data1_insert_taggeddata(node, node, "rank",
-       mem)))
+    logf (LOG_DEBUG, "grs_retrieve: score");
+    if (p->score >= 0 && (new =
+                         data1_insert_taggeddata(p->dh, node,
+                                                 node, "rank",
+                                                 mem)))
     {
        new->u.data.what = DATA1I_num;
-       new->u.data.data = new->u.data.lbuf;
+       new->u.data.data = new->lbuf;
        sprintf(new->u.data.data, "%d", p->score);
        new->u.data.len = strlen(new->u.data.data);
     }
-    if ((new = data1_insert_taggeddata(node, node, "localControlNumber", mem)))
+    logf (LOG_DEBUG, "grs_retrieve: localControlNumber");
+    if ((new = data1_insert_taggeddata(p->dh, node, node,
+                                      "localControlNumber", mem)))
     {
        new->u.data.what = DATA1I_text;
-       new->u.data.data = new->u.data.lbuf;
+       new->u.data.data = new->lbuf;
        sprintf(new->u.data.data, "%d", p->localno);
        new->u.data.len = strlen(new->u.data.data);
     }
+    logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
     if (p->input_format == VAL_GRS1 && node->u.root.absyn &&
        node->u.root.absyn->reference != VAL_NONE)
     {
        oident oe;
        Odr_oid *oid;
+       int oidtmp[OID_SIZE];
 
        oe.proto = PROTO_Z3950;
        oe.oclass = CLASS_SCHEMA;
        oe.value = node->u.root.absyn->reference;
 
-       if ((oid = oid_getoidbyent(&oe)))
+       if ((oid = oid_ent_to_oid (&oe, oidtmp)))
        {
            char tmp[128];
+           data1_handle dh = p->dh;
            char *p = tmp;
            int *ii;
 
@@ -528,8 +484,8 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            }
            *(p++) = '\0';
 
-           if ((new = data1_insert_taggeddata(node, node, "schemaIdentifier",
-               mem)))
+           if ((new = data1_insert_taggeddata(dh, node, node,
+                                              "schemaIdentifier", mem)))
            {
                new->u.data.what = DATA1I_oid;
                new->u.data.data = nmem_malloc(mem, p - tmp);
@@ -539,6 +495,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
        }
     }
 
+    logf (LOG_DEBUG, "grs_retrieve: schema mapping");
     /*
      * Does the requested format match a known schema-mapping? (this reflects
      * the overlap of schema and formatting which is inherent in the MARC
@@ -550,88 +507,94 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
        if (map->target_absyn_ref == p->input_format)
        {
            onode = node;
-           if (!(node = data1_map_record(onode, map, mem)))
+           if (!(node = data1_map_record(p->dh, onode, map, mem)))
            {
                p->diagnostic = 14;
+                nmem_destroy (mem);
                return 0;
            }
-
            break;
        }
 
-    if (p->comp && (res = process_comp(node, p->comp)) > 0)
+    logf (LOG_DEBUG, "grs_retrieve: element spec");
+    if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
     {
        p->diagnostic = res;
        if (onode)
-           data1_free_tree(onode);
-       data1_free_tree(node);
+           data1_free_tree(p->dh, onode);
+       data1_free_tree(p->dh, node);
        nmem_destroy(mem);
        return 0;
     }
     else if (p->comp && !res)
        selected = 1;
 
+    logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
     switch (p->output_format = (p->input_format != VAL_NONE ?
        p->input_format : VAL_SUTRS))
     {
        data1_marctab *marctab;
-       int dummy;
+        int dummy;
 
        case VAL_GRS1:
-           if (!(p->rec_buf = data1_nodetogr(node, selected, p->odr, &dummy)))
-               p->diagnostic = 2; /* this should be better specified */
+           dummy = 0;
+           if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
+                                             p->odr, &dummy)))
+               p->diagnostic = 238; /* not available in requested syntax */
            else
                p->rec_len = -1;
            break;
        case VAL_EXPLAIN:
-           if (!(p->rec_buf = data1_nodetoexplain(node, selected, p->odr)))
-               p->diagnostic = 2; /* this should be better specified */
+           if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
+                                                  p->odr)))
+               p->diagnostic = 238;
            else
                p->rec_len = -1;
            break;
        case VAL_SUMMARY:
-           if (!(p->rec_buf = data1_nodetosummary(node, selected, p->odr)))
-               p->diagnostic = 2;
+           if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
+                                                  p->odr)))
+               p->diagnostic = 238;
            else
                p->rec_len = -1;
            break;
        case VAL_SUTRS:
-           if (!(p->rec_buf = data1_nodetobuf(node, selected,
+           if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
                (int*)&p->rec_len)))
-           {
-               p->diagnostic = 2;
-               break;
-           }
+               p->diagnostic = 238;
            break;
        case VAL_SOIF:
-           if (!(p->rec_buf = data1_nodetosoif(node, selected,
-               (int*)&p->rec_len)))
-           {
-               p->diagnostic = 2;
-               break;
-           }
+           if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
+                                               (int*)&p->rec_len)))
+               p->diagnostic = 238;
            break;
        default:
+            if (!node->u.root.absyn)
+            {
+               p->diagnostic = 238;
+               break;
+           }
            for (marctab = node->u.root.absyn->marc; marctab;
                marctab = marctab->next)
                if (marctab->reference == p->input_format)
                    break;
            if (!marctab)
            {
-               p->diagnostic = 227;
+               p->diagnostic = 238;
                break;
            }
-           if (!(p->rec_buf = data1_nodetomarc(marctab, node, selected,
-               (int*)&p->rec_len)))
+           if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
+                                               selected,
+                                               (int*)&p->rec_len)))
            {
-               p->diagnostic = 2;
+               p->diagnostic = 238;
                break;
            }
     }
     if (node)
-       data1_free_tree(node);
+       data1_free_tree(p->dh, node);
     if (onode)
-       data1_free_tree(onode);
+       data1_free_tree(p->dh, onode);
     nmem_destroy(mem);
     return 0;
 }