Implemented Sort.
[idzebra-moved-to-github.git] / recctrl / recgrs.c
index d4cf58c..76e0918 100644 (file)
@@ -1,10 +1,35 @@
 /*
- * Copyright (C) 1994-1997, Index Data I/S 
+ * Copyright (C) 1994-1998, Index Data I/S 
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: recgrs.c,v $
- * Revision 1.9  1997-09-17 12:19:21  adam
+ * Revision 1.17  1998-02-10 12:03:06  adam
+ * Implemented Sort.
+ *
+ * Revision 1.16  1998/01/29 13:38:17  adam
+ * Fixed problem with mapping to record with unknown schema.
+ *
+ * Revision 1.15  1998/01/26 10:37:57  adam
+ * Better diagnostics.
+ *
+ * Revision 1.14  1997/11/06 11:41:01  adam
+ * Implemented "begin variant" for the sgml.regx filter.
+ *
+ * Revision 1.13  1997/10/31 12:35:44  adam
+ * Added a few log statements.
+ *
+ * Revision 1.12  1997/10/29 12:02:22  adam
+ * Using oid_ent_to_oid used instead of the non thread-safe oid_getoidbyent.
+ *
+ * Revision 1.11  1997/10/27 14:34:00  adam
+ * Work on generic character mapping depending on "structure" field
+ * in abstract syntax file.
+ *
+ * Revision 1.10  1997/09/18 08:59:21  adam
+ * Extra generic handle for the character mapping routines.
+ *
+ * Revision 1.9  1997/09/17 12:19:21  adam
  * Zebra version corresponds to YAZ version 1.4.
  * Changed Zebra server so that it doesn't depend on global common_resource.
  *
 #include <oid.h>
 
 #include <recctrl.h>
-#include <charmap.h>
 #include "grsread.h"
 
 #define GRS_MAX_WORD 512
@@ -170,117 +194,10 @@ static void grs_init(void)
 {
 }
 
-static void dumpkeys_word(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
-{
-    const char *b = n->u.data.data;
-    int remain;
-    const char **map = 0;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(&b, remain);
-
-    while (map)
-    {
-       RecWord wrd;
-       char buf[GRS_MAX_WORD+1];
-       int i, remain;
-
-       /* Skip spaces */
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-       i = 0;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           const char *cp = *map;
-
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!i)
-           return;
-       buf[i] = '\0';
-       (*p->init)(&wrd);      /* set defaults */
-       wrd.which = Word_String;
-       wrd.seqno = seqno++;
-       wrd.u.string = buf;
-       wrd.attrSet = att->parent->ordinal;
-       wrd.attrUse = att->locals->local;
-       (*p->add)(&wrd);
-    }
-}
-
-static void dumpkeys_phrase(data1_node *n, struct recExtractCtrl *p,
-    data1_att *att)
-{
-    const char *b = n->u.data.data;
-    char buf[GRS_MAX_WORD+1];
-    const char **map = 0;
-    RecWord wrd;
-    int i = 0, remain;
-
-    remain = n->u.data.len - (b - n->u.data.data);
-    if (remain > 0)
-       map = (*p->map_chrs_input)(&b, remain);
-
-    while (remain > 0 && i < GRS_MAX_WORD)
-    {
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-
-       if (i && i < GRS_MAX_WORD)
-           buf[i++] = *CHR_SPACE;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           const char *cp = *map;
-
-           if (i >= GRS_MAX_WORD)
-               break;
-           while (i < GRS_MAX_WORD && *cp)
-               buf[i++] = *(cp++);
-           remain = n->u.data.len - (b - n->u.data.data);
-           if (remain > 0)
-               map = (*p->map_chrs_input)(&b, remain);
-           else
-               map = 0;
-       }
-    }
-    if (!i)
-       return;
-    buf[i] = '\0';
-    (*p->init)(&wrd);
-    wrd.which = Word_Phrase;
-    wrd.seqno = seqno++;
-    wrd.u.string = buf;
-    wrd.attrSet = att->parent->ordinal;
-    wrd.attrUse = att->locals->local;
-    (*p->add)(&wrd);
-}
-
 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 {
+    RecWord wrd;
+    (*p->init)(p, &wrd);      /* set defaults */
     for (; n; n = n->next)
     {
        if (p->flagShowRecords) /* display element description to user */
@@ -328,6 +245,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
        {
            data1_node *par = get_parent_tag(p->dh, n);
            data1_termlist *tlist = 0;
+           data1_datatype dtype = DATA1K_string;
 
            if (p->flagShowRecords)
            {
@@ -350,39 +268,32 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
             */
 
            while (!par->u.tag.element)
-               if (!par->parent || !(par = get_parent_tag(p->dh, par->parent)))
+               if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
                    break;
-           if (!par)
-               tlist = 0;
-           else if (par->u.tag.element->termlists)
-               tlist = par->u.tag.element->termlists;
-           else
+           if (!par || !(tlist = par->u.tag.element->termlists))
                continue;
-
+           if (par->u.tag.element->tag)
+               dtype = par->u.tag.element->tag->kind;
            for (; tlist; tlist = tlist->next)
            {
                if (p->flagShowRecords)
                {
-                   printf("%*sIdx: [", (level + 1) * 4, "");
-                   switch (tlist->structure)
-                   {
-                       case DATA1S_word: printf("w"); break;
-                       case DATA1S_phrase: printf("p"); break;
-                       default: printf("?"); break;
-                   }
-                   printf("] ");
-                   printf("%s:%s [%d]\n", tlist->att->parent->name,
-                       tlist->att->name, tlist->att->value);
+                   printf("%*sIdx: [%s]", (level + 1) * 4, "",
+                          tlist->structure);
+                   printf("%s:%s [%d]\n",
+                          tlist->att->parent->name,
+                          tlist->att->name, tlist->att->value);
                }
-               else switch (tlist->structure)
+               else
                {
-                   case DATA1S_word:
-                       dumpkeys_word(n, p, tlist->att); break;
-                   case DATA1S_phrase:
-                       dumpkeys_phrase(n, p, tlist->att); break;
-                   default:
-                       logf(LOG_FATAL, "Bad structure type in dumpkeys");
-                       abort();
+                   wrd.reg_type = *tlist->structure;
+                   wrd.seqno = seqno;
+                   wrd.string = n->u.data.data;
+                   wrd.length = n->u.data.len;
+                   wrd.attrSet = tlist->att->parent->ordinal;
+                   wrd.attrUse = tlist->att->locals->local;
+                   (*p->add)(&wrd);
+                   seqno = wrd.seqno;
                }
            }
        }
@@ -397,10 +308,11 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
 static int grs_extract(struct recExtractCtrl *p)
 {
     data1_node *n;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
     seqno = 0;
 
+    mem = nmem_create (); 
     gri.readf = p->readf;
     gri.seekf = p->seekf;
     gri.tellf = p->tellf;
@@ -486,9 +398,15 @@ static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
                return 26; /* fix */
     }
     if (espec)
+    {
+        logf (LOG_LOG, "Element: Espec-1 match");
        return data1_doespec1(dh, n, espec);
+    }
     else
+    {
+       logf (LOG_DEBUG, "Element: all match");
        return -1;
+    }
 }
 
 static int grs_retrieve(struct recRetrieveCtrl *p)
@@ -497,9 +415,10 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     data1_node *new;
     data1_maptab *map;
     int res, selected = 0;
-    NMEM mem = nmem_create();
+    NMEM mem;
     struct grs_read_info gri;
     
+    mem = nmem_create();
     gri.readf = p->readf;
     gri.seekf = p->seekf;
     gri.tellf = p->tellf;
@@ -509,41 +428,47 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     gri.mem = mem;
     gri.dh = p->dh;
 
+    logf (LOG_DEBUG, "grs_retrieve");
     node = read_grs_type (&gri, p->subType);
     if (!node)
     {
-       p->diagnostic = 2;
+       p->diagnostic = 14;
+        nmem_destroy (mem);
        return 0;
     }
+    logf (LOG_DEBUG, "grs_retrieve: score");
     if (p->score >= 0 && (new =
                          data1_insert_taggeddata(p->dh, node,
                                                  node, "rank",
                                                  mem)))
     {
        new->u.data.what = DATA1I_num;
-       new->u.data.data = new->u.data.lbuf;
+       new->u.data.data = new->lbuf;
        sprintf(new->u.data.data, "%d", p->score);
        new->u.data.len = strlen(new->u.data.data);
     }
+    logf (LOG_DEBUG, "grs_retrieve: localControlNumber");
     if ((new = data1_insert_taggeddata(p->dh, node, node,
                                       "localControlNumber", mem)))
     {
        new->u.data.what = DATA1I_text;
-       new->u.data.data = new->u.data.lbuf;
+       new->u.data.data = new->lbuf;
        sprintf(new->u.data.data, "%d", p->localno);
        new->u.data.len = strlen(new->u.data.data);
     }
+    logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
     if (p->input_format == VAL_GRS1 && node->u.root.absyn &&
        node->u.root.absyn->reference != VAL_NONE)
     {
        oident oe;
        Odr_oid *oid;
+       int oidtmp[OID_SIZE];
 
        oe.proto = PROTO_Z3950;
        oe.oclass = CLASS_SCHEMA;
        oe.value = node->u.root.absyn->reference;
 
-       if ((oid = oid_getoidbyent(&oe)))
+       if ((oid = oid_ent_to_oid (&oe, oidtmp)))
        {
            char tmp[128];
            data1_handle dh = p->dh;
@@ -570,6 +495,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
        }
     }
 
+    logf (LOG_DEBUG, "grs_retrieve: schema mapping");
     /*
      * Does the requested format match a known schema-mapping? (this reflects
      * the overlap of schema and formatting which is inherent in the MARC
@@ -584,12 +510,13 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            if (!(node = data1_map_record(p->dh, onode, map, mem)))
            {
                p->diagnostic = 14;
+                nmem_destroy (mem);
                return 0;
            }
-
            break;
        }
 
+    logf (LOG_DEBUG, "grs_retrieve: element spec");
     if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
     {
        p->diagnostic = res;
@@ -602,6 +529,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
     else if (p->comp && !res)
        selected = 1;
 
+    logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
     switch (p->output_format = (p->input_format != VAL_NONE ?
        p->input_format : VAL_SUTRS))
     {
@@ -612,55 +540,54 @@ static int grs_retrieve(struct recRetrieveCtrl *p)
            dummy = 0;
            if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
                                              p->odr, &dummy)))
-               p->diagnostic = 2; /* this should be better specified */
+               p->diagnostic = 238; /* not available in requested syntax */
            else
                p->rec_len = -1;
            break;
        case VAL_EXPLAIN:
            if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
                                                   p->odr)))
-               p->diagnostic = 2; /* this should be better specified */
+               p->diagnostic = 238;
            else
                p->rec_len = -1;
            break;
        case VAL_SUMMARY:
            if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
                                                   p->odr)))
-               p->diagnostic = 2;
+               p->diagnostic = 238;
            else
                p->rec_len = -1;
            break;
        case VAL_SUTRS:
            if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
                (int*)&p->rec_len)))
-           {
-               p->diagnostic = 2;
-               break;
-           }
+               p->diagnostic = 238;
            break;
        case VAL_SOIF:
            if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
                                                (int*)&p->rec_len)))
-           {
-               p->diagnostic = 2;
-               break;
-           }
+               p->diagnostic = 238;
            break;
        default:
+            if (!node->u.root.absyn)
+            {
+               p->diagnostic = 238;
+               break;
+           }
            for (marctab = node->u.root.absyn->marc; marctab;
                marctab = marctab->next)
                if (marctab->reference == p->input_format)
                    break;
            if (!marctab)
            {
-               p->diagnostic = 227;
+               p->diagnostic = 238;
                break;
            }
            if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
                                                selected,
                                                (int*)&p->rec_len)))
            {
-               p->diagnostic = 2;
+               p->diagnostic = 238;
                break;
            }
     }