Minor changes.
[yaz-moved-to-github.git] / retrieval / d1_absyn.c
index ab1b3ac..7813015 100644 (file)
@@ -1,10 +1,47 @@
 /*
- * Copyright (c) 1995-1997, Index Data.
+ * Copyright (c) 1995-1998, Index Data.
  * See the file LICENSE for details.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: d1_absyn.c,v $
- * Revision 1.12  1997-09-17 12:10:34  adam
+ * Revision 1.21  1998-06-09 13:55:07  adam
+ * Minor changes.
+ *
+ * Revision 1.20  1998/05/18 13:07:02  adam
+ * Changed the way attribute sets are handled by the retriaval module.
+ * Extended Explain conversion / schema.
+ * Modified server and client to work with ASN.1 compiled protocol handlers.
+ *
+ * Revision 1.19  1998/03/05 08:15:32  adam
+ * Implemented data1_add_insert_taggeddata utility which is more flexible
+ * than data1_insert_taggeddata.
+ *
+ * Revision 1.18  1998/02/27 14:08:04  adam
+ * Added const to some char pointer arguments.
+ * Reworked data1_read_node so that it doesn't create a tree with
+ * pointers to original "SGML"-buffer.
+ *
+ * Revision 1.17  1998/02/11 11:53:34  adam
+ * Changed code so that it compiles as C++.
+ *
+ * Revision 1.16  1997/12/18 10:51:30  adam
+ * Implemented sub-trees feature for schemas - including forward
+ * references.
+ *
+ * Revision 1.15  1997/12/09 16:18:16  adam
+ * Work on EXPLAIN schema. First implementation of sub-schema facility
+ * in the *.abs files.
+ *
+ * Revision 1.14  1997/10/31 12:20:09  adam
+ * Improved memory debugging for xmalloc/nmem.c. References to NMEM
+ * instead of ODR in n ESPEC-1 handling in source d1_espec.c.
+ * Bug fix: missing fclose in data1_read_espec1.
+ *
+ * Revision 1.13  1997/10/27 13:54:18  adam
+ * Changed structure field in data1 node to be simple string which
+ * is "unknown" to the retrieval system itself.
+ *
+ * Revision 1.12  1997/09/17 12:10:34  adam
  * YAZ version 1.4.
  *
  * Revision 1.11  1997/09/05 09:50:55  adam
@@ -65,6 +102,13 @@ struct data1_absyn_cache_info
     data1_absyn_cache next;
 };
 
+struct data1_attset_cache_info 
+{
+    char *name;
+    data1_attset *attset;
+    data1_attset_cache next;
+};
+
 data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
 {
     data1_absyn_cache p = *data1_absyn_cache_get (dh);
@@ -78,12 +122,24 @@ data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
     return NULL;
 }
 
+void data1_absyn_trav (data1_handle dh, void *handle,
+                      void (*fh)(data1_handle dh, void *h, data1_absyn *a))
+{
+    data1_absyn_cache p = *data1_absyn_cache_get (dh);
+
+    while (p)
+    {
+       (*fh)(dh, handle, p->absyn);
+       p = p->next;
+    }
+}
+
 data1_absyn *data1_absyn_add (data1_handle dh, const char *name)
 {
     char fname[512];
     NMEM mem = data1_nmem_get (dh);
 
-    data1_absyn_cache p = nmem_malloc (mem, sizeof(*p));
+    data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p));
     data1_absyn_cache *pp = data1_absyn_cache_get (dh);
 
     sprintf(fname, "%s.abs", name);
@@ -94,7 +150,7 @@ data1_absyn *data1_absyn_add (data1_handle dh, const char *name)
     return p->absyn;
 }
 
-data1_absyn *data1_get_absyn (data1_handle dh, char *name)
+data1_absyn *data1_get_absyn (data1_handle dh, const char *name)
 {
     data1_absyn *absyn;
 
@@ -103,8 +159,75 @@ data1_absyn *data1_get_absyn (data1_handle dh, char *name)
     return absyn;
 }
 
+data1_attset *data1_attset_search_name (data1_handle dh, const char *name)
+{
+    data1_attset_cache p = *data1_attset_cache_get (dh);
+
+    while (p)
+    {
+       if (!strcmp (name, p->name))
+           return p->attset;
+       p = p->next;
+    }
+    return NULL;
+}
+
+data1_attset *data1_attset_search_id (data1_handle dh, int id)
+{
+    data1_attset_cache p = *data1_attset_cache_get (dh);
+
+    while (p)
+    {
+       if (id == p->attset->reference)
+           return p->attset;
+       p = p->next;
+    }
+    return NULL;
+}
+
+data1_attset *data1_attset_add (data1_handle dh, const char *name)
+{
+    char fname[512], aname[512];
+    NMEM mem = data1_nmem_get (dh);
+    data1_attset *attset;
+
+    strcpy (aname, name);
+    sprintf(fname, "%s.att", name);
+    attset = data1_read_attset (dh, fname);
+    if (!attset)
+    {
+       char *cp;
+       attset = data1_read_attset (dh, name);
+       if (attset && (cp = strrchr (aname, '.')))
+           *cp = '\0';
+    }
+    if (!attset)
+       logf (LOG_WARN|LOG_ERRNO, "couldn't load attribute set %s", name);
+    else
+    {
+       data1_attset_cache p = (data1_attset_cache)
+           nmem_malloc (mem, sizeof(*p));
+       data1_attset_cache *pp = data1_attset_cache_get (dh);
+       
+       attset->name = p->name = nmem_strdup (mem, aname);
+       p->attset = attset;
+       p->next = *pp;
+       *pp = p;
+    }
+    return attset;
+}
+
+data1_attset *data1_get_attset (data1_handle dh, const char *name)
+{
+    data1_attset *attset;
+
+    if (!(attset = data1_attset_search_name (dh, name)))
+       attset = data1_attset_add (dh, name);
+    return attset;
+}
+
 data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,
-                                   char *name)
+                                   const char *name)
 {
     data1_esetname *r;
 
@@ -116,14 +239,15 @@ data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,
 
 data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
                                          data1_element *parent,
-                                         char *tagname)
+                                         const char *tagname)
 {
     data1_element *r;
 
     if (!parent)
-       r = abs->elements;
+        r = abs->main_elements;
     else
        r = parent->children;
+    assert (abs->main_elements);
     for (; r; r = r->next)
     {
        data1_name *n;
@@ -136,22 +260,47 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
 }
 
 data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
-                                      char *name)
+                                      const char *name)
 {
     data1_element *r;
-
-    for (r = absyn->elements; r; r = r->next)
+    assert (absyn->main_elements);
+    for (r = absyn->main_elements; r; r = r->next)
        if (!data1_matchstr(r->name, name))
            return r;
     return 0;
 }
 
+
+void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e)
+{
+    for (; e; e = e->next)
+    {
+       if (!e->sub_name)
+       {
+           if (e->children)
+               fix_element_ref (dh, absyn, e->children);
+       }
+       else
+       {
+           data1_sub_elements *sub_e = absyn->sub_elements;
+           while (sub_e && strcmp (e->sub_name, sub_e->name))
+               sub_e = sub_e->next;
+           if (sub_e)
+               e->children = sub_e->elements;
+           else
+               logf (LOG_WARN, "Unresolved reference to sub-elements %s",
+                     e->sub_name);
+       }
+    }
+}
+
 data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 {
     char line[512], *r, cmd[512], args[512];
+    data1_sub_elements *cur_elements = NULL;
     data1_absyn *res = 0;
     FILE *f;
-    data1_element **ppl[D1_MAX_NESTING], *cur[D1_MAX_NESTING];
+    data1_element **ppl[D1_MAX_NESTING];
     data1_esetname **esetpp;
     data1_maptab **maptabp;
     data1_marctab **marcp;
@@ -160,25 +309,25 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
     if (!(f = yaz_path_fopen(data1_get_tabpath (dh), file, "r")))
     {
-       logf(LOG_WARN|LOG_ERRNO, "%s", file);
+       logf(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file);
        return 0;
     }
 
-    res = nmem_malloc(data1_nmem_get(dh), sizeof(*res));
+    res = (data1_absyn *)nmem_malloc(data1_nmem_get(dh), sizeof(*res));
     res->name = 0;
     res->reference = VAL_NONE;
     res->tagset = 0;
     res->attset = 0;
     res->varset = 0;
     res->esetnames = 0;
+    esetpp = &res->esetnames;
     res->maptabs = 0;
     maptabp = &res->maptabs;
     res->marc = 0;
     marcp = &res->marc;
-    res->elements = 0;
-    ppl[0] = &res->elements;
-    cur[0] = 0;
-    esetpp = &res->esetnames;
+
+    res->sub_elements = NULL;
+    res->main_elements = NULL;
 
     for (;;)
     {
@@ -190,20 +339,29 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                break;
        }
        if (!r)
-       {
-           fclose(f);
-           return res;
-       }
+            break;
        if (sscanf(r, "%s %[^\n]", cmd, args) < 2)
            *args = '\0';
        if (!strcmp(cmd, "elm"))
        {
            data1_element *new_element;
            int i;
-           char path[512], name[512], termlists[512], *p;
+           char path[512], name[512], termlists[512], *p, *sub_p;
            int type, value;
            data1_termlist **tp;
 
+           if (!cur_elements)
+           {
+                cur_elements = (data1_sub_elements *)nmem_malloc(data1_nmem_get(dh),
+                                          sizeof(*cur_elements));
+               cur_elements->next = res->sub_elements;
+               cur_elements->elements = NULL;
+               cur_elements->name = "main";
+               res->sub_elements = cur_elements;
+
+               level = 0;
+               ppl[level] = &cur_elements->elements;
+            }
            if (sscanf(args, "%511s %511s %511s", path, name, termlists) < 3)
            {
                logf(LOG_WARN, "Bad # of args to elm in %s: '%s'", 
@@ -228,16 +386,24 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                return 0;
            }
            level = i;
-           new_element = cur[level] = *ppl[level] =
-               nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
+           new_element = *ppl[level] =
+               (data1_element *)nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
            new_element->next = new_element->children = 0;
            new_element->tag = 0;
            new_element->termlists = 0;
-           new_element->parent = level ? cur[level - 1] : 0;
+           new_element->sub_name = 0;
+
            tp = &new_element->termlists;
            ppl[level] = &new_element->next;
            ppl[level+1] = &new_element->children;
-           
+
+           /* consider subtree (if any) ... */
+           if ((sub_p = strchr (p, ':')) && sub_p[1])
+           {
+               *sub_p++ = '\0';
+               new_element->sub_name =
+                   nmem_strdup (data1_nmem_get(dh), sub_p);            
+           }
            /* well-defined tag */
            if (sscanf(p, "(%d,%d)", &type, &value) == 2)
            {
@@ -260,11 +426,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
            else if (*p)
            {
                data1_tag *nt =
-                   new_element->tag = nmem_malloc(data1_nmem_get (dh),
+                   new_element->tag = (data1_tag *)nmem_malloc(data1_nmem_get (dh),
                                                   sizeof(*new_element->tag));
                nt->which = DATA1T_string;
-               nt->value.string = xstrdup(p);
-               nt->names = nmem_malloc(data1_nmem_get(dh), 
+               nt->value.string = nmem_strdup(data1_nmem_get (dh), p);
+               nt->names = (data1_name *)nmem_malloc(data1_nmem_get(dh), 
                                        sizeof(*new_element->tag->names));
                nt->names->name = nt->value.string;
                nt->names->next = 0;
@@ -278,7 +444,6 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                fclose(f);
                return 0;
            }
-
            /* parse termList definitions */
            p = termlists;
            if (*p == '-')
@@ -306,7 +471,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                    }
                    if (*attname == '!')
                        strcpy(attname, name);
-                   *tp = nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+                   *tp = (data1_termlist *)nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
                    (*tp)->next = 0;
                    if (!((*tp)->att = data1_getattbyname(dh, res->attset,
                                                          attname)))
@@ -317,19 +482,38 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                        return 0;
                    }
                    if (r < 2) /* is the structure qualified? */
-                       (*tp)->structure = DATA1S_word;
-                   else if (!data1_matchstr(structure, "w"))
-                       (*tp)->structure = DATA1S_word;
-                   else if (!data1_matchstr(structure, "p"))
-                       (*tp)->structure = DATA1S_phrase;
-
+                       (*tp)->structure = "w";
+                   else 
+                   {
+                       (*tp)->structure = (char *)nmem_malloc (data1_nmem_get (dh),
+                                                       strlen(structure)+1);
+                       strcpy ((*tp)->structure, structure);
+                   }
                    tp = &(*tp)->next;
                }
                while ((p = strchr(p, ',')) && *(++p));
                *tp = all; /* append any ALL entries to the list */
            }
-
-           new_element->name = xstrdup(name);
+           new_element->name = nmem_strdup(data1_nmem_get (dh), name);
+       }
+       else if (!strcmp(cmd, "section"))
+       {
+           char name[512];
+           if (sscanf(args, "%511s", name) < 1)
+           {
+               logf(LOG_WARN, "Bad # of args to sub in %s: '%s'",
+                                file, args);
+               continue;
+           }
+            cur_elements = (data1_sub_elements *)nmem_malloc(data1_nmem_get(dh),
+                                          sizeof(*cur_elements));
+           cur_elements->next = res->sub_elements;
+           cur_elements->elements = NULL;
+           cur_elements->name = nmem_strdup (data1_nmem_get(dh), name);
+           res->sub_elements = cur_elements;
+
+           level = 0;
+           ppl[level] = &cur_elements->elements;
        }
        else if (!strcmp(cmd, "all"))
        {
@@ -363,7 +547,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                    fclose(f);
                    return 0;
                }
-               *tp = nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+               *tp = (data1_termlist *)nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
                if (!((*tp)->att = data1_getattbyname (dh, res->attset,
                                                       attname)))
                {
@@ -373,12 +557,13 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
                    return 0;
                }
                if (r < 2) /* is the structure qualified? */
-                   (*tp)->structure = DATA1S_word;
-               else if (!data1_matchstr(structure, "w"))
-                   (*tp)->structure = DATA1S_word;
-               else if (!data1_matchstr(structure, "p"))
-                   (*tp)->structure = DATA1S_phrase;
-               
+                   (*tp)->structure = "w";
+               else 
+               {
+                   (*tp)->structure = (char *)nmem_malloc (data1_nmem_get (dh),
+                                                   strlen(structure)+1);
+                   strcpy ((*tp)->structure, structure);
+               }
                (*tp)->next = 0;
                tp = &(*tp)->next;
            }
@@ -388,13 +573,13 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
        {
            char name[512];
 
-           if (!sscanf(args, "%s", name))
+           if (!sscanf(args, "%511s", name))
            {
-               logf(LOG_WARN, "%s malformed name directive in %s", file);
+               logf(LOG_WARN, "Malformed name directive in %s", file);
                fclose(f);
                return 0;
            }
-           res->name = nmem_strdup(data1_nmem_get(dh), args);
+           res->name = nmem_strdup(data1_nmem_get(dh), name);
        }
        else if (!strcmp(cmd, "reference"))
        {
@@ -402,7 +587,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (!sscanf(args, "%s", name))
            {
-               logf(LOG_WARN, "%s malformed reference directive in %s", file);
+               logf(LOG_WARN, "Malformed reference in %s", file);
                fclose(f);
                return 0;
            }
@@ -419,11 +604,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (!sscanf(args, "%s", name))
            {
-               logf(LOG_WARN, "%s malformed attset directive in %s", file);
+               logf(LOG_WARN, "Malformed attset directive in %s", file);
                fclose(f);
                return 0;
            }
-           if (!(res->attset = data1_read_attset (dh, name)))
+           if (!(res->attset = data1_get_attset (dh, name)))
            {
                logf(LOG_WARN, "Attset failed in %s", file);
                fclose(f);
@@ -436,7 +621,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (!sscanf(args, "%s", name))
            {
-               logf(LOG_WARN, "%s malformed tagset directive in %s", file);
+               logf(LOG_WARN, "Malformed tagset directive in %s", file);
                fclose(f);
                return 0;
            }
@@ -453,7 +638,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (!sscanf(args, "%s", name))
            {
-               logf(LOG_WARN, "%s malformed varset directive in %s", file);
+               logf(LOG_WARN, "Malformed varset directive in %s", file);
                fclose(f);
                return 0;
            }
@@ -470,16 +655,17 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (sscanf(args, "%s %s", name, fname) != 2)
            {
-               logf(LOG_WARN, "%s: Two arg's required for esetname directive");
+               logf(LOG_WARN, "Two arg's required for esetname in %s",
+                     file);
                fclose(f);
                return 0;
            }
-           *esetpp = nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
+           *esetpp = (data1_esetname *)nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
            (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
            (*esetpp)->next = 0;
            if (*fname == '@')
                (*esetpp)->spec = 0;
-           else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname, 0)))
+           else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname)))
            {
                logf(LOG_WARN, "%s: Espec-1 read failed", file);
                fclose(f);
@@ -493,13 +679,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (sscanf(args, "%s", name) != 1)
            {
-               logf(LOG_WARN, "%s: One argument required for maptab directive",
-                   file);
+               logf(LOG_WARN, "One argument for maptab directive in %s",
+                     file);
                continue;
            }
            if (!(*maptabp = data1_read_maptab (dh, name)))
            {
-               logf(LOG_WARN, "%s: Failed to read maptab.");
+               logf(LOG_WARN, "Failed to read maptab %s in %s",
+                     name, file);
                continue;
            }
            maptabp = &(*maptabp)->next;
@@ -510,13 +697,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 
            if (sscanf(args, "%s", name) != 1)
            {
-               logf(LOG_WARN, "%s: One argument required for marc directive",
+               logf(LOG_WARN, "One argument for marc directive in %s",
                    file);
                continue;
            }
            if (!(*marcp = data1_read_marctab (dh, name)))
            {
-               logf(LOG_WARN, "%s: Failed to read marctab.");
+               logf(LOG_WARN, "%Failed to read marctab %s in %s",
+                     name, file);
                continue;
            }
            marcp = &(*marcp)->next;
@@ -528,4 +716,15 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
            return 0;
        }
     }
+    fclose(f);
+    
+    for (cur_elements = res->sub_elements; cur_elements;
+        cur_elements = cur_elements->next)
+    {
+       if (!strcmp (cur_elements->name, "main"))
+           res->main_elements = cur_elements->elements;
+       fix_element_ref (dh, res, cur_elements->elements);
+    }
+    logf (LOG_DEBUG, "end data1_read_absyn file=%s", file);
+    return res;
 }