YAZ version 1.4.
[yaz-moved-to-github.git] / retrieval / d1_absyn.c
index f93d39b..ab1b3ac 100644 (file)
@@ -1,10 +1,44 @@
 /*
- * Copyright (c) 1995, Index Data.
+ * Copyright (c) 1995-1997, Index Data.
  * See the file LICENSE for details.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: d1_absyn.c,v $
- * Revision 1.1  1995-11-01 11:56:06  quinn
+ * Revision 1.12  1997-09-17 12:10:34  adam
+ * YAZ version 1.4.
+ *
+ * Revision 1.11  1997/09/05 09:50:55  adam
+ * Removed global data1_tabpath - uses data1_get_tabpath() instead.
+ *
+ * Revision 1.10  1997/05/14 06:54:01  adam
+ * C++ support.
+ *
+ * Revision 1.9  1997/02/19 14:46:15  adam
+ * The "all" specifier only affects elements that are indexed (and not
+ * all elements).
+ *
+ * Revision 1.8  1997/01/02 10:47:59  quinn
+ * Added optional, physical ANY
+ *
+ * Revision 1.7  1996/06/10 08:56:01  quinn
+ * Work on Summary.
+ *
+ * Revision 1.6  1996/05/31  13:52:21  quinn
+ * Fixed uninitialized variable for local tags in abstract syntax.
+ *
+ * Revision 1.5  1996/05/09  07:27:43  quinn
+ * Multiple local attributes values supported.
+ *
+ * Revision 1.4  1996/05/01  12:45:28  quinn
+ * Support use of local tag names in abs file.
+ *
+ * Revision 1.3  1995/11/01  16:34:55  quinn
+ * Making data1 look for tables in data1_tabpath
+ *
+ * Revision 1.2  1995/11/01  13:54:44  quinn
+ * Minor adjustments
+ *
+ * Revision 1.1  1995/11/01  11:56:06  quinn
  * Added Retrieval (data management) functions en masse.
  *
  *
 #include <stdlib.h>
 #include <string.h>
 
-#include <xmalloc.h>
 #include <oid.h>
 #include <log.h>
+#include <tpath.h>
 
-#include "data1.h"
+#include <data1.h>
 
 #define D1_MAX_NESTING  128
-#define DATA1_MAX_SYNTAXES 30 /* max no of syntaxes to handle in one session */
 
-static struct /* cache of abstract syntaxes */
+struct data1_absyn_cache_info 
 {
     char *name;
     data1_absyn *absyn;
-} syntaxes[DATA1_MAX_SYNTAXES] = {{0,0}};
+    data1_absyn_cache next;
+};
 
-data1_absyn *data1_get_absyn(char *name)
+data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
 {
-    char fname[512];
-    int i;
-
-    for (i = 0; syntaxes[i].name; i++)
-       if (!strcmp(name, syntaxes[i].name))
-           return syntaxes[i].absyn;
+    data1_absyn_cache p = *data1_absyn_cache_get (dh);
 
-    if (i >= DATA1_MAX_SYNTAXES - 1)
+    while (p)
     {
-       logf(LOG_WARN, "Too many abstract syntaxes loaded");
-       return 0;
+       if (!strcmp (name, p->name))
+           return p->absyn;
+       p = p->next;
     }
+    return NULL;
+}
+
+data1_absyn *data1_absyn_add (data1_handle dh, const char *name)
+{
+    char fname[512];
+    NMEM mem = data1_nmem_get (dh);
+
+    data1_absyn_cache p = nmem_malloc (mem, sizeof(*p));
+    data1_absyn_cache *pp = data1_absyn_cache_get (dh);
+
     sprintf(fname, "%s.abs", name);
-    if (!(syntaxes[i].absyn = data1_read_absyn(fname)))
-       return 0;
-    if (!(syntaxes[i].name = xmalloc(strlen(name)+1)))
-       abort();
-    strcpy(syntaxes[i].name, name);
-    syntaxes[i+1].name = 0;
-    return syntaxes[i].absyn;
+    p->absyn = data1_read_absyn (dh, fname);
+    p->name = nmem_strdup (mem, name);
+    p->next = *pp;
+    *pp = p;
+    return p->absyn;
+}
+
+data1_absyn *data1_get_absyn (data1_handle dh, char *name)
+{
+    data1_absyn *absyn;
+
+    if (!(absyn = data1_absyn_search (dh, name)))
+       absyn = data1_absyn_add (dh, name);
+    return absyn;
 }
 
-data1_esetname *data1_getesetbyname(data1_absyn *a, char *name)
+data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,
+                                   char *name)
 {
     data1_esetname *r;
 
@@ -65,8 +114,9 @@ data1_esetname *data1_getesetbyname(data1_absyn *a, char *name)
     return 0;
 }
 
-data1_element *data1_getelementbytagname(data1_absyn *abs,
-    data1_element *parent, char *tagname)
+data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
+                                         data1_element *parent,
+                                         char *tagname)
 {
     data1_element *r;
 
@@ -85,7 +135,8 @@ data1_element *data1_getelementbytagname(data1_absyn *abs,
     return 0;
 }
 
-data1_element *data1_getelementbyname(data1_absyn *absyn, char *name)
+data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
+                                      char *name)
 {
     data1_element *r;
 
@@ -95,25 +146,25 @@ data1_element *data1_getelementbyname(data1_absyn *absyn, char *name)
     return 0;
 }
 
-data1_absyn *data1_read_absyn(char *file)
+data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
 {
     char line[512], *r, cmd[512], args[512];
     data1_absyn *res = 0;
     FILE *f;
-    data1_element **ppl[D1_MAX_NESTING];
+    data1_element **ppl[D1_MAX_NESTING], *cur[D1_MAX_NESTING];
     data1_esetname **esetpp;
     data1_maptab **maptabp;
     data1_marctab **marcp;
+    data1_termlist *all = 0;
     int level = 0;
 
-    if (!(f = fopen(file, "r")))
+    if (!(f = yaz_path_fopen(data1_get_tabpath (dh), file, "r")))
     {
        logf(LOG_WARN|LOG_ERRNO, "%s", file);
        return 0;
     }
 
-    if (!(res = xmalloc(sizeof(*res))))
-       abort();
+    res = nmem_malloc(data1_nmem_get(dh), sizeof(*res));
     res->name = 0;
     res->reference = VAL_NONE;
     res->tagset = 0;
@@ -126,6 +177,7 @@ data1_absyn *data1_read_absyn(char *file)
     marcp = &res->marc;
     res->elements = 0;
     ppl[0] = &res->elements;
+    cur[0] = 0;
     esetpp = &res->esetnames;
 
     for (;;)
@@ -146,12 +198,13 @@ data1_absyn *data1_read_absyn(char *file)
            *args = '\0';
        if (!strcmp(cmd, "elm"))
        {
-           data1_element *new;
+           data1_element *new_element;
            int i;
-           char path[512], name[512], att[512], *p;
+           char path[512], name[512], termlists[512], *p;
            int type, value;
+           data1_termlist **tp;
 
-           if (sscanf(args, "%s %s %s", path, name, att) < 3)
+           if (sscanf(args, "%511s %511s %511s", path, name, termlists) < 3)
            {
                logf(LOG_WARN, "Bad # of args to elm in %s: '%s'", 
                    file, args);
@@ -170,40 +223,66 @@ data1_absyn *data1_read_absyn(char *file)
            }
            if (i > level + 1)
            {
-               logf(LOG_WARN, "Bad level inc in %s in '%'", file, args);
+               logf(LOG_WARN, "Bad level inc in %s in '%s'", file, args);
                fclose(f);
                return 0;
            }
            level = i;
-           if (!(new = *ppl[level] = xmalloc(sizeof(*new))))
-               abort;
-           new ->next = new->children = 0;
-           ppl[level] = &new->next;
-           ppl[level+1] = &new->children;
-
-           if (sscanf(p, "(%d,%d)", &type, &value) < 2)
+           new_element = cur[level] = *ppl[level] =
+               nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
+           new_element->next = new_element->children = 0;
+           new_element->tag = 0;
+           new_element->termlists = 0;
+           new_element->parent = level ? cur[level - 1] : 0;
+           tp = &new_element->termlists;
+           ppl[level] = &new_element->next;
+           ppl[level+1] = &new_element->children;
+           
+           /* well-defined tag */
+           if (sscanf(p, "(%d,%d)", &type, &value) == 2)
            {
-               logf(LOG_WARN, "Malformed element '%s' in %s", p, file);
-               fclose(f);
-               return 0;
+               if (!res->tagset)
+               {
+                   logf(LOG_WARN, "No tagset loaded in %s", file);
+                   fclose(f);
+                   return 0;
+               }
+               if (!(new_element->tag = data1_gettagbynum (dh, res->tagset,
+                                                           type, value)))
+               {
+                   logf(LOG_WARN, "Couldn't find tag %s in tagset in %s",
+                       p, file);
+                   fclose(f);
+                   return 0;
+               }
            }
-           if (!res->tagset)
+           /* private tag */
+           else if (*p)
            {
-               logf(LOG_WARN, "No tagset loaded in %s", file);
-               fclose(f);
-               return 0;
+               data1_tag *nt =
+                   new_element->tag = nmem_malloc(data1_nmem_get (dh),
+                                                  sizeof(*new_element->tag));
+               nt->which = DATA1T_string;
+               nt->value.string = xstrdup(p);
+               nt->names = nmem_malloc(data1_nmem_get(dh), 
+                                       sizeof(*new_element->tag->names));
+               nt->names->name = nt->value.string;
+               nt->names->next = 0;
+               nt->kind = DATA1K_string;
+               nt->next = 0;
+               nt->tagset = 0;
            }
-           if (!(new->tag = data1_gettagbynum(res->tagset, type, value)))
+           else
            {
-               logf(LOG_WARN, "Couldn't find tag %s in tagset in %s",
-                   p, file);
+               logf(LOG_WARN, "Bad element is %s", file);
                fclose(f);
                return 0;
            }
-           if (*att == '!')
-               strcpy(att, name);
-           if (*att == '-')
-               new->att = 0;
+
+           /* parse termList definitions */
+           p = termlists;
+           if (*p == '-')
+               new_element->termlists = 0;
            else
            {
                if (!res->attset)
@@ -212,16 +291,98 @@ data1_absyn *data1_read_absyn(char *file)
                    fclose(f);
                    return 0;
                }
-               if (!(new->att = data1_getattbyname(res->attset, att)))
+               do
                {
-                   logf(LOG_WARN, "Couldn't find att '%s' in attset", att);
+                   char attname[512], structure[512];
+                   int r;
+
+                   if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
+                       structure)))
+                   {
+                       logf(LOG_WARN, "Syntax error in termlistspec in %s",
+                           file);
+                       fclose(f);
+                       return 0;
+                   }
+                   if (*attname == '!')
+                       strcpy(attname, name);
+                   *tp = nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+                   (*tp)->next = 0;
+                   if (!((*tp)->att = data1_getattbyname(dh, res->attset,
+                                                         attname)))
+                   {
+                       logf(LOG_WARN, "Couldn't find att '%s' in attset",
+                            attname);
+                       fclose(f);
+                       return 0;
+                   }
+                   if (r < 2) /* is the structure qualified? */
+                       (*tp)->structure = DATA1S_word;
+                   else if (!data1_matchstr(structure, "w"))
+                       (*tp)->structure = DATA1S_word;
+                   else if (!data1_matchstr(structure, "p"))
+                       (*tp)->structure = DATA1S_phrase;
+
+                   tp = &(*tp)->next;
+               }
+               while ((p = strchr(p, ',')) && *(++p));
+               *tp = all; /* append any ALL entries to the list */
+           }
+
+           new_element->name = xstrdup(name);
+       }
+       else if (!strcmp(cmd, "all"))
+       {
+           char *p;
+           data1_termlist **tp = &all;
+
+           if (all)
+           {
+               logf(LOG_WARN, "Too many ALL declarations in %s - ignored",
+                   file);
+               continue;
+           }
+
+           p = args;
+           if (!res->attset)
+           {
+               logf(LOG_WARN, "No attset loaded in %s", file);
+               fclose(f);
+               return 0;
+           }
+           do
+           {
+               char attname[512], structure[512];
+               int r;
+
+               if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
+                   structure)))
+               {
+                   logf(LOG_WARN, "Syntax error in termlistspec in %s",
+                       file);
+                   fclose(f);
+                   return 0;
+               }
+               *tp = nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+               if (!((*tp)->att = data1_getattbyname (dh, res->attset,
+                                                      attname)))
+               {
+                   logf(LOG_WARN, "Couldn't find att '%s' in attset",
+                        attname);
                    fclose(f);
                    return 0;
                }
+               if (r < 2) /* is the structure qualified? */
+                   (*tp)->structure = DATA1S_word;
+               else if (!data1_matchstr(structure, "w"))
+                   (*tp)->structure = DATA1S_word;
+               else if (!data1_matchstr(structure, "p"))
+                   (*tp)->structure = DATA1S_phrase;
+               
+               (*tp)->next = 0;
+               tp = &(*tp)->next;
            }
-           if (!(new->name = xmalloc(strlen(name)+1)))
-               abort();
-           strcpy(new->name, name);
+           while ((p = strchr(p, ',')) && *(++p));
        }
        else if (!strcmp(cmd, "name"))
        {
@@ -233,9 +394,7 @@ data1_absyn *data1_read_absyn(char *file)
                fclose(f);
                return 0;
            }
-           if (!(res->name = xmalloc(strlen(args)+1)))
-               abort();
-           strcpy(res->name, name);
+           res->name = nmem_strdup(data1_nmem_get(dh), args);
        }
        else if (!strcmp(cmd, "reference"))
        {
@@ -264,7 +423,7 @@ data1_absyn *data1_read_absyn(char *file)
                fclose(f);
                return 0;
            }
-           if (!(res->attset = data1_read_attset(name)))
+           if (!(res->attset = data1_read_attset (dh, name)))
            {
                logf(LOG_WARN, "Attset failed in %s", file);
                fclose(f);
@@ -281,7 +440,7 @@ data1_absyn *data1_read_absyn(char *file)
                fclose(f);
                return 0;
            }
-           if (!(res->tagset = data1_read_tagset(name)))
+           if (!(res->tagset = data1_read_tagset (dh, name)))
            {
                logf(LOG_WARN, "Tagset failed in %s", file);
                fclose(f);
@@ -298,7 +457,7 @@ data1_absyn *data1_read_absyn(char *file)
                fclose(f);
                return 0;
            }
-           if (!(res->varset = data1_read_varset(name)))
+           if (!(res->varset = data1_read_varset (dh, name)))
            {
                logf(LOG_WARN, "Varset failed in %s", file);
                fclose(f);
@@ -315,18 +474,17 @@ data1_absyn *data1_read_absyn(char *file)
                fclose(f);
                return 0;
            }
-           *esetpp = xmalloc(sizeof(**esetpp));
-           (*esetpp)->name = xmalloc(strlen(name)+1);
-           strcpy((*esetpp)->name, name);
+           *esetpp = nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
+           (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
+           (*esetpp)->next = 0;
            if (*fname == '@')
                (*esetpp)->spec = 0;
-           else if (!((*esetpp)->spec = data1_read_espec1(fname, 0)))
+           else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname, 0)))
            {
                logf(LOG_WARN, "%s: Espec-1 read failed", file);
                fclose(f);
                return 0;
            }
-           (*esetpp)->next = 0;
            esetpp = &(*esetpp)->next;
        }
        else if (!strcmp(cmd, "maptab"))
@@ -339,7 +497,7 @@ data1_absyn *data1_read_absyn(char *file)
                    file);
                continue;
            }
-           if (!(*maptabp = data1_read_maptab(name)))
+           if (!(*maptabp = data1_read_maptab (dh, name)))
            {
                logf(LOG_WARN, "%s: Failed to read maptab.");
                continue;
@@ -356,7 +514,7 @@ data1_absyn *data1_read_absyn(char *file)
                    file);
                continue;
            }
-           if (!(*marcp = data1_read_marctab(name)))
+           if (!(*marcp = data1_read_marctab (dh, name)))
            {
                logf(LOG_WARN, "%s: Failed to read marctab.");
                continue;