-/* $Id: d1_absyn.c,v 1.9 2003-06-12 18:20:24 adam Exp $
- Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
- Index Data Aps
+/* $Id: d1_absyn.c,v 1.19 2005-01-15 19:38:18 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
This file is part of the Zebra server.
#include <stdlib.h>
#include <string.h>
-#include <yaz/oid.h>
#include <yaz/log.h>
-#include <data1.h>
+#include <yaz/oid.h>
+#include <idzebra/data1.h>
#include <zebra_xpath.h>
+#include <d1_absyn.h>
#define D1_MAX_NESTING 128
+struct data1_hash_table {
+ NMEM nmem;
+ int size;
+ struct data1_hash_entry **ar;
+};
+
+struct data1_hash_entry {
+ void *clientData;
+ char *str;
+ struct data1_hash_entry *next;
+};
+
+unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
+{
+ unsigned v = 0;
+ assert(str);
+ while (*str)
+ {
+ if (*str >= 'a' && *str <= 'z')
+ v = v*65509 + *str -'a'+10;
+ else if (*str >= 'A' && *str <= 'Z')
+ v = v*65509 + *str -'A'+10;
+ else if (*str >= '0' && *str <= '9')
+ v = v*65509 + *str -'0';
+ str++;
+ }
+ return v % ht->size;
+}
+
+struct data1_hash_table *data1_hash_open(int size, NMEM nmem)
+{
+ int i;
+ struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht));
+ ht->nmem = nmem;
+ ht->size = size;
+ if (ht->size <= 0)
+ ht->size = 29;
+ ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size);
+ for (i = 0; i<ht->size; i++)
+ ht->ar[i] = 0;
+ return ht;
+}
+
+void data1_hash_insert(struct data1_hash_table *ht, const char *str,
+ void *clientData, int copy)
+{
+ char *dstr = copy ? nmem_strdup(ht->nmem, str) : (char*) str;
+ if (strchr(str, '?') || strchr(str, '.'))
+ {
+ int i;
+ for (i = 0; i<ht->size; i++)
+ {
+ struct data1_hash_entry **he = &ht->ar[i];
+ for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (!*he)
+ {
+ *he = nmem_malloc(ht->nmem, sizeof(**he));
+ (*he)->str = dstr;
+ (*he)->next = 0;
+ }
+ (*he)->clientData = clientData;
+ }
+ }
+ else
+ {
+ struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
+ for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (!*he)
+ {
+ *he = nmem_malloc(ht->nmem, sizeof(**he));
+ (*he)->str = dstr;
+ (*he)->next = 0;
+ }
+ (*he)->clientData = clientData;
+ }
+}
+
+void *data1_hash_lookup(struct data1_hash_table *ht, const char *str)
+{
+ struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
+
+ for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next)
+ ;
+ if (*he)
+ return (*he)->clientData;
+ return 0;
+}
+
struct data1_systag {
char *name;
char *value;
data1_attset_cache next;
};
+data1_element *data1_mk_element(data1_handle dh)
+{
+ data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e));
+ e->name = 0;
+ e->tag = 0;
+ e->termlists = 0;
+ e->next = e->children = 0;
+ e->sub_name = 0;
+ e->hash = 0;
+ return e;
+}
+
data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
{
data1_absyn_cache p = *data1_absyn_cache_get (dh);
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->absyn;
p = p->next;
}
{
data1_xpelement *xpe = abs->xp_elements;
while (xpe) {
- logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
+ yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
if (xpe->dfa) { dfa_delete (&xpe->dfa); }
xpe = xpe->next;
}
while (p)
{
- if (!strcmp (name, p->name))
+ if (!yaz_matchstr (name, p->name))
return p->attset;
p = p->next;
}
*cp = '\0';
}
if (!attset)
- yaz_log (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name);
+ yaz_log (YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name);
else
{
data1_attset_cache p = (data1_attset_cache)
return 0;
}
+/* we have multiple versions of data1_getelementbyname */
+#define DATA1_GETELEMENTBYTAGNAME_VERSION 1
+
+#if DATA1_GETELEMENTBYTAGNAME_VERSION==0
+/* straight linear search */
data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
data1_element *parent,
const char *tagname)
}
return 0;
}
+#endif
+
+#if DATA1_GETELEMENTBYTAGNAME_VERSION==1
+/* using hash search */
+data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
+ data1_element *parent,
+ const char *tagname)
+{
+ data1_element *r;
+ struct data1_hash_table *ht;
+
+ /* It's now possible to have a data1 tree with no abstract syntax */
+ if ( !abs )
+ return 0;
+
+ if (!parent)
+ r = abs->main_elements;
+ else
+ r = parent->children;
+
+ if (!r)
+ return 0;
+
+ ht = r->hash;
+ if (!ht)
+ {
+ ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
+ for (; r; r = r->next)
+ {
+ data1_name *n;
+
+ for (n = r->tag->names; n; n = n->next)
+ data1_hash_insert(ht, n->name, r, 0);
+ }
+ }
+ return data1_hash_lookup(ht, tagname);
+}
+#endif
data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
const char *name)
if (sub_e)
e->children = sub_e->elements;
else
- yaz_log (LOG_WARN, "Unresolved reference to sub-elements %s",
+ yaz_log (YLOG_WARN, "Unresolved reference to sub-elements %s",
e->sub_name);
}
}
if (!abs) { sprintf (p, ".*"); p+=2; }
sprintf (p, "$"); p++;
r = nmem_strdup (data1_nmem_get (dh), res);
- yaz_log(LOG_DEBUG,"Got regexp: %s",r);
+ yaz_log(YLOG_DEBUG,"Got regexp: %s",r);
return (r);
}
pop, 2002-12-13
*/
static int parse_termlists (data1_handle dh, data1_termlist ***tpp,
- char *p, const char *file, int lineno,
+ char *cp, const char *file, int lineno,
const char *element_name, data1_absyn *res,
int xpelement)
{
data1_termlist **tp = *tpp;
- do
+ while(1)
{
char attname[512], structure[512];
char *source;
- int r;
-
- if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
- structure)))
+ int r, i;
+ int level = 0;
+ structure[0] = '\0';
+ for (i = 0; cp[i] && i<sizeof(attname)-1; i++)
+ if (strchr(":,", cp[i]))
+ break;
+ else
+ attname[i] = cp[i];
+ if (i == 0)
{
- yaz_log(LOG_WARN,
- "%s:%d: Syntax error in termlistspec '%s'",
- file, lineno, p);
- return -1;
+ if (*cp)
+ yaz_log(YLOG_WARN,
+ "%s:%d: Syntax error in termlistspec '%s'",
+ file, lineno, cp);
+ break;
}
+ attname[i] = '\0';
+ r = 1;
+ cp += i;
+ if (*cp == ':')
+ cp++;
+
+ for (i = 0; cp[i] && i<sizeof(structure)-1; i++)
+ if (level == 0 && strchr(",", cp[i]))
+ break;
+ else
+ {
+ structure[i] = cp[i];
+ if (cp[i] == '(')
+ level++;
+ else if (cp[i] == ')')
+ level--;
+ }
+ structure[i] = '\0';
+ if (i)
+ r = 2;
+ cp += i;
+ if (*cp)
+ cp++; /* skip , */
*tp = (data1_termlist *)
- nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
+ nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
(*tp)->next = 0;
if (!xpelement) {
if (!((*tp)->att = data1_getattbyname(dh, res->attset,
attname))) {
if ((!xpelement) || (*attname != '!')) {
- yaz_log(LOG_WARN,
+ yaz_log(YLOG_WARN,
"%s:%d: Couldn't find att '%s' in attset",
file, lineno, attname);
return -1;
nmem_strdup (data1_nmem_get (dh), structure);
tp = &(*tp)->next;
}
- while ((p = strchr(p, ',')) && *(++p));
+
*tpp = tp;
return 0;
}
+/* quinn
+ * Converts a 'melm' field[$subfield] pattern to a simple xpath
+ */
+static int melm2xpath(char *melm, char *buf)
+{
+ char *dollar;
+ char *field = melm;
+ char *subfield;
+ char *fieldtype;
+ if ((dollar = strchr(melm, '$'))) {
+ *dollar = '\0';
+ subfield = ++dollar;
+ } else
+ subfield = "";
+ if (field[0] == '0' && field[1] == '0')
+ fieldtype = "controlfield";
+ else
+ fieldtype = "datafield";
+ sprintf(buf, "/*/%s[@tag=\"%s\"]", fieldtype, field);
+ if (*subfield)
+ sprintf(buf + strlen(buf), "/subfield[@code=\"%s\"]", subfield);
+ else if (field[0] != '0' || field[1] != '0')
+ strcat(buf, "/subfield");
+ yaz_log(YLOG_DEBUG, "Created xpath: '%s'", buf);
+ return 0;
+}
+
const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,
const char *default_value)
{
return argc;
}
+data1_marctab *data1_absyn_getmarctab(data1_handle dh, data1_absyn *absyn)
+{
+ return absyn->marc;
+}
+
+YAZ_EXPORT data1_element *data1_absyn_getelements(data1_handle dh,
+ data1_absyn *absyn)
+{
+ return absyn->main_elements;
+}
data1_absyn *data1_read_absyn (data1_handle dh, const char *file,
int file_must_exist)
if (!(f = data1_path_fopen(dh, file, "r")))
{
- yaz_log(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file);
+ yaz_log(YLOG_WARN|YLOG_ERRNO, "Couldn't open %s", file);
if (file_must_exist)
return 0;
}
if (argc < 4)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
continue;
}
path = argv[1];
}
if (i > level+1)
{
- yaz_log(LOG_WARN, "%s:%d: Bad level increase", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: Bad level increase", file, lineno);
fclose(f);
return 0;
}
level = i;
- new_element = *ppl[level-1] = (data1_element *)
- nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
- new_element->next = new_element->children = 0;
- new_element->tag = 0;
- new_element->termlists = 0;
- new_element->sub_name = 0;
+ new_element = *ppl[level-1] = data1_mk_element(dh);
tp = &new_element->termlists;
ppl[level-1] = &new_element->next;
{
if (!res->tagset)
{
- yaz_log(LOG_WARN, "%s:%d: No tagset loaded", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: No tagset loaded", file, lineno);
fclose(f);
return 0;
}
if (!(new_element->tag = data1_gettagbynum (dh, res->tagset,
type, value)))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
file, lineno, p);
fclose(f);
return 0;
}
else
{
- yaz_log(LOG_WARN, "%s:%d: Bad element", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: Bad element", file, lineno);
fclose(f);
return 0;
}
pop, 2003-01-17
*/
- else if (!strcmp(cmd, "xelm")) {
+ else if (!strcmp(cmd, "xelm") || !strcmp(cmd, "melm")) {
int i;
char *p, *xpath_expr, *termlists;
const char *regexp;
struct DFA *dfa = dfa = dfa_init();
data1_termlist **tp;
+ char melm_xpath[128];
if (argc < 3)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno);
continue;
}
- xpath_expr = argv[1];
+
+ if (!strcmp(cmd, "melm")) {
+ if (melm2xpath(argv[1], melm_xpath) < 0)
+ continue;
+ xpath_expr = melm_xpath;
+ } else {
+ xpath_expr = argv[1];
+ }
termlists = argv[2];
regexp = mk_xpath_regexp(dh, xpath_expr);
i = dfa_parse (dfa, ®exp);
if (i || *regexp) {
- yaz_log(LOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
+ yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
dfa_delete (&dfa);
continue;
}
if (argc < 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to section",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to section",
file, lineno);
continue;
}
{
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'xpath' directive",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'xpath' directive",
file, lineno);
continue;
}
res->enable_xpath_indexing = 0;
else
{
- yaz_log(LOG_WARN, "%s:%d: Expecting disable/enable "
+ yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable "
"after 'xpath' directive", file, lineno);
}
}
data1_termlist **tp = &all;
if (all)
{
- yaz_log(LOG_WARN, "%s:%d: Too many 'all' directives - ignored",
+ yaz_log(YLOG_WARN, "%s:%d: Too many 'all' directives - ignored",
file, lineno);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'all' directive",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'all' directive",
file, lineno);
continue;
}
{
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to name directive",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to name directive",
file, lineno);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to reference",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to reference",
file, lineno);
continue;
}
name = argv[1];
if ((res->reference = oid_getvalbyname(name)) == VAL_NONE)
{
- yaz_log(LOG_WARN, "%s:%d: Unknown tagset ref '%s'",
+ yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'",
file, lineno, name);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to attset",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset",
file, lineno);
continue;
}
name = argv[1];
if (!(attset = data1_get_attset (dh, name)))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't find attset %s",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s",
file, lineno, name);
continue;
}
int type = 0;
if (argc < 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args to tagset",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args to tagset",
file, lineno);
continue;
}
*tagset_childp = data1_read_tagset (dh, name, type);
if (!(*tagset_childp))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't load tagset %s",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't load tagset %s",
file, lineno, name);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args in varset",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args in varset",
file, lineno);
continue;
}
name = argv[1];
if (!(res->varset = data1_read_varset (dh, name)))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't load Varset %s",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't load Varset %s",
file, lineno, name);
continue;
}
if (argc != 3)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args in esetname",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args in esetname",
file, lineno);
continue;
}
(*esetpp)->spec = 0;
else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname)))
{
- yaz_log(LOG_WARN, "%s:%d: Espec-1 read failed for %s",
+ yaz_log(YLOG_WARN, "%s:%d: Espec-1 read failed for %s",
file, lineno, fname);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # of args for maptab",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # of args for maptab",
file, lineno);
continue;
}
name = argv[1];
if (!(*maptabp = data1_read_maptab (dh, name)))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't load maptab %s",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't load maptab %s",
file, lineno, name);
continue;
}
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # or args for marc",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # or args for marc",
file, lineno);
continue;
}
name = argv[1];
if (!(*marcp = data1_read_marctab (dh, name)))
{
- yaz_log(LOG_WARN, "%s:%d: Couldn't read marctab %s",
+ yaz_log(YLOG_WARN, "%s:%d: Couldn't read marctab %s",
file, lineno, name);
continue;
}
{
if (argc != 2)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # or args for encoding",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # or args for encoding",
file, lineno);
continue;
}
{
if (argc != 3)
{
- yaz_log(LOG_WARN, "%s:%d: Bad # or args for systag",
+ yaz_log(YLOG_WARN, "%s:%d: Bad # or args for systag",
file, lineno);
continue;
}
}
else
{
- yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file,
+ yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file,
lineno, cmd);
continue;
}
fix_element_ref (dh, res, cur_elements->elements);
}
*systagsp = 0;
- yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file);
+ yaz_log(YLOG_DEBUG, "%s: data1_read_absyn end", file);
return res;
}