X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=recctrl%2Fregxread.c;h=5658f653ad259ed9b2244ace6b016692e2238e5e;hp=9b8473b060b977fe92f653d6ebb49927827e83ac;hb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;hpb=b385fad64ec1afb51fd0ec9ad76567282ab20bfe diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 9b8473b..5658f65 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,6 +1,6 @@ -/* $Id: regxread.c,v 1.47 2003-04-24 19:34:20 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 - Index Data Aps +/* $Id: regxread.c,v 1.61 2006-05-10 08:13:30 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -20,16 +20,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - #include +#include #include #include #include #include -#include +#include #include -#include "grsread.h" +#include #if HAVE_TCL_H #include @@ -139,6 +139,7 @@ struct lexSpec { struct lexSpecs { struct lexSpec *spec; + char type[256]; }; static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, @@ -411,7 +412,7 @@ static int readParseToken (const char **cpp, int *len) cmd[i] = '\0'; if (i == 0) { - logf (LOG_WARN, "bad character %d %c", *cp, *cp); + yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp); cp++; while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n' && *cp != '\r') @@ -432,7 +433,7 @@ static int readParseToken (const char **cpp, int *len) return REGX_INIT; else { - logf (LOG_WARN, "bad command %s", cmd); + yaz_log (YLOG_WARN, "bad command %s", cmd); return 0; } } @@ -470,17 +471,19 @@ static int actionListMk (struct lexSpec *spec, const char *s, { xfree (*ap); *ap = NULL; - logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0); + yaz_log (YLOG_WARN, "regular expression error '%.*s'", s-s0, s0); return -1; } + if (debug_dfa_tran) + printf ("pattern: %.*s\n", s-s0, s0); dfa_mkstate ((*ap)->u.pattern.dfa); s++; break; case REGX_BEGIN: - logf (LOG_WARN, "cannot use BEGIN here"); + yaz_log (YLOG_WARN, "cannot use BEGIN here"); continue; case REGX_INIT: - logf (LOG_WARN, "cannot use INIT here"); + yaz_log (YLOG_WARN, "cannot use INIT here"); continue; case REGX_END: *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); @@ -506,7 +509,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) tok = readParseToken (&s, &len); if (tok != REGX_CODE) { - logf (LOG_WARN, "missing name after CONTEXT keyword"); + yaz_log (YLOG_WARN, "missing name after CONTEXT keyword"); return 0; } if (len > 31) @@ -537,17 +540,17 @@ int readOneSpec (struct lexSpec *spec, const char *s) break; case REGX_PATTERN: #if REGX_DEBUG - logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s); + yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s); #endif r = dfa_parse (spec->context->dfa, &s); if (r) { - logf (LOG_WARN, "regular expression error. r=%d", r); + yaz_log (YLOG_WARN, "regular expression error. r=%d", r); return -1; } if (*s != '/') { - logf (LOG_WARN, "expects / at end of pattern. got %c", *s); + yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s); return -1; } s++; @@ -582,14 +585,22 @@ int readFileSpec (struct lexSpec *spec) } if (!spec_inf) { - logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name); + yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name); return -1; } - logf (LOG_LOG, "reading regx filter %s", fname); + yaz_log (YLOG_LOG, "reading regx filter %s", fname); #if HAVE_TCL_H if (spec->tcl_interp) - logf (LOG_LOG, "Tcl enabled"); + yaz_log (YLOG_LOG, "Tcl enabled"); +#endif + +#if 0 + debug_dfa_trav = 0; + debug_dfa_tran = 1; + debug_dfa_followpos = 0; + dfa_verbose = 1; #endif + lineBuf = wrbuf_alloc(); spec->lineNo = 0; c = getc (spec_inf); @@ -632,12 +643,6 @@ int readFileSpec (struct lexSpec *spec) fclose (spec_inf); wrbuf_free(lineBuf, 1); -#if 0 - debug_dfa_trav = 1; - debug_dfa_tran = 1; - debug_dfa_followpos = 1; - dfa_verbose = 1; -#endif for (lc = spec->context; lc; lc = lc->next) { struct lexRule *rp; @@ -660,7 +665,8 @@ static struct lexSpec *curLexSpec = NULL; #endif static void execData (struct lexSpec *spec, - const char *ebuf, int elen, int formatted_text) + const char *ebuf, int elen, int formatted_text, + const char *attribute_str, int attribute_len) { struct data1_node *res, *parent; int org_len; @@ -668,18 +674,17 @@ static void execData (struct lexSpec *spec, if (elen == 0) /* shouldn't happen, but it does! */ return ; #if REGX_DEBUG - if (elen > 40) - logf (LOG_LOG, "data(%d bytes) %.15s ... %.*s", elen, - ebuf, 15, ebuf + elen-15); + if (elen > 80) + yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen, + ebuf, 40, ebuf + elen-40); else if (elen == 1 && ebuf[0] == '\n') { - logf (LOG_LOG, "data(new line)"); - assert(0); + yaz_log (YLOG_LOG, "data(new line)"); } else if (elen > 0) - logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf); + yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_LOG, "data(%d bytes)", elen); + yaz_log (YLOG_LOG, "data(%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -688,43 +693,82 @@ static void execData (struct lexSpec *spec, parent = spec->d1_stack[spec->d1_level -1]; assert (parent); - if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data) - org_len = res->u.data.len; - else + if (attribute_str) { - org_len = 0; + data1_xattr **ap; + res = parent; + if (res->which != DATA1N_tag) + return; + /* sweep through exising attributes.. */ + for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next) + if (strlen((*ap)->name) == attribute_len && + !memcmp((*ap)->name, attribute_str, attribute_len)) + break; + if (!*ap) + { + /* new attribute. Create it with name + value */ + *ap = nmem_malloc(spec->m, sizeof(**ap)); - res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); - res->u.data.what = DATA1I_text; - res->u.data.len = 0; - res->u.data.formatted_text = formatted_text; - res->u.data.data = 0; - - if (spec->d1_stack[spec->d1_level]) - spec->d1_stack[spec->d1_level]->next = res; - spec->d1_stack[spec->d1_level] = res; - } - if (org_len + elen >= spec->concatBuf[spec->d1_level].max) - { - char *old_buf, *new_buf; + (*ap)->name = nmem_malloc(spec->m, attribute_len+1); + memcpy((*ap)->name, attribute_str, attribute_len); + (*ap)->name[attribute_len] = '\0'; - spec->concatBuf[spec->d1_level].max = org_len + elen + 256; - new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); - if ((old_buf = spec->concatBuf[spec->d1_level].buf)) + (*ap)->value = nmem_malloc(spec->m, elen+1); + memcpy((*ap)->value, ebuf, elen); + (*ap)->value[elen] = '\0'; + (*ap)->next = 0; + } + else { - memcpy (new_buf, old_buf, org_len); - xfree (old_buf); + /* append to value if attribute already exists */ + char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value)); + strcpy(nv, (*ap)->value); + memcpy (nv + strlen(nv), ebuf, elen); + nv[strlen(nv)+elen] = '\0'; + (*ap)->value = nv; } - spec->concatBuf[spec->d1_level].buf = new_buf; + } + else + { + if ((res = spec->d1_stack[spec->d1_level]) && + res->which == DATA1N_data) + org_len = res->u.data.len; + else + { + org_len = 0; + + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); + res->u.data.what = DATA1I_text; + res->u.data.len = 0; + res->u.data.formatted_text = formatted_text; + res->u.data.data = 0; + + if (spec->d1_stack[spec->d1_level]) + spec->d1_stack[spec->d1_level]->next = res; + spec->d1_stack[spec->d1_level] = res; + } + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) + { + char *old_buf, *new_buf; + + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) + { + memcpy (new_buf, old_buf, org_len); + xfree (old_buf); + } + spec->concatBuf[spec->d1_level].buf = new_buf; + } + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); + res->u.data.len += elen; } - memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); - res->u.data.len += elen; } static void execDataP (struct lexSpec *spec, const char *ebuf, int elen, int formatted_text) { - execData (spec, ebuf, elen, formatted_text); + execData (spec, ebuf, elen, formatted_text, 0, 0); } static void tagDataRelease (struct lexSpec *spec) @@ -759,7 +803,7 @@ static void variantBegin (struct lexSpec *spec, if (spec->d1_level == 0) { - logf (LOG_WARN, "in variant begin. No record type defined"); + yaz_log (YLOG_WARN, "in variant begin. No record type defined"); return ; } if (class_len >= DATA1_MAX_SYMBOL) @@ -773,13 +817,13 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype, + yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype, spec->d1_level); #endif if (!(tp = - data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset, - tclass, ttype))) + data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn, + tclass, ttype))) return; if (parent->which != DATA1N_variant) @@ -798,7 +842,7 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_LOG, "variant node(%d)", spec->d1_level); + yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); @@ -835,7 +879,7 @@ static void tagBegin (struct lexSpec *spec, { if (spec->d1_level == 0) { - logf (LOG_WARN, "in element begin. No record type defined"); + yaz_log (YLOG_WARN, "in element begin. No record type defined"); return ; } tagStrip (&tag, &len); @@ -843,7 +887,7 @@ static void tagBegin (struct lexSpec *spec, tagDataRelease (spec); #if REGX_DEBUG - logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level); + yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level); #endif spec->d1_stack[spec->d1_level] = data1_mk_tag_n ( @@ -869,27 +913,39 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_LOG, "end tag(%d)", spec->d1_level); + yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level); #endif } static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, - struct DFA *dfa) + struct DFA *dfa, int greedy) { struct DFA_state *state = dfa->states[0]; struct DFA_tran *t; - unsigned char c; + unsigned char c = 0; unsigned char c_prev = 0; int ptr = *pptr; /* current pointer */ int start_ptr = *pptr; /* first char of match */ int last_ptr = 0; /* last char of match */ int last_rule = 0; /* rule number of current match */ + int restore_ptr = 0; int i; + if (ptr) + { + --ptr; + c = f_win_advance (spec, &ptr); + } while (1) { + if (dfa->states[0] == state) + { + c_prev = c; + restore_ptr = ptr; + } c = f_win_advance (spec, &ptr); + if (ptr == F_WIN_EOF) { if (last_rule) @@ -900,10 +956,11 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, } break; } + t = state->trans; i = state->tran_no; while (1) - if (--i < 0) + if (--i < 0) /* no transition for character c */ { if (last_rule) { @@ -912,27 +969,28 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, return 1; } state = dfa->states[0]; + + ptr = restore_ptr; + c = f_win_advance (spec, &ptr); + start_ptr = ptr; - c_prev = c; + break; } else if (c >= t->ch[0] && c <= t->ch[1]) { state = dfa->states[t->to]; - if (state->rule_no) - { - if (c_prev == '\n') - { - last_rule = state->rule_no; - last_ptr = ptr; - } - else - { - last_rule = state->rule_nno; - last_ptr = ptr; - } - } - break; + if (state->rule_no && c_prev == '\n') + { + last_rule = state->rule_no; + last_ptr = ptr; + } + else if (state->rule_nno) + { + last_rule = state->rule_nno; + last_ptr = ptr; + } + break; } else t++; @@ -1016,21 +1074,23 @@ static char *regxStrz (const char *src, int len, char *str) #if HAVE_TCL_H static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) return TCL_ERROR; if (!strcmp(argv[1], "record") && argc == 3) { - char *absynName = argv[2]; + const char *absynName = argv[2]; data1_node *res; #if REGX_DEBUG - logf (LOG_LOG, "begin record %s", absynName); + yaz_log (YLOG_LOG, "begin record %s", absynName); #endif res = data1_mk_root (spec->dh, spec->m, absynName); + spec->d1_level = 0; + spec->d1_stack[spec->d1_level++] = res; res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); @@ -1053,7 +1113,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, { struct lexContext *lc = spec->context; #if REGX_DEBUG - logf (LOG_LOG, "begin context %s",argv[2]); + yaz_log (YLOG_LOG, "begin context %s",argv[2]); #endif while (lc && strcmp (argv[2], lc->name)) lc = lc->next; @@ -1062,7 +1122,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, spec->context_stack[++(spec->context_stack_top)] = lc; } else - logf (LOG_WARN, "unknown context %s", argv[2]); + yaz_log (YLOG_WARN, "unknown context %s", argv[2]); } else return TCL_ERROR; @@ -1070,7 +1130,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, } static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) @@ -1084,14 +1144,14 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, (spec->d1_level)--; } #if REGX_DEBUG - logf (LOG_LOG, "end record"); + yaz_log (YLOG_LOG, "end record"); #endif spec->stop_flag = 1; } else if (!strcmp (argv[1], "element")) { - int min_level = 1; - char *element = 0; + int min_level = 2; + const char *element = 0; if (argc >= 3 && !strcmp(argv[2], "-record")) { min_level = 0; @@ -1102,10 +1162,10 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (argc == 3) element = argv[2]; tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG - logf (LOG_LOG, "end element end records"); + yaz_log (YLOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1113,7 +1173,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, else if (!strcmp (argv[1], "context")) { #if REGX_DEBUG - logf (LOG_LOG, "end context"); + yaz_log (YLOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1124,11 +1184,12 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, } static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { int argi = 1; int textFlag = 0; const char *element = 0; + const char *attribute = 0; struct lexSpec *spec = (struct lexSpec *) clientData; while (argi < argc) @@ -1144,6 +1205,12 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, if (argi < argc) element = argv[argi++]; } + else if (!strcmp("-attribute", argv[argi])) + { + argi++; + if (argi < argc) + attribute = argv[argi++]; + } else break; } @@ -1155,20 +1222,22 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) Tcl_DString ds; char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); - execData (spec, native, strlen(native), textFlag); + execData (spec, native, strlen(native), textFlag, attribute, + attribute ? strlen(attribute) : 0); Tcl_DStringFree (&ds); #else - execData (spec, argv[argi], strlen(argv[argi]), textFlag); + execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute, + attribute ? strlen(attribute) : 0); #endif argi++; } if (element) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); return TCL_OK; } static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; int argi = 1; @@ -1226,7 +1295,7 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) if (ret != TCL_OK) { const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); - logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", + yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", spec->tcl_interp->errorLine, spec->tcl_interp->result, err ? err : "[NO ERRORINFO]"); @@ -1257,7 +1326,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) { - logf (LOG_WARN, "missing keyword after 'begin'"); + yaz_log (YLOG_WARN, "missing keyword after 'begin'"); continue; } p = regxStrz (cmd_str, cmd_len, ptmp); @@ -1266,7 +1335,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) continue; - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { static char absynName[64]; data1_node *res; @@ -1276,10 +1345,12 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) memcpy (absynName, cmd_str, cmd_len); absynName[cmd_len] = '\0'; #if REGX_DEBUG - logf (LOG_LOG, "begin record %s", absynName); + yaz_log (YLOG_LOG, "begin record %s", absynName); #endif res = data1_mk_root (spec->dh, spec->m, absynName); + spec->d1_level = 0; + spec->d1_stack[spec->d1_level++] = res; res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); @@ -1337,21 +1408,21 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); #if REGX_DEBUG - logf (LOG_LOG, "begin context %s", p); + yaz_log (YLOG_LOG, "begin context %s", p); #endif while (lc && strcmp (p, lc->name)) lc = lc->next; if (lc) spec->context_stack[++(spec->context_stack_top)] = lc; else - logf (LOG_WARN, "unknown context %s", p); + yaz_log (YLOG_WARN, "unknown context %s", p); } r = execTok (spec, &s, &cmd_str, &cmd_len); } else { - logf (LOG_WARN, "bad keyword '%s' after begin", p); + yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p); } } else if (!strcmp (p, "end")) @@ -1359,7 +1430,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) { - logf (LOG_WARN, "missing keyword after 'end'"); + yaz_log (YLOG_WARN, "missing keyword after 'end'"); continue; } p = regxStrz (cmd_str, cmd_len, ptmp); @@ -1372,13 +1443,13 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_LOG, "end record"); + yaz_log (YLOG_LOG, "end record"); #endif spec->stop_flag = 1; } else if (!strcmp (p, "element")) { - int min_level = 1; + int min_level = 2; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) @@ -1391,10 +1462,10 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } else tagEnd (spec, min_level, NULL, 0); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG - logf (LOG_LOG, "end element end records"); + yaz_log (YLOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1403,20 +1474,22 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) else if (!strcmp (p, "context")) { #if REGX_DEBUG - logf (LOG_LOG, "end context"); + yaz_log (YLOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; r = execTok (spec, &s, &cmd_str, &cmd_len); } else - logf (LOG_WARN, "bad keyword '%s' after end", p); + yaz_log (YLOG_WARN, "bad keyword '%s' after end", p); } else if (!strcmp (p, "data")) { int textFlag = 0; int element_len; const char *element_str = NULL; + int attribute_len; + const char *attribute_str = NULL; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { @@ -1428,24 +1501,32 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (r < 2) break; } + else if (cmd_len==10 && !memcmp ("-attribute", cmd_str, + cmd_len)) + { + r = execTok (spec, &s, &attribute_str, &attribute_len); + if (r < 2) + break; + } else - logf (LOG_WARN, "bad data option: %.*s", + yaz_log (YLOG_WARN, "bad data option: %.*s", cmd_len, cmd_str); } if (r != 2) { - logf (LOG_WARN, "missing data item after data"); + yaz_log (YLOG_WARN, "missing data item after data"); continue; } if (element_str) tagBegin (spec, element_str, element_len); do { - execData (spec, cmd_str, cmd_len,textFlag); + execData (spec, cmd_str, cmd_len, textFlag, + attribute_str, attribute_len); r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); if (element_str) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); } else if (!strcmp (p, "unread")) { @@ -1456,7 +1537,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) { - logf (LOG_WARN, "missing number after -offset"); + yaz_log (YLOG_WARN, "missing number after -offset"); continue; } p = regxStrz (cmd_str, cmd_len, ptmp); @@ -1467,12 +1548,12 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) offset = 0; if (r < 2) { - logf (LOG_WARN, "missing index after unread command"); + yaz_log (YLOG_WARN, "missing index after unread command"); continue; } if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9') { - logf (LOG_WARN, "bad index after unread command"); + yaz_log (YLOG_WARN, "bad index after unread command"); continue; } else @@ -1497,20 +1578,20 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (lc) spec->context_stack[spec->context_stack_top] = lc; else - logf (LOG_WARN, "unknown context %s", p); + yaz_log (YLOG_WARN, "unknown context %s", p); } r = execTok (spec, &s, &cmd_str, &cmd_len); } else { - logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str); + yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str); r = execTok (spec, &s, &cmd_str, &cmd_len); continue; } if (r > 1) { - logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str); + yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str); do { r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); @@ -1542,13 +1623,14 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, if (ap->u.pattern.body) { arg_start[arg_no] = *pptr; - if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa)) + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0)) { arg_end[arg_no] = F_WIN_EOF; arg_no++; arg_start[arg_no] = F_WIN_EOF; arg_end[arg_no] = F_WIN_EOF; -/* return 1*/ + yaz_log(YLOG_DEBUG, "Pattern match rest of record"); + *pptr = F_WIN_EOF; } else { @@ -1561,7 +1643,7 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, else { arg_start[arg_no] = *pptr; - if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa)) + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1)) return 1; if (sptr != arg_start[arg_no]) return 1; @@ -1599,7 +1681,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_LOG, "exec rule %d", ruleNo); + yaz_log (YLOG_LOG, "exec rule %d", ruleNo); #endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr); @@ -1677,7 +1759,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) if (spec->f_win_ef && *ptr != F_WIN_EOF) { #if REGX_DEBUG - logf (LOG_LOG, "regx: endf ptr=%d", *ptr); + yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr); #endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } @@ -1742,7 +1824,7 @@ static data1_node *lexRoot (struct lexSpec *spec, off_t offset, } if (!lt) { - logf (LOG_WARN, "cannot find context %s", context_name); + yaz_log (YLOG_WARN, "cannot find context %s", context_name); return NULL; } spec->context_stack[spec->context_stack_top] = lt; @@ -1775,13 +1857,23 @@ void grs_destroy(void *clientData) xfree (specs); } -void *grs_init(void) +void *grs_init(Res res, RecType recType) { struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs)); specs->spec = 0; + strcpy(specs->type, ""); return specs; } + +ZEBRA_RES grs_config(void *clientData, Res res, const char *args) +{ + struct lexSpecs *specs = (struct lexSpecs *) clientData; + if (strlen(args) < sizeof(specs->type)) + strcpy(specs->type, args); + return ZEBRA_OK; +} + data1_node *grs_read_regx (struct grs_read_info *p) { int res; @@ -1789,13 +1881,13 @@ data1_node *grs_read_regx (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_LOG, "grs_read_regx"); + yaz_log (YLOG_LOG, "grs_read_regx"); #endif - if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) + if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type)) { if (*curLexSpec) lexSpecDestroy (curLexSpec); - *curLexSpec = lexSpecCreate (p->type, p->dh); + *curLexSpec = lexSpecCreate (specs->type, p->dh); res = readFileSpec (*curLexSpec); if (res) { @@ -1818,14 +1910,26 @@ data1_node *grs_read_regx (struct grs_read_info *p) return lexRoot (*curLexSpec, p->offset, "main"); } -static struct recTypeGrs regx_type = { - "regx", +static int extract_regx(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_regx); +} + +static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_regx); +} + +static struct recType regx_type = { + 0, + "grs.regx", grs_init, + grs_config, grs_destroy, - grs_read_regx + extract_regx, + retrieve_regx, }; -RecTypeGrs recTypeGrs_regx = ®x_type; #if HAVE_TCL_H data1_node *grs_read_tcl (struct grs_read_info *p) @@ -1835,14 +1939,14 @@ data1_node *grs_read_tcl (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_LOG, "grs_read_tcl"); + yaz_log (YLOG_LOG, "grs_read_tcl"); #endif - if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) + if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type)) { Tcl_Interp *tcl_interp; if (*curLexSpec) lexSpecDestroy (curLexSpec); - *curLexSpec = lexSpecCreate (p->type, p->dh); + *curLexSpec = lexSpecCreate (specs->type, p->dh); Tcl_FindExecutable(""); tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); Tcl_Init(tcl_interp); @@ -1873,12 +1977,47 @@ data1_node *grs_read_tcl (struct grs_read_info *p) return lexRoot (*curLexSpec, p->offset, "main"); } -static struct recTypeGrs tcl_type = { - "tcl", +static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl) +{ + return zebra_grs_extract(clientData, ctrl, grs_read_tcl); +} + +static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl) +{ + return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl); +} + +static struct recType tcl_type = { + 0, + "grs.tcl", grs_init, + grs_config, grs_destroy, - grs_read_tcl + extract_tcl, + retrieve_tcl, }; -RecTypeGrs recTypeGrs_tcl = &tcl_type; #endif + +RecType +#ifdef IDZEBRA_STATIC_GRS_REGX +idzebra_filter_grs_regx +#else +idzebra_filter +#endif + +[] = { + ®x_type, +#if HAVE_TCL_H + &tcl_type, +#endif + 0, +}; +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +