X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=4b26b8a4b3a622cde7579ea8f170f52d5aa6ce83;hb=d42b3234a40e6d65397ad444b38fff3941afd1f6;hp=9f612e58fcb731cbb30826da8201b2153b47e280;hpb=3ecabdd7340cf895d4d446a217b8b69c17a2d781;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 9f612e5..4b26b8a 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,9 +1,26 @@ -/* - * Copyright (C) 1994-2002, Index Data - * All rights reserved. - * - * $Id: regxread.c,v 1.40 2002-05-03 13:50:25 adam Exp $ - */ +/* $Id: regxread.c,v 1.50 2004-05-25 12:13:15 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + #include #include #include @@ -456,6 +473,8 @@ static int actionListMk (struct lexSpec *spec, const char *s, logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0); return -1; } + if (debug_dfa_tran) + printf ("pattern: %.*s\n", s-s0, s0); dfa_mkstate ((*ap)->u.pattern.dfa); s++; break; @@ -573,6 +592,14 @@ int readFileSpec (struct lexSpec *spec) if (spec->tcl_interp) logf (LOG_LOG, "Tcl enabled"); #endif + +#if 0 + debug_dfa_trav = 0; + debug_dfa_tran = 1; + debug_dfa_followpos = 0; + dfa_verbose = 1; +#endif + lineBuf = wrbuf_alloc(); spec->lineNo = 0; c = getc (spec_inf); @@ -615,12 +642,6 @@ int readFileSpec (struct lexSpec *spec) fclose (spec_inf); wrbuf_free(lineBuf, 1); -#if 0 - debug_dfa_trav = 1; - debug_dfa_tran = 1; - debug_dfa_followpos = 1; - dfa_verbose = 1; -#endif for (lc = spec->context; lc; lc = lc->next) { struct lexRule *rp; @@ -651,13 +672,17 @@ static void execData (struct lexSpec *spec, if (elen == 0) /* shouldn't happen, but it does! */ return ; #if REGX_DEBUG - if (elen > 40) - logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen, - ebuf, 15, ebuf + elen-15); + if (elen > 80) + logf (LOG_LOG, "data(%d bytes) %.40s ... %.*s", elen, + ebuf, 40, ebuf + elen-40); + else if (elen == 1 && ebuf[0] == '\n') + { + logf (LOG_LOG, "data(new line)"); + } else if (elen > 0) - logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf); + logf (LOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_LOG, "data (%d bytes)", elen); + logf (LOG_LOG, "data(%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -672,7 +697,7 @@ static void execData (struct lexSpec *spec, { org_len = 0; - res = data1_mk_node (spec->dh, spec->m, DATA1N_data, parent); + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); res->u.data.what = DATA1I_text; res->u.data.len = 0; res->u.data.formatted_text = formatted_text; @@ -751,7 +776,7 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype, + logf (LOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype, spec->d1_level); #endif @@ -762,7 +787,7 @@ static void variantBegin (struct lexSpec *spec, if (parent->which != DATA1N_variant) { - res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent); + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); if (spec->d1_stack[spec->d1_level]) tagDataRelease (spec); spec->d1_stack[spec->d1_level] = res; @@ -776,10 +801,10 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_LOG, "variant node (%d)", spec->d1_level); + logf (LOG_LOG, "variant node(%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; - res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent); + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); res->u.variant.type = tp; if (value_len >= DATA1_LOCALDATA) @@ -811,50 +836,21 @@ static void tagStrip (const char **tag, int *len) static void tagBegin (struct lexSpec *spec, const char *tag, int len) { - struct data1_node *parent; - data1_element *elem = NULL; - data1_node *partag; - data1_node *res; - data1_element *e = NULL; - int localtag = 0; - if (spec->d1_level == 0) { logf (LOG_WARN, "in element begin. No record type defined"); return ; } tagStrip (&tag, &len); + if (spec->d1_stack[spec->d1_level]) + tagDataRelease (spec); - parent = spec->d1_stack[spec->d1_level -1]; - partag = get_parent_tag(spec->dh, parent); - - res = data1_mk_node (spec->dh, spec->m, DATA1N_tag, parent); - - if (len >= DATA1_LOCALDATA) - res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); - else - res->u.tag.tag = res->lbuf; - - memcpy (res->u.tag.tag, tag, len); - res->u.tag.tag[len] = '\0'; - #if REGX_DEBUG - logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); + logf (LOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level); #endif - if (parent->which == DATA1N_variant) - return ; - if (partag) - if (!(e = partag->u.tag.element)) - localtag = 1; - - elem = data1_getelementbytagname (spec->dh, - spec->d1_stack[0]->u.root.absyn, - e, res->u.tag.tag); - res->u.tag.element = elem; - if (spec->d1_stack[spec->d1_level]) - tagDataRelease (spec); - spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[spec->d1_level] = data1_mk_tag_n ( + spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]); spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -876,27 +872,39 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_LOG, "end tag (%d)", spec->d1_level); + logf (LOG_LOG, "end tag(%d)", spec->d1_level); #endif } static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, - struct DFA *dfa) + struct DFA *dfa, int greedy) { struct DFA_state *state = dfa->states[0]; struct DFA_tran *t; - unsigned char c; + unsigned char c = 0; unsigned char c_prev = 0; int ptr = *pptr; /* current pointer */ int start_ptr = *pptr; /* first char of match */ int last_ptr = 0; /* last char of match */ int last_rule = 0; /* rule number of current match */ + int restore_ptr = 0; int i; + if (ptr) + { + --ptr; + c = f_win_advance (spec, &ptr); + } while (1) { + if (dfa->states[0] == state) + { + c_prev = c; + restore_ptr = ptr; + } c = f_win_advance (spec, &ptr); + if (ptr == F_WIN_EOF) { if (last_rule) @@ -907,10 +915,11 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, } break; } + t = state->trans; i = state->tran_no; while (1) - if (--i < 0) + if (--i < 0) /* no transition for character c */ { if (last_rule) { @@ -919,27 +928,28 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, return 1; } state = dfa->states[0]; + + ptr = restore_ptr; + c = f_win_advance (spec, &ptr); + start_ptr = ptr; - c_prev = c; + break; } else if (c >= t->ch[0] && c <= t->ch[1]) { state = dfa->states[t->to]; - if (state->rule_no) - { - if (c_prev == '\n') - { - last_rule = state->rule_no; - last_ptr = ptr; - } - else - { - last_rule = state->rule_nno; - last_ptr = ptr; - } - } - break; + if (state->rule_no && c_prev == '\n') + { + last_rule = state->rule_no; + last_ptr = ptr; + } + else if (state->rule_nno) + { + last_rule = state->rule_nno; + last_ptr = ptr; + } + break; } else t++; @@ -1031,23 +1041,22 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, if (!strcmp(argv[1], "record") && argc == 3) { char *absynName = argv[2]; - data1_absyn *absyn; data1_node *res; #if REGX_DEBUG logf (LOG_LOG, "begin record %s", absynName); #endif - absyn = data1_get_absyn (spec->dh, absynName); - - res = data1_mk_node (spec->dh, spec->m); - res->which = DATA1N_root; - res->u.root.type = - data1_insert_string(spec->dh, res, spec->m, absynName); - res->u.root.absyn = absyn; - res->root = res; + res = data1_mk_root (spec->dh, spec->m, absynName); - spec->d1_stack[spec->d1_level] = res; - spec->d1_stack[++(spec->d1_level)] = NULL; + spec->d1_level = 0; + + spec->d1_stack[spec->d1_level++] = res; + + res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); + + spec->d1_stack[spec->d1_level++] = res; + + spec->d1_stack[spec->d1_level] = NULL; } else if (!strcmp(argv[1], "element") && argc == 3) { @@ -1100,7 +1109,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, } else if (!strcmp (argv[1], "element")) { - int min_level = 1; + int min_level = 2; char *element = 0; if (argc >= 3 && !strcmp(argv[2], "-record")) { @@ -1112,7 +1121,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (argc == 3) element = argv[2]; tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG logf (LOG_LOG, "end element end records"); @@ -1173,7 +1182,7 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, argi++; } if (element) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); return TCL_OK; } @@ -1276,28 +1285,29 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) continue; - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { static char absynName[64]; - data1_absyn *absyn; data1_node *res; if (cmd_len > 63) cmd_len = 63; memcpy (absynName, cmd_str, cmd_len); absynName[cmd_len] = '\0'; - #if REGX_DEBUG logf (LOG_LOG, "begin record %s", absynName); #endif - absyn = data1_get_absyn (spec->dh, absynName); - - res = data1_mk_node (spec->dh, spec->m, DATA1N_root, 0); - res->u.root.type = absynName; - res->u.root.absyn = absyn; + res = data1_mk_root (spec->dh, spec->m, absynName); - spec->d1_stack[spec->d1_level] = res; - spec->d1_stack[++(spec->d1_level)] = NULL; + spec->d1_level = 0; + + spec->d1_stack[spec->d1_level++] = res; + + res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); + + spec->d1_stack[spec->d1_level++] = res; + + spec->d1_stack[spec->d1_level] = NULL; } r = execTok (spec, &s, &cmd_str, &cmd_len); } @@ -1389,7 +1399,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } else if (!strcmp (p, "element")) { - int min_level = 1; + int min_level = 2; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) @@ -1402,7 +1412,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } else tagEnd (spec, min_level, NULL, 0); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG logf (LOG_LOG, "end element end records"); @@ -1456,7 +1466,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); if (element_str) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); } else if (!strcmp (p, "unread")) { @@ -1553,13 +1563,14 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, if (ap->u.pattern.body) { arg_start[arg_no] = *pptr; - if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa)) + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0)) { arg_end[arg_no] = F_WIN_EOF; arg_no++; arg_start[arg_no] = F_WIN_EOF; arg_end[arg_no] = F_WIN_EOF; -/* return 1*/ + yaz_log(LOG_DEBUG, "Pattern match rest of record"); + *pptr = F_WIN_EOF; } else { @@ -1572,7 +1583,7 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, else { arg_start[arg_no] = *pptr; - if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa)) + if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1)) return 1; if (sptr != arg_start[arg_no]) return 1;