X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=d809993f04676233e44caae4f015cc627a21a108;hb=3e8db66d56c466a5a269362eb70b4931d9579128;hp=b2290a5b13da5ae3ffa12ca27db4302f5fd12d8a;hpb=c4318fe5480927a441042829d2953fc1484fb46b;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index b2290a5..d809993 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,5 +1,5 @@ -/* $Id: regxread.c,v 1.48 2003-06-17 22:22:57 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 +/* $Id: regxread.c,v 1.52 2004-08-15 17:22:45 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -664,7 +664,8 @@ static struct lexSpec *curLexSpec = NULL; #endif static void execData (struct lexSpec *spec, - const char *ebuf, int elen, int formatted_text) + const char *ebuf, int elen, int formatted_text, + const char *attribute_str, int attribute_len) { struct data1_node *res, *parent; int org_len; @@ -691,43 +692,82 @@ static void execData (struct lexSpec *spec, parent = spec->d1_stack[spec->d1_level -1]; assert (parent); - if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data) - org_len = res->u.data.len; - else + if (attribute_str) { - org_len = 0; + data1_xattr **ap; + res = parent; + if (res->which != DATA1N_tag) + return; + /* sweep through exising attributes.. */ + for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next) + if (strlen((*ap)->name) == attribute_len && + !memcmp((*ap)->name, attribute_str, attribute_len)) + break; + if (!*ap) + { + /* new attribute. Create it with name + value */ + *ap = nmem_malloc(spec->m, sizeof(**ap)); - res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); - res->u.data.what = DATA1I_text; - res->u.data.len = 0; - res->u.data.formatted_text = formatted_text; - res->u.data.data = 0; - - if (spec->d1_stack[spec->d1_level]) - spec->d1_stack[spec->d1_level]->next = res; - spec->d1_stack[spec->d1_level] = res; - } - if (org_len + elen >= spec->concatBuf[spec->d1_level].max) - { - char *old_buf, *new_buf; + (*ap)->name = nmem_malloc(spec->m, attribute_len+1); + memcpy((*ap)->name, attribute_str, attribute_len); + (*ap)->name[attribute_len] = '\0'; - spec->concatBuf[spec->d1_level].max = org_len + elen + 256; - new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); - if ((old_buf = spec->concatBuf[spec->d1_level].buf)) + (*ap)->value = nmem_malloc(spec->m, elen+1); + memcpy((*ap)->value, ebuf, elen); + (*ap)->value[elen] = '\0'; + (*ap)->next = 0; + } + else + { + /* append to value if attribute already exists */ + char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value)); + strcpy(nv, (*ap)->value); + memcpy (nv + strlen(nv), ebuf, elen); + nv[strlen(nv)+elen] = '\0'; + (*ap)->value = nv; + } + } + else + { + if ((res = spec->d1_stack[spec->d1_level]) && + res->which == DATA1N_data) + org_len = res->u.data.len; + else + { + org_len = 0; + + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); + res->u.data.what = DATA1I_text; + res->u.data.len = 0; + res->u.data.formatted_text = formatted_text; + res->u.data.data = 0; + + if (spec->d1_stack[spec->d1_level]) + spec->d1_stack[spec->d1_level]->next = res; + spec->d1_stack[spec->d1_level] = res; + } + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) { - memcpy (new_buf, old_buf, org_len); - xfree (old_buf); + char *old_buf, *new_buf; + + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) + { + memcpy (new_buf, old_buf, org_len); + xfree (old_buf); + } + spec->concatBuf[spec->d1_level].buf = new_buf; } - spec->concatBuf[spec->d1_level].buf = new_buf; + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); + res->u.data.len += elen; } - memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); - res->u.data.len += elen; } static void execDataP (struct lexSpec *spec, const char *ebuf, int elen, int formatted_text) { - execData (spec, ebuf, elen, formatted_text); + execData (spec, ebuf, elen, formatted_text, 0, 0); } static void tagDataRelease (struct lexSpec *spec) @@ -882,7 +922,7 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, { struct DFA_state *state = dfa->states[0]; struct DFA_tran *t; - unsigned char c; + unsigned char c = 0; unsigned char c_prev = 0; int ptr = *pptr; /* current pointer */ int start_ptr = *pptr; /* first char of match */ @@ -903,7 +943,6 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, c_prev = c; restore_ptr = ptr; } - c = f_win_advance (spec, &ptr); if (ptr == F_WIN_EOF) @@ -920,7 +959,7 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, t = state->trans; i = state->tran_no; while (1) - if (--i < 0) + if (--i < 0) /* no transition for character c */ { if (last_rule) { @@ -950,8 +989,7 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, last_rule = state->rule_nno; last_ptr = ptr; } - else - break; + break; } else t++; @@ -1035,14 +1073,14 @@ static char *regxStrz (const char *src, int len, char *str) #if HAVE_TCL_H static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) return TCL_ERROR; if (!strcmp(argv[1], "record") && argc == 3) { - char *absynName = argv[2]; + const char *absynName = argv[2]; data1_node *res; #if REGX_DEBUG @@ -1050,6 +1088,8 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, #endif res = data1_mk_root (spec->dh, spec->m, absynName); + spec->d1_level = 0; + spec->d1_stack[spec->d1_level++] = res; res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); @@ -1089,7 +1129,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, } static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) @@ -1109,8 +1149,8 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, } else if (!strcmp (argv[1], "element")) { - int min_level = 1; - char *element = 0; + int min_level = 2; + const char *element = 0; if (argc >= 3 && !strcmp(argv[2], "-record")) { min_level = 0; @@ -1121,7 +1161,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (argc == 3) element = argv[2]; tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG logf (LOG_LOG, "end element end records"); @@ -1143,11 +1183,12 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, } static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { int argi = 1; int textFlag = 0; const char *element = 0; + const char *attribute = 0; struct lexSpec *spec = (struct lexSpec *) clientData; while (argi < argc) @@ -1163,6 +1204,12 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, if (argi < argc) element = argv[argi++]; } + else if (!strcmp("-attribute", argv[argi])) + { + argi++; + if (argi < argc) + attribute = argv[argi++]; + } else break; } @@ -1174,20 +1221,22 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) Tcl_DString ds; char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); - execData (spec, native, strlen(native), textFlag); + execData (spec, native, strlen(native), textFlag, attribute, + attribute ? strlen(attribute) : 0); Tcl_DStringFree (&ds); #else - execData (spec, argv[argi], strlen(argv[argi]), textFlag); + execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute, + attribute ? strlen(attribute) : 0); #endif argi++; } if (element) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); return TCL_OK; } static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, - int argc, char **argv) + int argc, const char **argv) { struct lexSpec *spec = (struct lexSpec *) clientData; int argi = 1; @@ -1285,7 +1334,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) continue; - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { static char absynName[64]; data1_node *res; @@ -1299,6 +1348,8 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) #endif res = data1_mk_root (spec->dh, spec->m, absynName); + spec->d1_level = 0; + spec->d1_stack[spec->d1_level++] = res; res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res); @@ -1397,7 +1448,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } else if (!strcmp (p, "element")) { - int min_level = 1; + int min_level = 2; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) @@ -1410,7 +1461,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } else tagEnd (spec, min_level, NULL, 0); - if (spec->d1_level == 0) + if (spec->d1_level <= 1) { #if REGX_DEBUG logf (LOG_LOG, "end element end records"); @@ -1436,6 +1487,8 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) int textFlag = 0; int element_len; const char *element_str = NULL; + int attribute_len; + const char *attribute_str = NULL; while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { @@ -1447,6 +1500,13 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (r < 2) break; } + else if (cmd_len==10 && !memcmp ("-attribute", cmd_str, + cmd_len)) + { + r = execTok (spec, &s, &attribute_str, &attribute_len); + if (r < 2) + break; + } else logf (LOG_WARN, "bad data option: %.*s", cmd_len, cmd_str); @@ -1460,11 +1520,12 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) tagBegin (spec, element_str, element_len); do { - execData (spec, cmd_str, cmd_len,textFlag); + execData (spec, cmd_str, cmd_len, textFlag, + attribute_str, attribute_len); r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); if (element_str) - tagEnd (spec, 1, NULL, 0); + tagEnd (spec, 2, NULL, 0); } else if (!strcmp (p, "unread")) {