X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=e34f8963203d6ebeb31bd90b03670ea521f70749;hb=5b4dcfcb99600327a11b58de4fec33003dc4d816;hp=2e51f6719c25dc5b2640f7e1957e47ad391d053a;hpb=b306d131fe1c06ca1d313f3b04a159b4a3a376f9;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 2e51f67..e34f896 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,10 +1,36 @@ /* - * Copyright (C) 1994-1998, Index Data I/S + * Copyright (C) 1994-1998, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.14 1998-03-05 08:41:00 adam + * Revision 1.22 1998-11-03 16:07:13 adam + * Yet another fix. + * + * Revision 1.21 1998/11/03 15:43:39 adam + * Fixed bug introduced by previous commit. + * + * Revision 1.20 1998/11/03 14:51:28 adam + * Changed code so that it creates as few data1 nodes as possible. + * + * Revision 1.19 1998/11/03 10:22:39 adam + * Fixed memory leak that could occur for when large data1 node were + * concatenated. Data-type data1_nodes may have multiple nodes. + * + * Revision 1.18 1998/10/15 13:11:47 adam + * Added support for option -record for "end element". When specified + * end element will mark end-of-record when at outer-level. + * + * Revision 1.17 1998/07/01 10:13:51 adam + * Minor fix. + * + * Revision 1.16 1998/06/30 15:15:09 adam + * Tags are trimmed: white space removed before- and after the tag. + * + * Revision 1.15 1998/06/30 12:55:45 adam + * Bug fix. + * + * Revision 1.14 1998/03/05 08:41:00 adam * Implemented rule contexts. * * Revision 1.13 1997/12/12 06:33:58 adam @@ -132,6 +158,7 @@ #include #include #include +#include #include #include @@ -189,7 +216,14 @@ struct lexContext { struct lexContext *next; }; +struct lexConcatBuf { + int len; + int max; + char *buf; +}; + struct lexSpec { + char *name; struct lexContext *context; @@ -210,6 +244,8 @@ struct lexSpec { int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); + struct lexConcatBuf **concatBuf; + int maxLevel; }; @@ -359,6 +395,7 @@ static void lexContextDestroy (struct lexContext *p) static struct lexSpec *lexSpecCreate (const char *name) { struct lexSpec *p; + int i; p = xmalloc (sizeof(*p)); p->name = xmalloc (strlen(name)+1); @@ -369,6 +406,15 @@ static struct lexSpec *lexSpecCreate (const char *name) p->context_stack = xmalloc (sizeof(*p->context_stack) * p->context_stack_size); p->f_win_buf = NULL; + + p->maxLevel = 128; + p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel); + for (i = 0; i < p->maxLevel; i++) + { + p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf)); + p->concatBuf[i]->len = p->concatBuf[i]->max = 0; + p->concatBuf[i]->buf = 0; + } return p; } @@ -376,11 +422,17 @@ static void lexSpecDestroy (struct lexSpec **pp) { struct lexSpec *p; struct lexContext *lt; + int i; assert (pp); p = *pp; if (!p) return ; + + for (i = 0; i < p->maxLevel; i++) + xfree (p->concatBuf[i]); + xfree (p->concatBuf); + lt = p->context; while (lt) { @@ -665,17 +717,12 @@ int readFileSpec (struct lexSpec *spec) static struct lexSpec *curLexSpec = NULL; -static void destroy_data (struct data1_node *n) -{ - assert (n->which == DATA1N_data); - xfree (n->u.data.data); -} - static void execData (struct lexSpec *spec, data1_node **d1_stack, int *d1_level, const char *ebuf, int elen, int formatted_text) { struct data1_node *res, *parent; + int org_len; if (elen == 0) /* shouldn't happen, but it does! */ return ; @@ -694,44 +741,53 @@ static void execData (struct lexSpec *spec, parent = d1_stack[*d1_level -1]; assert (parent); - if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data) + + if ((res = d1_stack[*d1_level]) && res->which == DATA1N_data) + org_len = res->u.data.len; + else { - if (elen + res->u.data.len <= DATA1_LOCALDATA) - memcpy (res->u.data.data + res->u.data.len, ebuf, elen); - else - { - char *nb = xmalloc (elen + res->u.data.len); - memcpy (nb, res->u.data.data, res->u.data.len); - memcpy (nb + res->u.data.len, ebuf, elen); - if (res->u.data.len > DATA1_LOCALDATA) - xfree (res->u.data.data); - res->u.data.data = nb; - res->destroy = destroy_data; - } - res->u.data.len += elen; + org_len = 0; + + res = data1_mk_node (spec->dh, spec->m); + res->parent = parent; + res->which = DATA1N_data; + res->u.data.what = DATA1I_text; + res->u.data.len = 0; + res->u.data.formatted_text = formatted_text; +#if 0 + if (elen > DATA1_LOCALDATA) + res->u.data.data = nmem_malloc (spec->m, elen); + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, ebuf, elen); +#else + res->u.data.data = 0; +#endif + res->root = parent->root; + + parent->last_child = res; + if (d1_stack[*d1_level]) + d1_stack[*d1_level]->next = res; + else + parent->child = res; + d1_stack[*d1_level] = res; } - else + if (org_len + elen >= spec->concatBuf[*d1_level]->max) { - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_data; - res->u.data.what = DATA1I_text; - res->u.data.len = elen; - res->u.data.formatted_text = formatted_text; - if (elen > DATA1_LOCALDATA) - res->u.data.data = nmem_malloc (spec->m, elen); - else - res->u.data.data = res->lbuf; - memcpy (res->u.data.data, ebuf, elen); - res->root = parent->root; - - parent->last_child = res; - if (d1_stack[*d1_level]) - d1_stack[*d1_level]->next = res; - else - parent->child = res; - d1_stack[*d1_level] = res; + char *old_buf, *new_buf; + + spec->concatBuf[*d1_level]->max = org_len + elen + 256; + new_buf = xmalloc (spec->concatBuf[*d1_level]->max); + if ((old_buf = spec->concatBuf[*d1_level]->buf)) + { + memcpy (new_buf, old_buf, org_len); + xfree (old_buf); + } + spec->concatBuf[*d1_level]->buf = new_buf; } + assert (spec->concatBuf[*d1_level]); + memcpy (spec->concatBuf[*d1_level]->buf + org_len, ebuf, elen); + res->u.data.len += elen; } static void execDataP (struct lexSpec *spec, @@ -741,6 +797,26 @@ static void execDataP (struct lexSpec *spec, execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text); } +static void tagDataRelease (struct lexSpec *spec, + data1_node **d1_stack, int d1_level) +{ + data1_node *res; + + if ((res = d1_stack[d1_level]) && + res->which == DATA1N_data && + res->u.data.what == DATA1I_text) + { + assert (!res->u.data.data); + assert (res->u.data.len > 0); + if (res->u.data.len > DATA1_LOCALDATA) + res->u.data.data = nmem_malloc (spec->m, res->u.data.len); + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, spec->concatBuf[d1_level]->buf, + res->u.data.len); + } +} + static void variantBegin (struct lexSpec *spec, data1_node **d1_stack, int *d1_level, const char *class_str, int class_len, @@ -788,7 +864,10 @@ static void variantBegin (struct lexSpec *spec, parent->last_child = res; if (d1_stack[*d1_level]) + { + tagDataRelease (spec, d1_stack, *d1_level); d1_stack[*d1_level]->next = res; + } else parent->child = res; d1_stack[*d1_level] = res; @@ -820,13 +899,29 @@ static void variantBegin (struct lexSpec *spec, parent->last_child = res; if (d1_stack[*d1_level]) + { + tagDataRelease (spec, d1_stack, *d1_level); d1_stack[*d1_level]->next = res; + } else parent->child = res; d1_stack[*d1_level] = res; d1_stack[++(*d1_level)] = NULL; } +static void tagStrip (const char **tag, int *len) +{ + int i; + + for (i = *len; i > 0 && isspace((*tag)[i-1]); --i) + ; + *len = i; + for (i = 0; i < *len && isspace((*tag)[i]); i++) + ; + *tag += i; + *len -= i; +} + static void tagBegin (struct lexSpec *spec, data1_node **d1_stack, int *d1_level, const char *tag, int len) @@ -843,17 +938,20 @@ static void tagBegin (struct lexSpec *spec, logf (LOG_WARN, "in element begin. No record type defined"); return ; } - + tagStrip (&tag, &len); + res = data1_mk_node (spec->dh, spec->m); res->parent = parent; res->which = DATA1N_tag; res->u.tag.get_bytes = -1; if (len >= DATA1_LOCALDATA) - len = DATA1_LOCALDATA-1; - memcpy (res->lbuf, tag, len); - res->lbuf[len] = '\0'; - res->u.tag.tag = res->lbuf; + res->u.tag.tag = nmem_malloc (spec->m, len+1); + else + res->u.tag.tag = res->lbuf; + + memcpy (res->u.tag.tag, tag, len); + res->u.tag.tag[len] = '\0'; #if REGX_DEBUG logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, *d1_level); @@ -874,7 +972,10 @@ static void tagBegin (struct lexSpec *spec, parent->last_child = res; if (d1_stack[*d1_level]) + { + tagDataRelease (spec, d1_stack, *d1_level); d1_stack[*d1_level]->next = res; + } else parent->child = res; d1_stack[*d1_level] = res; @@ -882,12 +983,16 @@ static void tagBegin (struct lexSpec *spec, } static void tagEnd (struct lexSpec *spec, - data1_node **d1_stack, int *d1_level, + data1_node **d1_stack, int *d1_level, int min_level, const char *tag, int len) { - while (*d1_level > 1) + tagStrip (&tag, &len); + while (*d1_level > min_level) { + tagDataRelease (spec, d1_stack, *d1_level); (*d1_level)--; + if (*d1_level == 0) + break; if ((d1_stack[*d1_level]->which == DATA1N_tag) && (!tag || (strlen(d1_stack[*d1_level]->u.tag.tag) == (size_t) len && @@ -1192,6 +1297,9 @@ static int execCode (struct lexSpec *spec, p = regxStrz (cmd_str, cmd_len, ptmp); if (!strcmp (p, "record")) { + int i; + for (i = *d1_level; i; --i) + tagDataRelease (spec, d1_stack, i); *d1_level = 0; r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len); @@ -1202,23 +1310,30 @@ static int execCode (struct lexSpec *spec, } else if (!strcmp (p, "element")) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); -#if 0 - if (*d1_level == 1) - { - *d1_level = 0; - returnCode = 0; - } -#endif + int min_level = 1; + while ((r = execTok (spec, &s, arg_no, arg_start, arg_end, + &cmd_str, &cmd_len)) == 3) + { + if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) + min_level = 0; + } if (r > 2) { - tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len); + tagEnd (spec, d1_stack, d1_level, min_level, + cmd_str, cmd_len); r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len); } else - tagEnd (spec, d1_stack, d1_level, NULL, 0); + tagEnd (spec, d1_stack, d1_level, min_level, NULL, 0); + if (*d1_level == 0) + { +#if REGX_DEBUG + logf (LOG_DEBUG, "end element end records"); +#endif + returnCode = 0; + } + } else if (!strcmp (p, "context")) { @@ -1270,7 +1385,7 @@ static int execCode (struct lexSpec *spec, &cmd_str, &cmd_len); } while (r > 1); if (element_str) - tagEnd (spec, d1_stack, d1_level, NULL, 0); + tagEnd (spec, d1_stack, d1_level, 1, NULL, 0); } else if (!strcmp (p, "unread")) { @@ -1559,9 +1674,9 @@ static data1_node *lexRoot (struct lexSpec *spec, off_t offset, const char *context_name) { struct lexContext *lt = spec->context; - data1_node *d1_stack[512]; + data1_node *d1_stack[128]; int d1_level = 0; - int ptr = offset; + int i, ptr = offset; spec->context_stack_top = 0; while (lt) @@ -1580,6 +1695,8 @@ static data1_node *lexRoot (struct lexSpec *spec, off_t offset, if (lt->beginActionList) execAction (spec, lt->beginActionList, d1_stack, &d1_level, 0, &ptr); lexNode (spec, d1_stack, &d1_level, &ptr); + for (i = d1_level; i; --i) + tagDataRelease (spec, d1_stack, i); if (lt->endActionList) execAction (spec, lt->endActionList, d1_stack, &d1_level, ptr, &ptr); return *d1_stack; @@ -1605,6 +1722,7 @@ data1_node *grs_read_regx (struct grs_read_info *p) return NULL; } } + curLexSpec->dh = p->dh; if (!p->offset) { curLexSpec->f_win_start = 0;