X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frecgrs.c;h=e521d96884ea5aaaaf1c099a5e664c9bcf1ac72e;hb=b673999f9e517726d6e3789f3cefd194ce89ef15;hp=49fdf91d7af50a0c18225d1e0297e00e908119aa;hpb=856d020355a71f37c0b55564f97d52072646b7c8;p=idzebra-moved-to-github.git diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 49fdf91..e521d96 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,5 +1,5 @@ -/* $Id: recgrs.c,v 1.85 2003-10-07 09:18:21 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 +/* $Id: recgrs.c,v 1.91 2004-09-28 10:15:03 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -25,104 +25,147 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #ifndef WIN32 #include +#include #endif #include #include -#include -#include "grsread.h" +#include +#include #define GRS_MAX_WORD 512 -struct grs_handler { - RecTypeGrs type; - void *clientData; - int initFlag; - struct grs_handler *next; +struct source_parser { + int len; + const char *tok; + const char *src; + int lookahead; }; -struct grs_handlers { - struct grs_handler *handlers; -}; - -static int read_grs_type (struct grs_handlers *h, - struct grs_read_info *p, const char *type, - data1_node **root) +static int sp_lex(struct source_parser *sp) { - struct grs_handler *gh = h->handlers; - const char *cp = strchr (type, '.'); - - if (cp == NULL || cp == type) + while (*sp->src == ' ') + (sp->src)++; + sp->tok = sp->src; + sp->len = 0; + while (*sp->src && !strchr("<>();,-: ", *sp->src)) { - cp = strlen(type) + type; - *p->type = 0; + sp->src++; + sp->len++; } + if (sp->len) + sp->lookahead = 't'; else - strcpy (p->type, cp+1); - for (gh = h->handlers; gh; gh = gh->next) { - if (!memcmp (type, gh->type->type, cp-type) && - gh->type->type[cp-type] == '\0') + sp->lookahead = *sp->src; + if (*sp->src) + sp->src++; + } + return sp->lookahead; +} + + +static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd) +{ + if (sp->lookahead != 't') + return 0; + if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len)) + { + if (n->which == DATA1N_data) + { + wrd->string = n->u.data.data; + wrd->length = n->u.data.len; + } + sp_lex(sp); + } + else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len)) + { + if (n->which == DATA1N_tag) + { + wrd->string = n->u.tag.tag; + wrd->length = strlen(n->u.tag.tag); + } + sp_lex(sp); + } + else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len)) + { + sp_lex(sp); + if (sp->lookahead != '(') + return 0; + sp_lex(sp); + if (sp->lookahead != 't') + return 0; + + if (n->which == DATA1N_tag) { - if (!gh->initFlag) + data1_xattr *p = n->u.tag.attributes; + while (p && strlen(p->name) != sp->len && + memcmp (p->name, sp->tok, sp->len)) + p = p->next; + if (p) { - gh->initFlag = 1; - gh->clientData = (*gh->type->init)(); + wrd->string = p->value; + wrd->length = strlen(p->value); } - p->clientData = gh->clientData; - *root = (gh->type->read)(p); - gh->clientData = p->clientData; + } + sp_lex(sp); + if (sp->lookahead != ')') + return 0; + sp_lex(sp); + } + else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len)) + { + int start, len; + sp_lex(sp); + if (sp->lookahead != '(') + return 0; + + sp_lex(sp); + sp_expr(sp, n, wrd); + if (sp->lookahead != ',') + return 0; + + sp_lex(sp); + if (sp->lookahead != 't') + return 0; + start = atoi_n(sp->tok, sp->len); + + sp_lex(sp); + if (sp->lookahead != ',') + return 0; + + sp_lex(sp); + if (sp->lookahead != 't') + return 0; + len = atoi_n(sp->tok, sp->len); + + sp_lex(sp); + if (sp->lookahead != ')') return 0; + + sp_lex(sp); + if (wrd->string && wrd->length) + { + wrd->string += start; + wrd->length -= start; + if (wrd->length > len) + wrd->length = len; } } return 1; } -static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t) -{ - struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh)); - gh->next = h->handlers; - h->handlers = gh; - gh->initFlag = 0; - gh->clientData = 0; - gh->type = t; -} - -static void *grs_init(RecType recType) +static int sp_parse(data1_node *n, RecWord *wrd, const char *src) { - struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h)); - h->handlers = 0; - - grs_add_handler (h, recTypeGrs_sgml); - grs_add_handler (h, recTypeGrs_regx); -#if HAVE_TCL_H - grs_add_handler (h, recTypeGrs_tcl); -#endif - grs_add_handler (h, recTypeGrs_marc); - grs_add_handler (h, recTypeGrs_marcxml); -#if HAVE_EXPAT_H - grs_add_handler (h, recTypeGrs_xml); -#endif -#if HAVE_PERL - grs_add_handler (h, recTypeGrs_perl); -#endif - return h; -} - -static void grs_destroy(void *clientData) -{ - struct grs_handlers *h = (struct grs_handlers *) clientData; - struct grs_handler *gh = h->handlers, *gh_next; - while (gh) - { - gh_next = gh->next; - if (gh->initFlag) - (*gh->type->destroy)(gh->clientData); - xfree (gh); - gh = gh_next; - } - xfree (h); + struct source_parser sp; + sp.len = 0; + sp.tok = 0; + sp.src = src; + sp.lookahead = 0; + sp_lex(&sp); + + return sp_expr(&sp, n, wrd); } int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p) @@ -354,6 +397,8 @@ static void index_xpath (data1_node *n, struct recExtractCtrl *p, size_t flen = 0; data1_node *nn; int termlist_only = 1; + data1_termlist *tl; + int xpdone = 0; yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use); if ((!n->root->u.root.absyn) || @@ -366,58 +411,88 @@ static void index_xpath (data1_node *n, struct recExtractCtrl *p, case DATA1N_data: wrd->string = n->u.data.data; wrd->length = n->u.data.len; - if (p->flagShowRecords) - { - printf("%*s XData:\"", (level + 1) * 4, ""); - for (i = 0; ilength && i < 8; i++) - fputc (wrd->string[i], stdout); - printf("\"\n"); - } - else { - data1_termlist *tl; - int xpdone = 0; - flen = 0; - - /* we have to fetch the whole path to the data tag */ - for (nn = n; nn; nn = nn->parent) { - if (nn->which == DATA1N_tag) { - size_t tlen = strlen(nn->u.tag.tag); - if (tlen + flen > (sizeof(tag_path_full)-2)) return; - memcpy (tag_path_full + flen, nn->u.tag.tag, tlen); - flen += tlen; - tag_path_full[flen++] = '/'; - } - else if (nn->which == DATA1N_root) break; - } - - tag_path_full[flen] = 0; + xpdone = 0; + flen = 0; - /* If we have a matching termlist... */ - if (n->root->u.root.absyn && (tl = xpath_termlist_by_tagpath(tag_path_full, n))) { - for (; tl; tl = tl->next) { - wrd->reg_type = *tl->structure; - /* this is the ! case, so structure is for the xpath index */ - if (!tl->att) { - wrd->attrSet = VAL_IDXPATH; - wrd->attrUse = use; - (*p->tokenAdd)(wrd); - xpdone = 1; - } else { - /* this is just the old fashioned attribute based index */ - wrd->attrSet = (int) (tl->att->parent->reference); - wrd->attrUse = tl->att->locals->local; - (*p->tokenAdd)(wrd); - } - } - } - /* xpath indexing is done, if there was no termlist given, - or no ! in the termlist, and default indexing is enabled... */ - if ((!xpdone) && (!termlist_only)) { - wrd->attrSet = VAL_IDXPATH; - wrd->attrUse = use; - wrd->reg_type = 'w'; - (*p->tokenAdd)(wrd); - } + /* we have to fetch the whole path to the data tag */ + for (nn = n; nn; nn = nn->parent) { + if (nn->which == DATA1N_tag) { + size_t tlen = strlen(nn->u.tag.tag); + if (tlen + flen > (sizeof(tag_path_full)-2)) return; + memcpy (tag_path_full + flen, nn->u.tag.tag, tlen); + flen += tlen; + tag_path_full[flen++] = '/'; + } + else if (nn->which == DATA1N_root) break; + } + + tag_path_full[flen] = 0; + + /* If we have a matching termlist... */ + if (n->root->u.root.absyn && + (tl = xpath_termlist_by_tagpath(tag_path_full, n))) + { + for (; tl; tl = tl->next) + { + /* need to copy recword because it may be changed */ + RecWord wrd_tl; + wrd->reg_type = *tl->structure; + /* this is the ! case, so structure is for the xpath index */ + memcpy (&wrd_tl, wrd, sizeof(*wrd)); + if (tl->source) + sp_parse(n, &wrd_tl, tl->source); + if (!tl->att) { + wrd_tl.attrSet = VAL_IDXPATH; + wrd_tl.attrUse = use; + if (p->flagShowRecords) + { + int i; + printf("%*sXPath index", (level + 1) * 4, ""); + printf (" XData:\""); + for (i = 0; i 40) + printf (" ..."); + fputc ('\n', stdout); + } + else + (*p->tokenAdd)(&wrd_tl); + xpdone = 1; + } else { + /* this is just the old fashioned attribute based index */ + wrd_tl.attrSet = (int) (tl->att->parent->reference); + wrd_tl.attrUse = tl->att->locals->local; + if (p->flagShowRecords) + { + int i; + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tl->structure); + printf("%s:%s [%d] %s", + tl->att->parent->name, + tl->att->name, tl->att->value, + tl->source); + printf (" XData:\""); + for (i = 0; i 40) + printf (" ..."); + fputc ('\n', stdout); + } + else + (*p->tokenAdd)(&wrd_tl); + } + } + } + /* xpath indexing is done, if there was no termlist given, + or no ! in the termlist, and default indexing is enabled... */ + if (!p->flagShowRecords && !xpdone && !termlist_only) + { + wrd->attrSet = VAL_IDXPATH; + wrd->attrUse = use; + wrd->reg_type = 'w'; + (*p->tokenAdd)(wrd); } break; case DATA1N_tag: @@ -611,33 +686,11 @@ static void index_termlist (data1_node *par, data1_node *n, for (; tlist; tlist = tlist->next) { - - char xattr[512]; /* consider source */ wrd->string = 0; + assert(tlist->source); + sp_parse(n, wrd, tlist->source); - if (!strcmp (tlist->source, "data") && n->which == DATA1N_data) - { - wrd->string = n->u.data.data; - wrd->length = n->u.data.len; - } - else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag) - { - wrd->string = n->u.tag.tag; - wrd->length = strlen(n->u.tag.tag); - } - else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 && - n->which == DATA1N_tag) - { - data1_xattr *p = n->u.tag.attributes; - while (p && strcmp (p->name, xattr)) - p = p->next; - if (p) - { - wrd->string = p->value; - wrd->length = strlen(p->value); - } - } if (wrd->string) { if (p->flagShowRecords) @@ -780,8 +833,9 @@ int grs_extract_tree(struct recExtractCtrl *p, data1_node *n) return dumpkeys(n, p, 0, &wrd); } -static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, - NMEM mem) +static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, + NMEM mem, + data1_node *(*grs_read)(struct grs_read_info *)) { data1_node *n; struct grs_read_info gri; @@ -797,9 +851,9 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, gri.offset = p->offset; gri.mem = mem; gri.dh = p->dh; + gri.clientData = clientData; - if (read_grs_type (h, &gri, p->subType, &n)) - return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER; + n = (*grs_read)(&gri); if (!n) return RECCTRL_EXTRACT_EOF; oe.proto = PROTO_Z3950; @@ -833,13 +887,12 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, return RECCTRL_EXTRACT_OK; } -static int grs_extract(void *clientData, struct recExtractCtrl *p) +int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, + data1_node *(*grs_read)(struct grs_read_info *)) { int ret; NMEM mem = nmem_create (); - struct grs_handlers *h = (struct grs_handlers *) clientData; - - ret = grs_extract_sub(h, p, mem); + ret = grs_extract_sub(clientData, p, mem, grs_read); nmem_destroy(mem); return ret; } @@ -958,7 +1011,7 @@ static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top, data1_mk_tag_data_int (p->dh, n, "score", p->score, mem); } data1_mk_text (p->dh, mem, i4, n); - data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem); + data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem); if (p->fname) { data1_mk_text (p->dh, mem, i4, n); @@ -967,7 +1020,8 @@ static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top, data1_mk_text (p->dh, mem, i2, n); } -static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) +int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, + data1_node *(*grs_read)(struct grs_read_info *)) { data1_node *node = 0, *onode = 0, *top; data1_node *dnew; @@ -976,7 +1030,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) NMEM mem; struct grs_read_info gri; const char *tagname; - struct grs_handlers *h = (struct grs_handlers *) clientData; + int requested_schema = VAL_NONE; data1_marctab *marctab; int dummy; @@ -990,14 +1044,10 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) gri.offset = 0; gri.mem = mem; gri.dh = p->dh; + gri.clientData = clientData; yaz_log(LOG_DEBUG, "grs_retrieve"); - if (read_grs_type (h, &gri, p->subType, &node)) - { - p->diagnostic = 14; - nmem_destroy (mem); - return 0; - } + node = (*grs_read)(&gri); if (!node) { p->diagnostic = 14; @@ -1045,7 +1095,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) dnew->u.data.what = DATA1I_text; dnew->u.data.data = dnew->lbuf; - sprintf(dnew->u.data.data, "%d", p->localno); + sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno); dnew->u.data.len = strlen(dnew->u.data.data); } #if 0 @@ -1279,13 +1329,3 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) return 0; } -static struct recType grs_type = -{ - "grs", - grs_init, - grs_destroy, - grs_extract, - grs_retrieve -}; - -RecType recTypeGrs = &grs_type;