X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frecgrs.c;h=a7794626eabd49f72041f85cf9b33e945b4a453e;hb=d57bb162b65a585e0ed3ad319d9673d66bc6d90c;hp=aaa7cbae9004e2f500daa41b559c4513c41b55dc;hpb=ee469875edc1db23aae63746ec9fff6a7be8d4ab;p=idzebra-moved-to-github.git diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index aaa7cba..a779462 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,10 +1,43 @@ /* - * Copyright (C) 1994-2000, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.37 2000-12-05 12:22:53 adam + * Revision 1.48 2002-05-07 11:05:20 adam + * data1 updates. Run number fix + * + * Revision 1.47 2002/05/03 13:50:25 adam + * data1 cleanup + * + * Revision 1.46 2002/04/13 18:16:43 adam + * More XPATH work; common sequence numbers for extract keys + * + * Revision 1.45 2002/04/12 14:40:42 adam + * Work on XPATH + * + * Revision 1.44 2002/04/11 20:09:47 adam + * work on string tag indexing + * + * Revision 1.43 2002/03/21 23:06:36 adam + * Source 'tag' in abs-file + * + * Revision 1.42 2002/02/20 17:30:01 adam + * Work on new API. Locking system re-implemented + * + * Revision 1.41 2001/05/22 21:01:47 adam + * Removed print of data1 tree on stdout so that inetd works again. + * + * Revision 1.40 2001/03/29 21:31:31 adam + * Fixed "record begin" for Tcl filter. + * + * Revision 1.39 2000/12/05 19:09:15 adam + * Fixed problem where indexer could crash if abstract syntax was undefined. + * + * Revision 1.38 2000/12/05 14:44:58 adam + * Fixed minor bug that could cause zmbol to break it data were emitted + * with not parent tags. + * + * Revision 1.37 2000/12/05 12:22:53 adam * Termlist source implemented (so that we can index values of XML/SGML * attributes). * @@ -303,8 +336,81 @@ static void grs_destroy(void *clientData) free (h); } -static void index_tag (data1_node *par, data1_node *n, - struct recExtractCtrl *p, int level, RecWord *wrd) +static void index_xpath (data1_node *n, struct recExtractCtrl *p, + int level, RecWord *wrd, int use) +{ + int i; + char tag_path_full[1024]; + size_t flen = 0; + data1_node *nn; + + switch (n->which) + { + case DATA1N_data: + wrd->reg_type = 'w'; + wrd->string = n->u.data.data; + wrd->length = n->u.data.len; + wrd->attrSet = VAL_IDXPATH, + wrd->attrUse = use; + if (p->flagShowRecords) + { + printf("%*s data=", (level + 1) * 4, ""); + for (i = 0; ilength && i < 8; i++) + fputc (wrd->string[i], stdout); + printf("\n"); + } + else + { + (*p->tokenAdd)(wrd); + } + break; + case DATA1N_tag: + for (nn = n; nn; nn = nn->parent) + { + if (n->which == DATA1N_tag) + { + size_t tlen = strlen(nn->u.tag.tag); + if (tlen + flen > (sizeof(tag_path_full)-2)) + return; + memcpy (tag_path_full + flen, nn->u.tag.tag, tlen); + flen += tlen; + tag_path_full[flen++] = '/'; + } + else if (n->which == DATA1N_root) + { + size_t tlen = strlen(nn->u.root.type); + if (tlen + flen > (sizeof(tag_path_full)-2)) + return; + memcpy (tag_path_full + flen, nn->u.root.type, tlen); + flen += tlen; + tag_path_full[flen++] = '/'; + break; + } + } + wrd->reg_type = '0'; + wrd->string = tag_path_full; + wrd->length = flen; + wrd->attrSet = VAL_IDXPATH, + wrd->attrUse = use; + if (p->flagShowRecords) + { + printf("%*s tag=", (level + 1) * 4, ""); + for (i = 0; ilength && i < 40; i++) + fputc (wrd->string[i], stdout); + if (i == 40) + printf (" .."); + printf("\n"); + } + else + { + (*p->tokenAdd)(wrd); + } + break; + } +} + +static void index_termlist (data1_node *par, data1_node *n, + struct recExtractCtrl *p, int level, RecWord *wrd) { data1_termlist *tlist = 0; data1_datatype dtype = DATA1K_string; @@ -333,6 +439,11 @@ static void index_tag (data1_node *par, data1_node *n, wrd->string = n->u.data.data; wrd->length = n->u.data.len; } + else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag) + { + wrd->string = n->u.tag.tag; + wrd->length = strlen(n->u.tag.tag); + } else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 && n->which == DATA1N_tag) { @@ -375,10 +486,9 @@ static void index_tag (data1_node *par, data1_node *n, } } -static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) +static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level, + RecWord *wrd) { - RecWord wrd; - (*p->init)(p, &wrd); /* set defaults */ for (; n; n = n->next) { if (p->flagShowRecords) /* display element description to user */ @@ -386,7 +496,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) if (n->which == DATA1N_root) { printf("%*s", level * 4, ""); - printf("Record type: '%s'\n", n->u.root.absyn->name); + printf("Record type: '%s'\n", n->u.root.type); } else if (n->which == DATA1N_tag) { @@ -418,15 +528,19 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) } } - if (n->child) - if (dumpkeys(n->child, p, level + 1) < 0) - return -1; - if (n->which == DATA1N_tag) { - index_tag (n, n, p, level, &wrd); + index_termlist (n, n, p, level, wrd); + /* index start tag */ + if (!n->root->u.root.absyn) + index_xpath (n, p, level, wrd, 1); } + if (n->child) + if (dumpkeys(n->child, p, level + 1, wrd) < 0) + return -1; + + if (n->which == DATA1N_data) { data1_node *par = get_parent_tag(p->dh, n); @@ -444,10 +558,21 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) printf("NULL\n"); } - assert(par); + if (par) + index_termlist (par, n, p, level, wrd); + if (!n->root->u.root.absyn) + index_xpath (n, p, level, wrd, 1016); - index_tag (par, n, p, level, &wrd); + } + + if (n->which == DATA1N_tag) + { + /* index end tag */ + if (!n->root->u.root.absyn) + index_xpath (n, p, level, wrd, 2); } + + if (p->flagShowRecords && n->which == DATA1N_root) { printf("%*s-------------\n\n", level * 4, ""); @@ -460,15 +585,19 @@ int grs_extract_tree(struct recExtractCtrl *p, data1_node *n) { oident oe; int oidtmp[OID_SIZE]; + RecWord wrd; oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; - oe.value = n->u.root.absyn->reference; - - if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->schemaAdd)(p, oidtmp); - - return dumpkeys(n, p, 0); + if (n->u.root.absyn) + { + oe.value = n->u.root.absyn->reference; + + if ((oid_ent_to_oid (&oe, oidtmp))) + (*p->schemaAdd)(p, oidtmp); + } + (*p->init)(p, &wrd); + return dumpkeys(n, p, 0, &wrd); } static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, @@ -478,6 +607,7 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, struct grs_read_info gri; oident oe; int oidtmp[OID_SIZE]; + RecWord wrd; gri.readf = p->readf; gri.seekf = p->seekf; @@ -494,11 +624,21 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, return RECCTRL_EXTRACT_EOF; oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; - oe.value = n->u.root.absyn->reference; - if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->schemaAdd)(p, oidtmp); - - if (dumpkeys(n, p, 0) < 0) +#if 0 + if (!n->u.root.absyn) + return RECCTRL_EXTRACT_ERROR; +#endif + if (n->u.root.absyn) + { + oe.value = n->u.root.absyn->reference; + if ((oid_ent_to_oid (&oe, oidtmp))) + (*p->schemaAdd)(p, oidtmp); + } +#if 0 + data1_pr_tree (p->dh, n, stdout); +#endif + (*p->init)(p, &wrd); + if (dumpkeys(n, p, 0, &wrd) < 0) { data1_free_tree(p->dh, n); return RECCTRL_EXTRACT_ERROR; @@ -632,8 +772,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) data1_pr_tree (p->dh, node, stdout); #endif logf (LOG_DEBUG, "grs_retrieve: size"); - if ((dnew = data1_insert_taggeddata(p->dh, node, node, - "size", mem))) + if ((dnew = data1_mk_tag_data_wd(p->dh, node, "size", mem))) { dnew->u.data.what = DATA1I_text; dnew->u.data.data = dnew->lbuf; @@ -643,7 +782,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) tagname = res_get_def(p->res, "tagrank", "rank"); if (strcmp(tagname, "0") && p->score >= 0 && - (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem))) + (dnew = data1_mk_tag_data_wd(p->dh, node, tagname, mem))) { logf (LOG_DEBUG, "grs_retrieve: %s", tagname); dnew->u.data.what = DATA1I_num; @@ -654,7 +793,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) tagname = res_get_def(p->res, "tagsysno", "localControlNumber"); if (strcmp(tagname, "0") && p->localno > 0 && - (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem))) + (dnew = data1_mk_tag_data_wd(p->dh, node, tagname, mem))) { logf (LOG_DEBUG, "grs_retrieve: %s", tagname); dnew->u.data.what = DATA1I_text; @@ -662,7 +801,9 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) sprintf(dnew->u.data.data, "%d", p->localno); dnew->u.data.len = strlen(dnew->u.data.data); } - +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif if (p->comp && p->comp->which == Z_RecordComp_complex && p->comp->u.complex->generic && p->comp->u.complex->generic->schema) @@ -706,20 +847,21 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) * family) */ logf (LOG_DEBUG, "grs_retrieve: syntax mapping"); - for (map = node->u.root.absyn->maptabs; map; map = map->next) - { - if (map->target_absyn_ref == p->input_format) - { - onode = node; - if (!(node = data1_map_record(p->dh, onode, map, mem))) - { - p->diagnostic = 14; - nmem_destroy (mem); - return 0; - } - break; - } - } + if (node->u.root.absyn) + for (map = node->u.root.absyn->maptabs; map; map = map->next) + { + if (map->target_absyn_ref == p->input_format) + { + onode = node; + if (!(node = data1_map_record(p->dh, onode, map, mem))) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + break; + } + } logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier"); if (node->u.root.absyn && node->u.root.absyn->reference != VAL_NONE && @@ -749,8 +891,8 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) } *(p++) = '\0'; - if ((dnew = data1_insert_taggeddata(dh, node, node, - "schemaIdentifier", mem))) + if ((dnew = data1_mk_tag_data_wd(dh, node, + "schemaIdentifier", mem))) { dnew->u.data.what = DATA1I_oid; dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp); @@ -773,6 +915,9 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) else if (p->comp && !res) selected = 1; +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping"); switch (p->output_format = (p->input_format != VAL_NONE ? p->input_format : VAL_SUTRS))