X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frecgrs.c;h=aaa7cbae9004e2f500daa41b559c4513c41b55dc;hb=ee469875edc1db23aae63746ec9fff6a7be8d4ab;hp=d937600b4170457e5d7779d33916b7642afb97b5;hpb=7ac37393aeb242f6eddf1a604923bd009baace41;p=idzebra-moved-to-github.git diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index d937600..aaa7cba 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,10 +1,32 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.30 1999-07-06 12:26:41 adam + * Revision 1.37 2000-12-05 12:22:53 adam + * Termlist source implemented (so that we can index values of XML/SGML + * attributes). + * + * Revision 1.36 2000/12/05 10:01:44 adam + * Fixed bug regarding user-defined attribute sets. + * + * Revision 1.35 2000/11/29 15:21:31 adam + * Fixed problem with passwd db. + * + * Revision 1.34 2000/02/25 13:24:49 adam + * Fixed bug regarding pointer conversion that showed up on OSF V5. + * + * Revision 1.33 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.32 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.31 1999/07/14 10:56:43 adam + * Fixed potential memory leak. + * + * Revision 1.30 1999/07/06 12:26:41 adam * Retrieval handler obeys schema and handles XML transfer syntax. * * Revision 1.29 1999/05/26 07:49:14 adam @@ -191,8 +213,8 @@ #include #endif -#include -#include +#include +#include #include #include "grsread.h" @@ -281,6 +303,78 @@ static void grs_destroy(void *clientData) free (h); } +static void index_tag (data1_node *par, data1_node *n, + struct recExtractCtrl *p, int level, RecWord *wrd) +{ + data1_termlist *tlist = 0; + data1_datatype dtype = DATA1K_string; + /* + * cycle up towards the root until we find a tag with an att.. + * this has the effect of indexing locally defined tags with + * the attribute of their ancestor in the record. + */ + + while (!par->u.tag.element) + if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) + break; + if (!par || !(tlist = par->u.tag.element->termlists)) + return; + if (par->u.tag.element->tag) + dtype = par->u.tag.element->tag->kind; + + for (; tlist; tlist = tlist->next) + { + char xattr[512]; + /* consider source */ + wrd->string = 0; + + if (!strcmp (tlist->source, "data") && n->which == DATA1N_data) + { + wrd->string = n->u.data.data; + wrd->length = n->u.data.len; + } + else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 && + n->which == DATA1N_tag) + { + data1_xattr *p = n->u.tag.attributes; + while (p && strcmp (p->name, xattr)) + p = p->next; + if (p) + { + wrd->string = p->value; + wrd->length = strlen(p->value); + } + } + if (wrd->string) + { + if (p->flagShowRecords) + { + int i; + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tlist->structure); + printf("%s:%s [%d] %s", + tlist->att->parent->name, + tlist->att->name, tlist->att->value, + tlist->source); + printf (" data=\""); + for (i = 0; ilength && i < 8; i++) + fputc (wrd->string[i], stdout); + fputc ('"', stdout); + if (wrd->length > 8) + printf (" ..."); + fputc ('\n', stdout); + } + else + { + wrd->reg_type = *tlist->structure; + wrd->attrSet = (int) (tlist->att->parent->reference); + wrd->attrUse = tlist->att->locals->local; + (*p->tokenAdd)(wrd); + } + } + } +} + static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) { RecWord wrd; @@ -328,11 +422,14 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) if (dumpkeys(n->child, p, level + 1) < 0) return -1; + if (n->which == DATA1N_tag) + { + index_tag (n, n, p, level, &wrd); + } + if (n->which == DATA1N_data) { data1_node *par = get_parent_tag(p->dh, n); - data1_termlist *tlist = 0; - data1_datatype dtype = DATA1K_string; if (p->flagShowRecords) { @@ -349,39 +446,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) assert(par); - /* - * cycle up towards the root until we find a tag with an att.. - * this has the effect of indexing locally defined tags with - * the attribute of their ancestor in the record. - */ - - while (!par->u.tag.element) - if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) - break; - if (!par || !(tlist = par->u.tag.element->termlists)) - continue; - if (par->u.tag.element->tag) - dtype = par->u.tag.element->tag->kind; - for (; tlist; tlist = tlist->next) - { - if (p->flagShowRecords) - { - printf("%*sIdx: [%s]", (level + 1) * 4, "", - tlist->structure); - printf("%s:%s [%d]\n", - tlist->att->parent->name, - tlist->att->name, tlist->att->value); - } - else - { - wrd.reg_type = *tlist->structure; - wrd.string = n->u.data.data; - wrd.length = n->u.data.len; - wrd.attrSet = (int) (tlist->att->parent->reference); - wrd.attrUse = tlist->att->locals->local; - (*p->addWord)(&wrd); - } - } + index_tag (par, n, p, level, &wrd); } if (p->flagShowRecords && n->which == DATA1N_root) { @@ -401,21 +466,19 @@ int grs_extract_tree(struct recExtractCtrl *p, data1_node *n) oe.value = n->u.root.absyn->reference; if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->addSchema)(p, oidtmp); + (*p->schemaAdd)(p, oidtmp); return dumpkeys(n, p, 0); } -static int grs_extract(void *clientData, struct recExtractCtrl *p) +static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, + NMEM mem) { data1_node *n; - NMEM mem; struct grs_read_info gri; oident oe; int oidtmp[OID_SIZE]; - struct grs_handlers *h = (struct grs_handlers *) clientData; - mem = nmem_create (); gri.readf = p->readf; gri.seekf = p->seekf; gri.tellf = p->tellf; @@ -429,12 +492,11 @@ static int grs_extract(void *clientData, struct recExtractCtrl *p) return RECCTRL_EXTRACT_ERROR; if (!n) return RECCTRL_EXTRACT_EOF; - oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; oe.value = n->u.root.absyn->reference; if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->addSchema)(p, oidtmp); + (*p->schemaAdd)(p, oidtmp); if (dumpkeys(n, p, 0) < 0) { @@ -442,10 +504,20 @@ static int grs_extract(void *clientData, struct recExtractCtrl *p) return RECCTRL_EXTRACT_ERROR; } data1_free_tree(p->dh, n); - nmem_destroy(mem); return RECCTRL_EXTRACT_OK; } +static int grs_extract(void *clientData, struct recExtractCtrl *p) +{ + int ret; + NMEM mem = nmem_create (); + struct grs_handlers *h = (struct grs_handlers *) clientData; + + ret = grs_extract_sub(h, p, mem); + nmem_destroy(mem); + return ret; +} + /* * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic. */ @@ -464,7 +536,7 @@ static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c) c->u.simple->u.generic))) { logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic); - return 25; /* invalid esetname */ + return 25; /* invalid esetname */ } logf(LOG_DEBUG, "Esetname '%s' in simple compspec", c->u.simple->u.generic); @@ -556,6 +628,9 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) nmem_destroy (mem); return 0; } +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif logf (LOG_DEBUG, "grs_retrieve: size"); if ((dnew = data1_insert_taggeddata(p->dh, node, node, "size", mem))) @@ -707,7 +782,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) case VAL_TEXT_XML: if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected, - (int*)&p->rec_len))) + &p->rec_len))) p->diagnostic = 238; else { @@ -740,7 +815,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) break; case VAL_SUTRS: if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, - (int*)&p->rec_len))) + &p->rec_len))) p->diagnostic = 238; else { @@ -751,7 +826,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) break; case VAL_SOIF: if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, - (int*)&p->rec_len))) + &p->rec_len))) p->diagnostic = 238; else { @@ -776,8 +851,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) break; } if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, - selected, - (int*)&p->rec_len))) + selected, &p->rec_len))) p->diagnostic = 238; else {