X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frecgrs.c;h=d23a3d70bbd8283566f5cb2a7e673d3227b1d6e6;hb=7a49c3db444b475f63722c3da03e15c6db93f1a9;hp=e4c910a21f9f2525cdeb5c22db8ec60048e198ef;hpb=10e178572346e8c5c3caaa43b803dd10c005cb5f;p=idzebra-moved-to-github.git diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index e4c910a..d23a3d7 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,10 +1,50 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.28 1999-05-21 12:00:17 adam + * Revision 1.41 2001-05-22 21:01:47 adam + * Removed print of data1 tree on stdout so that inetd works again. + * + * Revision 1.40 2001/03/29 21:31:31 adam + * Fixed "record begin" for Tcl filter. + * + * Revision 1.39 2000/12/05 19:09:15 adam + * Fixed problem where indexer could crash if abstract syntax was undefined. + * + * Revision 1.38 2000/12/05 14:44:58 adam + * Fixed minor bug that could cause zmbol to break it data were emitted + * with not parent tags. + * + * Revision 1.37 2000/12/05 12:22:53 adam + * Termlist source implemented (so that we can index values of XML/SGML + * attributes). + * + * Revision 1.36 2000/12/05 10:01:44 adam + * Fixed bug regarding user-defined attribute sets. + * + * Revision 1.35 2000/11/29 15:21:31 adam + * Fixed problem with passwd db. + * + * Revision 1.34 2000/02/25 13:24:49 adam + * Fixed bug regarding pointer conversion that showed up on OSF V5. + * + * Revision 1.33 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.32 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.31 1999/07/14 10:56:43 adam + * Fixed potential memory leak. + * + * Revision 1.30 1999/07/06 12:26:41 adam + * Retrieval handler obeys schema and handles XML transfer syntax. + * + * Revision 1.29 1999/05/26 07:49:14 adam + * C++ compilation. + * + * Revision 1.28 1999/05/21 12:00:17 adam * Better diagnostics for extraction process. * * Revision 1.27 1999/05/20 12:57:18 adam @@ -185,8 +225,8 @@ #include #endif -#include -#include +#include +#include #include #include "grsread.h" @@ -238,7 +278,7 @@ static int read_grs_type (struct grs_handlers *h, static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t) { - struct grs_handler *gh = malloc (sizeof(*gh)); + struct grs_handler *gh = (struct grs_handler *) malloc (sizeof(*gh)); gh->next = h->handlers; h->handlers = gh; gh->initFlag = 0; @@ -248,7 +288,7 @@ static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t) static void *grs_init(RecType recType) { - struct grs_handlers *h = malloc (sizeof(*h)); + struct grs_handlers *h = (struct grs_handlers *) malloc (sizeof(*h)); h->handlers = 0; grs_add_handler (h, recTypeGrs_sgml); @@ -262,7 +302,7 @@ static void *grs_init(RecType recType) static void grs_destroy(void *clientData) { - struct grs_handlers *h = clientData; + struct grs_handlers *h = (struct grs_handlers *) clientData; struct grs_handler *gh = h->handlers, *gh_next; while (gh) { @@ -275,6 +315,78 @@ static void grs_destroy(void *clientData) free (h); } +static void index_tag (data1_node *par, data1_node *n, + struct recExtractCtrl *p, int level, RecWord *wrd) +{ + data1_termlist *tlist = 0; + data1_datatype dtype = DATA1K_string; + /* + * cycle up towards the root until we find a tag with an att.. + * this has the effect of indexing locally defined tags with + * the attribute of their ancestor in the record. + */ + + while (!par->u.tag.element) + if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) + break; + if (!par || !(tlist = par->u.tag.element->termlists)) + return; + if (par->u.tag.element->tag) + dtype = par->u.tag.element->tag->kind; + + for (; tlist; tlist = tlist->next) + { + char xattr[512]; + /* consider source */ + wrd->string = 0; + + if (!strcmp (tlist->source, "data") && n->which == DATA1N_data) + { + wrd->string = n->u.data.data; + wrd->length = n->u.data.len; + } + else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 && + n->which == DATA1N_tag) + { + data1_xattr *p = n->u.tag.attributes; + while (p && strcmp (p->name, xattr)) + p = p->next; + if (p) + { + wrd->string = p->value; + wrd->length = strlen(p->value); + } + } + if (wrd->string) + { + if (p->flagShowRecords) + { + int i; + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tlist->structure); + printf("%s:%s [%d] %s", + tlist->att->parent->name, + tlist->att->name, tlist->att->value, + tlist->source); + printf (" data=\""); + for (i = 0; ilength && i < 8; i++) + fputc (wrd->string[i], stdout); + fputc ('"', stdout); + if (wrd->length > 8) + printf (" ..."); + fputc ('\n', stdout); + } + else + { + wrd->reg_type = *tlist->structure; + wrd->attrSet = (int) (tlist->att->parent->reference); + wrd->attrUse = tlist->att->locals->local; + (*p->tokenAdd)(wrd); + } + } + } +} + static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) { RecWord wrd; @@ -322,11 +434,14 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) if (dumpkeys(n->child, p, level + 1) < 0) return -1; + if (n->which == DATA1N_tag) + { + index_tag (n, n, p, level, &wrd); + } + if (n->which == DATA1N_data) { data1_node *par = get_parent_tag(p->dh, n); - data1_termlist *tlist = 0; - data1_datatype dtype = DATA1K_string; if (p->flagShowRecords) { @@ -341,42 +456,9 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) printf("NULL\n"); } - assert(par); - - /* - * cycle up towards the root until we find a tag with an att.. - * this has the effect of indexing locally defined tags with - * the attribute of their ancestor in the record. - */ - - while (!par->u.tag.element) - if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) - break; - if (!par || !(tlist = par->u.tag.element->termlists)) - continue; - if (par->u.tag.element->tag) - dtype = par->u.tag.element->tag->kind; - for (; tlist; tlist = tlist->next) - { - if (p->flagShowRecords) - { - printf("%*sIdx: [%s]", (level + 1) * 4, "", - tlist->structure); - printf("%s:%s [%d]\n", - tlist->att->parent->name, - tlist->att->name, tlist->att->value); - } - else - { - wrd.reg_type = *tlist->structure; - wrd.string = n->u.data.data; - wrd.length = n->u.data.len; - wrd.attrSet = (int) (tlist->att->parent->reference); - wrd.attrUse = tlist->att->locals->local; - (*p->addWord)(&wrd); - } - } - } + if (par) + index_tag (par, n, p, level, &wrd); + } if (p->flagShowRecords && n->which == DATA1N_root) { printf("%*s-------------\n\n", level * 4, ""); @@ -395,21 +477,19 @@ int grs_extract_tree(struct recExtractCtrl *p, data1_node *n) oe.value = n->u.root.absyn->reference; if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->addSchema)(p, oidtmp); + (*p->schemaAdd)(p, oidtmp); return dumpkeys(n, p, 0); } -static int grs_extract(void *clientData, struct recExtractCtrl *p) +static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p, + NMEM mem) { data1_node *n; - NMEM mem; struct grs_read_info gri; oident oe; int oidtmp[OID_SIZE]; - struct grs_handlers *h = clientData; - mem = nmem_create (); gri.readf = p->readf; gri.seekf = p->seekf; gri.tellf = p->tellf; @@ -423,12 +503,13 @@ static int grs_extract(void *clientData, struct recExtractCtrl *p) return RECCTRL_EXTRACT_ERROR; if (!n) return RECCTRL_EXTRACT_EOF; - oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; + if (!n->u.root.absyn) + return RECCTRL_EXTRACT_ERROR; oe.value = n->u.root.absyn->reference; if ((oid_ent_to_oid (&oe, oidtmp))) - (*p->addSchema)(p, oidtmp); + (*p->schemaAdd)(p, oidtmp); if (dumpkeys(n, p, 0) < 0) { @@ -436,10 +517,20 @@ static int grs_extract(void *clientData, struct recExtractCtrl *p) return RECCTRL_EXTRACT_ERROR; } data1_free_tree(p->dh, n); - nmem_destroy(mem); return RECCTRL_EXTRACT_OK; } +static int grs_extract(void *clientData, struct recExtractCtrl *p) +{ + int ret; + NMEM mem = nmem_create (); + struct grs_handlers *h = (struct grs_handlers *) clientData; + + ret = grs_extract_sub(h, p, mem); + nmem_destroy(mem); + return ret; +} + /* * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic. */ @@ -451,56 +542,57 @@ static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c) switch (c->which) { - case Z_RecordComp_simple: - if (c->u.simple->which != Z_ElementSetNames_generic) - return 26; /* only generic form supported. Fix this later */ - if (!(eset = data1_getesetbyname(dh, n->u.root.absyn, - c->u.simple->u.generic))) - { - logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic); - return 25; /* invalid esetname */ - } - logf(LOG_DEBUG, "Esetname '%s' in simple compspec", - c->u.simple->u.generic); - espec = eset->spec; - break; - case Z_RecordComp_complex: - if (c->u.complex->generic) + case Z_RecordComp_simple: + if (c->u.simple->which != Z_ElementSetNames_generic) + return 26; /* only generic form supported. Fix this later */ + if (!(eset = data1_getesetbyname(dh, n->u.root.absyn, + c->u.simple->u.generic))) + { + logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic); + return 25; /* invalid esetname */ + } + logf(LOG_DEBUG, "Esetname '%s' in simple compspec", + c->u.simple->u.generic); + espec = eset->spec; + break; + case Z_RecordComp_complex: + if (c->u.complex->generic) + { + /* insert check for schema */ + if ((p = c->u.complex->generic->elementSpec)) { - /* insert check for schema */ - if ((p = c->u.complex->generic->elementSpec)) - switch (p->which) + switch (p->which) + { + case Z_ElementSpec_elementSetName: + if (!(eset = + data1_getesetbyname(dh, n->u.root.absyn, + p->u.elementSetName))) + { + logf(LOG_LOG, "Unknown esetname '%s'", + p->u.elementSetName); + return 25; /* invalid esetname */ + } + logf(LOG_DEBUG, "Esetname '%s' in complex compspec", + p->u.elementSetName); + espec = eset->spec; + break; + case Z_ElementSpec_externalSpec: + if (p->u.externalSpec->which == Z_External_espec1) { - case Z_ElementSpec_elementSetName: - if (!(eset = - data1_getesetbyname(dh, - n->u.root.absyn, - p->u.elementSetName))) - { - logf(LOG_LOG, "Unknown esetname '%s'", - p->u.elementSetName); - return 25; /* invalid esetname */ - } - logf(LOG_DEBUG, "Esetname '%s' in complex compspec", - p->u.elementSetName); - espec = eset->spec; - break; - case Z_ElementSpec_externalSpec: - if (p->u.externalSpec->which == Z_External_espec1) - { - logf(LOG_DEBUG, "Got Espec-1"); - espec = p->u.externalSpec-> u.espec1; - } - else - { - logf(LOG_LOG, "Unknown external espec."); - return 25; /* bad. what is proper diagnostic? */ - } - break; + logf(LOG_DEBUG, "Got Espec-1"); + espec = p->u.externalSpec-> u.espec1; } + else + { + logf(LOG_LOG, "Unknown external espec."); + return 25; /* bad. what is proper diagnostic? */ + } + break; + } } - else - return 26; /* fix */ + } + else + return 26; /* fix */ } if (espec) { @@ -523,7 +615,8 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) NMEM mem; struct grs_read_info gri; char *tagname; - struct grs_handlers *h = clientData; + struct grs_handlers *h = (struct grs_handlers *) clientData; + int requested_schema = VAL_NONE; mem = nmem_create(); gri.readf = p->readf; @@ -548,6 +641,9 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) nmem_destroy (mem); return 0; } +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif logf (LOG_DEBUG, "grs_retrieve: size"); if ((dnew = data1_insert_taggeddata(p->dh, node, node, "size", mem))) @@ -559,10 +655,8 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) } tagname = res_get_def(p->res, "tagrank", "rank"); - if (strcmp(tagname, "0") && p->score >= 0 && (dnew = - data1_insert_taggeddata(p->dh, node, - node, tagname, - mem))) + if (strcmp(tagname, "0") && p->score >= 0 && + (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem))) { logf (LOG_DEBUG, "grs_retrieve: %s", tagname); dnew->u.data.what = DATA1I_num; @@ -582,66 +676,103 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) dnew->u.data.len = strlen(dnew->u.data.data); } + if (p->comp && p->comp->which == Z_RecordComp_complex && + p->comp->u.complex->generic && + p->comp->u.complex->generic->schema) + { + oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema); + if (oe) + requested_schema = oe->value; + } + + /* If schema has been specified, map if possible, then check that + * we got the right one + */ + if (requested_schema != VAL_NONE) + { + logf (LOG_DEBUG, "grs_retrieve: schema mapping"); + for (map = node->u.root.absyn->maptabs; map; map = map->next) + { + if (map->target_absyn_ref == requested_schema) + { + onode = node; + if (!(node = data1_map_record(p->dh, onode, map, mem))) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + break; + } + } + if (node->u.root.absyn && + requested_schema != node->u.root.absyn->reference) + { + p->diagnostic = 238; + nmem_destroy (mem); + return 0; + } + } + /* + * Does the requested format match a known syntax-mapping? (this reflects + * the overlap of schema and formatting which is inherent in the MARC + * family) + */ + logf (LOG_DEBUG, "grs_retrieve: syntax mapping"); + for (map = node->u.root.absyn->maptabs; map; map = map->next) + { + if (map->target_absyn_ref == p->input_format) + { + onode = node; + if (!(node = data1_map_record(p->dh, onode, map, mem))) + { + p->diagnostic = 14; + nmem_destroy (mem); + return 0; + } + break; + } + } logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier"); - if (p->input_format == VAL_GRS1 && node->u.root.absyn && - node->u.root.absyn->reference != VAL_NONE) + if (node->u.root.absyn && + node->u.root.absyn->reference != VAL_NONE && + p->input_format == VAL_GRS1) { oident oe; Odr_oid *oid; int oidtmp[OID_SIZE]; - + oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; oe.value = node->u.root.absyn->reference; - + if ((oid = oid_ent_to_oid (&oe, oidtmp))) { char tmp[128]; data1_handle dh = p->dh; char *p = tmp; int *ii; - + for (ii = oid; *ii >= 0; ii++) { if (p != tmp) - *(p++) = '.'; + *(p++) = '.'; sprintf(p, "%d", *ii); p += strlen(p); } *(p++) = '\0'; - + if ((dnew = data1_insert_taggeddata(dh, node, node, - "schemaIdentifier", mem))) + "schemaIdentifier", mem))) { dnew->u.data.what = DATA1I_oid; - dnew->u.data.data = nmem_malloc(mem, p - tmp); + dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp); memcpy(dnew->u.data.data, tmp, p - tmp); dnew->u.data.len = p - tmp; } } } - logf (LOG_DEBUG, "grs_retrieve: schema mapping"); - /* - * Does the requested format match a known schema-mapping? (this reflects - * the overlap of schema and formatting which is inherent in the MARC - * family) - * NOTE: This should look at the schema-specification in the compspec - * as well. - */ - for (map = node->u.root.absyn->maptabs; map; map = map->next) - if (map->target_absyn_ref == p->input_format) - { - onode = node; - if (!(node = data1_map_record(p->dh, onode, map, mem))) - { - p->diagnostic = 14; - nmem_destroy (mem); - return 0; - } - break; - } - logf (LOG_DEBUG, "grs_retrieve: element spec"); if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0) { @@ -655,82 +786,95 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) else if (p->comp && !res) selected = 1; +#if 0 + data1_pr_tree (p->dh, node, stdout); +#endif logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping"); switch (p->output_format = (p->input_format != VAL_NONE ? - p->input_format : VAL_SUTRS)) + p->input_format : VAL_SUTRS)) { data1_marctab *marctab; int dummy; - - case VAL_GRS1: - dummy = 0; - if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected, - p->odr, &dummy))) - p->diagnostic = 238; /* not available in requested syntax */ - else - p->rec_len = -1; - break; - case VAL_EXPLAIN: - if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected, - p->odr))) - p->diagnostic = 238; - else - p->rec_len = -1; - break; - case VAL_SUMMARY: - if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected, - p->odr))) - p->diagnostic = 238; - else - p->rec_len = -1; - break; - case VAL_SUTRS: - if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, - (int*)&p->rec_len))) - p->diagnostic = 238; - else - { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); - p->rec_buf = new_buf; - } - break; - case VAL_SOIF: - if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, - (int*)&p->rec_len))) - p->diagnostic = 238; - else - { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); - p->rec_buf = new_buf; - } + + case VAL_TEXT_XML: + if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + case VAL_GRS1: + dummy = 0; + if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected, + p->odr, &dummy))) + p->diagnostic = 238; /* not available in requested syntax */ + else + p->rec_len = (size_t) (-1); + break; + case VAL_EXPLAIN: + if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; + else + p->rec_len = (size_t) (-1); + break; + case VAL_SUMMARY: + if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; + else + p->rec_len = (size_t) (-1); + break; + case VAL_SUTRS: + if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + case VAL_SOIF: + if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, + &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); + p->rec_buf = new_buf; + } + break; + default: + if (!node->u.root.absyn) + { + p->diagnostic = 238; break; - default: - if (!node->u.root.absyn) - { - p->diagnostic = 238; - break; - } - for (marctab = node->u.root.absyn->marc; marctab; - marctab = marctab->next) - if (marctab->reference == p->input_format) - break; - if (!marctab) - { - p->diagnostic = 238; + } + for (marctab = node->u.root.absyn->marc; marctab; + marctab = marctab->next) + if (marctab->reference == p->input_format) break; - } - if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, - selected, - (int*)&p->rec_len))) - p->diagnostic = 238; - else - { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); + if (!marctab) + { + p->diagnostic = 238; + break; + } + if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, + selected, &p->rec_len))) + p->diagnostic = 238; + else + { + char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); + memcpy (new_buf, p->rec_buf, p->rec_len); p->rec_buf = new_buf; - } + } } if (node) data1_free_tree(p->dh, node);