X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frecgrs.c;h=76e0918e3762ad080ab730633ec61cad8a849af8;hb=e89f9552c15bc7f95c0495457a36ac45914fcb40;hp=d8f34533a3d5565d0603d4884ce8931c9c90a046;hpb=9b426aeb9256cef844693c13f38aba4b10f7056d;p=idzebra-moved-to-github.git diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index d8f3453..76e0918 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,10 +1,53 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.4 1997-04-30 08:56:08 quinn + * Revision 1.17 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.16 1998/01/29 13:38:17 adam + * Fixed problem with mapping to record with unknown schema. + * + * Revision 1.15 1998/01/26 10:37:57 adam + * Better diagnostics. + * + * Revision 1.14 1997/11/06 11:41:01 adam + * Implemented "begin variant" for the sgml.regx filter. + * + * Revision 1.13 1997/10/31 12:35:44 adam + * Added a few log statements. + * + * Revision 1.12 1997/10/29 12:02:22 adam + * Using oid_ent_to_oid used instead of the non thread-safe oid_getoidbyent. + * + * Revision 1.11 1997/10/27 14:34:00 adam + * Work on generic character mapping depending on "structure" field + * in abstract syntax file. + * + * Revision 1.10 1997/09/18 08:59:21 adam + * Extra generic handle for the character mapping routines. + * + * Revision 1.9 1997/09/17 12:19:21 adam + * Zebra version corresponds to YAZ version 1.4. + * Changed Zebra server so that it doesn't depend on global common_resource. + * + * Revision 1.8 1997/09/09 13:38:14 adam + * Partial port to WIN95/NT. + * + * Revision 1.7 1997/09/05 15:30:10 adam + * Changed prototype for chr_map_input - added const. + * Added support for C++, headers uses extern "C" for public definitions. + * + * Revision 1.6 1997/09/04 13:54:40 adam + * Added MARC filter - type grs.marc. where syntax refers + * to abstract syntax. New method tellf in retrieve/extract method. + * + * Revision 1.5 1997/07/15 16:29:03 adam + * Initialized dummy variable to keep checker gcc happy. + * + * Revision 1.4 1997/04/30 08:56:08 quinn * null * * Revision 1.2 1996/10/11 16:06:43 quinn @@ -104,13 +147,14 @@ #include #include #include +#ifndef WINDOWS #include +#endif #include #include #include -#include #include "grsread.h" #define GRS_MAX_WORD 512 @@ -125,6 +169,7 @@ static data1_node *read_grs_type (struct grs_read_info *p, const char *type) } tab[] = { { "sgml", grs_read_sgml }, { "regx", grs_read_regx }, + { "marc", grs_read_marc }, { NULL, NULL } }; const char *cp = strchr (type, '.'); @@ -149,116 +194,10 @@ static void grs_init(void) { } -static void dumpkeys_word(data1_node *n, struct recExtractCtrl *p, - data1_att *att) -{ - char *b = n->u.data.data; - int remain; - char **map = 0; - - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - - while (map) - { - RecWord wrd; - char buf[GRS_MAX_WORD+1]; - int i, remain; - - /* Skip spaces */ - while (map && *map && **map == *CHR_SPACE) - { - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - else - map = 0; - } - if (!map) - break; - i = 0; - while (map && *map && **map != *CHR_SPACE) - { - char *cp = *map; - - while (i < GRS_MAX_WORD && *cp) - buf[i++] = *(cp++); - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - else - map = 0; - } - if (!i) - return; - buf[i] = '\0'; - (*p->init)(&wrd); /* set defaults */ - wrd.which = Word_String; - wrd.seqno = seqno++; - wrd.u.string = buf; - wrd.attrSet = att->parent->ordinal; - wrd.attrUse = att->locals->local; - (*p->add)(&wrd); - } -} - -static void dumpkeys_phrase(data1_node *n, struct recExtractCtrl *p, - data1_att *att) -{ - char *b = n->u.data.data; - char buf[GRS_MAX_WORD+1], **map = 0; - RecWord wrd; - int i = 0, remain; - - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - - while (remain > 0 && i < GRS_MAX_WORD) - { - while (map && *map && **map == *CHR_SPACE) - { - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - else - map = 0; - } - if (!map) - break; - - if (i && i < GRS_MAX_WORD) - buf[i++] = *CHR_SPACE; - while (map && *map && **map != *CHR_SPACE) - { - char *cp = *map; - - if (i >= GRS_MAX_WORD) - break; - while (i < GRS_MAX_WORD && *cp) - buf[i++] = *(cp++); - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = (*p->map_chrs_input)(&b, remain); - else - map = 0; - } - } - if (!i) - return; - buf[i] = '\0'; - (*p->init)(&wrd); - wrd.which = Word_Phrase; - wrd.seqno = seqno++; - wrd.u.string = buf; - wrd.attrSet = att->parent->ordinal; - wrd.attrUse = att->locals->local; - (*p->add)(&wrd); -} - static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) { + RecWord wrd; + (*p->init)(p, &wrd); /* set defaults */ for (; n; n = n->next) { if (p->flagShowRecords) /* display element description to user */ @@ -304,8 +243,9 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) if (n->which == DATA1N_data) { - data1_node *par = get_parent_tag(n); + data1_node *par = get_parent_tag(p->dh, n); data1_termlist *tlist = 0; + data1_datatype dtype = DATA1K_string; if (p->flagShowRecords) { @@ -328,39 +268,32 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) */ while (!par->u.tag.element) - if (!par->parent || !(par = get_parent_tag(par->parent))) + if (!par->parent || !(par=get_parent_tag(p->dh, par->parent))) break; - if (!par) - tlist = 0; - else if (par->u.tag.element->termlists) - tlist = par->u.tag.element->termlists; - else + if (!par || !(tlist = par->u.tag.element->termlists)) continue; - + if (par->u.tag.element->tag) + dtype = par->u.tag.element->tag->kind; for (; tlist; tlist = tlist->next) { if (p->flagShowRecords) { - printf("%*sIdx: [", (level + 1) * 4, ""); - switch (tlist->structure) - { - case DATA1S_word: printf("w"); break; - case DATA1S_phrase: printf("p"); break; - default: printf("?"); break; - } - printf("] "); - printf("%s:%s [%d]\n", tlist->att->parent->name, - tlist->att->name, tlist->att->value); + printf("%*sIdx: [%s]", (level + 1) * 4, "", + tlist->structure); + printf("%s:%s [%d]\n", + tlist->att->parent->name, + tlist->att->name, tlist->att->value); } - else switch (tlist->structure) + else { - case DATA1S_word: - dumpkeys_word(n, p, tlist->att); break; - case DATA1S_phrase: - dumpkeys_phrase(n, p, tlist->att); break; - default: - logf(LOG_FATAL, "Bad structure type in dumpkeys"); - abort(); + wrd.reg_type = *tlist->structure; + wrd.seqno = seqno; + wrd.string = n->u.data.data; + wrd.length = n->u.data.len; + wrd.attrSet = tlist->att->parent->ordinal; + wrd.attrUse = tlist->att->locals->local; + (*p->add)(&wrd); + seqno = wrd.seqno; } } } @@ -375,26 +308,29 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) static int grs_extract(struct recExtractCtrl *p) { data1_node *n; - NMEM mem = nmem_create(); + NMEM mem; struct grs_read_info gri; seqno = 0; + mem = nmem_create (); gri.readf = p->readf; gri.seekf = p->seekf; + gri.tellf = p->tellf; gri.endf = p->endf; gri.fh = p->fh; gri.offset = p->offset; gri.mem = mem; + gri.dh = p->dh; n = read_grs_type (&gri, p->subType); if (!n) return -1; if (dumpkeys(n, p, 0) < 0) { - data1_free_tree(n); + data1_free_tree(p->dh, n); return -2; } - data1_free_tree(n); + data1_free_tree(p->dh, n); nmem_destroy(mem); return 0; } @@ -402,7 +338,7 @@ static int grs_extract(struct recExtractCtrl *p) /* * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic. */ -static int process_comp(data1_node *n, Z_RecordComposition *c) +static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c) { data1_esetname *eset; Z_Espec1 *espec = 0; @@ -413,7 +349,7 @@ static int process_comp(data1_node *n, Z_RecordComposition *c) case Z_RecordComp_simple: if (c->u.simple->which != Z_ElementSetNames_generic) return 26; /* only generic form supported. Fix this later */ - if (!(eset = data1_getesetbyname(n->u.root.absyn, + if (!(eset = data1_getesetbyname(dh, n->u.root.absyn, c->u.simple->u.generic))) { logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic); @@ -431,8 +367,10 @@ static int process_comp(data1_node *n, Z_RecordComposition *c) switch (p->which) { case Z_ElementSpec_elementSetName: - if (!(eset = data1_getesetbyname(n->u.root.absyn, - p->u.elementSetName))) + if (!(eset = + data1_getesetbyname(dh, + n->u.root.absyn, + p->u.elementSetName))) { logf(LOG_LOG, "Unknown esetname '%s'", p->u.elementSetName); @@ -460,9 +398,15 @@ static int process_comp(data1_node *n, Z_RecordComposition *c) return 26; /* fix */ } if (espec) - return data1_doespec1(n, espec); + { + logf (LOG_LOG, "Element: Espec-1 match"); + return data1_doespec1(dh, n, espec); + } else + { + logf (LOG_DEBUG, "Element: all match"); return -1; + } } static int grs_retrieve(struct recRetrieveCtrl *p) @@ -471,51 +415,63 @@ static int grs_retrieve(struct recRetrieveCtrl *p) data1_node *new; data1_maptab *map; int res, selected = 0; - NMEM mem = nmem_create(); + NMEM mem; struct grs_read_info gri; + mem = nmem_create(); gri.readf = p->readf; gri.seekf = p->seekf; + gri.tellf = p->tellf; gri.endf = NULL; gri.fh = p->fh; gri.offset = 0; gri.mem = mem; + gri.dh = p->dh; + logf (LOG_DEBUG, "grs_retrieve"); node = read_grs_type (&gri, p->subType); -/* node = data1_read_record(p->readf, p->fh, mem); */ if (!node) { - p->diagnostic = 2; + p->diagnostic = 14; + nmem_destroy (mem); return 0; } - if (p->score >= 0 && (new = data1_insert_taggeddata(node, node, "rank", - mem))) + logf (LOG_DEBUG, "grs_retrieve: score"); + if (p->score >= 0 && (new = + data1_insert_taggeddata(p->dh, node, + node, "rank", + mem))) { new->u.data.what = DATA1I_num; - new->u.data.data = new->u.data.lbuf; + new->u.data.data = new->lbuf; sprintf(new->u.data.data, "%d", p->score); new->u.data.len = strlen(new->u.data.data); } - if ((new = data1_insert_taggeddata(node, node, "localControlNumber", mem))) + logf (LOG_DEBUG, "grs_retrieve: localControlNumber"); + if ((new = data1_insert_taggeddata(p->dh, node, node, + "localControlNumber", mem))) { new->u.data.what = DATA1I_text; - new->u.data.data = new->u.data.lbuf; + new->u.data.data = new->lbuf; sprintf(new->u.data.data, "%d", p->localno); new->u.data.len = strlen(new->u.data.data); } + logf (LOG_DEBUG, "grs_retrieve: schemaIdentifier"); if (p->input_format == VAL_GRS1 && node->u.root.absyn && node->u.root.absyn->reference != VAL_NONE) { oident oe; Odr_oid *oid; + int oidtmp[OID_SIZE]; oe.proto = PROTO_Z3950; oe.oclass = CLASS_SCHEMA; oe.value = node->u.root.absyn->reference; - if ((oid = oid_getoidbyent(&oe))) + if ((oid = oid_ent_to_oid (&oe, oidtmp))) { char tmp[128]; + data1_handle dh = p->dh; char *p = tmp; int *ii; @@ -528,8 +484,8 @@ static int grs_retrieve(struct recRetrieveCtrl *p) } *(p++) = '\0'; - if ((new = data1_insert_taggeddata(node, node, "schemaIdentifier", - mem))) + if ((new = data1_insert_taggeddata(dh, node, node, + "schemaIdentifier", mem))) { new->u.data.what = DATA1I_oid; new->u.data.data = nmem_malloc(mem, p - tmp); @@ -539,6 +495,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p) } } + logf (LOG_DEBUG, "grs_retrieve: schema mapping"); /* * Does the requested format match a known schema-mapping? (this reflects * the overlap of schema and formatting which is inherent in the MARC @@ -550,88 +507,94 @@ static int grs_retrieve(struct recRetrieveCtrl *p) if (map->target_absyn_ref == p->input_format) { onode = node; - if (!(node = data1_map_record(onode, map, mem))) + if (!(node = data1_map_record(p->dh, onode, map, mem))) { p->diagnostic = 14; + nmem_destroy (mem); return 0; } - break; } - if (p->comp && (res = process_comp(node, p->comp)) > 0) + logf (LOG_DEBUG, "grs_retrieve: element spec"); + if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0) { p->diagnostic = res; if (onode) - data1_free_tree(onode); - data1_free_tree(node); + data1_free_tree(p->dh, onode); + data1_free_tree(p->dh, node); nmem_destroy(mem); return 0; } else if (p->comp && !res) selected = 1; + logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping"); switch (p->output_format = (p->input_format != VAL_NONE ? p->input_format : VAL_SUTRS)) { data1_marctab *marctab; - int dummy; + int dummy; case VAL_GRS1: - if (!(p->rec_buf = data1_nodetogr(node, selected, p->odr, &dummy))) - p->diagnostic = 2; /* this should be better specified */ + dummy = 0; + if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected, + p->odr, &dummy))) + p->diagnostic = 238; /* not available in requested syntax */ else p->rec_len = -1; break; case VAL_EXPLAIN: - if (!(p->rec_buf = data1_nodetoexplain(node, selected, p->odr))) - p->diagnostic = 2; /* this should be better specified */ + if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; else p->rec_len = -1; break; case VAL_SUMMARY: - if (!(p->rec_buf = data1_nodetosummary(node, selected, p->odr))) - p->diagnostic = 2; + if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected, + p->odr))) + p->diagnostic = 238; else p->rec_len = -1; break; case VAL_SUTRS: - if (!(p->rec_buf = data1_nodetobuf(node, selected, + if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, (int*)&p->rec_len))) - { - p->diagnostic = 2; - break; - } + p->diagnostic = 238; break; case VAL_SOIF: - if (!(p->rec_buf = data1_nodetosoif(node, selected, - (int*)&p->rec_len))) - { - p->diagnostic = 2; - break; - } + if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, + (int*)&p->rec_len))) + p->diagnostic = 238; break; default: + if (!node->u.root.absyn) + { + p->diagnostic = 238; + break; + } for (marctab = node->u.root.absyn->marc; marctab; marctab = marctab->next) if (marctab->reference == p->input_format) break; if (!marctab) { - p->diagnostic = 227; + p->diagnostic = 238; break; } - if (!(p->rec_buf = data1_nodetomarc(marctab, node, selected, - (int*)&p->rec_len))) + if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, + selected, + (int*)&p->rec_len))) { - p->diagnostic = 2; + p->diagnostic = 238; break; } } if (node) - data1_free_tree(node); + data1_free_tree(p->dh, node); if (onode) - data1_free_tree(onode); + data1_free_tree(p->dh, onode); nmem_destroy(mem); return 0; }