1 /* $Id: recgrs.c,v 1.86.2.6 2005-11-23 14:26:05 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
410 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
411 if (c >= t->ch[0] && c <= t->ch[1])
421 New function, looking for xpath "element" definitions in abs, by
422 tagpath, using a kind of ugly regxp search.The DFA was built while
423 parsing abs, so here we just go trough them and try to match
424 against the given tagpath. The first matching entry is returned.
428 Added support for enhanced xelm. Now [] predicates are considered
429 as well, when selecting indexing rules... (why the hell it's called
436 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
438 data1_absyn *abs = n->root->u.root.absyn;
439 data1_xpelement *xpe = abs->xp_elements;
442 struct xpath_location_step *xp;
445 char *pexpr = xmalloc(strlen(tagpath)+5);
448 sprintf (pexpr, "/%s\n", tagpath);
449 yaz_log(LOG_DEBUG,"Checking tagpath %s",tagpath);
453 ok = dfa_match_first(xpe->dfa->states, pexpr);
455 yaz_log(LOG_DEBUG, " xpath got match %s",xpe->xpath_expr);
457 yaz_log(LOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
461 /* we have to check the perdicates up to the root node */
464 /* find the first tag up in the node structure */
465 nn = n; while (nn && nn->which != DATA1N_tag) {
469 /* go from inside out in the node structure, while going
470 backwards trough xpath location steps ... */
471 for (i=xpe->xpath_len - 1; i>0; i--) {
473 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
474 i,xp[i].part,nn->u.tag.tag);
476 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
477 yaz_log(LOG_DEBUG," Predicates didn't match");
482 if (nn->which == DATA1N_tag) {
497 yaz_log(LOG_DEBUG,"Got it");
498 return xpe->termlists;
505 1 start element (tag)
507 3 start attr (and attr-exact)
515 Now, if there is a matching xelm described in abs, for the
516 indexed element or the attribute, then the data is handled according
517 to those definitions...
519 modified by pop, 2002-12-13
522 /* add xpath index for an attribute */
523 static void index_xpath_attr (char *tag_path, char *name, char *value,
524 char *structure, struct recExtractCtrl *p,
527 wrd->attrSet = VAL_IDXPATH;
530 wrd->string = tag_path;
531 wrd->length = strlen(tag_path);
538 wrd->length = strlen(value);
544 wrd->string = tag_path;
545 wrd->length = strlen(tag_path);
551 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
552 int level, RecWord *wrd, int use,
553 struct RecWord_list *wl)
556 char tag_path_full[1024];
559 int termlist_only = 1;
563 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
564 if ((!n->root->u.root.absyn) ||
565 (n->root->u.root.absyn->enable_xpath_indexing)) {
572 wrd->string = n->u.data.data;
573 wrd->length = n->u.data.len;
577 /* we have to fetch the whole path to the data tag */
578 for (nn = n; nn; nn = nn->parent) {
579 if (nn->which == DATA1N_tag) {
580 size_t tlen = strlen(nn->u.tag.tag);
581 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
582 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
584 tag_path_full[flen++] = '/';
586 else if (nn->which == DATA1N_root) break;
589 tag_path_full[flen] = 0;
591 /* If we have a matching termlist... */
592 if (n->root->u.root.absyn &&
593 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
595 for (; tl; tl = tl->next)
597 /* need to copy recword because it may be changed */
599 wrd->reg_type = *tl->structure;
600 /* this is the ! case, so structure is for the xpath index */
601 memcpy (&wrd_tl, wrd, sizeof(*wrd));
603 sp_parse(n, &wrd_tl, tl->source);
605 wrd_tl.attrSet = VAL_IDXPATH;
606 wrd_tl.attrUse = use;
607 if (p->flagShowRecords)
610 printf("%*sXPath index", (level + 1) * 4, "");
611 printf (" XData:\"");
612 for (i = 0; i<wrd_tl.length && i < 40; i++)
613 fputc (wrd_tl.string[i], stdout);
615 if (wrd_tl.length > 40)
617 fputc ('\n', stdout);
620 (*p->tokenAdd)(&wrd_tl);
624 /* this is just the old fashioned attribute based index */
625 wrd_tl.attrSet = (int) (tl->att->parent->reference);
626 wrd_tl.attrUse = tl->att->locals->local;
627 if (p->flagShowRecords)
630 printf("%*sIdx: [%s]", (level + 1) * 4, "",
632 printf("%s:%s [%d] %s",
633 tl->att->parent->name,
634 tl->att->name, tl->att->value,
636 printf (" XData:\"");
637 for (i = 0; i<wrd_tl.length && i < 40; i++)
638 fputc (wrd_tl.string[i], stdout);
640 if (wrd_tl.length > 40)
642 fputc ('\n', stdout);
645 (*p->tokenAdd)(&wrd_tl);
649 /* xpath indexing is done, if there was no termlist given,
650 or no ! in the termlist, and default indexing is enabled... */
651 if (!p->flagShowRecords && !xpdone && !termlist_only)
653 wrd->attrSet = VAL_IDXPATH;
663 for (nn = n; nn; nn = nn->parent)
665 if (nn->which == DATA1N_tag)
667 size_t tlen = strlen(nn->u.tag.tag);
668 if (tlen + flen > (sizeof(tag_path_full)-2))
670 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
672 tag_path_full[flen++] = '/';
674 else if (nn->which == DATA1N_root)
680 wrd->string = tag_path_full;
682 wrd->attrSet = VAL_IDXPATH;
684 if (p->flagShowRecords)
686 printf("%*s tag=", (level + 1) * 4, "");
687 for (i = 0; i<wrd->length && i < 40; i++)
688 fputc (wrd->string[i], stdout);
699 tag_path_full[flen] = 0;
701 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
702 directive, or default xpath indexing is enabled */
703 if (!(do_xpindex = 1 - termlist_only)) {
704 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
705 for (; tl; tl = tl->next)
713 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
716 if (use == 1) /* only for the starting tag... */
718 #define MAX_ATTR_COUNT 50
719 data1_termlist *tll[MAX_ATTR_COUNT];
723 /* get termlists for attributes, and find out, if we have to do xpath indexing */
724 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
729 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
731 int do_xpindex = 1 - termlist_only;
733 char attr_tag_path_full[1024];
736 /* this could be cached as well */
737 sprintf (attr_tag_path_full, "@%s/%.*s",
738 xp->name, int_len, tag_path_full);
740 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
742 /* if there is a ! in the xelm termlist, or default indexing is on,
743 proceed with xpath idx */
746 for (; tl; tl = tl->next)
755 /* attribute (no value) */
758 wrd->string = xp->name;
759 wrd->length = strlen(xp->name);
765 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
767 /* attribute value exact */
768 strcpy (comb, xp->name);
770 strcat (comb, xp->value);
775 wrd->length = strlen(comb);
778 if (RecWord_list_lookadd(wl, wrd))
786 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
788 char attr_tag_path_full[1024];
792 sprintf (attr_tag_path_full, "@%s/%.*s",
793 xp->name, int_len, tag_path_full);
797 /* If there is a termlist given (=xelm directive) */
798 for (; tl; tl = tl->next)
801 /* add xpath index for the attribute */
802 index_xpath_attr (attr_tag_path_full, xp->name,
803 xp->value, tl->structure,
809 /* add attribute based index for the attribute */
812 (tl->att->parent->reference);
813 wrd->attrUse = tl->att->locals->local;
814 wrd->reg_type = *tl->structure;
815 wrd->string = xp->value;
816 wrd->length = strlen(xp->value);
817 if (RecWord_list_lookadd(wl, wrd))
823 /* if there was no termlist for the given path,
824 or the termlist didn't have a ! element, index
825 the attribute as "w" */
826 if ((!xpdone) && (!termlist_only))
828 index_xpath_attr (attr_tag_path_full, xp->name,
829 xp->value, "w", p, wrd);
838 static void index_termlist (data1_node *par, data1_node *n,
839 struct recExtractCtrl *p, int level, RecWord *wrd)
841 data1_termlist *tlist = 0;
842 data1_datatype dtype = DATA1K_string;
845 * cycle up towards the root until we find a tag with an att..
846 * this has the effect of indexing locally defined tags with
847 * the attribute of their ancestor in the record.
850 while (!par->u.tag.element)
851 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
853 if (!par || !(tlist = par->u.tag.element->termlists))
855 if (par->u.tag.element->tag)
856 dtype = par->u.tag.element->tag->kind;
858 for (; tlist; tlist = tlist->next)
860 /* consider source */
862 assert(tlist->source);
863 sp_parse(n, wrd, tlist->source);
867 if (p->flagShowRecords)
870 printf("%*sIdx: [%s]", (level + 1) * 4, "",
872 printf("%s:%s [%d] %s",
873 tlist->att->parent->name,
874 tlist->att->name, tlist->att->value,
876 printf (" XData:\"");
877 for (i = 0; i<wrd->length && i < 40; i++)
878 fputc (wrd->string[i], stdout);
880 if (wrd->length > 40)
882 fputc ('\n', stdout);
886 wrd->reg_type = *tlist->structure;
887 wrd->attrSet = (int) (tlist->att->parent->reference);
888 wrd->attrUse = tlist->att->locals->local;
895 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
896 RecWord *wrd, struct RecWord_list *wl)
898 for (; n; n = n->next)
900 if (p->flagShowRecords) /* display element description to user */
902 if (n->which == DATA1N_root)
904 printf("%*s", level * 4, "");
905 printf("Record type: '%s'\n", n->u.root.type);
907 else if (n->which == DATA1N_tag)
911 printf("%*s", level * 4, "");
912 if (!(e = n->u.tag.element))
913 printf("Local tag: '%s'\n", n->u.tag.tag);
916 printf("Elm: '%s' ", e->name);
919 data1_tag *t = e->tag;
921 printf("TagNam: '%s' ", t->names->name);
924 printf("%s[%d],", t->tagset->name, t->tagset->type);
927 if (t->which == DATA1T_numeric)
928 printf("%d)", t->value.numeric);
930 printf("'%s')", t->value.string);
937 if (n->which == DATA1N_tag)
939 index_termlist (n, n, p, level, wrd);
940 /* index start tag */
941 if (n->root->u.root.absyn)
942 index_xpath (n, p, level, wrd, 1, wl);
946 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
950 if (n->which == DATA1N_data)
952 data1_node *par = get_parent_tag(p->dh, n);
954 if (p->flagShowRecords)
956 printf("%*s", level * 4, "");
958 if (n->u.data.len > 256)
959 printf("'%.170s ... %.70s'\n", n->u.data.data,
960 n->u.data.data + n->u.data.len-70);
961 else if (n->u.data.len > 0)
962 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
968 index_termlist (par, n, p, level, wrd);
970 index_xpath (n, p, level, wrd, 1016, wl);
973 if (n->which == DATA1N_tag)
976 index_xpath (n, p, level, wrd, 2, wl);
979 if (p->flagShowRecords && n->which == DATA1N_root)
981 printf("%*s-------------\n\n", level * 4, "");
987 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
990 int oidtmp[OID_SIZE];
993 struct RecWord_list *wl = 0;
995 oe.proto = PROTO_Z3950;
996 oe.oclass = CLASS_SCHEMA;
999 oe.value = n->u.root.absyn->reference;
1001 if ((oid_ent_to_oid (&oe, oidtmp)))
1002 (*p->schemaAdd)(p, oidtmp);
1004 (*p->init)(p, &wrd);
1006 wl = RecWord_list_create("grs_extract_tree");
1007 r = dumpkeys(n, p, 0, &wrd, wl);
1008 RecWord_list_destroy(wl);
1012 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1016 struct grs_read_info gri;
1018 int oidtmp[OID_SIZE];
1020 struct RecWord_list *wl = 0;
1023 gri.readf = p->readf;
1024 gri.seekf = p->seekf;
1025 gri.tellf = p->tellf;
1028 gri.offset = p->offset;
1032 if (read_grs_type (h, &gri, p->subType, &n))
1033 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1035 return RECCTRL_EXTRACT_EOF;
1036 oe.proto = PROTO_Z3950;
1037 oe.oclass = CLASS_SCHEMA;
1039 if (!n->u.root.absyn)
1040 return RECCTRL_EXTRACT_ERROR;
1042 if (n->u.root.absyn)
1044 oe.value = n->u.root.absyn->reference;
1045 if ((oid_ent_to_oid (&oe, oidtmp)))
1046 (*p->schemaAdd)(p, oidtmp);
1048 data1_concat_text(p->dh, mem, n);
1050 /* ensure our data1 tree is UTF-8 */
1051 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1054 data1_pr_tree (p->dh, n, stdout);
1057 wl = RecWord_list_create("grs.sgml");
1059 (*p->init)(p, &wrd);
1060 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1061 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1063 ret_val = RECCTRL_EXTRACT_OK;
1064 data1_free_tree(p->dh, n);
1065 RecWord_list_destroy(wl);
1070 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1073 NMEM mem = nmem_create ();
1074 struct grs_handlers *h = (struct grs_handlers *) clientData;
1076 ret = grs_extract_sub(h, p, mem);
1082 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1084 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1086 data1_esetname *eset;
1087 Z_Espec1 *espec = 0;
1092 case Z_RecordComp_simple:
1093 if (c->u.simple->which != Z_ElementSetNames_generic)
1094 return 26; /* only generic form supported. Fix this later */
1095 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1096 c->u.simple->u.generic)))
1098 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1099 return 25; /* invalid esetname */
1101 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1102 c->u.simple->u.generic);
1105 case Z_RecordComp_complex:
1106 if (c->u.complex->generic)
1108 /* insert check for schema */
1109 if ((p = c->u.complex->generic->elementSpec))
1113 case Z_ElementSpec_elementSetName:
1115 data1_getesetbyname(dh, n->u.root.absyn,
1116 p->u.elementSetName)))
1118 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1119 p->u.elementSetName);
1120 return 25; /* invalid esetname */
1122 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1123 p->u.elementSetName);
1126 case Z_ElementSpec_externalSpec:
1127 if (p->u.externalSpec->which == Z_External_espec1)
1129 yaz_log(LOG_DEBUG, "Got Espec-1");
1130 espec = p->u.externalSpec-> u.espec1;
1134 yaz_log(LOG_LOG, "Unknown external espec.");
1135 return 25; /* bad. what is proper diagnostic? */
1142 return 26; /* fix */
1146 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1147 return data1_doespec1(dh, n, espec);
1151 yaz_log(LOG_DEBUG, "Element: all match");
1156 /* Add Zebra info in separate namespace ...
1159 <metadata xmlns="http://www.indexdata.dk/zebra/">
1161 <localnumber>447</localnumber>
1162 <filename>records/genera.xml</filename>
1167 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1170 const char *idzebra_ns[3];
1171 const char *i2 = "\n ";
1172 const char *i4 = "\n ";
1175 idzebra_ns[0] = "xmlns";
1176 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1179 data1_mk_text (p->dh, mem, i2, top);
1181 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1183 data1_mk_text (p->dh, mem, "\n", top);
1185 data1_mk_text (p->dh, mem, i4, n);
1187 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1191 data1_mk_text (p->dh, mem, i4, n);
1192 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1194 data1_mk_text (p->dh, mem, i4, n);
1195 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1198 data1_mk_text (p->dh, mem, i4, n);
1199 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1201 data1_mk_text (p->dh, mem, i2, n);
1204 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1206 data1_node *node = 0, *onode = 0, *top;
1209 int res, selected = 0;
1211 struct grs_read_info gri;
1212 const char *tagname;
1213 struct grs_handlers *h = (struct grs_handlers *) clientData;
1214 int requested_schema = VAL_NONE;
1215 data1_marctab *marctab;
1218 mem = nmem_create();
1219 gri.readf = p->readf;
1220 gri.seekf = p->seekf;
1221 gri.tellf = p->tellf;
1228 yaz_log(LOG_DEBUG, "grs_retrieve");
1229 if (read_grs_type (h, &gri, p->subType, &node))
1241 data1_concat_text(p->dh, mem, node);
1243 /* ensure our data1 tree is UTF-8 */
1244 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1247 data1_pr_tree (p->dh, node, stdout);
1249 top = data1_get_root_tag (p->dh, node);
1251 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1252 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1254 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1256 dnew->u.data.what = DATA1I_text;
1257 dnew->u.data.data = dnew->lbuf;
1258 sprintf(dnew->u.data.data, "%d", p->recordSize);
1259 dnew->u.data.len = strlen(dnew->u.data.data);
1262 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1264 if (tagname && p->score >= 0 &&
1265 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1267 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1268 dnew->u.data.what = DATA1I_num;
1269 dnew->u.data.data = dnew->lbuf;
1270 sprintf(dnew->u.data.data, "%d", p->score);
1271 dnew->u.data.len = strlen(dnew->u.data.data);
1274 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1275 "localControlNumber");
1276 if (tagname && p->localno > 0 &&
1277 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1279 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1280 dnew->u.data.what = DATA1I_text;
1281 dnew->u.data.data = dnew->lbuf;
1283 sprintf(dnew->u.data.data, "%d", p->localno);
1284 dnew->u.data.len = strlen(dnew->u.data.data);
1287 if (p->input_format == VAL_TEXT_XML)
1288 zebra_xml_metadata (p, top, mem);
1291 data1_pr_tree (p->dh, node, stdout);
1293 #if YAZ_VERSIONL >= 0x010903L
1294 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1295 p->comp->u.complex->generic &&
1296 p->comp->u.complex->generic->which == Z_Schema_oid &&
1297 p->comp->u.complex->generic->schema.oid)
1299 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1301 requested_schema = oe->value;
1304 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1305 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1307 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1309 requested_schema = oe->value;
1313 /* If schema has been specified, map if possible, then check that
1314 * we got the right one
1316 if (requested_schema != VAL_NONE)
1318 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1319 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1321 if (map->target_absyn_ref == requested_schema)
1324 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1333 if (node->u.root.absyn &&
1334 requested_schema != node->u.root.absyn->reference)
1336 p->diagnostic = 238;
1342 * Does the requested format match a known syntax-mapping? (this reflects
1343 * the overlap of schema and formatting which is inherent in the MARC
1346 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1347 if (node->u.root.absyn)
1348 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1350 if (map->target_absyn_ref == p->input_format)
1353 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1362 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1363 if (node->u.root.absyn &&
1364 node->u.root.absyn->reference != VAL_NONE &&
1365 p->input_format == VAL_GRS1)
1369 int oidtmp[OID_SIZE];
1371 oe.proto = PROTO_Z3950;
1372 oe.oclass = CLASS_SCHEMA;
1373 oe.value = node->u.root.absyn->reference;
1375 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1378 data1_handle dh = p->dh;
1382 for (ii = oid; *ii >= 0; ii++)
1386 sprintf(p, "%d", *ii);
1389 if ((dnew = data1_mk_tag_data_wd(dh, top,
1390 "schemaIdentifier", mem)))
1392 dnew->u.data.what = DATA1I_oid;
1393 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1394 memcpy(dnew->u.data.data, tmp, p - tmp);
1395 dnew->u.data.len = p - tmp;
1400 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1401 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1403 p->diagnostic = res;
1405 data1_free_tree(p->dh, onode);
1406 data1_free_tree(p->dh, node);
1410 else if (p->comp && !res)
1414 data1_pr_tree (p->dh, node, stdout);
1416 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1417 switch (p->output_format = (p->input_format != VAL_NONE ?
1418 p->input_format : VAL_SUTRS))
1423 data1_pr_tree (p->dh, node, stdout);
1427 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1429 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1431 p->diagnostic = 238;
1434 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1435 memcpy (new_buf, p->rec_buf, p->rec_len);
1436 p->rec_buf = new_buf;
1441 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1443 p->diagnostic = 238; /* not available in requested syntax */
1445 p->rec_len = (size_t) (-1);
1448 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1450 p->diagnostic = 238;
1452 p->rec_len = (size_t) (-1);
1455 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1457 p->diagnostic = 238;
1459 p->rec_len = (size_t) (-1);
1463 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1464 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1466 p->diagnostic = 238;
1469 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1470 memcpy (new_buf, p->rec_buf, p->rec_len);
1471 p->rec_buf = new_buf;
1475 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1477 p->diagnostic = 238;
1480 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1481 memcpy (new_buf, p->rec_buf, p->rec_len);
1482 p->rec_buf = new_buf;
1486 if (!node->u.root.absyn)
1488 p->diagnostic = 238;
1491 for (marctab = node->u.root.absyn->marc; marctab;
1492 marctab = marctab->next)
1493 if (marctab->reference == p->input_format)
1497 p->diagnostic = 238;
1501 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1502 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1503 selected, &p->rec_len)))
1504 p->diagnostic = 238;
1507 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1508 memcpy (new_buf, p->rec_buf, p->rec_len);
1509 p->rec_buf = new_buf;
1513 data1_free_tree(p->dh, node);
1515 data1_free_tree(p->dh, onode);
1520 static struct recType grs_type =
1529 RecType recTypeGrs = &grs_type;