1 /* $Id: recgrs.c,v 1.86.2.14 2006-11-03 10:11:37 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
412 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
413 if (c >= t->ch[0] && c <= t->ch[1])
423 New function, looking for xpath "element" definitions in abs, by
424 tagpath, using a kind of ugly regxp search.The DFA was built while
425 parsing abs, so here we just go trough them and try to match
426 against the given tagpath. The first matching entry is returned.
430 Added support for enhanced xelm. Now [] predicates are considered
431 as well, when selecting indexing rules... (why the hell it's called
438 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
440 data1_absyn *abs = n->root->u.root.absyn;
441 data1_xpelement *xpe = 0;
444 struct xpath_location_step *xp;
446 char *pexpr = xmalloc(strlen(tagpath)+5);
448 sprintf (pexpr, "/%s\n", tagpath);
450 yaz_log(LOG_DEBUG, "Checking tagpath %s", tagpath);
453 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
454 xpe->match_state = -1; /* don't know if it matches yet */
456 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
459 int ok = xpe->match_state;
462 { /* don't know whether there is a match yet */
463 data1_xpelement *xpe1;
466 ok = dfa_match_first(xpe->dfa->states, pexpr);
468 /* mark this and following ones with same regexp */
469 for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
470 xpe1->match_state = ok;
473 assert (ok == 0 || ok == 1);
476 /* we have to check the perdicates up to the root node */
479 /* find the first tag up in the node structure */
480 nn = n; while (nn && nn->which != DATA1N_tag) {
484 /* go from inside out in the node structure, while going
485 backwards trough xpath location steps ... */
486 for (i=xpe->xpath_len - 1; i>0; i--) {
488 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
489 i,xp[i].part,nn->u.tag.tag);
491 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
492 yaz_log(LOG_DEBUG," Predicates didn't match");
497 if (nn->which == DATA1N_tag) {
511 yaz_log(LOG_DEBUG,"Got it");
512 return xpe->termlists;
519 1 start element (tag)
521 3 start attr (and attr-exact)
529 Now, if there is a matching xelm described in abs, for the
530 indexed element or the attribute, then the data is handled according
531 to those definitions...
533 modified by pop, 2002-12-13
536 /* add xpath index for an attribute */
537 static void index_xpath_attr (char *tag_path, char *name, char *value,
538 char *structure, struct recExtractCtrl *p,
541 wrd->attrSet = VAL_IDXPATH;
544 wrd->string = tag_path;
545 wrd->length = strlen(tag_path);
552 wrd->length = strlen(value);
558 wrd->string = tag_path;
559 wrd->length = strlen(tag_path);
565 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
566 int level, RecWord *wrd, int use,
567 struct RecWord_list *wl)
570 char tag_path_full[1024];
573 int termlist_only = 1;
577 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
578 if ((!n->root->u.root.absyn) ||
579 (n->root->u.root.absyn->enable_xpath_indexing)) {
586 wrd->string = n->u.data.data;
587 wrd->length = n->u.data.len;
591 /* we have to fetch the whole path to the data tag */
592 for (nn = n; nn; nn = nn->parent) {
593 if (nn->which == DATA1N_tag) {
594 size_t tlen = strlen(nn->u.tag.tag);
595 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
596 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
598 tag_path_full[flen++] = '/';
600 else if (nn->which == DATA1N_root) break;
603 tag_path_full[flen] = 0;
605 /* If we have a matching termlist... */
606 if (n->root->u.root.absyn &&
607 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
611 for (; tl; tl = tl->next)
613 /* need to copy recword because it may be changed */
615 wrd->reg_type = *tl->structure;
616 /* this is the ! case, so structure is for the xpath index */
617 memcpy (&wrd_tl, wrd, sizeof(*wrd));
619 sp_parse(n, &wrd_tl, tl->source);
621 wrd_tl.attrSet = VAL_IDXPATH;
622 wrd_tl.attrUse = use;
623 if (p->flagShowRecords)
626 printf("%*sXPath index", (level + 1) * 4, "");
627 printf (" XData:\"");
628 for (i = 0; i<wrd_tl.length && i < 40; i++)
629 fputc (wrd_tl.string[i], stdout);
631 if (wrd_tl.length > 40)
633 fputc ('\n', stdout);
636 (*p->tokenAdd)(&wrd_tl);
640 /* this is just the old fashioned attribute based index */
641 wrd_tl.attrSet = (int) (tl->att->parent->reference);
642 wrd_tl.attrUse = tl->att->locals->local;
643 if (p->flagShowRecords)
646 printf("%*sIdx: [%s]", (level + 1) * 4, "",
648 printf("%s:%s [%d] %s",
649 tl->att->parent->name,
650 tl->att->name, tl->att->value,
652 printf (" XData:\"");
653 for (i = 0; i<wrd_tl.length && i < 40; i++)
654 fputc (wrd_tl.string[i], stdout);
656 if (wrd_tl.length > 40)
658 fputc ('\n', stdout);
661 (*p->tokenAdd)(&wrd_tl);
663 if (wrd_tl.seqno > max_seqno)
664 max_seqno = wrd_tl.seqno;
667 wrd->seqno = max_seqno;
669 /* xpath indexing is done, if there was no termlist given,
670 or no ! in the termlist, and default indexing is enabled... */
671 if (!p->flagShowRecords && !xpdone && !termlist_only)
673 wrd->attrSet = VAL_IDXPATH;
681 for (nn = n; nn; nn = nn->parent)
683 if (nn->which == DATA1N_tag)
685 size_t tlen = strlen(nn->u.tag.tag);
686 if (tlen + flen > (sizeof(tag_path_full)-2))
688 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
690 tag_path_full[flen++] = '/';
692 else if (nn->which == DATA1N_root)
698 wrd->string = tag_path_full;
700 wrd->attrSet = VAL_IDXPATH;
702 if (p->flagShowRecords)
704 printf("%*s tag=", (level + 1) * 4, "");
705 for (i = 0; i<wrd->length && i < 40; i++)
706 fputc (wrd->string[i], stdout);
717 tag_path_full[flen] = 0;
719 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
720 directive, or default xpath indexing is enabled */
721 if (!(do_xpindex = 1 - termlist_only)) {
722 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
723 for (; tl; tl = tl->next)
731 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
734 if (use == 1) /* only for the starting tag... */
736 #define MAX_ATTR_COUNT 50
737 data1_termlist *tll[MAX_ATTR_COUNT];
741 /* get termlists for attributes, and find out, if we have to do xpath indexing */
742 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
747 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
749 int do_xpindex = 1 - termlist_only;
751 char attr_tag_path_full[1024];
754 /* this could be cached as well */
755 sprintf (attr_tag_path_full, "@%s/%.*s",
756 xp->name, int_len, tag_path_full);
758 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
760 /* if there is a ! in the xelm termlist, or default indexing is on,
761 proceed with xpath idx */
764 for (; tl; tl = tl->next)
773 /* attribute (no value) */
776 wrd->string = xp->name;
777 wrd->length = strlen(xp->name);
783 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
785 /* attribute value exact */
786 strcpy (comb, xp->name);
788 strcat (comb, xp->value);
793 wrd->length = strlen(comb);
796 if (RecWord_list_lookadd(wl, wrd))
804 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
806 char attr_tag_path_full[1024];
810 sprintf (attr_tag_path_full, "@%s/%.*s",
811 xp->name, int_len, tag_path_full);
815 /* If there is a termlist given (=xelm directive) */
816 for (; tl; tl = tl->next)
819 /* add xpath index for the attribute */
820 index_xpath_attr (attr_tag_path_full, xp->name,
821 xp->value, tl->structure,
827 /* index attribute value (only path/@attr) */
831 (tl->att->parent->reference);
832 wrd->attrUse = tl->att->locals->local;
833 wrd->reg_type = *tl->structure;
834 wrd->string = xp->value;
835 wrd->length = strlen(xp->value);
836 if (RecWord_list_lookadd(wl, wrd))
842 /* if there was no termlist for the given path,
843 or the termlist didn't have a ! element, index
844 the attribute as "w" */
845 if ((!xpdone) && (!termlist_only))
847 index_xpath_attr (attr_tag_path_full, xp->name,
848 xp->value, "w", p, wrd);
857 static void index_termlist (data1_node *par, data1_node *n,
858 struct recExtractCtrl *p, int level, RecWord *wrd)
860 data1_termlist *tlist = 0;
861 data1_datatype dtype = DATA1K_string;
864 * cycle up towards the root until we find a tag with an att..
865 * this has the effect of indexing locally defined tags with
866 * the attribute of their ancestor in the record.
869 while (!par->u.tag.element)
870 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
872 if (!par || !(tlist = par->u.tag.element->termlists))
874 if (par->u.tag.element->tag)
875 dtype = par->u.tag.element->tag->kind;
877 for (; tlist; tlist = tlist->next)
879 /* consider source */
881 assert(tlist->source);
882 sp_parse(n, wrd, tlist->source);
886 if (p->flagShowRecords)
889 printf("%*sIdx: [%s]", (level + 1) * 4, "",
891 printf("%s:%s [%d] %s",
892 tlist->att->parent->name,
893 tlist->att->name, tlist->att->value,
895 printf (" XData:\"");
896 for (i = 0; i<wrd->length && i < 40; i++)
897 fputc (wrd->string[i], stdout);
899 if (wrd->length > 40)
901 fputc ('\n', stdout);
905 wrd->reg_type = *tlist->structure;
906 wrd->attrSet = (int) (tlist->att->parent->reference);
907 wrd->attrUse = tlist->att->locals->local;
914 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
915 RecWord *wrd, struct RecWord_list *wl)
917 for (; n; n = n->next)
919 if (p->flagShowRecords) /* display element description to user */
921 if (n->which == DATA1N_root)
923 printf("%*s", level * 4, "");
924 printf("Record type: '%s'\n", n->u.root.type);
926 else if (n->which == DATA1N_tag)
930 printf("%*s", level * 4, "");
931 if (!(e = n->u.tag.element))
932 printf("Local tag: '%s'\n", n->u.tag.tag);
935 printf("Elm: '%s' ", e->name);
938 data1_tag *t = e->tag;
940 printf("TagNam: '%s' ", t->names->name);
943 printf("%s[%d],", t->tagset->name, t->tagset->type);
946 if (t->which == DATA1T_numeric)
947 printf("%d)", t->value.numeric);
949 printf("'%s')", t->value.string);
956 if (n->which == DATA1N_tag)
958 index_termlist (n, n, p, level, wrd);
959 /* index start tag */
960 if (n->root->u.root.absyn)
961 index_xpath (n, p, level, wrd, 1, wl);
965 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
969 if (n->which == DATA1N_data)
971 data1_node *par = get_parent_tag(p->dh, n);
973 if (p->flagShowRecords)
975 printf("%*s", level * 4, "");
977 if (n->u.data.len > 256)
978 printf("'%.170s ... %.70s'\n", n->u.data.data,
979 n->u.data.data + n->u.data.len-70);
980 else if (n->u.data.len > 0)
981 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
987 index_termlist (par, n, p, level, wrd);
989 index_xpath (n, p, level, wrd, 1016, wl);
992 if (n->which == DATA1N_tag)
995 index_xpath (n, p, level, wrd, 2, wl);
998 if (p->flagShowRecords && n->which == DATA1N_root)
1000 printf("%*s-------------\n\n", level * 4, "");
1006 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1009 int oidtmp[OID_SIZE];
1012 struct RecWord_list *wl = 0;
1014 oe.proto = PROTO_Z3950;
1015 oe.oclass = CLASS_SCHEMA;
1016 if (n->u.root.absyn)
1018 oe.value = n->u.root.absyn->reference;
1020 if ((oid_ent_to_oid (&oe, oidtmp)))
1021 (*p->schemaAdd)(p, oidtmp);
1023 (*p->init)(p, &wrd);
1025 wl = RecWord_list_create("grs_extract_tree");
1026 r = dumpkeys(n, p, 0, &wrd, wl);
1027 RecWord_list_destroy(wl);
1031 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1035 struct grs_read_info gri;
1037 int oidtmp[OID_SIZE];
1039 struct RecWord_list *wl = 0;
1042 gri.readf = p->readf;
1043 gri.seekf = p->seekf;
1044 gri.tellf = p->tellf;
1047 gri.offset = p->offset;
1051 if (read_grs_type (h, &gri, p->subType, &n))
1052 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1054 return RECCTRL_EXTRACT_EOF;
1055 oe.proto = PROTO_Z3950;
1056 oe.oclass = CLASS_SCHEMA;
1058 if (!n->u.root.absyn)
1059 return RECCTRL_EXTRACT_ERROR;
1061 if (n->u.root.absyn)
1063 oe.value = n->u.root.absyn->reference;
1064 if ((oid_ent_to_oid (&oe, oidtmp)))
1065 (*p->schemaAdd)(p, oidtmp);
1067 data1_concat_text(p->dh, mem, n);
1069 /* ensure our data1 tree is UTF-8 */
1070 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1073 data1_pr_tree (p->dh, n, stdout);
1076 wl = RecWord_list_create("grs.sgml");
1078 (*p->init)(p, &wrd);
1079 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1080 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1082 ret_val = RECCTRL_EXTRACT_OK;
1083 data1_free_tree(p->dh, n);
1084 RecWord_list_destroy(wl);
1089 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1092 NMEM mem = nmem_create ();
1093 struct grs_handlers *h = (struct grs_handlers *) clientData;
1095 ret = grs_extract_sub(h, p, mem);
1101 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1103 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1105 data1_esetname *eset;
1106 Z_Espec1 *espec = 0;
1111 case Z_RecordComp_simple:
1112 if (c->u.simple->which != Z_ElementSetNames_generic)
1113 return 26; /* only generic form supported. Fix this later */
1114 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1115 c->u.simple->u.generic)))
1117 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1118 return 25; /* invalid esetname */
1120 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1121 c->u.simple->u.generic);
1124 case Z_RecordComp_complex:
1125 if (c->u.complex->generic)
1127 /* insert check for schema */
1128 if ((p = c->u.complex->generic->elementSpec))
1132 case Z_ElementSpec_elementSetName:
1134 data1_getesetbyname(dh, n->u.root.absyn,
1135 p->u.elementSetName)))
1137 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1138 p->u.elementSetName);
1139 return 25; /* invalid esetname */
1141 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1142 p->u.elementSetName);
1145 case Z_ElementSpec_externalSpec:
1146 if (p->u.externalSpec->which == Z_External_espec1)
1148 yaz_log(LOG_DEBUG, "Got Espec-1");
1149 espec = p->u.externalSpec-> u.espec1;
1153 yaz_log(LOG_LOG, "Unknown external espec.");
1154 return 25; /* bad. what is proper diagnostic? */
1161 return 26; /* fix */
1165 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1166 return data1_doespec1(dh, n, espec);
1170 yaz_log(LOG_DEBUG, "Element: all match");
1175 /* Add Zebra info in separate namespace ...
1178 <metadata xmlns="http://www.indexdata.dk/zebra/">
1180 <localnumber>447</localnumber>
1181 <filename>records/genera.xml</filename>
1186 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1189 const char *idzebra_ns[3];
1190 const char *i2 = "\n ";
1191 const char *i4 = "\n ";
1194 idzebra_ns[0] = "xmlns";
1195 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1198 data1_mk_text (p->dh, mem, i2, top);
1200 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1202 data1_mk_text (p->dh, mem, "\n", top);
1204 data1_mk_text (p->dh, mem, i4, n);
1206 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1210 data1_mk_text (p->dh, mem, i4, n);
1211 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1213 data1_mk_text (p->dh, mem, i4, n);
1214 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1217 data1_mk_text (p->dh, mem, i4, n);
1218 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1220 data1_mk_text (p->dh, mem, i2, n);
1223 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1225 data1_node *node = 0, *onode = 0, *top;
1228 int res, selected = 0;
1230 struct grs_read_info gri;
1231 const char *tagname;
1232 struct grs_handlers *h = (struct grs_handlers *) clientData;
1233 int requested_schema = VAL_NONE;
1234 data1_marctab *marctab;
1237 mem = nmem_create();
1238 gri.readf = p->readf;
1239 gri.seekf = p->seekf;
1240 gri.tellf = p->tellf;
1247 yaz_log(LOG_DEBUG, "grs_retrieve");
1248 if (read_grs_type (h, &gri, p->subType, &node))
1260 data1_concat_text(p->dh, mem, node);
1262 /* ensure our data1 tree is UTF-8 */
1263 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1266 data1_pr_tree (p->dh, node, stdout);
1268 top = data1_get_root_tag (p->dh, node);
1270 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1271 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1273 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1275 dnew->u.data.what = DATA1I_text;
1276 dnew->u.data.data = dnew->lbuf;
1277 sprintf(dnew->u.data.data, "%d", p->recordSize);
1278 dnew->u.data.len = strlen(dnew->u.data.data);
1281 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1283 if (tagname && p->score >= 0 &&
1284 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1286 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1287 dnew->u.data.what = DATA1I_num;
1288 dnew->u.data.data = dnew->lbuf;
1289 sprintf(dnew->u.data.data, "%d", p->score);
1290 dnew->u.data.len = strlen(dnew->u.data.data);
1293 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1294 "localControlNumber");
1295 if (tagname && p->localno > 0 &&
1296 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1298 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1299 dnew->u.data.what = DATA1I_text;
1300 dnew->u.data.data = dnew->lbuf;
1302 sprintf(dnew->u.data.data, "%d", p->localno);
1303 dnew->u.data.len = strlen(dnew->u.data.data);
1306 if (p->input_format == VAL_TEXT_XML)
1307 zebra_xml_metadata (p, top, mem);
1310 data1_pr_tree (p->dh, node, stdout);
1312 #if YAZ_VERSIONL >= 0x010903L
1313 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1314 p->comp->u.complex->generic &&
1315 p->comp->u.complex->generic->which == Z_Schema_oid &&
1316 p->comp->u.complex->generic->schema.oid)
1318 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1320 requested_schema = oe->value;
1323 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1324 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1326 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1328 requested_schema = oe->value;
1332 /* If schema has been specified, map if possible, then check that
1333 * we got the right one
1335 if (requested_schema != VAL_NONE)
1337 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1338 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1340 if (map->target_absyn_ref == requested_schema)
1343 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1352 if (node->u.root.absyn &&
1353 requested_schema != node->u.root.absyn->reference)
1355 p->diagnostic = 238;
1361 * Does the requested format match a known syntax-mapping? (this reflects
1362 * the overlap of schema and formatting which is inherent in the MARC
1365 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1366 if (node->u.root.absyn)
1367 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1369 if (map->target_absyn_ref == p->input_format)
1372 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1381 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1382 if (node->u.root.absyn &&
1383 node->u.root.absyn->reference != VAL_NONE &&
1384 p->input_format == VAL_GRS1)
1388 int oidtmp[OID_SIZE];
1390 oe.proto = PROTO_Z3950;
1391 oe.oclass = CLASS_SCHEMA;
1392 oe.value = node->u.root.absyn->reference;
1394 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1397 data1_handle dh = p->dh;
1401 for (ii = oid; *ii >= 0; ii++)
1405 sprintf(p, "%d", *ii);
1408 if ((dnew = data1_mk_tag_data_wd(dh, top,
1409 "schemaIdentifier", mem)))
1411 dnew->u.data.what = DATA1I_oid;
1412 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1413 memcpy(dnew->u.data.data, tmp, p - tmp);
1414 dnew->u.data.len = p - tmp;
1419 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1420 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1422 p->diagnostic = res;
1424 data1_free_tree(p->dh, onode);
1425 data1_free_tree(p->dh, node);
1429 else if (p->comp && !res)
1433 data1_pr_tree (p->dh, node, stdout);
1435 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1436 switch (p->output_format = (p->input_format != VAL_NONE ?
1437 p->input_format : VAL_SUTRS))
1442 data1_pr_tree (p->dh, node, stdout);
1446 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1448 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1450 p->diagnostic = 238;
1453 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1454 memcpy (new_buf, p->rec_buf, p->rec_len);
1455 p->rec_buf = new_buf;
1460 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1462 p->diagnostic = 238; /* not available in requested syntax */
1467 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1469 p->diagnostic = 238;
1474 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1476 p->diagnostic = 238;
1482 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1483 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1485 p->diagnostic = 238;
1488 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1489 memcpy (new_buf, p->rec_buf, p->rec_len);
1490 p->rec_buf = new_buf;
1494 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1496 p->diagnostic = 238;
1499 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1500 memcpy (new_buf, p->rec_buf, p->rec_len);
1501 p->rec_buf = new_buf;
1505 if (!node->u.root.absyn)
1507 p->diagnostic = 238;
1510 for (marctab = node->u.root.absyn->marc; marctab;
1511 marctab = marctab->next)
1512 if (marctab->reference == p->input_format)
1516 p->diagnostic = 238;
1520 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1521 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1522 selected, &p->rec_len)))
1523 p->diagnostic = 238;
1526 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1527 memcpy (new_buf, p->rec_buf, p->rec_len);
1528 p->rec_buf = new_buf;
1532 data1_free_tree(p->dh, node);
1534 data1_free_tree(p->dh, onode);
1539 static struct recType grs_type =
1548 RecType recTypeGrs = &grs_type;