1 /* $Id: recgrs.c,v 1.86.2.9 2006-02-07 00:22:25 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
412 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
413 if (c >= t->ch[0] && c <= t->ch[1])
423 New function, looking for xpath "element" definitions in abs, by
424 tagpath, using a kind of ugly regxp search.The DFA was built while
425 parsing abs, so here we just go trough them and try to match
426 against the given tagpath. The first matching entry is returned.
430 Added support for enhanced xelm. Now [] predicates are considered
431 as well, when selecting indexing rules... (why the hell it's called
438 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
440 data1_absyn *abs = n->root->u.root.absyn;
441 data1_xpelement *xpe = abs->xp_elements;
444 struct xpath_location_step *xp;
447 char *pexpr = xmalloc(strlen(tagpath)+5);
450 sprintf (pexpr, "/%s\n", tagpath);
452 yaz_log(LOG_DEBUG, "Checking tagpath %s", tagpath);
457 ok = dfa_match_first(xpe->dfa->states, pexpr);
459 yaz_log(LOG_DEBUG, " xpath got match %s",xpe->xpath_expr);
461 yaz_log(LOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
465 /* we have to check the perdicates up to the root node */
468 /* find the first tag up in the node structure */
469 nn = n; while (nn && nn->which != DATA1N_tag) {
473 /* go from inside out in the node structure, while going
474 backwards trough xpath location steps ... */
475 for (i=xpe->xpath_len - 1; i>0; i--) {
477 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
478 i,xp[i].part,nn->u.tag.tag);
480 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
481 yaz_log(LOG_DEBUG," Predicates didn't match");
486 if (nn->which == DATA1N_tag) {
501 yaz_log(LOG_DEBUG,"Got it");
502 return xpe->termlists;
509 1 start element (tag)
511 3 start attr (and attr-exact)
519 Now, if there is a matching xelm described in abs, for the
520 indexed element or the attribute, then the data is handled according
521 to those definitions...
523 modified by pop, 2002-12-13
526 /* add xpath index for an attribute */
527 static void index_xpath_attr (char *tag_path, char *name, char *value,
528 char *structure, struct recExtractCtrl *p,
531 wrd->attrSet = VAL_IDXPATH;
534 wrd->string = tag_path;
535 wrd->length = strlen(tag_path);
542 wrd->length = strlen(value);
548 wrd->string = tag_path;
549 wrd->length = strlen(tag_path);
555 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
556 int level, RecWord *wrd, int use,
557 struct RecWord_list *wl)
560 char tag_path_full[1024];
563 int termlist_only = 1;
567 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
568 if ((!n->root->u.root.absyn) ||
569 (n->root->u.root.absyn->enable_xpath_indexing)) {
576 wrd->string = n->u.data.data;
577 wrd->length = n->u.data.len;
581 /* we have to fetch the whole path to the data tag */
582 for (nn = n; nn; nn = nn->parent) {
583 if (nn->which == DATA1N_tag) {
584 size_t tlen = strlen(nn->u.tag.tag);
585 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
586 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
588 tag_path_full[flen++] = '/';
590 else if (nn->which == DATA1N_root) break;
593 tag_path_full[flen] = 0;
595 /* If we have a matching termlist... */
596 if (n->root->u.root.absyn &&
597 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
599 for (; tl; tl = tl->next)
601 /* need to copy recword because it may be changed */
603 wrd->reg_type = *tl->structure;
604 /* this is the ! case, so structure is for the xpath index */
605 memcpy (&wrd_tl, wrd, sizeof(*wrd));
607 sp_parse(n, &wrd_tl, tl->source);
609 wrd_tl.attrSet = VAL_IDXPATH;
610 wrd_tl.attrUse = use;
611 if (p->flagShowRecords)
614 printf("%*sXPath index", (level + 1) * 4, "");
615 printf (" XData:\"");
616 for (i = 0; i<wrd_tl.length && i < 40; i++)
617 fputc (wrd_tl.string[i], stdout);
619 if (wrd_tl.length > 40)
621 fputc ('\n', stdout);
624 (*p->tokenAdd)(&wrd_tl);
628 /* this is just the old fashioned attribute based index */
629 wrd_tl.attrSet = (int) (tl->att->parent->reference);
630 wrd_tl.attrUse = tl->att->locals->local;
631 if (p->flagShowRecords)
634 printf("%*sIdx: [%s]", (level + 1) * 4, "",
636 printf("%s:%s [%d] %s",
637 tl->att->parent->name,
638 tl->att->name, tl->att->value,
640 printf (" XData:\"");
641 for (i = 0; i<wrd_tl.length && i < 40; i++)
642 fputc (wrd_tl.string[i], stdout);
644 if (wrd_tl.length > 40)
646 fputc ('\n', stdout);
649 (*p->tokenAdd)(&wrd_tl);
653 /* xpath indexing is done, if there was no termlist given,
654 or no ! in the termlist, and default indexing is enabled... */
655 if (!p->flagShowRecords && !xpdone && !termlist_only)
657 wrd->attrSet = VAL_IDXPATH;
667 for (nn = n; nn; nn = nn->parent)
669 if (nn->which == DATA1N_tag)
671 size_t tlen = strlen(nn->u.tag.tag);
672 if (tlen + flen > (sizeof(tag_path_full)-2))
674 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
676 tag_path_full[flen++] = '/';
678 else if (nn->which == DATA1N_root)
684 wrd->string = tag_path_full;
686 wrd->attrSet = VAL_IDXPATH;
688 if (p->flagShowRecords)
690 printf("%*s tag=", (level + 1) * 4, "");
691 for (i = 0; i<wrd->length && i < 40; i++)
692 fputc (wrd->string[i], stdout);
703 tag_path_full[flen] = 0;
705 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
706 directive, or default xpath indexing is enabled */
707 if (!(do_xpindex = 1 - termlist_only)) {
708 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
709 for (; tl; tl = tl->next)
717 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
720 if (use == 1) /* only for the starting tag... */
722 #define MAX_ATTR_COUNT 50
723 data1_termlist *tll[MAX_ATTR_COUNT];
727 /* get termlists for attributes, and find out, if we have to do xpath indexing */
728 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
733 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
735 int do_xpindex = 1 - termlist_only;
737 char attr_tag_path_full[1024];
740 /* this could be cached as well */
741 sprintf (attr_tag_path_full, "@%s/%.*s",
742 xp->name, int_len, tag_path_full);
744 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
746 /* if there is a ! in the xelm termlist, or default indexing is on,
747 proceed with xpath idx */
750 for (; tl; tl = tl->next)
759 /* attribute (no value) */
762 wrd->string = xp->name;
763 wrd->length = strlen(xp->name);
769 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
771 /* attribute value exact */
772 strcpy (comb, xp->name);
774 strcat (comb, xp->value);
779 wrd->length = strlen(comb);
782 if (RecWord_list_lookadd(wl, wrd))
790 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
792 char attr_tag_path_full[1024];
796 sprintf (attr_tag_path_full, "@%s/%.*s",
797 xp->name, int_len, tag_path_full);
801 /* If there is a termlist given (=xelm directive) */
802 for (; tl; tl = tl->next)
805 /* add xpath index for the attribute */
806 index_xpath_attr (attr_tag_path_full, xp->name,
807 xp->value, tl->structure,
813 /* index attribute value (only path/@attr) */
817 (tl->att->parent->reference);
818 wrd->attrUse = tl->att->locals->local;
819 wrd->reg_type = *tl->structure;
820 wrd->string = xp->value;
821 wrd->length = strlen(xp->value);
822 if (RecWord_list_lookadd(wl, wrd))
828 /* if there was no termlist for the given path,
829 or the termlist didn't have a ! element, index
830 the attribute as "w" */
831 if ((!xpdone) && (!termlist_only))
833 index_xpath_attr (attr_tag_path_full, xp->name,
834 xp->value, "w", p, wrd);
843 static void index_termlist (data1_node *par, data1_node *n,
844 struct recExtractCtrl *p, int level, RecWord *wrd)
846 data1_termlist *tlist = 0;
847 data1_datatype dtype = DATA1K_string;
850 * cycle up towards the root until we find a tag with an att..
851 * this has the effect of indexing locally defined tags with
852 * the attribute of their ancestor in the record.
855 while (!par->u.tag.element)
856 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
858 if (!par || !(tlist = par->u.tag.element->termlists))
860 if (par->u.tag.element->tag)
861 dtype = par->u.tag.element->tag->kind;
863 for (; tlist; tlist = tlist->next)
865 /* consider source */
867 assert(tlist->source);
868 sp_parse(n, wrd, tlist->source);
872 if (p->flagShowRecords)
875 printf("%*sIdx: [%s]", (level + 1) * 4, "",
877 printf("%s:%s [%d] %s",
878 tlist->att->parent->name,
879 tlist->att->name, tlist->att->value,
881 printf (" XData:\"");
882 for (i = 0; i<wrd->length && i < 40; i++)
883 fputc (wrd->string[i], stdout);
885 if (wrd->length > 40)
887 fputc ('\n', stdout);
891 wrd->reg_type = *tlist->structure;
892 wrd->attrSet = (int) (tlist->att->parent->reference);
893 wrd->attrUse = tlist->att->locals->local;
900 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
901 RecWord *wrd, struct RecWord_list *wl)
903 for (; n; n = n->next)
905 if (p->flagShowRecords) /* display element description to user */
907 if (n->which == DATA1N_root)
909 printf("%*s", level * 4, "");
910 printf("Record type: '%s'\n", n->u.root.type);
912 else if (n->which == DATA1N_tag)
916 printf("%*s", level * 4, "");
917 if (!(e = n->u.tag.element))
918 printf("Local tag: '%s'\n", n->u.tag.tag);
921 printf("Elm: '%s' ", e->name);
924 data1_tag *t = e->tag;
926 printf("TagNam: '%s' ", t->names->name);
929 printf("%s[%d],", t->tagset->name, t->tagset->type);
932 if (t->which == DATA1T_numeric)
933 printf("%d)", t->value.numeric);
935 printf("'%s')", t->value.string);
942 if (n->which == DATA1N_tag)
944 index_termlist (n, n, p, level, wrd);
945 /* index start tag */
946 if (n->root->u.root.absyn)
947 index_xpath (n, p, level, wrd, 1, wl);
951 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
955 if (n->which == DATA1N_data)
957 data1_node *par = get_parent_tag(p->dh, n);
959 if (p->flagShowRecords)
961 printf("%*s", level * 4, "");
963 if (n->u.data.len > 256)
964 printf("'%.170s ... %.70s'\n", n->u.data.data,
965 n->u.data.data + n->u.data.len-70);
966 else if (n->u.data.len > 0)
967 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
973 index_termlist (par, n, p, level, wrd);
975 index_xpath (n, p, level, wrd, 1016, wl);
978 if (n->which == DATA1N_tag)
981 index_xpath (n, p, level, wrd, 2, wl);
984 if (p->flagShowRecords && n->which == DATA1N_root)
986 printf("%*s-------------\n\n", level * 4, "");
992 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
995 int oidtmp[OID_SIZE];
998 struct RecWord_list *wl = 0;
1000 oe.proto = PROTO_Z3950;
1001 oe.oclass = CLASS_SCHEMA;
1002 if (n->u.root.absyn)
1004 oe.value = n->u.root.absyn->reference;
1006 if ((oid_ent_to_oid (&oe, oidtmp)))
1007 (*p->schemaAdd)(p, oidtmp);
1009 (*p->init)(p, &wrd);
1011 wl = RecWord_list_create("grs_extract_tree");
1012 r = dumpkeys(n, p, 0, &wrd, wl);
1013 RecWord_list_destroy(wl);
1017 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1021 struct grs_read_info gri;
1023 int oidtmp[OID_SIZE];
1025 struct RecWord_list *wl = 0;
1028 gri.readf = p->readf;
1029 gri.seekf = p->seekf;
1030 gri.tellf = p->tellf;
1033 gri.offset = p->offset;
1037 if (read_grs_type (h, &gri, p->subType, &n))
1038 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1040 return RECCTRL_EXTRACT_EOF;
1041 oe.proto = PROTO_Z3950;
1042 oe.oclass = CLASS_SCHEMA;
1044 if (!n->u.root.absyn)
1045 return RECCTRL_EXTRACT_ERROR;
1047 if (n->u.root.absyn)
1049 oe.value = n->u.root.absyn->reference;
1050 if ((oid_ent_to_oid (&oe, oidtmp)))
1051 (*p->schemaAdd)(p, oidtmp);
1053 data1_concat_text(p->dh, mem, n);
1055 /* ensure our data1 tree is UTF-8 */
1056 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1059 data1_pr_tree (p->dh, n, stdout);
1062 wl = RecWord_list_create("grs.sgml");
1064 (*p->init)(p, &wrd);
1065 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1066 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1068 ret_val = RECCTRL_EXTRACT_OK;
1069 data1_free_tree(p->dh, n);
1070 RecWord_list_destroy(wl);
1075 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1078 NMEM mem = nmem_create ();
1079 struct grs_handlers *h = (struct grs_handlers *) clientData;
1081 ret = grs_extract_sub(h, p, mem);
1087 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1089 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1091 data1_esetname *eset;
1092 Z_Espec1 *espec = 0;
1097 case Z_RecordComp_simple:
1098 if (c->u.simple->which != Z_ElementSetNames_generic)
1099 return 26; /* only generic form supported. Fix this later */
1100 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1101 c->u.simple->u.generic)))
1103 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1104 return 25; /* invalid esetname */
1106 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1107 c->u.simple->u.generic);
1110 case Z_RecordComp_complex:
1111 if (c->u.complex->generic)
1113 /* insert check for schema */
1114 if ((p = c->u.complex->generic->elementSpec))
1118 case Z_ElementSpec_elementSetName:
1120 data1_getesetbyname(dh, n->u.root.absyn,
1121 p->u.elementSetName)))
1123 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1124 p->u.elementSetName);
1125 return 25; /* invalid esetname */
1127 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1128 p->u.elementSetName);
1131 case Z_ElementSpec_externalSpec:
1132 if (p->u.externalSpec->which == Z_External_espec1)
1134 yaz_log(LOG_DEBUG, "Got Espec-1");
1135 espec = p->u.externalSpec-> u.espec1;
1139 yaz_log(LOG_LOG, "Unknown external espec.");
1140 return 25; /* bad. what is proper diagnostic? */
1147 return 26; /* fix */
1151 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1152 return data1_doespec1(dh, n, espec);
1156 yaz_log(LOG_DEBUG, "Element: all match");
1161 /* Add Zebra info in separate namespace ...
1164 <metadata xmlns="http://www.indexdata.dk/zebra/">
1166 <localnumber>447</localnumber>
1167 <filename>records/genera.xml</filename>
1172 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1175 const char *idzebra_ns[3];
1176 const char *i2 = "\n ";
1177 const char *i4 = "\n ";
1180 idzebra_ns[0] = "xmlns";
1181 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1184 data1_mk_text (p->dh, mem, i2, top);
1186 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1188 data1_mk_text (p->dh, mem, "\n", top);
1190 data1_mk_text (p->dh, mem, i4, n);
1192 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1196 data1_mk_text (p->dh, mem, i4, n);
1197 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1199 data1_mk_text (p->dh, mem, i4, n);
1200 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1203 data1_mk_text (p->dh, mem, i4, n);
1204 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1206 data1_mk_text (p->dh, mem, i2, n);
1209 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1211 data1_node *node = 0, *onode = 0, *top;
1214 int res, selected = 0;
1216 struct grs_read_info gri;
1217 const char *tagname;
1218 struct grs_handlers *h = (struct grs_handlers *) clientData;
1219 int requested_schema = VAL_NONE;
1220 data1_marctab *marctab;
1223 mem = nmem_create();
1224 gri.readf = p->readf;
1225 gri.seekf = p->seekf;
1226 gri.tellf = p->tellf;
1233 yaz_log(LOG_DEBUG, "grs_retrieve");
1234 if (read_grs_type (h, &gri, p->subType, &node))
1246 data1_concat_text(p->dh, mem, node);
1248 /* ensure our data1 tree is UTF-8 */
1249 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1252 data1_pr_tree (p->dh, node, stdout);
1254 top = data1_get_root_tag (p->dh, node);
1256 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1257 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1259 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1261 dnew->u.data.what = DATA1I_text;
1262 dnew->u.data.data = dnew->lbuf;
1263 sprintf(dnew->u.data.data, "%d", p->recordSize);
1264 dnew->u.data.len = strlen(dnew->u.data.data);
1267 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1269 if (tagname && p->score >= 0 &&
1270 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1272 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1273 dnew->u.data.what = DATA1I_num;
1274 dnew->u.data.data = dnew->lbuf;
1275 sprintf(dnew->u.data.data, "%d", p->score);
1276 dnew->u.data.len = strlen(dnew->u.data.data);
1279 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1280 "localControlNumber");
1281 if (tagname && p->localno > 0 &&
1282 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1284 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1285 dnew->u.data.what = DATA1I_text;
1286 dnew->u.data.data = dnew->lbuf;
1288 sprintf(dnew->u.data.data, "%d", p->localno);
1289 dnew->u.data.len = strlen(dnew->u.data.data);
1292 if (p->input_format == VAL_TEXT_XML)
1293 zebra_xml_metadata (p, top, mem);
1296 data1_pr_tree (p->dh, node, stdout);
1298 #if YAZ_VERSIONL >= 0x010903L
1299 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1300 p->comp->u.complex->generic &&
1301 p->comp->u.complex->generic->which == Z_Schema_oid &&
1302 p->comp->u.complex->generic->schema.oid)
1304 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1306 requested_schema = oe->value;
1309 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1310 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1312 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1314 requested_schema = oe->value;
1318 /* If schema has been specified, map if possible, then check that
1319 * we got the right one
1321 if (requested_schema != VAL_NONE)
1323 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1324 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1326 if (map->target_absyn_ref == requested_schema)
1329 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1338 if (node->u.root.absyn &&
1339 requested_schema != node->u.root.absyn->reference)
1341 p->diagnostic = 238;
1347 * Does the requested format match a known syntax-mapping? (this reflects
1348 * the overlap of schema and formatting which is inherent in the MARC
1351 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1352 if (node->u.root.absyn)
1353 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1355 if (map->target_absyn_ref == p->input_format)
1358 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1367 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1368 if (node->u.root.absyn &&
1369 node->u.root.absyn->reference != VAL_NONE &&
1370 p->input_format == VAL_GRS1)
1374 int oidtmp[OID_SIZE];
1376 oe.proto = PROTO_Z3950;
1377 oe.oclass = CLASS_SCHEMA;
1378 oe.value = node->u.root.absyn->reference;
1380 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1383 data1_handle dh = p->dh;
1387 for (ii = oid; *ii >= 0; ii++)
1391 sprintf(p, "%d", *ii);
1394 if ((dnew = data1_mk_tag_data_wd(dh, top,
1395 "schemaIdentifier", mem)))
1397 dnew->u.data.what = DATA1I_oid;
1398 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1399 memcpy(dnew->u.data.data, tmp, p - tmp);
1400 dnew->u.data.len = p - tmp;
1405 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1406 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1408 p->diagnostic = res;
1410 data1_free_tree(p->dh, onode);
1411 data1_free_tree(p->dh, node);
1415 else if (p->comp && !res)
1419 data1_pr_tree (p->dh, node, stdout);
1421 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1422 switch (p->output_format = (p->input_format != VAL_NONE ?
1423 p->input_format : VAL_SUTRS))
1428 data1_pr_tree (p->dh, node, stdout);
1432 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1434 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1436 p->diagnostic = 238;
1439 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1440 memcpy (new_buf, p->rec_buf, p->rec_len);
1441 p->rec_buf = new_buf;
1446 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1448 p->diagnostic = 238; /* not available in requested syntax */
1450 p->rec_len = (size_t) (-1);
1453 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1455 p->diagnostic = 238;
1457 p->rec_len = (size_t) (-1);
1460 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1462 p->diagnostic = 238;
1464 p->rec_len = (size_t) (-1);
1468 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1469 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1471 p->diagnostic = 238;
1474 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1475 memcpy (new_buf, p->rec_buf, p->rec_len);
1476 p->rec_buf = new_buf;
1480 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1482 p->diagnostic = 238;
1485 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1486 memcpy (new_buf, p->rec_buf, p->rec_len);
1487 p->rec_buf = new_buf;
1491 if (!node->u.root.absyn)
1493 p->diagnostic = 238;
1496 for (marctab = node->u.root.absyn->marc; marctab;
1497 marctab = marctab->next)
1498 if (marctab->reference == p->input_format)
1502 p->diagnostic = 238;
1506 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1507 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1508 selected, &p->rec_len)))
1509 p->diagnostic = 238;
1512 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1513 memcpy (new_buf, p->rec_buf, p->rec_len);
1514 p->rec_buf = new_buf;
1518 data1_free_tree(p->dh, node);
1520 data1_free_tree(p->dh, onode);
1525 static struct recType grs_type =
1534 RecType recTypeGrs = &grs_type;