1 /* $Id: recgrs.c,v 1.86.2.11 2006-09-28 18:38:42 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
412 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
413 if (c >= t->ch[0] && c <= t->ch[1])
423 New function, looking for xpath "element" definitions in abs, by
424 tagpath, using a kind of ugly regxp search.The DFA was built while
425 parsing abs, so here we just go trough them and try to match
426 against the given tagpath. The first matching entry is returned.
430 Added support for enhanced xelm. Now [] predicates are considered
431 as well, when selecting indexing rules... (why the hell it's called
438 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
440 data1_absyn *abs = n->root->u.root.absyn;
441 data1_xpelement *xpe = 0;
444 struct xpath_location_step *xp;
446 char *pexpr = xmalloc(strlen(tagpath)+5);
448 sprintf (pexpr, "/%s\n", tagpath);
450 yaz_log(LOG_DEBUG, "Checking tagpath %s", tagpath);
453 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
454 xpe->match_state = -1; /* don't know if it matches yet */
456 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
459 int ok = xpe->match_state;
462 { /* don't know whether there is a match yet */
463 data1_xpelement *xpe1;
466 ok = dfa_match_first(xpe->dfa->states, pexpr);
468 /* mark this and following ones with same regexp */
469 for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
471 if (!strcmp(xpe1->regexp, xpe->regexp))
472 xpe1->match_state = ok;
476 assert (ok == 0 || ok == 1);
479 /* we have to check the perdicates up to the root node */
482 /* find the first tag up in the node structure */
483 nn = n; while (nn && nn->which != DATA1N_tag) {
487 /* go from inside out in the node structure, while going
488 backwards trough xpath location steps ... */
489 for (i=xpe->xpath_len - 1; i>0; i--) {
491 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
492 i,xp[i].part,nn->u.tag.tag);
494 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
495 yaz_log(LOG_DEBUG," Predicates didn't match");
500 if (nn->which == DATA1N_tag) {
514 yaz_log(LOG_DEBUG,"Got it");
515 return xpe->termlists;
522 1 start element (tag)
524 3 start attr (and attr-exact)
532 Now, if there is a matching xelm described in abs, for the
533 indexed element or the attribute, then the data is handled according
534 to those definitions...
536 modified by pop, 2002-12-13
539 /* add xpath index for an attribute */
540 static void index_xpath_attr (char *tag_path, char *name, char *value,
541 char *structure, struct recExtractCtrl *p,
544 wrd->attrSet = VAL_IDXPATH;
547 wrd->string = tag_path;
548 wrd->length = strlen(tag_path);
555 wrd->length = strlen(value);
561 wrd->string = tag_path;
562 wrd->length = strlen(tag_path);
568 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
569 int level, RecWord *wrd, int use,
570 struct RecWord_list *wl)
573 char tag_path_full[1024];
576 int termlist_only = 1;
580 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
581 if ((!n->root->u.root.absyn) ||
582 (n->root->u.root.absyn->enable_xpath_indexing)) {
589 wrd->string = n->u.data.data;
590 wrd->length = n->u.data.len;
594 /* we have to fetch the whole path to the data tag */
595 for (nn = n; nn; nn = nn->parent) {
596 if (nn->which == DATA1N_tag) {
597 size_t tlen = strlen(nn->u.tag.tag);
598 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
599 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
601 tag_path_full[flen++] = '/';
603 else if (nn->which == DATA1N_root) break;
606 tag_path_full[flen] = 0;
608 /* If we have a matching termlist... */
609 if (n->root->u.root.absyn &&
610 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
612 for (; tl; tl = tl->next)
614 /* need to copy recword because it may be changed */
616 wrd->reg_type = *tl->structure;
617 /* this is the ! case, so structure is for the xpath index */
618 memcpy (&wrd_tl, wrd, sizeof(*wrd));
620 sp_parse(n, &wrd_tl, tl->source);
622 wrd_tl.attrSet = VAL_IDXPATH;
623 wrd_tl.attrUse = use;
624 if (p->flagShowRecords)
627 printf("%*sXPath index", (level + 1) * 4, "");
628 printf (" XData:\"");
629 for (i = 0; i<wrd_tl.length && i < 40; i++)
630 fputc (wrd_tl.string[i], stdout);
632 if (wrd_tl.length > 40)
634 fputc ('\n', stdout);
637 (*p->tokenAdd)(&wrd_tl);
641 /* this is just the old fashioned attribute based index */
642 wrd_tl.attrSet = (int) (tl->att->parent->reference);
643 wrd_tl.attrUse = tl->att->locals->local;
644 if (p->flagShowRecords)
647 printf("%*sIdx: [%s]", (level + 1) * 4, "",
649 printf("%s:%s [%d] %s",
650 tl->att->parent->name,
651 tl->att->name, tl->att->value,
653 printf (" XData:\"");
654 for (i = 0; i<wrd_tl.length && i < 40; i++)
655 fputc (wrd_tl.string[i], stdout);
657 if (wrd_tl.length > 40)
659 fputc ('\n', stdout);
662 (*p->tokenAdd)(&wrd_tl);
666 /* xpath indexing is done, if there was no termlist given,
667 or no ! in the termlist, and default indexing is enabled... */
668 if (!p->flagShowRecords && !xpdone && !termlist_only)
670 wrd->attrSet = VAL_IDXPATH;
680 for (nn = n; nn; nn = nn->parent)
682 if (nn->which == DATA1N_tag)
684 size_t tlen = strlen(nn->u.tag.tag);
685 if (tlen + flen > (sizeof(tag_path_full)-2))
687 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
689 tag_path_full[flen++] = '/';
691 else if (nn->which == DATA1N_root)
697 wrd->string = tag_path_full;
699 wrd->attrSet = VAL_IDXPATH;
701 if (p->flagShowRecords)
703 printf("%*s tag=", (level + 1) * 4, "");
704 for (i = 0; i<wrd->length && i < 40; i++)
705 fputc (wrd->string[i], stdout);
716 tag_path_full[flen] = 0;
718 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
719 directive, or default xpath indexing is enabled */
720 if (!(do_xpindex = 1 - termlist_only)) {
721 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
722 for (; tl; tl = tl->next)
730 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
733 if (use == 1) /* only for the starting tag... */
735 #define MAX_ATTR_COUNT 50
736 data1_termlist *tll[MAX_ATTR_COUNT];
740 /* get termlists for attributes, and find out, if we have to do xpath indexing */
741 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
746 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
748 int do_xpindex = 1 - termlist_only;
750 char attr_tag_path_full[1024];
753 /* this could be cached as well */
754 sprintf (attr_tag_path_full, "@%s/%.*s",
755 xp->name, int_len, tag_path_full);
757 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
759 /* if there is a ! in the xelm termlist, or default indexing is on,
760 proceed with xpath idx */
763 for (; tl; tl = tl->next)
772 /* attribute (no value) */
775 wrd->string = xp->name;
776 wrd->length = strlen(xp->name);
782 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
784 /* attribute value exact */
785 strcpy (comb, xp->name);
787 strcat (comb, xp->value);
792 wrd->length = strlen(comb);
795 if (RecWord_list_lookadd(wl, wrd))
803 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
805 char attr_tag_path_full[1024];
809 sprintf (attr_tag_path_full, "@%s/%.*s",
810 xp->name, int_len, tag_path_full);
814 /* If there is a termlist given (=xelm directive) */
815 for (; tl; tl = tl->next)
818 /* add xpath index for the attribute */
819 index_xpath_attr (attr_tag_path_full, xp->name,
820 xp->value, tl->structure,
826 /* index attribute value (only path/@attr) */
830 (tl->att->parent->reference);
831 wrd->attrUse = tl->att->locals->local;
832 wrd->reg_type = *tl->structure;
833 wrd->string = xp->value;
834 wrd->length = strlen(xp->value);
835 if (RecWord_list_lookadd(wl, wrd))
841 /* if there was no termlist for the given path,
842 or the termlist didn't have a ! element, index
843 the attribute as "w" */
844 if ((!xpdone) && (!termlist_only))
846 index_xpath_attr (attr_tag_path_full, xp->name,
847 xp->value, "w", p, wrd);
856 static void index_termlist (data1_node *par, data1_node *n,
857 struct recExtractCtrl *p, int level, RecWord *wrd)
859 data1_termlist *tlist = 0;
860 data1_datatype dtype = DATA1K_string;
863 * cycle up towards the root until we find a tag with an att..
864 * this has the effect of indexing locally defined tags with
865 * the attribute of their ancestor in the record.
868 while (!par->u.tag.element)
869 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
871 if (!par || !(tlist = par->u.tag.element->termlists))
873 if (par->u.tag.element->tag)
874 dtype = par->u.tag.element->tag->kind;
876 for (; tlist; tlist = tlist->next)
878 /* consider source */
880 assert(tlist->source);
881 sp_parse(n, wrd, tlist->source);
885 if (p->flagShowRecords)
888 printf("%*sIdx: [%s]", (level + 1) * 4, "",
890 printf("%s:%s [%d] %s",
891 tlist->att->parent->name,
892 tlist->att->name, tlist->att->value,
894 printf (" XData:\"");
895 for (i = 0; i<wrd->length && i < 40; i++)
896 fputc (wrd->string[i], stdout);
898 if (wrd->length > 40)
900 fputc ('\n', stdout);
904 wrd->reg_type = *tlist->structure;
905 wrd->attrSet = (int) (tlist->att->parent->reference);
906 wrd->attrUse = tlist->att->locals->local;
913 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
914 RecWord *wrd, struct RecWord_list *wl)
916 for (; n; n = n->next)
918 if (p->flagShowRecords) /* display element description to user */
920 if (n->which == DATA1N_root)
922 printf("%*s", level * 4, "");
923 printf("Record type: '%s'\n", n->u.root.type);
925 else if (n->which == DATA1N_tag)
929 printf("%*s", level * 4, "");
930 if (!(e = n->u.tag.element))
931 printf("Local tag: '%s'\n", n->u.tag.tag);
934 printf("Elm: '%s' ", e->name);
937 data1_tag *t = e->tag;
939 printf("TagNam: '%s' ", t->names->name);
942 printf("%s[%d],", t->tagset->name, t->tagset->type);
945 if (t->which == DATA1T_numeric)
946 printf("%d)", t->value.numeric);
948 printf("'%s')", t->value.string);
955 if (n->which == DATA1N_tag)
957 index_termlist (n, n, p, level, wrd);
958 /* index start tag */
959 if (n->root->u.root.absyn)
960 index_xpath (n, p, level, wrd, 1, wl);
964 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
968 if (n->which == DATA1N_data)
970 data1_node *par = get_parent_tag(p->dh, n);
972 if (p->flagShowRecords)
974 printf("%*s", level * 4, "");
976 if (n->u.data.len > 256)
977 printf("'%.170s ... %.70s'\n", n->u.data.data,
978 n->u.data.data + n->u.data.len-70);
979 else if (n->u.data.len > 0)
980 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
986 index_termlist (par, n, p, level, wrd);
988 index_xpath (n, p, level, wrd, 1016, wl);
991 if (n->which == DATA1N_tag)
994 index_xpath (n, p, level, wrd, 2, wl);
997 if (p->flagShowRecords && n->which == DATA1N_root)
999 printf("%*s-------------\n\n", level * 4, "");
1005 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1008 int oidtmp[OID_SIZE];
1011 struct RecWord_list *wl = 0;
1013 oe.proto = PROTO_Z3950;
1014 oe.oclass = CLASS_SCHEMA;
1015 if (n->u.root.absyn)
1017 oe.value = n->u.root.absyn->reference;
1019 if ((oid_ent_to_oid (&oe, oidtmp)))
1020 (*p->schemaAdd)(p, oidtmp);
1022 (*p->init)(p, &wrd);
1024 wl = RecWord_list_create("grs_extract_tree");
1025 r = dumpkeys(n, p, 0, &wrd, wl);
1026 RecWord_list_destroy(wl);
1030 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1034 struct grs_read_info gri;
1036 int oidtmp[OID_SIZE];
1038 struct RecWord_list *wl = 0;
1041 gri.readf = p->readf;
1042 gri.seekf = p->seekf;
1043 gri.tellf = p->tellf;
1046 gri.offset = p->offset;
1050 if (read_grs_type (h, &gri, p->subType, &n))
1051 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1053 return RECCTRL_EXTRACT_EOF;
1054 oe.proto = PROTO_Z3950;
1055 oe.oclass = CLASS_SCHEMA;
1057 if (!n->u.root.absyn)
1058 return RECCTRL_EXTRACT_ERROR;
1060 if (n->u.root.absyn)
1062 oe.value = n->u.root.absyn->reference;
1063 if ((oid_ent_to_oid (&oe, oidtmp)))
1064 (*p->schemaAdd)(p, oidtmp);
1066 data1_concat_text(p->dh, mem, n);
1068 /* ensure our data1 tree is UTF-8 */
1069 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1072 data1_pr_tree (p->dh, n, stdout);
1075 wl = RecWord_list_create("grs.sgml");
1077 (*p->init)(p, &wrd);
1078 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1079 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1081 ret_val = RECCTRL_EXTRACT_OK;
1082 data1_free_tree(p->dh, n);
1083 RecWord_list_destroy(wl);
1088 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1091 NMEM mem = nmem_create ();
1092 struct grs_handlers *h = (struct grs_handlers *) clientData;
1094 ret = grs_extract_sub(h, p, mem);
1100 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1102 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1104 data1_esetname *eset;
1105 Z_Espec1 *espec = 0;
1110 case Z_RecordComp_simple:
1111 if (c->u.simple->which != Z_ElementSetNames_generic)
1112 return 26; /* only generic form supported. Fix this later */
1113 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1114 c->u.simple->u.generic)))
1116 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1117 return 25; /* invalid esetname */
1119 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1120 c->u.simple->u.generic);
1123 case Z_RecordComp_complex:
1124 if (c->u.complex->generic)
1126 /* insert check for schema */
1127 if ((p = c->u.complex->generic->elementSpec))
1131 case Z_ElementSpec_elementSetName:
1133 data1_getesetbyname(dh, n->u.root.absyn,
1134 p->u.elementSetName)))
1136 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1137 p->u.elementSetName);
1138 return 25; /* invalid esetname */
1140 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1141 p->u.elementSetName);
1144 case Z_ElementSpec_externalSpec:
1145 if (p->u.externalSpec->which == Z_External_espec1)
1147 yaz_log(LOG_DEBUG, "Got Espec-1");
1148 espec = p->u.externalSpec-> u.espec1;
1152 yaz_log(LOG_LOG, "Unknown external espec.");
1153 return 25; /* bad. what is proper diagnostic? */
1160 return 26; /* fix */
1164 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1165 return data1_doespec1(dh, n, espec);
1169 yaz_log(LOG_DEBUG, "Element: all match");
1174 /* Add Zebra info in separate namespace ...
1177 <metadata xmlns="http://www.indexdata.dk/zebra/">
1179 <localnumber>447</localnumber>
1180 <filename>records/genera.xml</filename>
1185 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1188 const char *idzebra_ns[3];
1189 const char *i2 = "\n ";
1190 const char *i4 = "\n ";
1193 idzebra_ns[0] = "xmlns";
1194 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1197 data1_mk_text (p->dh, mem, i2, top);
1199 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1201 data1_mk_text (p->dh, mem, "\n", top);
1203 data1_mk_text (p->dh, mem, i4, n);
1205 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1209 data1_mk_text (p->dh, mem, i4, n);
1210 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1212 data1_mk_text (p->dh, mem, i4, n);
1213 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1216 data1_mk_text (p->dh, mem, i4, n);
1217 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1219 data1_mk_text (p->dh, mem, i2, n);
1222 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1224 data1_node *node = 0, *onode = 0, *top;
1227 int res, selected = 0;
1229 struct grs_read_info gri;
1230 const char *tagname;
1231 struct grs_handlers *h = (struct grs_handlers *) clientData;
1232 int requested_schema = VAL_NONE;
1233 data1_marctab *marctab;
1236 mem = nmem_create();
1237 gri.readf = p->readf;
1238 gri.seekf = p->seekf;
1239 gri.tellf = p->tellf;
1246 yaz_log(LOG_DEBUG, "grs_retrieve");
1247 if (read_grs_type (h, &gri, p->subType, &node))
1259 data1_concat_text(p->dh, mem, node);
1261 /* ensure our data1 tree is UTF-8 */
1262 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1265 data1_pr_tree (p->dh, node, stdout);
1267 top = data1_get_root_tag (p->dh, node);
1269 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1270 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1272 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1274 dnew->u.data.what = DATA1I_text;
1275 dnew->u.data.data = dnew->lbuf;
1276 sprintf(dnew->u.data.data, "%d", p->recordSize);
1277 dnew->u.data.len = strlen(dnew->u.data.data);
1280 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1282 if (tagname && p->score >= 0 &&
1283 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1285 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1286 dnew->u.data.what = DATA1I_num;
1287 dnew->u.data.data = dnew->lbuf;
1288 sprintf(dnew->u.data.data, "%d", p->score);
1289 dnew->u.data.len = strlen(dnew->u.data.data);
1292 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1293 "localControlNumber");
1294 if (tagname && p->localno > 0 &&
1295 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1297 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1298 dnew->u.data.what = DATA1I_text;
1299 dnew->u.data.data = dnew->lbuf;
1301 sprintf(dnew->u.data.data, "%d", p->localno);
1302 dnew->u.data.len = strlen(dnew->u.data.data);
1305 if (p->input_format == VAL_TEXT_XML)
1306 zebra_xml_metadata (p, top, mem);
1309 data1_pr_tree (p->dh, node, stdout);
1311 #if YAZ_VERSIONL >= 0x010903L
1312 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1313 p->comp->u.complex->generic &&
1314 p->comp->u.complex->generic->which == Z_Schema_oid &&
1315 p->comp->u.complex->generic->schema.oid)
1317 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1319 requested_schema = oe->value;
1322 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1323 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1325 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1327 requested_schema = oe->value;
1331 /* If schema has been specified, map if possible, then check that
1332 * we got the right one
1334 if (requested_schema != VAL_NONE)
1336 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1337 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1339 if (map->target_absyn_ref == requested_schema)
1342 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1351 if (node->u.root.absyn &&
1352 requested_schema != node->u.root.absyn->reference)
1354 p->diagnostic = 238;
1360 * Does the requested format match a known syntax-mapping? (this reflects
1361 * the overlap of schema and formatting which is inherent in the MARC
1364 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1365 if (node->u.root.absyn)
1366 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1368 if (map->target_absyn_ref == p->input_format)
1371 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1380 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1381 if (node->u.root.absyn &&
1382 node->u.root.absyn->reference != VAL_NONE &&
1383 p->input_format == VAL_GRS1)
1387 int oidtmp[OID_SIZE];
1389 oe.proto = PROTO_Z3950;
1390 oe.oclass = CLASS_SCHEMA;
1391 oe.value = node->u.root.absyn->reference;
1393 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1396 data1_handle dh = p->dh;
1400 for (ii = oid; *ii >= 0; ii++)
1404 sprintf(p, "%d", *ii);
1407 if ((dnew = data1_mk_tag_data_wd(dh, top,
1408 "schemaIdentifier", mem)))
1410 dnew->u.data.what = DATA1I_oid;
1411 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1412 memcpy(dnew->u.data.data, tmp, p - tmp);
1413 dnew->u.data.len = p - tmp;
1418 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1419 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1421 p->diagnostic = res;
1423 data1_free_tree(p->dh, onode);
1424 data1_free_tree(p->dh, node);
1428 else if (p->comp && !res)
1432 data1_pr_tree (p->dh, node, stdout);
1434 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1435 switch (p->output_format = (p->input_format != VAL_NONE ?
1436 p->input_format : VAL_SUTRS))
1441 data1_pr_tree (p->dh, node, stdout);
1445 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1447 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1449 p->diagnostic = 238;
1452 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1453 memcpy (new_buf, p->rec_buf, p->rec_len);
1454 p->rec_buf = new_buf;
1459 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1461 p->diagnostic = 238; /* not available in requested syntax */
1463 p->rec_len = (size_t) (-1);
1466 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1468 p->diagnostic = 238;
1470 p->rec_len = (size_t) (-1);
1473 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1475 p->diagnostic = 238;
1477 p->rec_len = (size_t) (-1);
1481 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1482 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1484 p->diagnostic = 238;
1487 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1488 memcpy (new_buf, p->rec_buf, p->rec_len);
1489 p->rec_buf = new_buf;
1493 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1495 p->diagnostic = 238;
1498 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1499 memcpy (new_buf, p->rec_buf, p->rec_len);
1500 p->rec_buf = new_buf;
1504 if (!node->u.root.absyn)
1506 p->diagnostic = 238;
1509 for (marctab = node->u.root.absyn->marc; marctab;
1510 marctab = marctab->next)
1511 if (marctab->reference == p->input_format)
1515 p->diagnostic = 238;
1519 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1520 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1521 selected, &p->rec_len)))
1522 p->diagnostic = 238;
1525 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1526 memcpy (new_buf, p->rec_buf, p->rec_len);
1527 p->rec_buf = new_buf;
1531 data1_free_tree(p->dh, node);
1533 data1_free_tree(p->dh, onode);
1538 static struct recType grs_type =
1547 RecType recTypeGrs = &grs_type;