1 /* $Id: recgrs.c,v 1.105 2005-08-22 08:19:51 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
353 if (c >= t->ch[0] && c <= t->ch[1])
357 /* move to next state and return if we get a match */
363 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
364 if (c >= t->ch[0] && c <= t->ch[1])
373 New function, looking for xpath "element" definitions in abs, by
374 tagpath, using a kind of ugly regxp search.The DFA was built while
375 parsing abs, so here we just go trough them and try to match
376 against the given tagpath. The first matching entry is returned.
380 Added support for enhanced xelm. Now [] predicates are considered
381 as well, when selecting indexing rules... (why the hell it's called
388 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
390 data1_absyn *abs = n->root->u.root.absyn;
391 data1_xpelement *xpe = abs->xp_elements;
394 struct xpath_location_step *xp;
396 char *pexpr = xmalloc(strlen(tagpath)+5);
399 sprintf (pexpr, "/%s\n", tagpath);
400 for (; xpe; xpe = xpe->next)
403 ok = dfa_match_first(xpe->dfa->states, pexpr);
407 /* we have to check the perdicates up to the root node */
410 /* find the first tag up in the node structure */
411 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
414 /* go from inside out in the node structure, while going
415 backwards trough xpath location steps ... */
416 for (i = xpe->xpath_len - 1; i>0; i--)
418 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
419 i, xp[i].part, nn->u.tag.tag);
421 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
423 yaz_log(YLOG_DEBUG, " Predicates didn't match");
428 if (nn->which == DATA1N_tag)
440 yaz_log(YLOG_DEBUG, "Got it");
441 return xpe->termlists;
448 1 start element (tag)
450 3 start attr (and attr-exact)
458 Now, if there is a matching xelm described in abs, for the
459 indexed element or the attribute, then the data is handled according
460 to those definitions...
462 modified by pop, 2002-12-13
465 /* add xpath index for an attribute */
466 static void index_xpath_attr (char *tag_path, char *name, char *value,
467 char *structure, struct recExtractCtrl *p,
471 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
473 wrd->attrSet = VAL_IDXPATH;
476 wrd->index_type = '0';
477 wrd->term_buf = tag_path;
478 wrd->term_len = strlen(tag_path);
483 wrd->index_name = ZEBRA_XPATH_ATTR;
487 wrd->index_type = 'w';
488 wrd->term_buf = value;
489 wrd->term_len = strlen(value);
493 wrd->index_name = ZEBRA_XPATH_ELM_END;
497 wrd->index_type = '0';
498 wrd->term_buf = tag_path;
499 wrd->term_len = strlen(tag_path);
504 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
509 /* we have to fetch the whole path to the data tag */
510 for (nn = n; nn; nn = nn->parent)
512 if (nn->which == DATA1N_tag)
514 size_t tlen = strlen(nn->u.tag.tag);
515 if (tlen + flen > (max - 2))
517 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
519 tag_path_full[flen++] = '/';
522 if (nn->which == DATA1N_root)
525 tag_path_full[flen] = 0;
529 static void index_xpath(struct source_parser *sp, data1_node *n,
530 struct recExtractCtrl *p,
531 int level, RecWord *wrd,
541 char tag_path_full[1024];
542 int termlist_only = 1;
547 int xpath_is_start = 0;
553 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
556 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
558 if ((!n->root->u.root.absyn) ||
559 (n->root->u.root.absyn->enable_xpath_indexing)) {
566 wrd->term_buf = n->u.data.data;
567 wrd->term_len = n->u.data.len;
570 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
572 /* If we have a matching termlist... */
573 if (n->root->u.root.absyn &&
574 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
576 for (; tl; tl = tl->next)
578 /* need to copy recword because it may be changed */
580 wrd->index_type = *tl->structure;
581 memcpy (&wrd_tl, wrd, sizeof(*wrd));
583 sp_parse(sp, n, &wrd_tl, tl->source);
590 /* this is the ! case, so structure is for the xpath index */
592 wrd_tl.index_name = xpath_index;
594 wrd_tl.attrSet = VAL_IDXPATH;
595 wrd_tl.attrUse = use;
597 if (p->flagShowRecords)
600 printf("%*sXPath index", (level + 1) * 4, "");
601 printf (" XData:\"");
602 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
603 fputc (wrd_tl.term_buf[i], stdout);
605 if (wrd_tl.term_len > 40)
607 fputc ('\n', stdout);
610 (*p->tokenAdd)(&wrd_tl);
613 /* this is just the old fashioned attribute based index */
615 wrd_tl.index_name = tl->index_name;
617 wrd_tl.attrSet = (int) (tl->att->parent->reference);
618 wrd_tl.attrUse = tl->att->locals->local;
620 if (p->flagShowRecords)
623 printf("%*sIdx: [%s]", (level + 1) * 4, "",
626 printf("%s %s", tl->index_name, tl->source);
628 printf("%s:%s [%d] %s",
629 tl->att->parent->name,
630 tl->att->name, tl->att->value,
633 printf (" XData:\"");
634 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
635 fputc (wrd_tl.term_buf[i], stdout);
637 if (wrd_tl.term_len > 40)
639 fputc ('\n', stdout);
642 (*p->tokenAdd)(&wrd_tl);
646 /* xpath indexing is done, if there was no termlist given,
647 or no ! in the termlist, and default indexing is enabled... */
648 if (!p->flagShowRecords && !xpdone && !termlist_only)
651 wrd->index_name = xpath_index;
653 wrd->attrSet = VAL_IDXPATH;
656 wrd->index_type = 'w';
661 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
663 wrd->index_type = '0';
664 wrd->term_buf = tag_path_full;
665 wrd->term_len = strlen(tag_path_full);
667 wrd->index_name = xpath_index;
669 wrd->attrSet = VAL_IDXPATH;
672 if (p->flagShowRecords)
674 printf("%*s tag=", (level + 1) * 4, "");
675 for (i = 0; i<wrd->term_len && i < 40; i++)
676 fputc (wrd->term_buf[i], stdout);
687 /* Add tag start/end xpath index, only when there is a ! in
688 the apropriate xelm directive, or default xpath indexing
691 if (!(do_xpindex = 1 - termlist_only))
693 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
695 for (; tl; tl = tl->next)
707 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
710 if (xpath_is_start == 1) /* only for the starting tag... */
712 #define MAX_ATTR_COUNT 50
713 data1_termlist *tll[MAX_ATTR_COUNT];
717 /* get termlists for attributes, and find out, if we have to do xpath indexing */
718 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
723 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
725 int do_xpindex = 1 - termlist_only;
727 char attr_tag_path_full[1024];
729 /* this could be cached as well */
730 sprintf (attr_tag_path_full, "@%s/%s",
731 xp->name, tag_path_full);
733 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
735 /* if there is a ! in the xelm termlist, or default indexing is on,
736 proceed with xpath idx */
739 for (; tl; tl = tl->next)
753 /* attribute (no value) */
754 wrd->index_type = '0';
756 wrd->index_name = ZEBRA_XPATH_ATTR;
760 wrd->term_buf = xp->name;
761 wrd->term_len = strlen(xp->name);
767 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
769 /* attribute value exact */
770 strcpy (comb, xp->name);
772 strcat (comb, xp->value);
775 wrd->index_name = ZEBRA_XPATH_ATTR;
779 wrd->index_type = '0';
780 wrd->term_buf = comb;
781 wrd->term_len = strlen(comb);
791 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
793 char attr_tag_path_full[1024];
796 sprintf (attr_tag_path_full, "@%s/%s",
797 xp->name, tag_path_full);
801 /* If there is a termlist given (=xelm directive) */
802 for (; tl; tl = tl->next)
810 /* add xpath index for the attribute */
811 index_xpath_attr (attr_tag_path_full, xp->name,
812 xp->value, tl->structure,
816 /* add attribute based index for the attribute */
820 wrd->index_name = tl->index_name;
823 (tl->att->parent->reference);
824 wrd->attrUse = tl->att->locals->local;
826 wrd->index_type = *tl->structure;
827 wrd->term_buf = xp->value;
828 wrd->term_len = strlen(xp->value);
834 /* if there was no termlist for the given path,
835 or the termlist didn't have a ! element, index
836 the attribute as "w" */
837 if ((!xpdone) && (!termlist_only))
839 index_xpath_attr (attr_tag_path_full, xp->name,
840 xp->value, "w", p, wrd);
849 static void index_termlist (struct source_parser *sp, data1_node *par,
851 struct recExtractCtrl *p, int level, RecWord *wrd)
853 data1_termlist *tlist = 0;
854 data1_datatype dtype = DATA1K_string;
857 * cycle up towards the root until we find a tag with an att..
858 * this has the effect of indexing locally defined tags with
859 * the attribute of their ancestor in the record.
862 while (!par->u.tag.element)
863 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
865 if (!par || !(tlist = par->u.tag.element->termlists))
867 if (par->u.tag.element->tag)
868 dtype = par->u.tag.element->tag->kind;
870 for (; tlist; tlist = tlist->next)
872 /* consider source */
874 assert(tlist->source);
875 sp_parse(sp, n, wrd, tlist->source);
877 if (wrd->term_buf && wrd->term_len)
879 if (p->flagShowRecords)
882 printf("%*sIdx: [%s]", (level + 1) * 4, "",
885 printf("%s %s", tlist->index_name, tlist->source);
887 printf("%s:%s [%d] %s",
888 tlist->att->parent->name,
889 tlist->att->name, tlist->att->value,
892 printf (" XData:\"");
893 for (i = 0; i<wrd->term_len && i < 40; i++)
894 fputc (wrd->term_buf[i], stdout);
896 if (wrd->term_len > 40)
898 fputc ('\n', stdout);
902 wrd->index_type = *tlist->structure;
904 wrd->index_name = tlist->index_name;
906 wrd->attrSet = (int) (tlist->att->parent->reference);
907 wrd->attrUse = tlist->att->locals->local;
915 static int dumpkeys_r(struct source_parser *sp,
916 data1_node *n, struct recExtractCtrl *p, int level,
919 for (; n; n = n->next)
921 if (p->flagShowRecords) /* display element description to user */
923 if (n->which == DATA1N_root)
925 printf("%*s", level * 4, "");
926 printf("Record type: '%s'\n", n->u.root.type);
928 else if (n->which == DATA1N_tag)
932 printf("%*s", level * 4, "");
933 if (!(e = n->u.tag.element))
934 printf("Local tag: '%s'\n", n->u.tag.tag);
937 printf("Elm: '%s' ", e->name);
940 data1_tag *t = e->tag;
942 printf("TagNam: '%s' ", t->names->name);
945 printf("%s[%d],", t->tagset->name, t->tagset->type);
948 if (t->which == DATA1T_numeric)
949 printf("%d)", t->value.numeric);
951 printf("'%s')", t->value.string);
958 if (n->which == DATA1N_tag)
960 index_termlist(sp, n, n, p, level, wrd);
961 /* index start tag */
963 if (n->root->u.root.absyn)
964 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
967 if (n->root->u.root.absyn)
968 index_xpath(sp, n, p, level, wrd, 1);
973 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
977 if (n->which == DATA1N_data)
979 data1_node *par = get_parent_tag(p->dh, n);
981 if (p->flagShowRecords)
983 printf("%*s", level * 4, "");
985 if (n->u.data.len > 256)
986 printf("'%.170s ... %.70s'\n", n->u.data.data,
987 n->u.data.data + n->u.data.len-70);
988 else if (n->u.data.len > 0)
989 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
995 index_termlist(sp, par, n, p, level, wrd);
998 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
1001 index_xpath(sp, n, p, level, wrd, 1016);
1005 if (n->which == DATA1N_tag)
1009 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1012 index_xpath(sp, n, p, level, wrd, 2);
1016 if (p->flagShowRecords && n->which == DATA1N_root)
1018 printf("%*s-------------\n\n", level * 4, "");
1024 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1026 struct source_parser *sp = source_parser_create();
1027 int r = dumpkeys_r(sp, n, p, 0, wrd);
1028 source_parser_destroy(sp);
1032 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1035 int oidtmp[OID_SIZE];
1038 oe.proto = PROTO_Z3950;
1039 oe.oclass = CLASS_SCHEMA;
1040 if (n->u.root.absyn)
1042 oe.value = n->u.root.absyn->reference;
1044 if ((oid_ent_to_oid (&oe, oidtmp)))
1045 (*p->schemaAdd)(p, oidtmp);
1047 (*p->init)(p, &wrd);
1049 return dumpkeys(n, p, &wrd);
1052 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1054 data1_node *(*grs_read)(struct grs_read_info *))
1057 struct grs_read_info gri;
1059 int oidtmp[OID_SIZE];
1062 gri.readf = p->readf;
1063 gri.seekf = p->seekf;
1064 gri.tellf = p->tellf;
1067 gri.offset = p->offset;
1070 gri.clientData = clientData;
1072 n = (*grs_read)(&gri);
1074 return RECCTRL_EXTRACT_EOF;
1075 oe.proto = PROTO_Z3950;
1076 oe.oclass = CLASS_SCHEMA;
1078 if (!n->u.root.absyn)
1079 return RECCTRL_EXTRACT_ERROR;
1081 if (n->u.root.absyn)
1083 oe.value = n->u.root.absyn->reference;
1084 if ((oid_ent_to_oid (&oe, oidtmp)))
1085 (*p->schemaAdd)(p, oidtmp);
1087 data1_concat_text(p->dh, mem, n);
1089 /* ensure our data1 tree is UTF-8 */
1090 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1093 data1_pr_tree (p->dh, n, stdout);
1096 (*p->init)(p, &wrd);
1097 if (dumpkeys(n, p, &wrd) < 0)
1099 data1_free_tree(p->dh, n);
1100 return RECCTRL_EXTRACT_ERROR_GENERIC;
1102 data1_free_tree(p->dh, n);
1103 return RECCTRL_EXTRACT_OK;
1106 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1107 data1_node *(*grs_read)(struct grs_read_info *))
1110 NMEM mem = nmem_create ();
1111 ret = grs_extract_sub(clientData, p, mem, grs_read);
1117 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1119 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1120 char **addinfo, ODR o)
1122 data1_esetname *eset;
1123 Z_Espec1 *espec = 0;
1128 case Z_RecordComp_simple:
1129 if (c->u.simple->which != Z_ElementSetNames_generic)
1130 return 26; /* only generic form supported. Fix this later */
1131 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1132 c->u.simple->u.generic)))
1134 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1135 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1136 return 25; /* invalid esetname */
1138 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1139 c->u.simple->u.generic);
1142 case Z_RecordComp_complex:
1143 if (c->u.complex->generic)
1145 /* insert check for schema */
1146 if ((p = c->u.complex->generic->elementSpec))
1150 case Z_ElementSpec_elementSetName:
1152 data1_getesetbyname(dh, n->u.root.absyn,
1153 p->u.elementSetName)))
1155 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1156 p->u.elementSetName);
1157 *addinfo = odr_strdup(o, p->u.elementSetName);
1158 return 25; /* invalid esetname */
1160 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1161 p->u.elementSetName);
1164 case Z_ElementSpec_externalSpec:
1165 if (p->u.externalSpec->which == Z_External_espec1)
1167 yaz_log(YLOG_DEBUG, "Got Espec-1");
1168 espec = p->u.externalSpec-> u.espec1;
1172 yaz_log(YLOG_LOG, "Unknown external espec.");
1173 return 25; /* bad. what is proper diagnostic? */
1180 return 26; /* fix */
1184 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1185 return data1_doespec1(dh, n, espec);
1189 yaz_log(YLOG_DEBUG, "Element: all match");
1194 /* Add Zebra info in separate namespace ...
1197 <metadata xmlns="http://www.indexdata.dk/zebra/">
1199 <localnumber>447</localnumber>
1200 <filename>records/genera.xml</filename>
1205 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1208 const char *idzebra_ns[3];
1209 const char *i2 = "\n ";
1210 const char *i4 = "\n ";
1213 idzebra_ns[0] = "xmlns";
1214 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1217 data1_mk_text (p->dh, mem, i2, top);
1219 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1221 data1_mk_text (p->dh, mem, "\n", top);
1223 data1_mk_text (p->dh, mem, i4, n);
1225 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1229 data1_mk_text (p->dh, mem, i4, n);
1230 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1232 data1_mk_text (p->dh, mem, i4, n);
1233 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1236 data1_mk_text (p->dh, mem, i4, n);
1237 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1239 data1_mk_text (p->dh, mem, i2, n);
1242 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1243 data1_node *(*grs_read)(struct grs_read_info *))
1245 data1_node *node = 0, *onode = 0, *top;
1248 int res, selected = 0;
1250 struct grs_read_info gri;
1251 const char *tagname;
1253 int requested_schema = VAL_NONE;
1254 data1_marctab *marctab;
1257 mem = nmem_create();
1258 gri.readf = p->readf;
1259 gri.seekf = p->seekf;
1260 gri.tellf = p->tellf;
1266 gri.clientData = clientData;
1268 yaz_log(YLOG_DEBUG, "grs_retrieve");
1269 node = (*grs_read)(&gri);
1276 data1_concat_text(p->dh, mem, node);
1279 data1_pr_tree (p->dh, node, stdout);
1281 top = data1_get_root_tag (p->dh, node);
1283 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1284 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1286 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1288 dnew->u.data.what = DATA1I_text;
1289 dnew->u.data.data = dnew->lbuf;
1290 sprintf(dnew->u.data.data, "%d", p->recordSize);
1291 dnew->u.data.len = strlen(dnew->u.data.data);
1294 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1295 if (tagname && p->score >= 0 &&
1296 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1298 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1299 dnew->u.data.what = DATA1I_num;
1300 dnew->u.data.data = dnew->lbuf;
1301 sprintf(dnew->u.data.data, "%d", p->score);
1302 dnew->u.data.len = strlen(dnew->u.data.data);
1305 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1306 "localControlNumber");
1307 if (tagname && p->localno > 0 &&
1308 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1310 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1311 dnew->u.data.what = DATA1I_text;
1312 dnew->u.data.data = dnew->lbuf;
1314 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1315 dnew->u.data.len = strlen(dnew->u.data.data);
1318 if (p->input_format == VAL_TEXT_XML)
1319 zebra_xml_metadata (p, top, mem);
1322 data1_pr_tree (p->dh, node, stdout);
1324 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1325 p->comp->u.complex->generic &&
1326 p->comp->u.complex->generic->which == Z_Schema_oid &&
1327 p->comp->u.complex->generic->schema.oid)
1329 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1331 requested_schema = oe->value;
1333 /* If schema has been specified, map if possible, then check that
1334 * we got the right one
1336 if (requested_schema != VAL_NONE)
1338 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1339 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1341 if (map->target_absyn_ref == requested_schema)
1344 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1353 if (node->u.root.absyn &&
1354 requested_schema != node->u.root.absyn->reference)
1356 p->diagnostic = 238;
1362 * Does the requested format match a known syntax-mapping? (this reflects
1363 * the overlap of schema and formatting which is inherent in the MARC
1366 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1367 if (node->u.root.absyn)
1368 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1370 if (map->target_absyn_ref == p->input_format)
1373 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1382 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1383 if (node->u.root.absyn &&
1384 node->u.root.absyn->reference != VAL_NONE &&
1385 p->input_format == VAL_GRS1)
1389 int oidtmp[OID_SIZE];
1391 oe.proto = PROTO_Z3950;
1392 oe.oclass = CLASS_SCHEMA;
1393 oe.value = node->u.root.absyn->reference;
1395 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1398 data1_handle dh = p->dh;
1402 for (ii = oid; *ii >= 0; ii++)
1406 sprintf(p, "%d", *ii);
1409 if ((dnew = data1_mk_tag_data_wd(dh, top,
1410 "schemaIdentifier", mem)))
1412 dnew->u.data.what = DATA1I_oid;
1413 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1414 memcpy(dnew->u.data.data, tmp, p - tmp);
1415 dnew->u.data.len = p - tmp;
1420 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1421 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1424 p->diagnostic = res;
1426 data1_free_tree(p->dh, onode);
1427 data1_free_tree(p->dh, node);
1431 else if (p->comp && !res)
1435 data1_pr_tree (p->dh, node, stdout);
1437 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1438 switch (p->output_format = (p->input_format != VAL_NONE ?
1439 p->input_format : VAL_SUTRS))
1443 data1_pr_tree (p->dh, node, stdout);
1445 /* default output encoding for XML is UTF-8 */
1446 data1_iconv (p->dh, mem, node,
1447 p->encoding ? p->encoding : "UTF-8",
1448 data1_get_encoding(p->dh, node));
1450 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1452 p->diagnostic = 238;
1455 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1456 memcpy (new_buf, p->rec_buf, p->rec_len);
1457 p->rec_buf = new_buf;
1461 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1463 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1465 p->diagnostic = 238; /* not available in requested syntax */
1467 p->rec_len = (size_t) (-1);
1470 /* ensure our data1 tree is UTF-8 */
1471 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1473 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1475 p->diagnostic = 238;
1477 p->rec_len = (size_t) (-1);
1480 /* ensure our data1 tree is UTF-8 */
1481 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1482 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1484 p->diagnostic = 238;
1486 p->rec_len = (size_t) (-1);
1490 data1_iconv (p->dh, mem, node, p->encoding,
1491 data1_get_encoding(p->dh, node));
1492 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1494 p->diagnostic = 238;
1497 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1498 memcpy (new_buf, p->rec_buf, p->rec_len);
1499 p->rec_buf = new_buf;
1504 data1_iconv (p->dh, mem, node, p->encoding,
1505 data1_get_encoding(p->dh, node));
1506 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1508 p->diagnostic = 238;
1511 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1512 memcpy (new_buf, p->rec_buf, p->rec_len);
1513 p->rec_buf = new_buf;
1517 if (!node->u.root.absyn)
1519 p->diagnostic = 238;
1522 for (marctab = node->u.root.absyn->marc; marctab;
1523 marctab = marctab->next)
1524 if (marctab->reference == p->input_format)
1528 p->diagnostic = 238;
1532 data1_iconv (p->dh, mem, node, p->encoding,
1533 data1_get_encoding(p->dh, node));
1534 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1535 selected, &p->rec_len)))
1536 p->diagnostic = 238;
1539 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1540 memcpy (new_buf, p->rec_buf, p->rec_len);
1541 p->rec_buf = new_buf;
1545 data1_free_tree(p->dh, node);
1547 data1_free_tree(p->dh, onode);