1 /* $Id: recgrs.c,v 1.86.2.7 2006-02-06 13:34:00 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
40 struct RecWord_entry **entries;
45 struct RecWord_entry {
47 struct RecWord_entry *next;
50 struct RecWord_list *RecWord_list_create(const char *name)
52 NMEM m = nmem_create();
53 struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
58 p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
59 for (i = 0; i<p->hash_size; i++)
61 p->name = nmem_strdup(m, name);
65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
67 struct RecWord_entry *e;
70 (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
72 for (e = l->entries[hash]; e; e = e->next)
73 if (e->w.attrSet == wrd->attrSet &&
74 e->w.attrUse == wrd->attrUse &&
75 e->w.reg_type == wrd->reg_type &&
76 e->w.length == wrd->length &&
77 !memcmp(e->w.string, wrd->string, wrd->length))
80 fprintf(stderr, "DUP key found in %s\n", l->name);
81 fprintf(stderr, "set=%d use=%d regtype=%c\n",
82 wrd->attrSet, wrd->attrUse, wrd->reg_type);
86 e = nmem_malloc(l->nmem, sizeof(*e));
87 e->next = l->entries[hash];
89 memcpy(&e->w, wrd, sizeof(*wrd));
90 e->w.string = nmem_malloc(l->nmem, wrd->length);
91 memcpy(e->w.string, wrd->string, wrd->length);
95 void RecWord_list_destroy(struct RecWord_list *l)
98 nmem_destroy(l->nmem);
106 struct grs_handler *next;
109 struct grs_handlers {
110 struct grs_handler *handlers;
113 static int read_grs_type (struct grs_handlers *h,
114 struct grs_read_info *p, const char *type,
117 struct grs_handler *gh = h->handlers;
118 const char *cp = strchr (type, '.');
120 if (cp == NULL || cp == type)
122 cp = strlen(type) + type;
126 strcpy (p->type, cp+1);
127 for (gh = h->handlers; gh; gh = gh->next)
129 if (!memcmp (type, gh->type->type, cp-type) &&
130 gh->type->type[cp-type] == '\0')
135 gh->clientData = (*gh->type->init)();
137 p->clientData = gh->clientData;
138 *root = (gh->type->read)(p);
139 gh->clientData = p->clientData;
146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
148 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
149 gh->next = h->handlers;
156 static void *grs_init(RecType recType)
158 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
161 grs_add_handler (h, recTypeGrs_sgml);
162 grs_add_handler (h, recTypeGrs_regx);
164 grs_add_handler (h, recTypeGrs_tcl);
166 grs_add_handler (h, recTypeGrs_marc);
167 grs_add_handler (h, recTypeGrs_marcxml);
169 grs_add_handler (h, recTypeGrs_xml);
172 grs_add_handler (h, recTypeGrs_perl);
174 grs_add_handler (h, recTypeGrs_danbib);
178 static void grs_destroy(void *clientData)
180 struct grs_handlers *h = (struct grs_handlers *) clientData;
181 struct grs_handler *gh = h->handlers, *gh_next;
186 (*gh->type->destroy)(gh->clientData);
193 struct source_parser {
200 static int sp_lex(struct source_parser *sp)
202 while (*sp->src == ' ')
206 while (*sp->src && !strchr("<>();,-: ", *sp->src))
215 sp->lookahead = *sp->src;
219 return sp->lookahead;
223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
225 if (sp->lookahead != 't')
227 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
229 if (n->which == DATA1N_data)
231 wrd->string = n->u.data.data;
232 wrd->length = n->u.data.len;
236 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
238 if (n->which == DATA1N_tag)
240 wrd->string = n->u.tag.tag;
241 wrd->length = strlen(n->u.tag.tag);
245 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
248 if (sp->lookahead != '(')
251 if (sp->lookahead != 't')
254 if (n->which == DATA1N_tag)
256 data1_xattr *p = n->u.tag.attributes;
257 while (p && strlen(p->name) != sp->len &&
258 memcmp (p->name, sp->tok, sp->len))
262 wrd->string = p->value;
263 wrd->length = strlen(p->value);
267 if (sp->lookahead != ')')
271 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
275 if (sp->lookahead != '(')
280 if (sp->lookahead != ',')
284 if (sp->lookahead != 't')
286 start = atoi_n(sp->tok, sp->len);
289 if (sp->lookahead != ',')
293 if (sp->lookahead != 't')
295 len = atoi_n(sp->tok, sp->len);
298 if (sp->lookahead != ')')
302 if (wrd->string && wrd->length)
304 wrd->string += start;
305 wrd->length -= start;
306 if (wrd->length > len)
313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
315 struct source_parser sp;
322 return sp_expr(&sp, n, wrd);
325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
334 if (p->which == XPATH_PREDICATE_RELATION) {
335 if (p->u.relation.name[0]) {
336 if (*p->u.relation.name != '@') {
338 " Only attributes (@) are supported in xelm xpath predicates");
339 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
342 attname = p->u.relation.name + 1;
344 /* looking for the attribute with a specified name */
345 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
346 yaz_log(LOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
348 if (!strcmp(attr->name, attname)) {
349 if (p->u.relation.op[0]) {
350 if (*p->u.relation.op != '=') {
352 "Only '=' relation is supported (%s)",p->u.relation.op);
353 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
356 yaz_log(LOG_DEBUG," - value %s <-> %s",
357 p->u.relation.value, attr->value );
358 if (!strcmp(attr->value, p->u.relation.value)) {
363 /* attribute exists, no value specified */
368 yaz_log(LOG_DEBUG, "return %d", res);
374 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
375 if (!strcmp(p->u.boolean.op,"and")) {
376 return d1_check_xpath_predicate(n, p->u.boolean.left)
377 && d1_check_xpath_predicate(n, p->u.boolean.right);
379 else if (!strcmp(p->u.boolean.op,"or")) {
380 return (d1_check_xpath_predicate(n, p->u.boolean.left)
381 || d1_check_xpath_predicate(n, p->u.boolean.right));
383 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
393 struct DFA_state *s = dfaar[0]; /* start state */
396 const char *p = text;
399 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
400 if (c >= t->ch[0] && c <= t->ch[1])
404 /* move to next state and return if we get a match */
410 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
411 if (c >= t->ch[0] && c <= t->ch[1])
421 New function, looking for xpath "element" definitions in abs, by
422 tagpath, using a kind of ugly regxp search.The DFA was built while
423 parsing abs, so here we just go trough them and try to match
424 against the given tagpath. The first matching entry is returned.
428 Added support for enhanced xelm. Now [] predicates are considered
429 as well, when selecting indexing rules... (why the hell it's called
436 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
438 data1_absyn *abs = n->root->u.root.absyn;
439 data1_xpelement *xpe = abs->xp_elements;
442 struct xpath_location_step *xp;
445 char *pexpr = xmalloc(strlen(tagpath)+5);
448 sprintf (pexpr, "/%s\n", tagpath);
449 yaz_log(LOG_DEBUG,"Checking tagpath %s",tagpath);
453 ok = dfa_match_first(xpe->dfa->states, pexpr);
455 yaz_log(LOG_DEBUG, " xpath got match %s",xpe->xpath_expr);
457 yaz_log(LOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
461 /* we have to check the perdicates up to the root node */
464 /* find the first tag up in the node structure */
465 nn = n; while (nn && nn->which != DATA1N_tag) {
469 /* go from inside out in the node structure, while going
470 backwards trough xpath location steps ... */
471 for (i=xpe->xpath_len - 1; i>0; i--) {
473 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
474 i,xp[i].part,nn->u.tag.tag);
476 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
477 yaz_log(LOG_DEBUG," Predicates didn't match");
482 if (nn->which == DATA1N_tag) {
497 yaz_log(LOG_DEBUG,"Got it");
498 return xpe->termlists;
505 1 start element (tag)
507 3 start attr (and attr-exact)
515 Now, if there is a matching xelm described in abs, for the
516 indexed element or the attribute, then the data is handled according
517 to those definitions...
519 modified by pop, 2002-12-13
522 /* add xpath index for an attribute */
523 static void index_xpath_attr (char *tag_path, char *name, char *value,
524 char *structure, struct recExtractCtrl *p,
527 wrd->attrSet = VAL_IDXPATH;
530 wrd->string = tag_path;
531 wrd->length = strlen(tag_path);
538 wrd->length = strlen(value);
544 wrd->string = tag_path;
545 wrd->length = strlen(tag_path);
551 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
552 int level, RecWord *wrd, int use,
553 struct RecWord_list *wl)
556 char tag_path_full[1024];
559 int termlist_only = 1;
563 yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
564 if ((!n->root->u.root.absyn) ||
565 (n->root->u.root.absyn->enable_xpath_indexing)) {
572 wrd->string = n->u.data.data;
573 wrd->length = n->u.data.len;
577 /* we have to fetch the whole path to the data tag */
578 for (nn = n; nn; nn = nn->parent) {
579 if (nn->which == DATA1N_tag) {
580 size_t tlen = strlen(nn->u.tag.tag);
581 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
582 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
584 tag_path_full[flen++] = '/';
586 else if (nn->which == DATA1N_root) break;
589 tag_path_full[flen] = 0;
591 /* If we have a matching termlist... */
592 if (n->root->u.root.absyn &&
593 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
595 for (; tl; tl = tl->next)
597 /* need to copy recword because it may be changed */
599 wrd->reg_type = *tl->structure;
600 /* this is the ! case, so structure is for the xpath index */
601 memcpy (&wrd_tl, wrd, sizeof(*wrd));
603 sp_parse(n, &wrd_tl, tl->source);
605 wrd_tl.attrSet = VAL_IDXPATH;
606 wrd_tl.attrUse = use;
607 if (p->flagShowRecords)
610 printf("%*sXPath index", (level + 1) * 4, "");
611 printf (" XData:\"");
612 for (i = 0; i<wrd_tl.length && i < 40; i++)
613 fputc (wrd_tl.string[i], stdout);
615 if (wrd_tl.length > 40)
617 fputc ('\n', stdout);
620 (*p->tokenAdd)(&wrd_tl);
624 /* this is just the old fashioned attribute based index */
625 wrd_tl.attrSet = (int) (tl->att->parent->reference);
626 wrd_tl.attrUse = tl->att->locals->local;
627 if (p->flagShowRecords)
630 printf("%*sIdx: [%s]", (level + 1) * 4, "",
632 printf("%s:%s [%d] %s",
633 tl->att->parent->name,
634 tl->att->name, tl->att->value,
636 printf (" XData:\"");
637 for (i = 0; i<wrd_tl.length && i < 40; i++)
638 fputc (wrd_tl.string[i], stdout);
640 if (wrd_tl.length > 40)
642 fputc ('\n', stdout);
645 (*p->tokenAdd)(&wrd_tl);
649 /* xpath indexing is done, if there was no termlist given,
650 or no ! in the termlist, and default indexing is enabled... */
651 if (!p->flagShowRecords && !xpdone && !termlist_only)
653 wrd->attrSet = VAL_IDXPATH;
663 for (nn = n; nn; nn = nn->parent)
665 if (nn->which == DATA1N_tag)
667 size_t tlen = strlen(nn->u.tag.tag);
668 if (tlen + flen > (sizeof(tag_path_full)-2))
670 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
672 tag_path_full[flen++] = '/';
674 else if (nn->which == DATA1N_root)
680 wrd->string = tag_path_full;
682 wrd->attrSet = VAL_IDXPATH;
684 if (p->flagShowRecords)
686 printf("%*s tag=", (level + 1) * 4, "");
687 for (i = 0; i<wrd->length && i < 40; i++)
688 fputc (wrd->string[i], stdout);
699 tag_path_full[flen] = 0;
701 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
702 directive, or default xpath indexing is enabled */
703 if (!(do_xpindex = 1 - termlist_only)) {
704 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
705 for (; tl; tl = tl->next)
713 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
716 if (use == 1) /* only for the starting tag... */
718 #define MAX_ATTR_COUNT 50
719 data1_termlist *tll[MAX_ATTR_COUNT];
723 /* get termlists for attributes, and find out, if we have to do xpath indexing */
724 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
729 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
731 int do_xpindex = 1 - termlist_only;
733 char attr_tag_path_full[1024];
736 /* this could be cached as well */
737 sprintf (attr_tag_path_full, "@%s/%.*s",
738 xp->name, int_len, tag_path_full);
740 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
742 /* if there is a ! in the xelm termlist, or default indexing is on,
743 proceed with xpath idx */
746 for (; tl; tl = tl->next)
755 /* attribute (no value) */
758 wrd->string = xp->name;
759 wrd->length = strlen(xp->name);
765 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
767 /* attribute value exact */
768 strcpy (comb, xp->name);
770 strcat (comb, xp->value);
775 wrd->length = strlen(comb);
778 if (RecWord_list_lookadd(wl, wrd))
786 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
788 char attr_tag_path_full[1024];
792 sprintf (attr_tag_path_full, "@%s/%.*s",
793 xp->name, int_len, tag_path_full);
797 /* If there is a termlist given (=xelm directive) */
798 for (; tl; tl = tl->next)
801 /* add xpath index for the attribute */
802 index_xpath_attr (attr_tag_path_full, xp->name,
803 xp->value, tl->structure,
809 /* if this fragment is enabled, we index
810 attribute values as well. See bug #460 */
811 if (0 && xp->value) {
813 (tl->att->parent->reference);
814 wrd->attrUse = tl->att->locals->local;
815 wrd->reg_type = *tl->structure;
816 wrd->string = xp->value;
817 wrd->length = strlen(xp->value);
818 if (RecWord_list_lookadd(wl, wrd))
824 /* if there was no termlist for the given path,
825 or the termlist didn't have a ! element, index
826 the attribute as "w" */
827 if ((!xpdone) && (!termlist_only))
829 index_xpath_attr (attr_tag_path_full, xp->name,
830 xp->value, "w", p, wrd);
839 static void index_termlist (data1_node *par, data1_node *n,
840 struct recExtractCtrl *p, int level, RecWord *wrd)
842 data1_termlist *tlist = 0;
843 data1_datatype dtype = DATA1K_string;
846 * cycle up towards the root until we find a tag with an att..
847 * this has the effect of indexing locally defined tags with
848 * the attribute of their ancestor in the record.
851 while (!par->u.tag.element)
852 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
854 if (!par || !(tlist = par->u.tag.element->termlists))
856 if (par->u.tag.element->tag)
857 dtype = par->u.tag.element->tag->kind;
859 for (; tlist; tlist = tlist->next)
861 /* consider source */
863 assert(tlist->source);
864 sp_parse(n, wrd, tlist->source);
868 if (p->flagShowRecords)
871 printf("%*sIdx: [%s]", (level + 1) * 4, "",
873 printf("%s:%s [%d] %s",
874 tlist->att->parent->name,
875 tlist->att->name, tlist->att->value,
877 printf (" XData:\"");
878 for (i = 0; i<wrd->length && i < 40; i++)
879 fputc (wrd->string[i], stdout);
881 if (wrd->length > 40)
883 fputc ('\n', stdout);
887 wrd->reg_type = *tlist->structure;
888 wrd->attrSet = (int) (tlist->att->parent->reference);
889 wrd->attrUse = tlist->att->locals->local;
896 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
897 RecWord *wrd, struct RecWord_list *wl)
899 for (; n; n = n->next)
901 if (p->flagShowRecords) /* display element description to user */
903 if (n->which == DATA1N_root)
905 printf("%*s", level * 4, "");
906 printf("Record type: '%s'\n", n->u.root.type);
908 else if (n->which == DATA1N_tag)
912 printf("%*s", level * 4, "");
913 if (!(e = n->u.tag.element))
914 printf("Local tag: '%s'\n", n->u.tag.tag);
917 printf("Elm: '%s' ", e->name);
920 data1_tag *t = e->tag;
922 printf("TagNam: '%s' ", t->names->name);
925 printf("%s[%d],", t->tagset->name, t->tagset->type);
928 if (t->which == DATA1T_numeric)
929 printf("%d)", t->value.numeric);
931 printf("'%s')", t->value.string);
938 if (n->which == DATA1N_tag)
940 index_termlist (n, n, p, level, wrd);
941 /* index start tag */
942 if (n->root->u.root.absyn)
943 index_xpath (n, p, level, wrd, 1, wl);
947 if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
951 if (n->which == DATA1N_data)
953 data1_node *par = get_parent_tag(p->dh, n);
955 if (p->flagShowRecords)
957 printf("%*s", level * 4, "");
959 if (n->u.data.len > 256)
960 printf("'%.170s ... %.70s'\n", n->u.data.data,
961 n->u.data.data + n->u.data.len-70);
962 else if (n->u.data.len > 0)
963 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
969 index_termlist (par, n, p, level, wrd);
971 index_xpath (n, p, level, wrd, 1016, wl);
974 if (n->which == DATA1N_tag)
977 index_xpath (n, p, level, wrd, 2, wl);
980 if (p->flagShowRecords && n->which == DATA1N_root)
982 printf("%*s-------------\n\n", level * 4, "");
988 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
991 int oidtmp[OID_SIZE];
994 struct RecWord_list *wl = 0;
996 oe.proto = PROTO_Z3950;
997 oe.oclass = CLASS_SCHEMA;
1000 oe.value = n->u.root.absyn->reference;
1002 if ((oid_ent_to_oid (&oe, oidtmp)))
1003 (*p->schemaAdd)(p, oidtmp);
1005 (*p->init)(p, &wrd);
1007 wl = RecWord_list_create("grs_extract_tree");
1008 r = dumpkeys(n, p, 0, &wrd, wl);
1009 RecWord_list_destroy(wl);
1013 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1017 struct grs_read_info gri;
1019 int oidtmp[OID_SIZE];
1021 struct RecWord_list *wl = 0;
1024 gri.readf = p->readf;
1025 gri.seekf = p->seekf;
1026 gri.tellf = p->tellf;
1029 gri.offset = p->offset;
1033 if (read_grs_type (h, &gri, p->subType, &n))
1034 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1036 return RECCTRL_EXTRACT_EOF;
1037 oe.proto = PROTO_Z3950;
1038 oe.oclass = CLASS_SCHEMA;
1040 if (!n->u.root.absyn)
1041 return RECCTRL_EXTRACT_ERROR;
1043 if (n->u.root.absyn)
1045 oe.value = n->u.root.absyn->reference;
1046 if ((oid_ent_to_oid (&oe, oidtmp)))
1047 (*p->schemaAdd)(p, oidtmp);
1049 data1_concat_text(p->dh, mem, n);
1051 /* ensure our data1 tree is UTF-8 */
1052 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1055 data1_pr_tree (p->dh, n, stdout);
1058 wl = RecWord_list_create("grs.sgml");
1060 (*p->init)(p, &wrd);
1061 if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1062 ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1064 ret_val = RECCTRL_EXTRACT_OK;
1065 data1_free_tree(p->dh, n);
1066 RecWord_list_destroy(wl);
1071 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1074 NMEM mem = nmem_create ();
1075 struct grs_handlers *h = (struct grs_handlers *) clientData;
1077 ret = grs_extract_sub(h, p, mem);
1083 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1085 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1087 data1_esetname *eset;
1088 Z_Espec1 *espec = 0;
1093 case Z_RecordComp_simple:
1094 if (c->u.simple->which != Z_ElementSetNames_generic)
1095 return 26; /* only generic form supported. Fix this later */
1096 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1097 c->u.simple->u.generic)))
1099 yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1100 return 25; /* invalid esetname */
1102 yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1103 c->u.simple->u.generic);
1106 case Z_RecordComp_complex:
1107 if (c->u.complex->generic)
1109 /* insert check for schema */
1110 if ((p = c->u.complex->generic->elementSpec))
1114 case Z_ElementSpec_elementSetName:
1116 data1_getesetbyname(dh, n->u.root.absyn,
1117 p->u.elementSetName)))
1119 yaz_log(LOG_LOG, "Unknown esetname '%s'",
1120 p->u.elementSetName);
1121 return 25; /* invalid esetname */
1123 yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1124 p->u.elementSetName);
1127 case Z_ElementSpec_externalSpec:
1128 if (p->u.externalSpec->which == Z_External_espec1)
1130 yaz_log(LOG_DEBUG, "Got Espec-1");
1131 espec = p->u.externalSpec-> u.espec1;
1135 yaz_log(LOG_LOG, "Unknown external espec.");
1136 return 25; /* bad. what is proper diagnostic? */
1143 return 26; /* fix */
1147 yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1148 return data1_doespec1(dh, n, espec);
1152 yaz_log(LOG_DEBUG, "Element: all match");
1157 /* Add Zebra info in separate namespace ...
1160 <metadata xmlns="http://www.indexdata.dk/zebra/">
1162 <localnumber>447</localnumber>
1163 <filename>records/genera.xml</filename>
1168 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1171 const char *idzebra_ns[3];
1172 const char *i2 = "\n ";
1173 const char *i4 = "\n ";
1176 idzebra_ns[0] = "xmlns";
1177 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1180 data1_mk_text (p->dh, mem, i2, top);
1182 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1184 data1_mk_text (p->dh, mem, "\n", top);
1186 data1_mk_text (p->dh, mem, i4, n);
1188 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1192 data1_mk_text (p->dh, mem, i4, n);
1193 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1195 data1_mk_text (p->dh, mem, i4, n);
1196 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1199 data1_mk_text (p->dh, mem, i4, n);
1200 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1202 data1_mk_text (p->dh, mem, i2, n);
1205 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1207 data1_node *node = 0, *onode = 0, *top;
1210 int res, selected = 0;
1212 struct grs_read_info gri;
1213 const char *tagname;
1214 struct grs_handlers *h = (struct grs_handlers *) clientData;
1215 int requested_schema = VAL_NONE;
1216 data1_marctab *marctab;
1219 mem = nmem_create();
1220 gri.readf = p->readf;
1221 gri.seekf = p->seekf;
1222 gri.tellf = p->tellf;
1229 yaz_log(LOG_DEBUG, "grs_retrieve");
1230 if (read_grs_type (h, &gri, p->subType, &node))
1242 data1_concat_text(p->dh, mem, node);
1244 /* ensure our data1 tree is UTF-8 */
1245 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1248 data1_pr_tree (p->dh, node, stdout);
1250 top = data1_get_root_tag (p->dh, node);
1252 yaz_log(LOG_DEBUG, "grs_retrieve: size");
1253 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1255 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1257 dnew->u.data.what = DATA1I_text;
1258 dnew->u.data.data = dnew->lbuf;
1259 sprintf(dnew->u.data.data, "%d", p->recordSize);
1260 dnew->u.data.len = strlen(dnew->u.data.data);
1263 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1265 if (tagname && p->score >= 0 &&
1266 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1268 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1269 dnew->u.data.what = DATA1I_num;
1270 dnew->u.data.data = dnew->lbuf;
1271 sprintf(dnew->u.data.data, "%d", p->score);
1272 dnew->u.data.len = strlen(dnew->u.data.data);
1275 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1276 "localControlNumber");
1277 if (tagname && p->localno > 0 &&
1278 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1280 yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1281 dnew->u.data.what = DATA1I_text;
1282 dnew->u.data.data = dnew->lbuf;
1284 sprintf(dnew->u.data.data, "%d", p->localno);
1285 dnew->u.data.len = strlen(dnew->u.data.data);
1288 if (p->input_format == VAL_TEXT_XML)
1289 zebra_xml_metadata (p, top, mem);
1292 data1_pr_tree (p->dh, node, stdout);
1294 #if YAZ_VERSIONL >= 0x010903L
1295 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1296 p->comp->u.complex->generic &&
1297 p->comp->u.complex->generic->which == Z_Schema_oid &&
1298 p->comp->u.complex->generic->schema.oid)
1300 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1302 requested_schema = oe->value;
1305 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1306 p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1308 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1310 requested_schema = oe->value;
1314 /* If schema has been specified, map if possible, then check that
1315 * we got the right one
1317 if (requested_schema != VAL_NONE)
1319 yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1320 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1322 if (map->target_absyn_ref == requested_schema)
1325 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1334 if (node->u.root.absyn &&
1335 requested_schema != node->u.root.absyn->reference)
1337 p->diagnostic = 238;
1343 * Does the requested format match a known syntax-mapping? (this reflects
1344 * the overlap of schema and formatting which is inherent in the MARC
1347 yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1348 if (node->u.root.absyn)
1349 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1351 if (map->target_absyn_ref == p->input_format)
1354 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1363 yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1364 if (node->u.root.absyn &&
1365 node->u.root.absyn->reference != VAL_NONE &&
1366 p->input_format == VAL_GRS1)
1370 int oidtmp[OID_SIZE];
1372 oe.proto = PROTO_Z3950;
1373 oe.oclass = CLASS_SCHEMA;
1374 oe.value = node->u.root.absyn->reference;
1376 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1379 data1_handle dh = p->dh;
1383 for (ii = oid; *ii >= 0; ii++)
1387 sprintf(p, "%d", *ii);
1390 if ((dnew = data1_mk_tag_data_wd(dh, top,
1391 "schemaIdentifier", mem)))
1393 dnew->u.data.what = DATA1I_oid;
1394 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1395 memcpy(dnew->u.data.data, tmp, p - tmp);
1396 dnew->u.data.len = p - tmp;
1401 yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1402 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1404 p->diagnostic = res;
1406 data1_free_tree(p->dh, onode);
1407 data1_free_tree(p->dh, node);
1411 else if (p->comp && !res)
1415 data1_pr_tree (p->dh, node, stdout);
1417 yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1418 switch (p->output_format = (p->input_format != VAL_NONE ?
1419 p->input_format : VAL_SUTRS))
1424 data1_pr_tree (p->dh, node, stdout);
1428 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1430 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1432 p->diagnostic = 238;
1435 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1436 memcpy (new_buf, p->rec_buf, p->rec_len);
1437 p->rec_buf = new_buf;
1442 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1444 p->diagnostic = 238; /* not available in requested syntax */
1446 p->rec_len = (size_t) (-1);
1449 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1451 p->diagnostic = 238;
1453 p->rec_len = (size_t) (-1);
1456 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1458 p->diagnostic = 238;
1460 p->rec_len = (size_t) (-1);
1464 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1465 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1467 p->diagnostic = 238;
1470 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1471 memcpy (new_buf, p->rec_buf, p->rec_len);
1472 p->rec_buf = new_buf;
1476 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1478 p->diagnostic = 238;
1481 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1482 memcpy (new_buf, p->rec_buf, p->rec_len);
1483 p->rec_buf = new_buf;
1487 if (!node->u.root.absyn)
1489 p->diagnostic = 238;
1492 for (marctab = node->u.root.absyn->marc; marctab;
1493 marctab = marctab->next)
1494 if (marctab->reference == p->input_format)
1498 p->diagnostic = 238;
1502 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1503 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1504 selected, &p->rec_len)))
1505 p->diagnostic = 238;
1508 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1509 memcpy (new_buf, p->rec_buf, p->rec_len);
1510 p->rec_buf = new_buf;
1514 data1_free_tree(p->dh, node);
1516 data1_free_tree(p->dh, onode);
1521 static struct recType grs_type =
1530 RecType recTypeGrs = &grs_type;