recctrl/recgrs.c

   1 /* $Id: recgrs.c,v 1.86.2.6 2005-11-23 14:26:05 adam Exp $
   2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
   3    Index Data Aps
   4
   5 This file is part of the Zebra server.
   6
   7 Zebra is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
  19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.
  21 */
  22
  23 #include <stdio.h>
  24 #include <assert.h>
  25 #include <sys/types.h>
  26 #ifndef WIN32
  27 #include <unistd.h>
  28 #endif
  29
  30 #include <yaz/log.h>
  31 #include <yaz/oid.h>
  32
  33 #include <recctrl.h>
  34 #include "grsread.h"
  35
  36 #define GRS_MAX_WORD 512
  37
  38 struct RecWord_list {
  39     NMEM nmem;
  40     struct RecWord_entry **entries;
  41     unsigned hash_size;
  42     char *name;
  43 };
  44
  45 struct RecWord_entry {
  46     RecWord w;
  47     struct RecWord_entry *next;
  48 };
  49
  50 struct RecWord_list *RecWord_list_create(const char *name)
  51 {
  52     NMEM m = nmem_create();
  53     struct RecWord_list *p = nmem_malloc(m, sizeof(*p));
  54     size_t i;
  55
  56     p->hash_size = 127;
  57     p->nmem = m;
  58     p->entries = nmem_malloc(m, p->hash_size * sizeof(*p->entries));
  59     for (i = 0; i<p->hash_size; i++)
  60         p->entries[i] = 0;
  61     p->name = nmem_strdup(m, name);
  62     return p;
  63 }
  64
  65 int RecWord_list_lookadd(struct RecWord_list *l, RecWord *wrd)
  66 {
  67     struct RecWord_entry *e;
  68
  69     unsigned hash =
  70         (wrd->attrSet*15 + wrd->attrSet + wrd->reg_type) % l->hash_size;
  71
  72     for (e = l->entries[hash]; e; e = e->next)
  73         if (e->w.attrSet == wrd->attrSet &&
  74             e->w.attrUse == wrd->attrUse &&
  75             e->w.reg_type == wrd->reg_type &&
  76             e->w.length == wrd->length &&
  77             !memcmp(e->w.string, wrd->string, wrd->length))
  78         {
  79 #if 0
  80             fprintf(stderr, "DUP key found in %s\n", l->name);
  81             fprintf(stderr, "set=%d use=%d regtype=%c\n",
  82                     wrd->attrSet, wrd->attrUse, wrd->reg_type);
  83 #endif
  84             return 0;
  85         }
  86     e = nmem_malloc(l->nmem, sizeof(*e));
  87     e->next = l->entries[hash];
  88     l->entries[hash] = e;
  89     memcpy(&e->w, wrd, sizeof(*wrd));
  90     e->w.string = nmem_malloc(l->nmem, wrd->length);
  91     memcpy(e->w.string, wrd->string, wrd->length);
  92     return 1;
  93 }
  94
  95 void RecWord_list_destroy(struct RecWord_list *l)
  96 {
  97     if (l)
  98         nmem_destroy(l->nmem);
  99 }
 100
 101
 102 struct grs_handler {
 103     RecTypeGrs type;
 104     void *clientData;
 105     int initFlag;
 106     struct grs_handler *next;
 107 };
 108
 109 struct grs_handlers {
 110     struct grs_handler *handlers;
 111 };
 112
 113 static int read_grs_type (struct grs_handlers *h,
 114                           struct grs_read_info *p, const char *type,
 115                           data1_node **root)
 116 {
 117     struct grs_handler *gh = h->handlers;
 118     const char *cp = strchr (type, '.');
 119
 120     if (cp == NULL || cp == type)
 121     {
 122         cp = strlen(type) + type;
 123         *p->type = 0;
 124     }
 125     else
 126         strcpy (p->type, cp+1);
 127     for (gh = h->handlers; gh; gh = gh->next)
 128     {
 129         if (!memcmp (type, gh->type->type, cp-type) &&
 130             gh->type->type[cp-type] == '\0')
 131         {
 132             if (!gh->initFlag)
 133             {
 134                 gh->initFlag = 1;
 135                 gh->clientData = (*gh->type->init)();
 136             }
 137             p->clientData = gh->clientData;
 138             *root = (gh->type->read)(p);
 139             gh->clientData = p->clientData;
 140             return 0;
 141         }
 142     }
 143     return 1;
 144 }
 145
 146 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
 147 {
 148     struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
 149     gh->next = h->handlers;
 150     h->handlers = gh;
 151     gh->initFlag = 0;
 152     gh->clientData = 0;
 153     gh->type = t;
 154 }
 155
 156 static void *grs_init(RecType recType)
 157 {
 158     struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
 159     h->handlers = 0;
 160
 161     grs_add_handler (h, recTypeGrs_sgml);
 162     grs_add_handler (h, recTypeGrs_regx);
 163 #if HAVE_TCL_H
 164     grs_add_handler (h, recTypeGrs_tcl);
 165 #endif
 166     grs_add_handler (h, recTypeGrs_marc);
 167     grs_add_handler (h, recTypeGrs_marcxml);
 168 #if HAVE_EXPAT_H
 169     grs_add_handler (h, recTypeGrs_xml);
 170 #endif
 171 #if HAVE_PERL
 172     grs_add_handler (h, recTypeGrs_perl);
 173 #endif
 174     grs_add_handler (h, recTypeGrs_danbib);
 175     return h;
 176 }
 177
 178 static void grs_destroy(void *clientData)
 179 {
 180     struct grs_handlers *h = (struct grs_handlers *) clientData;
 181     struct grs_handler *gh = h->handlers, *gh_next;
 182     while (gh)
 183     {
 184         gh_next = gh->next;
 185         if (gh->initFlag)
 186             (*gh->type->destroy)(gh->clientData);
 187         xfree (gh);
 188         gh = gh_next;
 189     }
 190     xfree (h);
 191 }
 192
 193 struct source_parser {
 194     int len;
 195     const char *tok;
 196     const char *src;
 197     int lookahead;
 198 };
 199
 200 static int sp_lex(struct source_parser *sp)
 201 {
 202     while (*sp->src == ' ')
 203         (sp->src)++;
 204     sp->tok = sp->src;
 205     sp->len = 0;
 206     while (*sp->src && !strchr("<>();,-: ", *sp->src))
 207     {
 208         sp->src++;
 209         sp->len++;
 210     }
 211     if (sp->len)
 212         sp->lookahead = 't';
 213     else
 214     {
 215         sp->lookahead = *sp->src;
 216         if (*sp->src)
 217             sp->src++;
 218     }
 219     return sp->lookahead;
 220 }
 221
 222
 223 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
 224 {
 225     if (sp->lookahead != 't')
 226         return 0;
 227     if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
 228     {
 229         if (n->which == DATA1N_data)
 230         {
 231             wrd->string = n->u.data.data;
 232             wrd->length = n->u.data.len;
 233         }
 234         sp_lex(sp);
 235     }
 236     else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
 237     {
 238         if (n->which == DATA1N_tag)
 239         {
 240             wrd->string = n->u.tag.tag;
 241             wrd->length = strlen(n->u.tag.tag);
 242         }
 243         sp_lex(sp);
 244     }
 245     else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
 246     {
 247         sp_lex(sp);
 248         if (sp->lookahead != '(')
 249             return 0;
 250         sp_lex(sp);
 251         if (sp->lookahead != 't')
 252             return 0;
 253
 254         if (n->which == DATA1N_tag)
 255         {
 256             data1_xattr *p = n->u.tag.attributes;
 257             while (p && strlen(p->name) != sp->len &&
 258                    memcmp (p->name, sp->tok, sp->len))
 259                 p = p->next;
 260             if (p)
 261             {
 262                 wrd->string = p->value;
 263                 wrd->length = strlen(p->value);
 264             }
 265         }
 266         sp_lex(sp);
 267         if (sp->lookahead != ')')
 268             return 0;
 269         sp_lex(sp);
 270     }
 271     else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
 272     {
 273         int start, len;
 274         sp_lex(sp);
 275         if (sp->lookahead != '(')
 276             return 0;
 277
 278         sp_lex(sp);
 279         sp_expr(sp, n, wrd);
 280         if (sp->lookahead != ',')
 281             return 0;
 282
 283         sp_lex(sp);
 284         if (sp->lookahead != 't')
 285             return 0;
 286         start = atoi_n(sp->tok, sp->len);
 287
 288         sp_lex(sp);
 289         if (sp->lookahead != ',')
 290             return 0;
 291
 292         sp_lex(sp);
 293         if (sp->lookahead != 't')
 294             return 0;
 295         len = atoi_n(sp->tok, sp->len);
 296
 297         sp_lex(sp);
 298         if (sp->lookahead != ')')
 299             return 0;
 300
 301         sp_lex(sp);
 302         if (wrd->string && wrd->length)
 303         {
 304             wrd->string += start;
 305             wrd->length -= start;
 306             if (wrd->length > len)
 307                 wrd->length = len;
 308         }
 309     }
 310     return 1;
 311 }
 312
 313 static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
 314 {
 315     struct source_parser sp;
 316     sp.len = 0;
 317     sp.tok = 0;
 318     sp.src = src;
 319     sp.lookahead = 0;
 320     sp_lex(&sp);
 321
 322     return sp_expr(&sp, n, wrd);
 323 }
 324
 325 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
 326 {
 327     int res = 1;
 328     char *attname;
 329     data1_xattr *attr;
 330
 331     if (!p) {
 332         return 1;
 333     } else {
 334         if (p->which == XPATH_PREDICATE_RELATION) {
 335             if (p->u.relation.name[0]) {
 336                 if (*p->u.relation.name != '@') {
 337                     yaz_log(LOG_WARN,
 338                          "  Only attributes (@) are supported in xelm xpath predicates");
 339                     yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
 340                     return (1);
 341                 }
 342                 attname = p->u.relation.name + 1;
 343                 res = 0;
 344                 /* looking for the attribute with a specified name */
 345                 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
 346                     yaz_log(LOG_DEBUG,"  - attribute %s <-> %s", attname, attr->name );
 347
 348                     if (!strcmp(attr->name, attname)) {
 349                         if (p->u.relation.op[0]) {
 350                             if (*p->u.relation.op != '=') {
 351                                 yaz_log(LOG_WARN,
 352                                      "Only '=' relation is supported (%s)",p->u.relation.op);
 353                                 yaz_log(LOG_WARN, "predicate %s ignored", p->u.relation.name);
 354                                 res = 1; break;
 355                             } else {
 356                                 yaz_log(LOG_DEBUG,"    - value %s <-> %s",
 357                                      p->u.relation.value, attr->value );
 358                                 if (!strcmp(attr->value, p->u.relation.value)) {
 359                                     res = 1; break;
 360                                 }
 361                             }
 362                         } else {
 363                             /* attribute exists, no value specified */
 364                             res = 1; break;
 365                         }
 366                     }
 367                 }
 368                 yaz_log(LOG_DEBUG, "return %d", res);
 369                 return res;
 370             } else {
 371                 return 1;
 372             }
 373         }
 374         else if (p->which == XPATH_PREDICATE_BOOLEAN) {
 375             if (!strcmp(p->u.boolean.op,"and")) {
 376                 return d1_check_xpath_predicate(n, p->u.boolean.left)
 377                     && d1_check_xpath_predicate(n, p->u.boolean.right);
 378             }
 379             else if (!strcmp(p->u.boolean.op,"or")) {
 380                 return (d1_check_xpath_predicate(n, p->u.boolean.left)
 381                         || d1_check_xpath_predicate(n, p->u.boolean.right));
 382             } else {
 383                 yaz_log(LOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
 384                 return 1;
 385             }
 386         }
 387     }
 388     return 0;
 389 }
 390
 391 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
 392 {
 393     struct DFA_state *s = dfaar[0]; /* start state */
 394     struct DFA_tran *t;
 395     int i;
 396     const char *p = text;
 397     unsigned char c;
 398
 399     for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
 400         if (c >= t->ch[0] && c <= t->ch[1])
 401         {
 402             while (i >= 0)
 403             {
 404                 /* move to next state and return if we get a match */
 405                 s = dfaar[t->to];
 406                 if (s->rule_no)
 407                     return 1;
 408                 /* next char */
 409                 c = *p++;
 410                 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
 411                     if (c >= t->ch[0] && c <= t->ch[1])
 412                         break;
 413             }
 414         }
 415     return 0;
 416 }
 417
 418
 419 /* *ostrich*
 420
 421 New function, looking for xpath "element" definitions in abs, by
 422 tagpath, using a kind of ugly regxp search.The DFA was built while
 423 parsing abs, so here we just go trough them and try to match
 424 against the given tagpath. The first matching entry is returned.
 425
 426 pop, 2002-12-13
 427
 428 Added support for enhanced xelm. Now [] predicates are considered
 429 as well, when selecting indexing rules... (why the hell it's called
 430 termlist???)
 431
 432 pop, 2003-01-17
 433
 434 */
 435
 436 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
 437 {
 438     data1_absyn *abs = n->root->u.root.absyn;
 439     data1_xpelement *xpe = abs->xp_elements;
 440     data1_node *nn;
 441 #ifdef ENHANCED_XELM
 442     struct xpath_location_step *xp;
 443
 444 #endif
 445     char *pexpr = xmalloc(strlen(tagpath)+5);
 446     int ok = 0;
 447
 448     sprintf (pexpr, "/%s\n", tagpath);
 449     yaz_log(LOG_DEBUG,"Checking tagpath %s",tagpath);
 450     while (xpe)
 451     {
 452         int i;
 453         ok = dfa_match_first(xpe->dfa->states, pexpr);
 454         if (ok)
 455             yaz_log(LOG_DEBUG, " xpath got match %s",xpe->xpath_expr);
 456         else
 457             yaz_log(LOG_DEBUG, " xpath no match %s",xpe->xpath_expr);
 458
 459         if (ok) {
 460 #ifdef ENHANCED_XELM
 461             /* we have to check the perdicates up to the root node */
 462             xp = xpe->xpath;
 463
 464             /* find the first tag up in the node structure */
 465             nn = n; while (nn && nn->which != DATA1N_tag) {
 466                 nn = nn->parent;
 467             }
 468
 469             /* go from inside out in the node structure, while going
 470                backwards trough xpath location steps ... */
 471             for (i=xpe->xpath_len - 1; i>0; i--) {
 472
 473                 yaz_log(LOG_DEBUG,"Checking step %d: %s on tag %s",
 474                      i,xp[i].part,nn->u.tag.tag);
 475
 476                 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
 477                     yaz_log(LOG_DEBUG,"  Predicates didn't match");
 478                     ok = 0;
 479                     break;
 480                 }
 481
 482                 if (nn->which == DATA1N_tag) {
 483                     nn = nn->parent;
 484                 }
 485             }
 486 #endif
 487             if (ok) {
 488                 break;
 489             }
 490         }
 491         xpe = xpe->next;
 492     }
 493
 494     xfree(pexpr);
 495
 496     if (ok) {
 497       yaz_log(LOG_DEBUG,"Got it");
 498         return xpe->termlists;
 499     } else {
 500         return NULL;
 501     }
 502 }
 503
 504 /* use
 505      1   start element (tag)
 506      2   end element
 507      3   start attr (and attr-exact)
 508      4   end attr
 509
 510   1016   cdata
 511   1015   attr data
 512
 513   *ostrich*
 514
 515   Now, if there is a matching xelm described in abs, for the
 516   indexed element or the attribute,  then the data is handled according
 517   to those definitions...
 518
 519   modified by pop, 2002-12-13
 520 */
 521
 522 /* add xpath index for an attribute */
 523 static void index_xpath_attr (char *tag_path, char *name, char *value,
 524                               char *structure, struct recExtractCtrl *p,
 525                               RecWord *wrd)
 526 {
 527     wrd->attrSet = VAL_IDXPATH;
 528     wrd->attrUse = 1;
 529     wrd->reg_type = '0';
 530     wrd->string = tag_path;
 531     wrd->length = strlen(tag_path);
 532     (*p->tokenAdd)(wrd);
 533
 534     if (value) {
 535         wrd->attrUse = 1015;
 536         wrd->reg_type = 'w';
 537         wrd->string = value;
 538         wrd->length = strlen(value);
 539         (*p->tokenAdd)(wrd);
 540     }
 541
 542     wrd->attrUse = 2;
 543     wrd->reg_type = '0';
 544     wrd->string = tag_path;
 545     wrd->length = strlen(tag_path);
 546     (*p->tokenAdd)(wrd);
 547 }
 548
 549
 550
 551 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
 552                          int level, RecWord *wrd, int use,
 553                          struct RecWord_list *wl)
 554 {
 555     int i;
 556     char tag_path_full[1024];
 557     size_t flen = 0;
 558     data1_node *nn;
 559     int termlist_only = 1;
 560     data1_termlist *tl;
 561     int xpdone = 0;
 562
 563     yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
 564     if ((!n->root->u.root.absyn) ||
 565         (n->root->u.root.absyn->enable_xpath_indexing)) {
 566       termlist_only = 0;
 567     }
 568
 569     switch (n->which)
 570     {
 571     case DATA1N_data:
 572         wrd->string = n->u.data.data;
 573         wrd->length = n->u.data.len;
 574         xpdone = 0;
 575         flen = 0;
 576
 577         /* we have to fetch the whole path to the data tag */
 578         for (nn = n; nn; nn = nn->parent) {
 579             if (nn->which == DATA1N_tag) {
 580                 size_t tlen = strlen(nn->u.tag.tag);
 581                 if (tlen + flen > (sizeof(tag_path_full)-2)) return;
 582                 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
 583                 flen += tlen;
 584                 tag_path_full[flen++] = '/';
 585             }
 586             else if (nn->which == DATA1N_root)  break;
 587         }
 588
 589         tag_path_full[flen] = 0;
 590
 591         /* If we have a matching termlist... */
 592         if (n->root->u.root.absyn &&
 593             (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
 594         {
 595             for (; tl; tl = tl->next)
 596             {
 597                 /* need to copy recword because it may be changed */
 598                 RecWord wrd_tl;
 599                 wrd->reg_type = *tl->structure;
 600                 /* this is the ! case, so structure is for the xpath index */
 601                 memcpy (&wrd_tl, wrd, sizeof(*wrd));
 602                 if (tl->source)
 603                     sp_parse(n, &wrd_tl, tl->source);
 604                 if (!tl->att) {
 605                     wrd_tl.attrSet = VAL_IDXPATH;
 606                     wrd_tl.attrUse = use;
 607                     if (p->flagShowRecords)
 608                     {
 609                         int i;
 610                         printf("%*sXPath index", (level + 1) * 4, "");
 611                         printf (" XData:\"");
 612                         for (i = 0; i<wrd_tl.length && i < 40; i++)
 613                             fputc (wrd_tl.string[i], stdout);
 614                         fputc ('"', stdout);
 615                         if (wrd_tl.length > 40)
 616                             printf (" ...");
 617                         fputc ('\n', stdout);
 618                     }
 619                     else
 620                         (*p->tokenAdd)(&wrd_tl);
 621
 622                     xpdone = 1;
 623                 } else {
 624                     /* this is just the old fashioned attribute based index */
 625                     wrd_tl.attrSet = (int) (tl->att->parent->reference);
 626                     wrd_tl.attrUse = tl->att->locals->local;
 627                     if (p->flagShowRecords)
 628                     {
 629                         int i;
 630                         printf("%*sIdx: [%s]", (level + 1) * 4, "",
 631                                tl->structure);
 632                         printf("%s:%s [%d] %s",
 633                                tl->att->parent->name,
 634                                tl->att->name, tl->att->value,
 635                                tl->source);
 636                         printf (" XData:\"");
 637                         for (i = 0; i<wrd_tl.length && i < 40; i++)
 638                             fputc (wrd_tl.string[i], stdout);
 639                         fputc ('"', stdout);
 640                         if (wrd_tl.length > 40)
 641                             printf (" ...");
 642                         fputc ('\n', stdout);
 643                     }
 644                     else
 645                         (*p->tokenAdd)(&wrd_tl);
 646                 }
 647             }
 648         }
 649         /* xpath indexing is done, if there was no termlist given,
 650            or no ! in the termlist, and default indexing is enabled... */
 651         if (!p->flagShowRecords && !xpdone && !termlist_only)
 652         {
 653             wrd->attrSet = VAL_IDXPATH;
 654             wrd->attrUse = use;
 655             wrd->reg_type = 'w';
 656             (*p->tokenAdd)(wrd);
 657         }
 658         else
 659             wrd->seqno++;
 660         break;
 661     case DATA1N_tag:
 662         flen = 0;
 663         for (nn = n; nn; nn = nn->parent)
 664         {
 665             if (nn->which == DATA1N_tag)
 666             {
 667                 size_t tlen = strlen(nn->u.tag.tag);
 668                 if (tlen + flen > (sizeof(tag_path_full)-2))
 669                     return;
 670                 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
 671                 flen += tlen;
 672                 tag_path_full[flen++] = '/';
 673             }
 674             else if (nn->which == DATA1N_root)
 675                 break;
 676         }
 677
 678
 679         wrd->reg_type = '0';
 680         wrd->string = tag_path_full;
 681         wrd->length = flen;
 682         wrd->attrSet = VAL_IDXPATH;
 683         wrd->attrUse = use;
 684         if (p->flagShowRecords)
 685         {
 686             printf("%*s tag=", (level + 1) * 4, "");
 687             for (i = 0; i<wrd->length && i < 40; i++)
 688                 fputc (wrd->string[i], stdout);
 689             if (i == 40)
 690                 printf (" ..");
 691             printf("\n");
 692         }
 693         else
 694         {
 695             data1_xattr *xp;
 696             data1_termlist *tl;
 697             int do_xpindex;
 698
 699             tag_path_full[flen] = 0;
 700
 701             /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
 702                directive, or default xpath indexing is enabled */
 703             if (!(do_xpindex = 1 - termlist_only)) {
 704                 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
 705                     for (; tl; tl = tl->next)
 706                     {
 707                         if (!tl->att)
 708                             do_xpindex = 1;
 709                     }
 710                 }
 711             }
 712             if (do_xpindex) {
 713                 (*p->tokenAdd)(wrd);   /* index element pag (AKA tag path) */
 714             }
 715
 716             if (use == 1) /* only for the starting tag... */
 717             {
 718 #define MAX_ATTR_COUNT 50
 719                 data1_termlist *tll[MAX_ATTR_COUNT];
 720
 721                 int i = 0;
 722
 723                 /* get termlists for attributes, and find out, if we have to do xpath indexing */
 724                 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
 725                     i++;
 726                 }
 727
 728                 i = 0;
 729                 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
 730                     char comb[512];
 731                     int do_xpindex = 1 - termlist_only;
 732                     data1_termlist *tl;
 733                     char attr_tag_path_full[1024];
 734                     int int_len = flen;
 735
 736                     /* this could be cached as well */
 737                     sprintf (attr_tag_path_full, "@%s/%.*s",
 738                              xp->name, int_len, tag_path_full);
 739
 740                     tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
 741
 742                     /* if there is a ! in the xelm termlist, or default indexing is on,
 743                        proceed with xpath idx */
 744                     if ((tl = tll[i]))
 745                     {
 746                         for (; tl; tl = tl->next)
 747                         {
 748                             if (!tl->att)
 749                                 do_xpindex = 1;
 750                         }
 751                     }
 752
 753                     if (do_xpindex) {
 754
 755                         /* attribute  (no value) */
 756                         wrd->reg_type = '0';
 757                         wrd->attrUse = 3;
 758                         wrd->string = xp->name;
 759                         wrd->length = strlen(xp->name);
 760
 761                         wrd->seqno--;
 762                         (*p->tokenAdd)(wrd);
 763
 764                         if (xp->value &&
 765                             strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
 766
 767                             /* attribute value exact */
 768                             strcpy (comb, xp->name);
 769                             strcat (comb, "=");
 770                             strcat (comb, xp->value);
 771
 772                             wrd->attrUse = 3;
 773                             wrd->reg_type = '0';
 774                             wrd->string = comb;
 775                             wrd->length = strlen(comb);
 776                             wrd->seqno--;
 777
 778                             if (RecWord_list_lookadd(wl, wrd))
 779                                 (*p->tokenAdd)(wrd);
 780                         }
 781                     }
 782                     i++;
 783                 }
 784
 785                 i = 0;
 786                 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
 787                     data1_termlist *tl;
 788                     char attr_tag_path_full[1024];
 789                     int int_len = flen;
 790                     int xpdone = 0;
 791
 792                     sprintf (attr_tag_path_full, "@%s/%.*s",
 793                              xp->name, int_len, tag_path_full);
 794
 795                     if ((tl = tll[i]))
 796                     {
 797                         /* If there is a termlist given (=xelm directive) */
 798                         for (; tl; tl = tl->next)
 799                         {
 800                             if (!tl->att) {
 801                                 /* add xpath index for the attribute */
 802                                 index_xpath_attr (attr_tag_path_full, xp->name,
 803                                                   xp->value, tl->structure,
 804                                                   p, wrd);
 805                                 xpdone = 1;
 806                             }
 807                             else
 808                             {
 809                                 /* add attribute based index for the attribute */
 810                                 if (xp->value) {
 811                                     wrd->attrSet = (int)
 812                                         (tl->att->parent->reference);
 813                                     wrd->attrUse = tl->att->locals->local;
 814                                     wrd->reg_type = *tl->structure;
 815                                     wrd->string = xp->value;
 816                                     wrd->length = strlen(xp->value);
 817                                     if (RecWord_list_lookadd(wl, wrd))
 818                                         (*p->tokenAdd)(wrd);
 819                                 }
 820                             }
 821                         }
 822                     }
 823                     /* if there was no termlist for the given path,
 824                        or the termlist didn't have a ! element, index
 825                        the attribute as "w" */
 826                     if ((!xpdone) && (!termlist_only))
 827                     {
 828                         index_xpath_attr (attr_tag_path_full, xp->name,
 829                                           xp->value,  "w", p, wrd);
 830                     }
 831                     i++;
 832                 }
 833             }
 834         }
 835     }
 836 }
 837
 838 static void index_termlist (data1_node *par, data1_node *n,
 839                             struct recExtractCtrl *p, int level, RecWord *wrd)
 840 {
 841     data1_termlist *tlist = 0;
 842     data1_datatype dtype = DATA1K_string;
 843
 844     /*
 845      * cycle up towards the root until we find a tag with an att..
 846      * this has the effect of indexing locally defined tags with
 847      * the attribute of their ancestor in the record.
 848      */
 849
 850     while (!par->u.tag.element)
 851         if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
 852             break;
 853     if (!par || !(tlist = par->u.tag.element->termlists))
 854         return;
 855     if (par->u.tag.element->tag)
 856         dtype = par->u.tag.element->tag->kind;
 857
 858     for (; tlist; tlist = tlist->next)
 859     {
 860         /* consider source */
 861         wrd->string = 0;
 862         assert(tlist->source);
 863         sp_parse(n, wrd, tlist->source);
 864
 865         if (wrd->string)
 866         {
 867             if (p->flagShowRecords)
 868             {
 869                 int i;
 870                 printf("%*sIdx: [%s]", (level + 1) * 4, "",
 871                        tlist->structure);
 872                 printf("%s:%s [%d] %s",
 873                        tlist->att->parent->name,
 874                        tlist->att->name, tlist->att->value,
 875                        tlist->source);
 876                 printf (" XData:\"");
 877                 for (i = 0; i<wrd->length && i < 40; i++)
 878                     fputc (wrd->string[i], stdout);
 879                 fputc ('"', stdout);
 880                 if (wrd->length > 40)
 881                     printf (" ...");
 882                 fputc ('\n', stdout);
 883             }
 884             else
 885             {
 886                 wrd->reg_type = *tlist->structure;
 887                 wrd->attrSet = (int) (tlist->att->parent->reference);
 888                 wrd->attrUse = tlist->att->locals->local;
 889                 (*p->tokenAdd)(wrd);
 890             }
 891         }
 892     }
 893 }
 894
 895 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
 896                     RecWord *wrd, struct RecWord_list *wl)
 897 {
 898     for (; n; n = n->next)
 899     {
 900         if (p->flagShowRecords) /* display element description to user */
 901         {
 902             if (n->which == DATA1N_root)
 903             {
 904                 printf("%*s", level * 4, "");
 905                 printf("Record type: '%s'\n", n->u.root.type);
 906             }
 907             else if (n->which == DATA1N_tag)
 908             {
 909                 data1_element *e;
 910
 911                 printf("%*s", level * 4, "");
 912                 if (!(e = n->u.tag.element))
 913                     printf("Local tag: '%s'\n", n->u.tag.tag);
 914                 else
 915                 {
 916                     printf("Elm: '%s' ", e->name);
 917                     if (e->tag)
 918                     {
 919                         data1_tag *t = e->tag;
 920
 921                         printf("TagNam: '%s' ", t->names->name);
 922                         printf("(");
 923                         if (t->tagset)
 924                             printf("%s[%d],", t->tagset->name, t->tagset->type);
 925                         else
 926                             printf("?,");
 927                         if (t->which == DATA1T_numeric)
 928                             printf("%d)", t->value.numeric);
 929                         else
 930                             printf("'%s')", t->value.string);
 931                     }
 932                     printf("\n");
 933                 }
 934             }
 935         }
 936
 937         if (n->which == DATA1N_tag)
 938         {
 939             index_termlist (n, n, p, level, wrd);
 940             /* index start tag */
 941             if (n->root->u.root.absyn)
 942                 index_xpath (n, p, level, wrd, 1, wl);
 943         }
 944
 945         if (n->child)
 946             if (dumpkeys(n->child, p, level + 1, wrd, wl) < 0)
 947                 return -1;
 948
 949
 950         if (n->which == DATA1N_data)
 951         {
 952             data1_node *par = get_parent_tag(p->dh, n);
 953
 954             if (p->flagShowRecords)
 955             {
 956                 printf("%*s", level * 4, "");
 957                 printf("Data: ");
 958                 if (n->u.data.len > 256)
 959                     printf("'%.170s ... %.70s'\n", n->u.data.data,
 960                            n->u.data.data + n->u.data.len-70);
 961                 else if (n->u.data.len > 0)
 962                     printf("'%.*s'\n", n->u.data.len, n->u.data.data);
 963                 else
 964                     printf("NULL\n");
 965             }
 966
 967             if (par)
 968                 index_termlist (par, n, p, level, wrd);
 969
 970             index_xpath (n, p, level, wrd, 1016, wl);
 971         }
 972
 973         if (n->which == DATA1N_tag)
 974         {
 975             /* index end tag */
 976             index_xpath (n, p, level, wrd, 2, wl);
 977         }
 978
 979         if (p->flagShowRecords && n->which == DATA1N_root)
 980         {
 981             printf("%*s-------------\n\n", level * 4, "");
 982         }
 983     }
 984     return 0;
 985 }
 986
 987 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
 988 {
 989     oident oe;
 990     int oidtmp[OID_SIZE];
 991     RecWord wrd;
 992     int r;
 993     struct RecWord_list *wl = 0;
 994
 995     oe.proto = PROTO_Z3950;
 996     oe.oclass = CLASS_SCHEMA;
 997     if (n->u.root.absyn)
 998     {
 999         oe.value = n->u.root.absyn->reference;
1000
1001         if ((oid_ent_to_oid (&oe, oidtmp)))
1002             (*p->schemaAdd)(p, oidtmp);
1003     }
1004     (*p->init)(p, &wrd);
1005
1006     wl = RecWord_list_create("grs_extract_tree");
1007     r = dumpkeys(n, p, 0, &wrd, wl);
1008     RecWord_list_destroy(wl);
1009     return r;
1010 }
1011
1012 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
1013                            NMEM mem)
1014 {
1015     data1_node *n;
1016     struct grs_read_info gri;
1017     oident oe;
1018     int oidtmp[OID_SIZE];
1019     RecWord wrd;
1020     struct RecWord_list *wl = 0;
1021     int ret_val;
1022
1023     gri.readf = p->readf;
1024     gri.seekf = p->seekf;
1025     gri.tellf = p->tellf;
1026     gri.endf = p->endf;
1027     gri.fh = p->fh;
1028     gri.offset = p->offset;
1029     gri.mem = mem;
1030     gri.dh = p->dh;
1031
1032     if (read_grs_type (h, &gri, p->subType, &n))
1033         return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
1034     if (!n)
1035         return RECCTRL_EXTRACT_EOF;
1036     oe.proto = PROTO_Z3950;
1037     oe.oclass = CLASS_SCHEMA;
1038 #if 0
1039     if (!n->u.root.absyn)
1040         return RECCTRL_EXTRACT_ERROR;
1041 #endif
1042     if (n->u.root.absyn)
1043     {
1044         oe.value = n->u.root.absyn->reference;
1045         if ((oid_ent_to_oid (&oe, oidtmp)))
1046             (*p->schemaAdd)(p, oidtmp);
1047     }
1048     data1_concat_text(p->dh, mem, n);
1049
1050     /* ensure our data1 tree is UTF-8 */
1051     data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1052
1053 #if 0
1054     data1_pr_tree (p->dh, n, stdout);
1055 #endif
1056
1057     wl = RecWord_list_create("grs.sgml");
1058
1059     (*p->init)(p, &wrd);
1060     if (dumpkeys(n, p, 0, &wrd, wl) < 0)
1061         ret_val = RECCTRL_EXTRACT_ERROR_GENERIC;
1062     else
1063         ret_val = RECCTRL_EXTRACT_OK;
1064     data1_free_tree(p->dh, n);
1065     RecWord_list_destroy(wl);
1066
1067     return ret_val;
1068 }
1069
1070 static int grs_extract(void *clientData, struct recExtractCtrl *p)
1071 {
1072     int ret;
1073     NMEM mem = nmem_create ();
1074     struct grs_handlers *h = (struct grs_handlers *) clientData;
1075
1076     ret = grs_extract_sub(h, p, mem);
1077     nmem_destroy(mem);
1078     return ret;
1079 }
1080
1081 /*
1082  * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1083  */
1084 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
1085 {
1086     data1_esetname *eset;
1087     Z_Espec1 *espec = 0;
1088     Z_ElementSpec *p;
1089
1090     switch (c->which)
1091     {
1092     case Z_RecordComp_simple:
1093         if (c->u.simple->which != Z_ElementSetNames_generic)
1094             return 26; /* only generic form supported. Fix this later */
1095         if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1096                                          c->u.simple->u.generic)))
1097         {
1098             yaz_log(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1099             return 25; /* invalid esetname */
1100         }
1101         yaz_log(LOG_DEBUG, "Esetname '%s' in simple compspec",
1102              c->u.simple->u.generic);
1103         espec = eset->spec;
1104         break;
1105     case Z_RecordComp_complex:
1106         if (c->u.complex->generic)
1107         {
1108             /* insert check for schema */
1109             if ((p = c->u.complex->generic->elementSpec))
1110             {
1111                 switch (p->which)
1112                 {
1113                 case Z_ElementSpec_elementSetName:
1114                     if (!(eset =
1115                           data1_getesetbyname(dh, n->u.root.absyn,
1116                                               p->u.elementSetName)))
1117                     {
1118                         yaz_log(LOG_LOG, "Unknown esetname '%s'",
1119                              p->u.elementSetName);
1120                         return 25; /* invalid esetname */
1121                     }
1122                     yaz_log(LOG_DEBUG, "Esetname '%s' in complex compspec",
1123                          p->u.elementSetName);
1124                     espec = eset->spec;
1125                     break;
1126                 case Z_ElementSpec_externalSpec:
1127                     if (p->u.externalSpec->which == Z_External_espec1)
1128                     {
1129                         yaz_log(LOG_DEBUG, "Got Espec-1");
1130                         espec = p->u.externalSpec-> u.espec1;
1131                     }
1132                     else
1133                     {
1134                         yaz_log(LOG_LOG, "Unknown external espec.");
1135                         return 25; /* bad. what is proper diagnostic? */
1136                     }
1137                     break;
1138                 }
1139             }
1140         }
1141         else
1142             return 26; /* fix */
1143     }
1144     if (espec)
1145     {
1146         yaz_log(LOG_DEBUG, "Element: Espec-1 match");
1147         return data1_doespec1(dh, n, espec);
1148     }
1149     else
1150     {
1151         yaz_log(LOG_DEBUG, "Element: all match");
1152         return -1;
1153     }
1154 }
1155
1156 /* Add Zebra info in separate namespace ...
1157         <root
1158          ...
1159          <metadata xmlns="http://www.indexdata.dk/zebra/">
1160           <size>359</size>
1161           <localnumber>447</localnumber>
1162           <filename>records/genera.xml</filename>
1163          </metadata>
1164         </root>
1165 */
1166
1167 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1168                                 NMEM mem)
1169 {
1170     const char *idzebra_ns[3];
1171     const char *i2 = "\n  ";
1172     const char *i4 = "\n    ";
1173     data1_node *n;
1174
1175     idzebra_ns[0] = "xmlns";
1176     idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1177     idzebra_ns[2] = 0;
1178
1179     data1_mk_text (p->dh, mem, i2, top);
1180
1181     n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1182
1183     data1_mk_text (p->dh, mem, "\n", top);
1184
1185     data1_mk_text (p->dh, mem, i4, n);
1186
1187     data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1188
1189     if (p->score != -1)
1190     {
1191         data1_mk_text (p->dh, mem, i4, n);
1192         data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1193     }
1194     data1_mk_text (p->dh, mem, i4, n);
1195     data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
1196     if (p->fname)
1197     {
1198         data1_mk_text (p->dh, mem, i4, n);
1199         data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1200     }
1201     data1_mk_text (p->dh, mem, i2, n);
1202 }
1203
1204 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
1205 {
1206     data1_node *node = 0, *onode = 0, *top;
1207     data1_node *dnew;
1208     data1_maptab *map;
1209     int res, selected = 0;
1210     NMEM mem;
1211     struct grs_read_info gri;
1212     const char *tagname;
1213     struct grs_handlers *h = (struct grs_handlers *) clientData;
1214     int requested_schema = VAL_NONE;
1215     data1_marctab *marctab;
1216     int dummy;
1217
1218     mem = nmem_create();
1219     gri.readf = p->readf;
1220     gri.seekf = p->seekf;
1221     gri.tellf = p->tellf;
1222     gri.endf = NULL;
1223     gri.fh = p->fh;
1224     gri.offset = 0;
1225     gri.mem = mem;
1226     gri.dh = p->dh;
1227
1228     yaz_log(LOG_DEBUG, "grs_retrieve");
1229     if (read_grs_type (h, &gri, p->subType, &node))
1230     {
1231         p->diagnostic = 14;
1232         nmem_destroy (mem);
1233         return 0;
1234     }
1235     if (!node)
1236     {
1237         p->diagnostic = 14;
1238         nmem_destroy (mem);
1239         return 0;
1240     }
1241     data1_concat_text(p->dh, mem, node);
1242
1243     /* ensure our data1 tree is UTF-8 */
1244     data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1245
1246 #if 0
1247     data1_pr_tree (p->dh, node, stdout);
1248 #endif
1249     top = data1_get_root_tag (p->dh, node);
1250
1251     yaz_log(LOG_DEBUG, "grs_retrieve: size");
1252     tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1253     if (tagname &&
1254         (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1255     {
1256         dnew->u.data.what = DATA1I_text;
1257         dnew->u.data.data = dnew->lbuf;
1258         sprintf(dnew->u.data.data, "%d", p->recordSize);
1259         dnew->u.data.len = strlen(dnew->u.data.data);
1260     }
1261
1262     tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1263
1264     if (tagname && p->score >= 0 &&
1265         (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1266     {
1267         yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1268         dnew->u.data.what = DATA1I_num;
1269         dnew->u.data.data = dnew->lbuf;
1270         sprintf(dnew->u.data.data, "%d", p->score);
1271         dnew->u.data.len = strlen(dnew->u.data.data);
1272     }
1273
1274     tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1275                                   "localControlNumber");
1276     if (tagname && p->localno > 0 &&
1277         (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1278     {
1279         yaz_log(LOG_DEBUG, "grs_retrieve: %s", tagname);
1280         dnew->u.data.what = DATA1I_text;
1281         dnew->u.data.data = dnew->lbuf;
1282
1283         sprintf(dnew->u.data.data, "%d", p->localno);
1284         dnew->u.data.len = strlen(dnew->u.data.data);
1285     }
1286
1287     if (p->input_format == VAL_TEXT_XML)
1288         zebra_xml_metadata (p, top, mem);
1289
1290 #if 0
1291     data1_pr_tree (p->dh, node, stdout);
1292 #endif
1293 #if YAZ_VERSIONL >= 0x010903L
1294     if (p->comp && p->comp->which == Z_RecordComp_complex &&
1295         p->comp->u.complex->generic &&
1296         p->comp->u.complex->generic->which == Z_Schema_oid &&
1297         p->comp->u.complex->generic->schema.oid)
1298     {
1299         oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1300         if (oe)
1301             requested_schema = oe->value;
1302     }
1303 #else
1304     if (p->comp && p->comp->which == Z_RecordComp_complex &&
1305         p->comp->u.complex->generic && p->comp->u.complex->generic->schema)
1306     {
1307         oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
1308         if (oe)
1309             requested_schema = oe->value;
1310     }
1311 #endif
1312
1313     /* If schema has been specified, map if possible, then check that
1314      * we got the right one
1315      */
1316     if (requested_schema != VAL_NONE)
1317     {
1318         yaz_log(LOG_DEBUG, "grs_retrieve: schema mapping");
1319         for (map = node->u.root.absyn->maptabs; map; map = map->next)
1320         {
1321             if (map->target_absyn_ref == requested_schema)
1322             {
1323                 onode = node;
1324                 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1325                 {
1326                     p->diagnostic = 14;
1327                     nmem_destroy (mem);
1328                     return 0;
1329                 }
1330                 break;
1331             }
1332         }
1333         if (node->u.root.absyn &&
1334             requested_schema != node->u.root.absyn->reference)
1335         {
1336             p->diagnostic = 238;
1337             nmem_destroy (mem);
1338             return 0;
1339         }
1340     }
1341     /*
1342      * Does the requested format match a known syntax-mapping? (this reflects
1343      * the overlap of schema and formatting which is inherent in the MARC
1344      * family)
1345      */
1346     yaz_log(LOG_DEBUG, "grs_retrieve: syntax mapping");
1347     if (node->u.root.absyn)
1348         for (map = node->u.root.absyn->maptabs; map; map = map->next)
1349         {
1350             if (map->target_absyn_ref == p->input_format)
1351             {
1352                 onode = node;
1353                 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1354                 {
1355                     p->diagnostic = 14;
1356                     nmem_destroy (mem);
1357                     return 0;
1358                 }
1359                 break;
1360             }
1361         }
1362     yaz_log(LOG_DEBUG, "grs_retrieve: schemaIdentifier");
1363     if (node->u.root.absyn &&
1364         node->u.root.absyn->reference != VAL_NONE &&
1365         p->input_format == VAL_GRS1)
1366     {
1367         oident oe;
1368         Odr_oid *oid;
1369         int oidtmp[OID_SIZE];
1370
1371         oe.proto = PROTO_Z3950;
1372         oe.oclass = CLASS_SCHEMA;
1373         oe.value = node->u.root.absyn->reference;
1374
1375         if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1376         {
1377             char tmp[128];
1378             data1_handle dh = p->dh;
1379             char *p = tmp;
1380             int *ii;
1381
1382             for (ii = oid; *ii >= 0; ii++)
1383             {
1384                 if (p != tmp)
1385                         *(p++) = '.';
1386                 sprintf(p, "%d", *ii);
1387                 p += strlen(p);
1388             }
1389             if ((dnew = data1_mk_tag_data_wd(dh, top,
1390                                              "schemaIdentifier", mem)))
1391             {
1392                 dnew->u.data.what = DATA1I_oid;
1393                 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1394                 memcpy(dnew->u.data.data, tmp, p - tmp);
1395                 dnew->u.data.len = p - tmp;
1396             }
1397         }
1398     }
1399
1400     yaz_log(LOG_DEBUG, "grs_retrieve: element spec");
1401     if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
1402     {
1403         p->diagnostic = res;
1404         if (onode)
1405             data1_free_tree(p->dh, onode);
1406         data1_free_tree(p->dh, node);
1407         nmem_destroy(mem);
1408         return 0;
1409     }
1410     else if (p->comp && !res)
1411         selected = 1;
1412
1413 #if 0
1414     data1_pr_tree (p->dh, node, stdout);
1415 #endif
1416     yaz_log(LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1417     switch (p->output_format = (p->input_format != VAL_NONE ?
1418                                 p->input_format : VAL_SUTRS))
1419     {
1420     case VAL_TEXT_XML:
1421
1422 #if 0
1423         data1_pr_tree (p->dh, node, stdout);
1424 #endif
1425
1426         if (p->encoding)
1427             data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1428
1429         if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1430                                               &p->rec_len)))
1431             p->diagnostic = 238;
1432         else
1433         {
1434             char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1435             memcpy (new_buf, p->rec_buf, p->rec_len);
1436             p->rec_buf = new_buf;
1437         }
1438         break;
1439     case VAL_GRS1:
1440         dummy = 0;
1441         if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1442                                           p->odr, &dummy)))
1443             p->diagnostic = 238; /* not available in requested syntax */
1444         else
1445             p->rec_len = (size_t) (-1);
1446         break;
1447     case VAL_EXPLAIN:
1448         if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1449                                                p->odr)))
1450             p->diagnostic = 238;
1451         else
1452             p->rec_len = (size_t) (-1);
1453         break;
1454     case VAL_SUMMARY:
1455         if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1456                                                p->odr)))
1457             p->diagnostic = 238;
1458         else
1459             p->rec_len = (size_t) (-1);
1460         break;
1461     case VAL_SUTRS:
1462         if (p->encoding)
1463             data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1464         if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1465                                            &p->rec_len)))
1466             p->diagnostic = 238;
1467         else
1468         {
1469             char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1470             memcpy (new_buf, p->rec_buf, p->rec_len);
1471             p->rec_buf = new_buf;
1472         }
1473         break;
1474     case VAL_SOIF:
1475         if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1476                                             &p->rec_len)))
1477             p->diagnostic = 238;
1478         else
1479         {
1480             char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1481             memcpy (new_buf, p->rec_buf, p->rec_len);
1482             p->rec_buf = new_buf;
1483         }
1484         break;
1485     default:
1486         if (!node->u.root.absyn)
1487         {
1488             p->diagnostic = 238;
1489             break;
1490         }
1491         for (marctab = node->u.root.absyn->marc; marctab;
1492              marctab = marctab->next)
1493             if (marctab->reference == p->input_format)
1494                 break;
1495         if (!marctab)
1496         {
1497             p->diagnostic = 238;
1498             break;
1499         }
1500         if (p->encoding)
1501             data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
1502         if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1503                                         selected, &p->rec_len)))
1504             p->diagnostic = 238;
1505         else
1506         {
1507             char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1508             memcpy (new_buf, p->rec_buf, p->rec_len);
1509                 p->rec_buf = new_buf;
1510         }
1511     }
1512     if (node)
1513         data1_free_tree(p->dh, node);
1514     if (onode)
1515         data1_free_tree(p->dh, onode);
1516     nmem_destroy(mem);
1517     return 0;
1518 }
1519
1520 static struct recType grs_type =
1521 {
1522     "grs",
1523     grs_init,
1524     grs_destroy,
1525     grs_extract,
1526     grs_retrieve
1527 };
1528
1529 RecType recTypeGrs = &grs_type;