updated test since non-used attributes are not longer printed out
[idzebra-moved-to-github.git] / index / retrieve.c
1 /* $Id: retrieve.c,v 1.59 2006-11-24 12:21:31 marc Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25
26 #include <fcntl.h>
27 #ifdef WIN32
28 #include <io.h>
29 #include <process.h>
30 #endif
31 #if HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34
35 #include "index.h"
36 #include <yaz/diagbib1.h>
37 #include <direntz.h>
38
39
40 #define ZEBRA_XML_HEADER_STR "<record xmlns=\"http://www.indexdata.com/zebra/\""
41
42 static int zebra_create_record_stream(ZebraHandle zh, 
43                                Record *rec,
44                                struct ZebraRecStream *stream){
45
46     RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
47
48     if ((*rec)->size[recInfo_storeData] > 0)
49         zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50                                 (*rec)->size[recInfo_storeData]);
51     else
52     {
53         char full_rep[1024];
54         int fd;
55             
56         if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57             strcpy(full_rep, zh->path_reg);
58             strcat(full_rep, "/");
59             strcat(full_rep, (*rec)->info[recInfo_filename]);
60         }
61         else
62             strcpy(full_rep, (*rec)->info[recInfo_filename]);
63             
64         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65             yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
66                      full_rep);
67             rec_free(rec);
68             return 14;
69         }
70         zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
71     }
72     return 0;
73 }
74     
75
76
77 static int parse_zebra_elem(const char *elem,
78                              const char **index, size_t *index_len,
79                              const char **type, size_t *type_len)
80 {
81     *index = 0;
82     *index_len = 0;
83
84     *type = 0;
85     *type_len = 0;
86
87     if (elem && *elem)
88     {
89         char *cp;
90         /* verify that '::' is in the beginning of *elem 
91            and something more follows */
92         if (':' != *elem
93             || !(elem +1) || ':' != *(elem +1)
94             || !(elem +2) || '\0' == *(elem +2))
95             return 0;
96  
97         /* pick out info from string after '::' */
98         elem = elem + 2;
99         cp = strchr(elem, ':');
100
101         if (!cp) /* index, no colon, no type */
102         {
103             *index = elem;
104             *index_len = strlen(elem);
105         }
106         else if (cp[1] == '\0') /* colon, but no following type */
107         {
108             return 0;
109         }
110         else  /* index, colon and type */
111         {
112             *index = elem;
113             *index_len = cp - elem;
114             *type = cp+1;
115             *type_len = strlen(cp+1);
116         }
117     }
118     return 1;
119 }
120
121
122 int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr,
123                               Record rec,
124                               const char *elemsetname,
125                               oid_value input_format,
126                               oid_value *output_format,
127                               char **rec_bufp, int *rec_lenp)
128 {
129     const char *retrieval_index;
130     size_t retrieval_index_len; 
131     const char *retrieval_type;
132     size_t retrieval_type_len;
133     WRBUF wrbuf = 0;
134     zebra_rec_keys_t keys;
135     
136     /* set output variables before processing possible error states */
137     /* *rec_lenp = 0; */
138
139     /* only accept XML and SUTRS requests */
140     if (input_format != VAL_TEXT_XML
141         && input_format != VAL_SUTRS){
142         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
143                 elemsetname);
144         *output_format = VAL_NONE;
145         return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
146     }
147
148     if (!parse_zebra_elem(elemsetname,
149                      &retrieval_index, &retrieval_index_len,
150                      &retrieval_type,  &retrieval_type_len))
151         return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
152
153     if (retrieval_type_len != 0 && retrieval_type_len != 1)
154     {
155         return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
156     }
157
158     if (retrieval_index_len)
159     {
160         char retrieval_index_cstr[256];
161
162         if (retrieval_index_len  < sizeof(retrieval_index_cstr) -1)
163         {
164             memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
165             retrieval_index_cstr[retrieval_index_len] = '\0';
166             
167             if (zebraExplain_lookup_attr_str(zh->reg->zei,
168                                              zinfo_index_category_index,
169                                              (retrieval_type_len == 0 ? -1 : 
170                                               retrieval_type[0]),
171                                              retrieval_index_cstr) == -1)
172                 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
173         }
174     }
175
176     keys = zebra_rec_keys_open();
177     zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
178                            rec->size[recInfo_delKeys], 0);
179
180     wrbuf = wrbuf_alloc();
181     if (zebra_rec_keys_rewind(keys)){
182         size_t slen;
183         const char *str;
184         struct it_key key_in;
185
186         if (input_format == VAL_TEXT_XML)
187         {
188             *output_format = VAL_TEXT_XML;
189             wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
190                          " sysno=\"" ZINT_FORMAT "\""
191                          " set=\"zebra::index%s/\">\n",
192                          sysno, elemsetname);
193         }
194         else if (input_format == VAL_SUTRS)
195             *output_format = VAL_SUTRS;
196
197         while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){
198             int i;
199             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
200             int index_type;
201             const char *db = 0;
202             const char *string_index = 0;
203             size_t string_index_len;
204             char dst_buf[IT_MAX_WORD];
205             
206             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
207                                     &string_index);
208             string_index_len = strlen(string_index);
209
210             /* process only if index is not defined, 
211                or if defined and matching */
212             if (retrieval_index == 0 
213                 || (string_index_len == retrieval_index_len 
214                     && !memcmp(string_index, retrieval_index,
215                                string_index_len))){
216                
217                 /* process only if type is not defined, or is matching */
218                 if (retrieval_type == 0 
219                     || (retrieval_type_len == 1 
220                         && retrieval_type[0] == index_type)){
221                     
222
223                     zebra_term_untrans(zh, index_type, dst_buf, str);
224                     if (strlen(dst_buf)){
225
226                         if (input_format == VAL_TEXT_XML){
227                             wrbuf_printf(wrbuf, "  <index name=\"%s\"", 
228                                          string_index);
229                             
230                             wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
231                             
232                             wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">", 
233                                          key_in.mem[key_in.len -1]);
234                         
235                             wrbuf_xmlputs(wrbuf, dst_buf);
236                             wrbuf_printf(wrbuf, "</index>\n");
237                         }
238                         else if (input_format == VAL_SUTRS){
239                             wrbuf_printf(wrbuf, "%s ", string_index);
240                             
241                             wrbuf_printf(wrbuf, "%c", index_type);
242                             
243                             for (i = 1; i < key_in.len; i++)
244                                 wrbuf_printf(wrbuf, " " ZINT_FORMAT, 
245                                              key_in.mem[i]);
246
247                         /* zebra_term_untrans(zh, index_type, dst_buf, str); */
248                             wrbuf_printf(wrbuf, " %s", dst_buf);
249                         
250                             wrbuf_printf(wrbuf, "\n");
251                         }
252                     }
253                     
254                 }
255             }
256         }
257         if (input_format == VAL_TEXT_XML)
258             wrbuf_printf(wrbuf, "</record>\n");
259      }
260     *rec_lenp = wrbuf_len(wrbuf);
261     *rec_bufp = odr_malloc(odr, *rec_lenp);
262     memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
263     wrbuf_free(wrbuf, 1);
264     zebra_rec_keys_close(keys);
265     return 0;
266 }
267
268
269 static void retrieve_puts_attr(WRBUF wrbuf, const char *name,
270                                const char *value)
271 {
272     if (value)
273     {
274         wrbuf_printf(wrbuf, " %s=\"", name);
275         wrbuf_xmlputs(wrbuf, value);
276         wrbuf_printf(wrbuf, "\"");
277     }
278 }
279
280 static void retrieve_puts_attr_int(WRBUF wrbuf, const char *name,
281                                const int value)
282 {
283     wrbuf_printf(wrbuf, " %s=\"%i\"", name, value);
284 }
285
286 static void retrieve_puts_str(WRBUF wrbuf, const char *name,
287                                const char *value)
288 {
289     if (value)
290         wrbuf_printf(wrbuf, "%s %s\n", name, value);
291 }
292
293 static void retrieve_puts_int(WRBUF wrbuf, const char *name,
294                                const int value)
295 {
296     wrbuf_printf(wrbuf, "%s %i\n", name, value);
297 }
298
299 int zebra_special_fetch(ZebraHandle zh, zint sysno, int score, ODR odr,
300                            const char *elemsetname,
301                            oid_value input_format,
302                            oid_value *output_format,
303                            char **rec_bufp, int *rec_lenp)
304 {
305     Record rec;
306     
307     /* set output variables before processing possible error states */
308     /* *rec_lenp = 0; */
309
310
311
312     /* processing zebra::meta::sysno elemset without fetching binary data */
313     if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno"))
314     {
315         int ret = 0;
316         WRBUF wrbuf = wrbuf_alloc();
317         if (input_format == VAL_SUTRS)
318         {
319             wrbuf_printf(wrbuf, ZINT_FORMAT, sysno);
320             *output_format = VAL_SUTRS;
321         } 
322         else if (input_format == VAL_TEXT_XML)
323         {
324             wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
325                          " sysno=\"" ZINT_FORMAT "\""
326                          " set=\"zebra::%s\"/>\n",
327                          sysno, elemsetname);
328             *output_format = VAL_TEXT_XML;
329         }
330         *rec_lenp = wrbuf_len(wrbuf);
331         if (*rec_lenp)
332             *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
333         else
334             ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
335         wrbuf_free(wrbuf, 1);
336         return ret;
337     }
338
339     /* fetching binary record up for all other display elementsets */
340     rec = rec_get(zh->reg->records, sysno);
341     if (!rec)
342     {
343         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
344         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
345     }
346
347     /* processing special elementsetnames zebra::data */    
348     if (elemsetname && 0 == strcmp(elemsetname, "data"))
349     {
350         struct ZebraRecStream stream;
351         RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
352         zebra_create_record_stream(zh, &rec, &stream);
353         *output_format = input_format;
354         *rec_lenp = recordAttr->recordSize;
355         *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
356         stream.readf(&stream, *rec_bufp, *rec_lenp);
357         stream.destroy(&stream);
358         rec_free(&rec);
359         return 0;
360     }
361
362     /* only accept XML and SUTRS requests from now */
363     if (input_format != VAL_TEXT_XML && input_format != VAL_SUTRS)
364     {
365         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
366                 elemsetname);
367         return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
368     }
369     
370
371     /* processing special elementsetnames zebra::meta:: */
372     if (elemsetname && 0 == strcmp(elemsetname, "meta"))
373     {
374         int ret = 0;
375         WRBUF wrbuf = wrbuf_alloc();
376         RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
377
378         if (input_format == VAL_TEXT_XML)
379         {
380             *output_format = VAL_TEXT_XML;
381             
382             wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR
383                          " sysno=\"" ZINT_FORMAT "\"", sysno);
384             retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]);
385             retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]);
386             retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]);
387             if (score >= 0)
388                 retrieve_puts_attr_int(wrbuf, "score", score);
389            
390             wrbuf_printf(wrbuf,
391                          " rank=\"" ZINT_FORMAT "\""
392                          " size=\"%i\""
393                          " set=\"zebra::%s\"/>\n",
394                          recordAttr->staticrank,
395                          recordAttr->recordSize,
396                          elemsetname);
397         }
398         else if (input_format == VAL_SUTRS)
399         {
400             *output_format = VAL_SUTRS;
401             wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno);
402             retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]);
403             retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]);
404             retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]);
405             if (score >= 0)
406                 retrieve_puts_int(wrbuf, "score", score);
407
408             wrbuf_printf(wrbuf,
409                          "rank " ZINT_FORMAT "\n"
410                          "size %i\n"
411                          "set zebra::%s\n",
412                          recordAttr->staticrank,
413                          recordAttr->recordSize,
414                          elemsetname);
415         }
416         *rec_lenp = wrbuf_len(wrbuf);
417         if (*rec_lenp)
418             *rec_bufp = odr_strdup(odr, wrbuf_buf(wrbuf));
419         else
420             ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
421
422         wrbuf_free(wrbuf, 1);
423         rec_free(&rec);
424         return ret;
425     }
426
427     /* processing special elementsetnames zebra::index:: */
428     if (elemsetname && 0 == strncmp(elemsetname, "index", 5)){
429         
430         int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
431                                             elemsetname + 5,
432                                             input_format, output_format,
433                                             rec_bufp, rec_lenp);
434         
435         rec_free(&rec);
436         return ret;
437     }
438
439     if (rec)
440         rec_free(&rec);
441     return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
442 }
443
444                           
445 int zebra_record_fetch(ZebraHandle zh, zint sysno, int score,
446                        zebra_snippets *hit_snippet, ODR odr,
447                        oid_value input_format, Z_RecordComposition *comp,
448                        oid_value *output_format,
449                        char **rec_bufp, int *rec_lenp, char **basenamep,
450                        char **addinfo)
451 {
452     Record rec;
453     char *fname, *file_type, *basename;
454     const char *elemsetname;
455     struct ZebraRecStream stream;
456     RecordAttr *recordAttr;
457     void *clientData;
458     int return_code = 0;
459
460     *basenamep = 0;
461     *addinfo = 0;
462     elemsetname = yaz_get_esn(comp);
463
464     /* processing zebra special elementset names of form 'zebra:: */
465     if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
466         return  zebra_special_fetch(zh, sysno, score, odr,
467                                     elemsetname + 7,
468                                     input_format, output_format,
469                                     rec_bufp, rec_lenp);
470
471
472     /* processing all other element set names */
473     rec = rec_get(zh->reg->records, sysno);
474     if (!rec)
475     {
476         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
477         *basenamep = 0;
478         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
479     }
480
481
482     recordAttr = rec_init_attr(zh->reg->zei, rec);
483
484     file_type = rec->info[recInfo_fileType];
485     fname = rec->info[recInfo_filename];
486     basename = rec->info[recInfo_databaseName];
487     *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
488     strcpy (*basenamep, basename);
489
490     yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
491             sysno, score);
492
493     zebra_create_record_stream(zh, &rec, &stream);
494     
495     {
496         /* snippets code */
497         zebra_snippets *snippet;
498         zebra_rec_keys_t reckeys = zebra_rec_keys_open();
499         RecType rt;
500         struct recRetrieveCtrl retrieveCtrl;
501
502         retrieveCtrl.stream = &stream;
503         retrieveCtrl.fname = fname;
504         retrieveCtrl.localno = sysno;
505         retrieveCtrl.staticrank = recordAttr->staticrank;
506         retrieveCtrl.score = score;
507         retrieveCtrl.recordSize = recordAttr->recordSize;
508         retrieveCtrl.odr = odr;
509         retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
510         retrieveCtrl.comp = comp;
511         retrieveCtrl.encoding = zh->record_encoding;
512         retrieveCtrl.diagnostic = 0;
513         retrieveCtrl.addinfo = 0;
514         retrieveCtrl.dh = zh->reg->dh;
515         retrieveCtrl.res = zh->res;
516         retrieveCtrl.rec_buf = 0;
517         retrieveCtrl.rec_len = -1;
518         retrieveCtrl.hit_snippet = hit_snippet;
519         retrieveCtrl.doc_snippet = zebra_snippets_create();
520
521         zebra_rec_keys_set_buf(reckeys,
522                                rec->info[recInfo_delKeys],
523                                rec->size[recInfo_delKeys], 
524                                0);
525         zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
526         zebra_rec_keys_close(reckeys);
527
528 #if 0
529         /* for debugging purposes */
530         yaz_log(YLOG_LOG, "DOC SNIPPET:");
531         zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
532         yaz_log(YLOG_LOG, "HIT SNIPPET:");
533         zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
534 #endif
535         snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
536                                         retrieveCtrl.hit_snippet,
537                                         10);
538 #if 0
539         /* for debugging purposes */
540         yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
541         zebra_snippets_log(snippet, YLOG_LOG);
542 #endif
543
544         if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
545                                   file_type, &clientData)))
546         {
547             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
548         }
549         else
550         {
551             (*rt->retrieve)(clientData, &retrieveCtrl);
552             return_code = retrieveCtrl.diagnostic;
553
554             *output_format = retrieveCtrl.output_format;
555             *rec_bufp = (char *) retrieveCtrl.rec_buf;
556             *rec_lenp = retrieveCtrl.rec_len;
557             *addinfo = retrieveCtrl.addinfo;
558         }
559
560         zebra_snippets_destroy(snippet);
561         zebra_snippets_destroy(retrieveCtrl.doc_snippet);
562      }
563
564     stream.destroy(&stream);
565     rec_free(&rec);
566
567     return return_code;
568 }
569
570 /*
571  * Local variables:
572  * c-basic-offset: 4
573  * indent-tabs-mode: nil
574  * End:
575  * vim: shiftwidth=4 tabstop=8 expandtab
576  */
577