protected wrong buffer-length write by initializing to 0, but there is more debugging...
[idzebra-moved-to-github.git] / index / retrieve.c
1 /* $Id: retrieve.c,v 1.53 2006-11-16 21:58:54 marc Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25
26 #include <fcntl.h>
27 #ifdef WIN32
28 #include <io.h>
29 #include <process.h>
30 #endif
31 #if HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34
35 #include "index.h"
36 #include <yaz/diagbib1.h>
37 #include <direntz.h>
38
39
40
41
42 static int zebra_create_record_stream(ZebraHandle zh, 
43                                Record *rec,
44                                struct ZebraRecStream *stream){
45
46     RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec);
47
48     if ((*rec)->size[recInfo_storeData] > 0){ 
49         zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData],
50                                 (*rec)->size[recInfo_storeData]);
51     }
52     else {
53         char full_rep[1024];
54         int fd;
55             
56         if (zh->path_reg && !yaz_is_abspath((*rec)->info[recInfo_filename])){
57             strcpy(full_rep, zh->path_reg);
58             strcat(full_rep, "/");
59             strcat(full_rep, (*rec)->info[recInfo_filename]);
60         }
61         else
62             strcpy(full_rep, (*rec)->info[recInfo_filename]);
63             
64         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1){
65             yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
66                      full_rep);
67             rec_free(rec);
68             return 14;
69         }
70         zebra_create_stream_fd(stream, fd, recordAttr->recordOffset);
71     }
72     return 0;
73 }
74     
75
76
77 static void parse_zebra_elem(const char *elem,
78                              const char **index, size_t *index_len,
79                              const char **type, size_t *type_len)
80 {
81     *type = 0;
82     *type_len = 0;
83
84     *index = 0;
85     *index_len = 0;
86
87     if (elem && *elem)
88     {
89         const char *cp = strchr(elem, ':');
90
91         if (!cp) /* no colon */
92         {
93             *index = elem;
94             *index_len = strlen(elem);
95         }
96         else if (cp[1] == '\0') /* 'index:' */
97         {
98             *index = elem;
99             *index_len = cp - elem;
100         }
101         else
102         {
103             *index = elem;
104             *index_len = cp - elem;
105             *type = cp+1;
106             *type_len = strlen(cp+1);
107         }
108     }
109 }
110
111
112 int zebra_special_index_fetch(ZebraHandle zh, SYSNO sysno, ODR odr,
113                           Record rec,
114                           const char *elemsetname,
115                           oid_value input_format,
116                           oid_value *output_format,
117                           char **rec_bufp, int *rec_lenp)
118 {
119     const char *retrieval_index;
120     size_t retrieval_index_len; 
121     const char *retrieval_type;
122     size_t retrieval_type_len;
123     WRBUF wrbuf = 0;
124     zebra_rec_keys_t keys;
125     
126     /* set output variables before processing possible error states */
127     *rec_lenp = 0;
128
129     /* only accept XML and SUTRS requests */
130     if (input_format != VAL_TEXT_XML
131         && input_format != VAL_SUTRS){
132         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
133                 elemsetname);
134         *output_format = VAL_NONE;
135         return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
136     }
137
138     parse_zebra_elem(elemsetname,
139                      &retrieval_index, &retrieval_index_len,
140                      &retrieval_type,  &retrieval_type_len);
141
142     if (retrieval_type_len != 0 && retrieval_type_len != 1)
143     {
144         return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
145     }
146
147     if (retrieval_index_len)
148     {
149         char retrieval_index_cstr[256];
150
151         if (retrieval_index_len  < sizeof(retrieval_index_cstr) -1)
152         {
153             memcpy(retrieval_index_cstr, retrieval_index, retrieval_index_len);
154             retrieval_index_cstr[retrieval_index_len] = '\0';
155             
156             if (zebraExplain_lookup_attr_str(zh->reg->zei,
157                                              zinfo_index_category_index,
158                                              (retrieval_type_len == 0 ? -1 : 
159                                               retrieval_type[0]),
160                                              retrieval_index_cstr) == -1)
161             {
162                 return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
163             }
164         }
165     }
166
167     keys = zebra_rec_keys_open();
168     zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
169                            rec->size[recInfo_delKeys], 0);
170
171     wrbuf = wrbuf_alloc();
172     if (zebra_rec_keys_rewind(keys)){
173         size_t slen;
174         const char *str;
175         struct it_key key_in;
176
177         if (input_format == VAL_TEXT_XML){
178             *output_format = VAL_TEXT_XML;
179             /*wrbuf_printf(wrbuf, 
180               "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");*/
181             
182             wrbuf_printf(wrbuf, 
183                          "<record xmlns="
184                          "\"http://www.indexdata.com/zebra/\""
185                          " sysno=\"" ZINT_FORMAT "\""
186                          " set=\"zebra::index::%s/\">\n",
187                          sysno, elemsetname);
188         }
189         else if (input_format == VAL_SUTRS)
190             *output_format = VAL_SUTRS;
191
192
193         while(zebra_rec_keys_read(keys, &str, &slen, &key_in)){
194             int i;
195             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
196             int index_type;
197             const char *db = 0;
198             const char *string_index = 0;
199             size_t string_index_len;
200             char dst_buf[IT_MAX_WORD];
201             
202             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
203                                     &string_index);
204             string_index_len = strlen(string_index);
205             if (retrieval_index == 0 
206                 || (string_index_len == retrieval_index_len 
207                     && !memcmp(string_index, retrieval_index,
208                                string_index_len))){
209                 
210                 if (retrieval_type == 0 
211                     || (retrieval_type_len == 1 
212                         && retrieval_type[0] == index_type)){
213                     
214                     if (input_format == VAL_TEXT_XML){
215                         wrbuf_printf(wrbuf, "  <index name=\"%s\"", 
216                                      string_index);
217                         
218                         wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
219                         
220                         wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">", 
221                                      key_in.mem[key_in.len -1]);
222                         
223                         zebra_term_untrans(zh, index_type, dst_buf, str);
224                         wrbuf_xmlputs(wrbuf, dst_buf);
225                         wrbuf_printf(wrbuf, "</index>\n");
226                     }
227                     else if (input_format == VAL_SUTRS){
228                         wrbuf_printf(wrbuf, "%s ", string_index);
229                     
230                         wrbuf_printf(wrbuf, "%c", index_type);
231                     
232                         for (i = 1; i < key_in.len; i++)
233                             wrbuf_printf(wrbuf, " " ZINT_FORMAT, 
234                                          key_in.mem[i]);
235
236                         zebra_term_untrans(zh, index_type, dst_buf, str);
237                         wrbuf_printf(wrbuf, " %s", dst_buf);
238                         
239                         wrbuf_printf(wrbuf, "\n");
240                     }
241                 }
242             }
243         }
244         if (input_format == VAL_TEXT_XML)
245             wrbuf_printf(wrbuf, "</record>\n");
246      }
247     *rec_lenp = wrbuf_len(wrbuf);
248     *rec_bufp = odr_malloc(odr, *rec_lenp);
249     memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
250     wrbuf_free(wrbuf, 1);
251     zebra_rec_keys_close(keys);
252     return 0;
253 }
254
255
256 int zebra_special_fetch(ZebraHandle zh, SYSNO sysno, ODR odr,
257                            const char *elemsetname,
258                            oid_value input_format,
259                            oid_value *output_format,
260                            char **rec_bufp, int *rec_lenp)
261 {
262     Record rec;
263     
264     /* set output variables before processing possible error states */
265     *rec_lenp = 0;
266
267     /* only accept XML and SUTRS requests */
268     if (input_format != VAL_TEXT_XML
269         && input_format != VAL_SUTRS){
270         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
271                 elemsetname);
272         return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST;
273     }
274     
275     /* processing zebra::meta::sysno elemset without fetching binary data */
276     if (elemsetname  && 0 == strcmp(elemsetname, "meta::sysno"))
277     {
278         char rec_str[128];
279         if (input_format == VAL_SUTRS){
280             sprintf(rec_str, ZINT_FORMAT, sysno);
281             *output_format = VAL_SUTRS;
282         } 
283         else if (input_format == VAL_TEXT_XML){
284             sprintf(rec_str, "<record xmlns="
285                     "\"http://www.indexdata.com/zebra/\""
286                              " sysno=\"" ZINT_FORMAT "\""
287                              " set=\"zebra::%s\"/>\n",
288                     sysno, elemsetname);
289             *output_format = VAL_TEXT_XML;
290         }
291         *rec_lenp = strlen(rec_str);
292         if (*rec_lenp){
293             *rec_bufp = odr_strdup(odr, rec_str);
294             return 0;
295         } else {
296             return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
297         }
298     }
299
300     /* fetching binary record up for all other display elementsets */
301     rec = rec_get(zh->reg->records, sysno);
302     if (!rec){
303         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
304         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
305     }
306
307     /* processing special elementsetnames zebra::index:: */
308     if (elemsetname && 0 == strncmp(elemsetname, "index::", 7)){
309         
310         int ret = zebra_special_index_fetch(zh, sysno, odr, rec,
311                                             elemsetname + 7,
312                                             input_format, output_format,
313                                             rec_bufp, rec_lenp);
314         
315         rec_free(&rec);
316         return ret;
317     }
318
319     /* processing special elementsetnames zebra::data */    
320     if (elemsetname && 0 == strcmp(elemsetname, "data")){
321         struct ZebraRecStream stream;
322         RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); 
323         zebra_create_record_stream(zh, &rec, &stream);
324         *output_format = input_format;
325         *rec_lenp = recordAttr->recordSize;
326         *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
327         stream.readf(&stream, *rec_bufp, *rec_lenp);
328         stream.destroy(&stream);
329         rec_free(&rec);
330         return 0;
331     }
332
333     if (rec)
334         rec_free(&rec);
335     return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
336 }
337
338                           
339 int zebra_record_fetch(ZebraHandle zh, SYSNO sysno, int score,
340                        zebra_snippets *hit_snippet, ODR odr,
341                        oid_value input_format, Z_RecordComposition *comp,
342                        oid_value *output_format,
343                        char **rec_bufp, int *rec_lenp, char **basenamep,
344                        char **addinfo)
345 {
346     Record rec;
347     char *fname, *file_type, *basename;
348     const char *elemsetname;
349     struct ZebraRecStream stream;
350     RecordAttr *recordAttr;
351     void *clientData;
352     int return_code = 0;
353
354     *basenamep = 0;
355     *addinfo = 0;
356     elemsetname = yaz_get_esn(comp);
357
358     /* processing zebra special elementset names of form 'zebra:: */
359     /* SUGGESTION: do not check elemset nema here, buuuut ... */  
360     if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7))
361         return  zebra_special_fetch(zh, sysno, odr,
362                                     elemsetname + 7,
363                                     input_format, output_format,
364                                     rec_bufp, rec_lenp);
365
366
367     /* processing all other element set names */
368     rec = rec_get(zh->reg->records, sysno);
369     if (!rec)
370     {
371         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
372         *basenamep = 0;
373         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
374     }
375
376
377     recordAttr = rec_init_attr(zh->reg->zei, rec);
378
379     file_type = rec->info[recInfo_fileType];
380     fname = rec->info[recInfo_filename];
381     basename = rec->info[recInfo_databaseName];
382     *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
383     strcpy (*basenamep, basename);
384
385     yaz_log(YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
386             sysno, score);
387
388     zebra_create_record_stream(zh, &rec, &stream);
389     
390     {
391         /* snippets code */
392         zebra_snippets *snippet;
393         zebra_rec_keys_t reckeys = zebra_rec_keys_open();
394         RecType rt;
395         struct recRetrieveCtrl retrieveCtrl;
396
397         retrieveCtrl.stream = &stream;
398         retrieveCtrl.fname = fname;
399         retrieveCtrl.localno = sysno;
400         retrieveCtrl.staticrank = recordAttr->staticrank;
401         retrieveCtrl.score = score;
402         retrieveCtrl.recordSize = recordAttr->recordSize;
403         retrieveCtrl.odr = odr;
404         retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
405         retrieveCtrl.comp = comp;
406         retrieveCtrl.encoding = zh->record_encoding;
407         retrieveCtrl.diagnostic = 0;
408         retrieveCtrl.addinfo = 0;
409         retrieveCtrl.dh = zh->reg->dh;
410         retrieveCtrl.res = zh->res;
411         retrieveCtrl.rec_buf = 0;
412         retrieveCtrl.rec_len = -1;
413         retrieveCtrl.hit_snippet = hit_snippet;
414         retrieveCtrl.doc_snippet = zebra_snippets_create();
415
416         zebra_rec_keys_set_buf(reckeys,
417                                rec->info[recInfo_delKeys],
418                                rec->size[recInfo_delKeys], 
419                                0);
420         zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
421         zebra_rec_keys_close(reckeys);
422
423 #if 0
424         /* for debugging purposes */
425         yaz_log(YLOG_LOG, "DOC SNIPPET:");
426         zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
427         yaz_log(YLOG_LOG, "HIT SNIPPET:");
428         zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
429 #endif
430         snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
431                                         retrieveCtrl.hit_snippet,
432                                         10);
433 #if 0
434         /* for debugging purposes */
435         yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
436         zebra_snippets_log(snippet, YLOG_LOG);
437 #endif
438
439         /* SUGGESTION: do not check elemset name here, buuuut ... 
440            add another recType Struct with zebra internal stuff here,
441            which overrides the Alvis/GRS-1/Safari filters .... 
442         */
443         if (!(rt = recType_byName(zh->reg->recTypes, zh->res,
444                                   file_type, &clientData)))
445         {
446             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
447         }
448         else
449         {
450         /* SUGGESTION: do not check elemset name here, buuuut ... 
451            add another recType Struct with zebra internal stuff here,
452            which overrides the Alvis/GRS-1/Safari filters .... 
453            (*rt->retrieve) method to make the correct encoded, etc, retrieval,
454            where all needed info already is found in the   &retrieveCtr
455            parameter. This way, we do not need to re-code/dublicate a lot of 
456            logic. 
457         */
458             (*rt->retrieve)(clientData, &retrieveCtrl);
459             return_code = retrieveCtrl.diagnostic;
460
461             *output_format = retrieveCtrl.output_format;
462             *rec_bufp = (char *) retrieveCtrl.rec_buf;
463             *rec_lenp = retrieveCtrl.rec_len;
464             *addinfo = retrieveCtrl.addinfo;
465         }
466         /* another SUGGESTION: throw out all this snippet stuff in this 
467            file, and do it correctly - either inside filters, or in another 
468            new   (*rt->retrieve)(clientData, &retrieveCtrl); type 'snippet'
469         */
470
471         zebra_snippets_destroy(snippet);
472         zebra_snippets_destroy(retrieveCtrl.doc_snippet);
473      }
474
475     stream.destroy(&stream);
476     rec_free(&rec);
477
478     return return_code;
479 }
480
481 /*
482  * Local variables:
483  * c-basic-offset: 4
484  * indent-tabs-mode: nil
485  * End:
486  * vim: shiftwidth=4 tabstop=8 expandtab
487  */
488