written XML formatting for new zebra-specific elem set 'zebra::' which lets you dump...
[idzebra-moved-to-github.git] / index / retrieve.c
1 /* $Id: retrieve.c,v 1.48 2006-11-13 13:53:49 marc Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25
26 #include <fcntl.h>
27 #ifdef WIN32
28 #include <io.h>
29 #include <process.h>
30 #endif
31 #if HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34
35 #include "index.h"
36 #include <yaz/diagbib1.h>
37 #include <direntz.h>
38
39 static void parse_zebra_elem(const char *elem,
40                              const char **index, size_t *index_len,
41                              const char **type, size_t *type_len)
42 {
43     *type = 0;
44     *type_len = 0;
45
46     *index = 0;
47     *index_len = 0;
48
49     if (elem && *elem)
50     {
51         const char *cp = strchr(elem, ':');
52
53         if (!cp) /* no colon */
54         {
55             *index = elem;
56             *index_len = strlen(elem);
57         }
58         else if (cp[1] == '\0') /* 'index:' */
59         {
60             *index = elem;
61             *index_len = cp - elem;
62         }
63         else
64         {
65             *index = elem;
66             *index_len = cp - elem;
67             *type = cp+1;
68             *type_len = strlen(cp+1);
69         }
70     }
71 }
72
73 int zebra_storekeys_fetch(ZebraHandle zh, SYSNO sysno, ODR odr,
74                           Record rec,
75                           const char *element_set,
76                           oid_value input_format,
77                           oid_value *output_format,
78                           char **rec_bufp, int *rec_lenp)
79 {
80     const char *retrieval_index;
81     size_t retrieval_index_len; 
82     const char *retrieval_type;
83     size_t retrieval_type_len;
84    
85     int return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
86
87     WRBUF wrbuf = wrbuf_alloc();
88     zebra_rec_keys_t keys;
89     
90
91     /* only accept XML and SUTRS requests */
92     if (input_format != VAL_TEXT_XML
93         && input_format != VAL_SUTRS)
94     {
95         yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", 
96                 element_set);
97         *output_format = VAL_NONE;
98         return return_code;
99     }
100
101
102     keys = zebra_rec_keys_open();
103     zebra_rec_keys_set_buf(keys,
104                            rec->info[recInfo_delKeys],
105                            rec->size[recInfo_delKeys],
106                            0);
107
108     parse_zebra_elem(element_set,
109                      &retrieval_index, &retrieval_index_len,
110                      &retrieval_type,  &retrieval_type_len);
111
112
113
114
115
116     if (zebra_rec_keys_rewind(keys))
117     {
118         size_t slen;
119         const char *str;
120         struct it_key key_in;
121
122
123         if (input_format == VAL_TEXT_XML)
124             {
125                 *output_format = VAL_TEXT_XML;
126                 /*wrbuf_printf(wrbuf, 
127                   "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");*/
128
129                 wrbuf_printf(wrbuf, 
130                              "<record xmlns="
131                              "\"http://www.indexdata.com/zebra/\""
132                              " sysno=\"" ZINT_FORMAT "\""
133                              " set=\"zebra::%s\">\n",
134                              sysno, element_set);
135             }
136         else if (input_format == VAL_SUTRS)
137                 *output_format = VAL_SUTRS;
138
139
140         while(zebra_rec_keys_read(keys, &str, &slen, &key_in))
141         {
142             int i;
143             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
144             int index_type;
145             const char *db = 0;
146             const char *string_index = 0;
147             size_t string_index_len;
148             char dst_buf[IT_MAX_WORD];
149             
150             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
151                                     &string_index);
152             string_index_len = strlen(string_index);
153             if (retrieval_index == 0 
154                 || (string_index_len == retrieval_index_len 
155                     && !memcmp(string_index, retrieval_index,
156                                string_index_len)))
157             {
158                 
159                 if (retrieval_type == 0 
160                     || (retrieval_type_len == 1 
161                         && retrieval_type[0] == index_type))
162                 {
163                     
164                     if (input_format == VAL_TEXT_XML)
165                         {
166                             wrbuf_printf(wrbuf, "  <index name=\"%s\"", 
167                                          string_index);
168                     
169                             wrbuf_printf(wrbuf, " type=\"%c\"", index_type);
170
171                             wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">", 
172                                              key_in.mem[key_in.len -1]);
173
174                             zebra_term_untrans(zh, index_type, dst_buf, str);
175                             wrbuf_xmlputs(wrbuf, dst_buf);
176                             wrbuf_printf(wrbuf, "</index>\n");
177                         }
178                     else if (input_format == VAL_SUTRS)
179                         {
180                             wrbuf_printf(wrbuf, "%s ", string_index);
181                     
182                             wrbuf_printf(wrbuf, "%c", index_type);
183                     
184                             for (i = 1; i < key_in.len; i++)
185                                 wrbuf_printf(wrbuf, " " ZINT_FORMAT, 
186                                              key_in.mem[i]);
187
188                             zebra_term_untrans(zh, index_type, dst_buf, str);
189                             wrbuf_printf(wrbuf, " %s", dst_buf);
190
191                             wrbuf_printf(wrbuf, "\n");
192                         }
193                 }
194             }
195         }
196         if (input_format == VAL_TEXT_XML)
197             {
198                 wrbuf_printf(wrbuf, "</record>\n");
199             }
200     }
201
202     *rec_lenp = wrbuf_len(wrbuf);
203     *rec_bufp = odr_malloc(odr, *rec_lenp);
204     memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
205     wrbuf_free(wrbuf, 1);
206     zebra_rec_keys_close(keys);
207     return 0;
208 }
209                           
210 int zebra_record_fetch(ZebraHandle zh, SYSNO sysno, int score,
211                        zebra_snippets *hit_snippet, ODR odr,
212                        oid_value input_format, Z_RecordComposition *comp,
213                        oid_value *output_format,
214                        char **rec_bufp, int *rec_lenp, char **basenamep,
215                        char **addinfo)
216 {
217     Record rec;
218     char *fname, *file_type, *basename;
219     struct ZebraRecStream stream;
220     RecordAttr *recordAttr;
221     void *clientData;
222     int raw_mode = 0;
223     int return_code = 0;
224
225     *basenamep = 0;
226     *addinfo = 0;
227     if (comp && comp->which == Z_RecordComp_simple &&
228         comp->u.simple->which == Z_ElementSetNames_generic && 
229         !strcmp (comp->u.simple->u.generic, "_sysno_"))
230     {
231         char rec_str[60];
232         sprintf(rec_str, ZINT_FORMAT, sysno);
233         *output_format = VAL_SUTRS;
234         *rec_lenp = strlen(rec_str);
235         *rec_bufp = odr_strdup(odr, rec_str);
236         return 0;
237     }
238     rec = rec_get (zh->reg->records, sysno);
239     if (!rec)
240     {
241         yaz_log (YLOG_DEBUG, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
242         *basenamep = 0;
243         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
244     }
245     recordAttr = rec_init_attr (zh->reg->zei, rec);
246
247     file_type = rec->info[recInfo_fileType];
248     fname = rec->info[recInfo_filename];
249     basename = rec->info[recInfo_databaseName];
250     *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
251     strcpy (*basenamep, basename);
252
253     if (comp && comp->which == Z_RecordComp_simple 
254         && comp->u.simple->which == Z_ElementSetNames_generic 
255         && strncmp(comp->u.simple->u.generic, "zebra::", 7) == 0)
256     {
257         int r = zebra_storekeys_fetch(zh, sysno, odr, rec,
258                                       comp->u.simple->u.generic + 7,
259                                       input_format, output_format,
260                                       rec_bufp, rec_lenp);
261
262         rec_free(&rec);
263         return r;
264     }
265
266     if (comp && comp->which == Z_RecordComp_simple &&
267         comp->u.simple->which == Z_ElementSetNames_generic && 
268         !strcmp (comp->u.simple->u.generic, "R"))
269     {
270         raw_mode = 1;
271     }
272     yaz_log (YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
273              sysno, score);
274     if (rec->size[recInfo_storeData] > 0)
275     {
276         zebra_create_stream_mem(&stream, rec->info[recInfo_storeData],
277                                 rec->size[recInfo_storeData]);
278     }
279     else
280     {
281         char full_rep[1024];
282         int fd;
283
284         if (zh->path_reg && !yaz_is_abspath (fname))
285         {
286             strcpy (full_rep, zh->path_reg);
287             strcat (full_rep, "/");
288             strcat (full_rep, fname);
289         }
290         else
291             strcpy (full_rep, fname);
292
293         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
294         {
295             yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
296                   full_rep);
297             rec_free(&rec);
298             return 14;
299         }
300         zebra_create_stream_fd(&stream, fd, recordAttr->recordOffset);
301     }
302
303     if (raw_mode)
304     {
305         *output_format = VAL_SUTRS;
306         *rec_lenp = recordAttr->recordSize;
307         *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
308         stream.readf(&stream, *rec_bufp, *rec_lenp);
309     }
310     else
311     {
312         /* snippets code */
313         zebra_snippets *snippet;
314         zebra_rec_keys_t reckeys = zebra_rec_keys_open();
315         RecType rt;
316         struct recRetrieveCtrl retrieveCtrl;
317
318         retrieveCtrl.stream = &stream;
319         retrieveCtrl.fname = fname;
320         retrieveCtrl.localno = sysno;
321         retrieveCtrl.staticrank = recordAttr->staticrank;
322         retrieveCtrl.score = score;
323         retrieveCtrl.recordSize = recordAttr->recordSize;
324         retrieveCtrl.odr = odr;
325         retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
326         retrieveCtrl.comp = comp;
327         retrieveCtrl.encoding = zh->record_encoding;
328         retrieveCtrl.diagnostic = 0;
329         retrieveCtrl.addinfo = 0;
330         retrieveCtrl.dh = zh->reg->dh;
331         retrieveCtrl.res = zh->res;
332         retrieveCtrl.rec_buf = 0;
333         retrieveCtrl.rec_len = -1;
334         retrieveCtrl.hit_snippet = hit_snippet;
335         retrieveCtrl.doc_snippet = zebra_snippets_create();
336
337         zebra_rec_keys_set_buf(reckeys,
338                                rec->info[recInfo_delKeys],
339                                rec->size[recInfo_delKeys], 
340                                0);
341         zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
342         zebra_rec_keys_close(reckeys);
343
344 #if 0
345         /* for debugging purposes */
346         yaz_log(YLOG_LOG, "DOC SNIPPET:");
347         zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
348         yaz_log(YLOG_LOG, "HIT SNIPPET:");
349         zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
350 #endif
351         snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
352                                         retrieveCtrl.hit_snippet,
353                                         10);
354 #if 0
355         /* for debugging purposes */
356         yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
357         zebra_snippets_log(snippet, YLOG_LOG);
358 #endif
359
360         if (!(rt = recType_byName (zh->reg->recTypes, zh->res,
361                                    file_type, &clientData)))
362         {
363             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
364         }
365         else
366         {
367             (*rt->retrieve)(clientData, &retrieveCtrl);
368             return_code = retrieveCtrl.diagnostic;
369
370             *output_format = retrieveCtrl.output_format;
371             *rec_bufp = (char *) retrieveCtrl.rec_buf;
372             *rec_lenp = retrieveCtrl.rec_len;
373             *addinfo = retrieveCtrl.addinfo;
374         }
375         zebra_snippets_destroy(snippet);
376         zebra_snippets_destroy(retrieveCtrl.doc_snippet);
377     }
378     stream.destroy(&stream);
379     rec_free(&rec);
380
381     return return_code;
382 }
383
384 /*
385  * Local variables:
386  * c-basic-offset: 4
387  * indent-tabs-mode: nil
388  * End:
389  * vim: shiftwidth=4 tabstop=8 expandtab
390  */
391