Parse element spec zebra::field,type to make Zebra return storekeys for
[idzebra-moved-to-github.git] / index / retrieve.c
1 /* $Id: retrieve.c,v 1.47 2006-11-13 09:07:05 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25
26 #include <fcntl.h>
27 #ifdef WIN32
28 #include <io.h>
29 #include <process.h>
30 #endif
31 #if HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34
35 #include "index.h"
36 #include <yaz/diagbib1.h>
37 #include <direntz.h>
38
39 static void parse_zebra_elem(const char *elem,
40                              const char **index, size_t *index_len,
41                              const char **type, size_t *type_len)
42 {
43     *type = 0;
44     *type_len = 0;
45
46     *index = 0;
47     *index_len = 0;
48
49     if (elem && *elem)
50     {
51         const char *cp = strchr(elem, ':');
52
53         if (!cp) /* no colon */
54         {
55             *index = elem;
56             *index_len = strlen(elem);
57         }
58         else if (cp[1] == '\0') /* 'index:' */
59         {
60             *index = elem;
61             *index_len = cp - elem;
62         }
63         else
64         {
65             *index = elem;
66             *index_len = cp - elem;
67             *type = cp+1;
68             *type_len = strlen(cp+1);
69         }
70     }
71 }
72
73 int zebra_storekeys_fetch(ZebraHandle zh, SYSNO sysno, ODR odr,
74                           Record rec,
75                           const char *element_set,
76                           oid_value input_format,
77                           oid_value *output_format,
78                           char **rec_bufp, int *rec_lenp)
79 {
80     const char *retrieval_index;
81     size_t retrieval_index_len; 
82     const char *retrieval_type;
83     size_t retrieval_type_len;
84    
85     WRBUF wrbuf = wrbuf_alloc();
86     zebra_rec_keys_t keys = zebra_rec_keys_open();
87     zebra_rec_keys_set_buf(keys,
88                            rec->info[recInfo_delKeys],
89                            rec->size[recInfo_delKeys],
90                            0);
91
92     yaz_log(YLOG_LOG, "element_set=%s", element_set);
93     
94     parse_zebra_elem(element_set,
95                      &retrieval_index, &retrieval_index_len,
96                      &retrieval_type,  &retrieval_type_len);
97
98
99     if (input_format == VAL_TEXT_XML)
100     {
101         yaz_log(YLOG_LOG, "want XML output");
102     }
103     else if (input_format == VAL_SUTRS)
104     {
105         yaz_log(YLOG_LOG, "want SUTRS output");
106     }
107     else
108     {
109         yaz_log(YLOG_LOG, "unsupported.. We must produce an error");
110     }
111     if (zebra_rec_keys_rewind(keys))
112     {
113         size_t slen;
114         const char *str;
115         struct it_key key_in;
116         while(zebra_rec_keys_read(keys, &str, &slen, &key_in))
117         {
118             int i;
119             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
120             int index_type;
121             const char *db = 0;
122             const char *string_index = 0;
123             size_t string_index_len;
124             char dst_buf[IT_MAX_WORD];
125             
126             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db,
127                                     &string_index);
128             string_index_len = strlen(string_index);
129             if (retrieval_index == 0 
130                 || (string_index_len == retrieval_index_len 
131                     && !memcmp(string_index, retrieval_index,
132                                string_index_len)))
133             {
134                 
135                 if (retrieval_type == 0 
136                     || (retrieval_type_len == 1 
137                         && retrieval_type[0] == index_type))
138                 {
139                     
140                     wrbuf_printf(wrbuf, "%s ", string_index);
141                     
142                     wrbuf_printf(wrbuf, "%c", index_type);
143                     
144                     zebra_term_untrans(zh, index_type, dst_buf, str);
145                     wrbuf_printf(wrbuf, " %s", dst_buf);
146                     
147                     for (i = 1; i < key_in.len; i++)
148                         wrbuf_printf(wrbuf, " " ZINT_FORMAT, key_in.mem[i]);
149                     wrbuf_printf(wrbuf, "\n");
150                 }
151             }
152         }
153     }
154     *output_format = VAL_SUTRS;
155     *rec_lenp = wrbuf_len(wrbuf);
156     *rec_bufp = odr_malloc(odr, *rec_lenp);
157     memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp);
158     wrbuf_free(wrbuf, 1);
159     zebra_rec_keys_close(keys);
160     return 0;
161 }
162                           
163 int zebra_record_fetch(ZebraHandle zh, SYSNO sysno, int score,
164                        zebra_snippets *hit_snippet, ODR odr,
165                        oid_value input_format, Z_RecordComposition *comp,
166                        oid_value *output_format,
167                        char **rec_bufp, int *rec_lenp, char **basenamep,
168                        char **addinfo)
169 {
170     Record rec;
171     char *fname, *file_type, *basename;
172     struct ZebraRecStream stream;
173     RecordAttr *recordAttr;
174     void *clientData;
175     int raw_mode = 0;
176     int return_code = 0;
177
178     *basenamep = 0;
179     *addinfo = 0;
180     if (comp && comp->which == Z_RecordComp_simple &&
181         comp->u.simple->which == Z_ElementSetNames_generic && 
182         !strcmp (comp->u.simple->u.generic, "_sysno_"))
183     {
184         char rec_str[60];
185         sprintf(rec_str, ZINT_FORMAT, sysno);
186         *output_format = VAL_SUTRS;
187         *rec_lenp = strlen(rec_str);
188         *rec_bufp = odr_strdup(odr, rec_str);
189         return 0;
190     }
191     rec = rec_get (zh->reg->records, sysno);
192     if (!rec)
193     {
194         yaz_log (YLOG_DEBUG, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
195         *basenamep = 0;
196         return YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
197     }
198     recordAttr = rec_init_attr (zh->reg->zei, rec);
199
200     file_type = rec->info[recInfo_fileType];
201     fname = rec->info[recInfo_filename];
202     basename = rec->info[recInfo_databaseName];
203     *basenamep = (char *) odr_malloc (odr, strlen(basename)+1);
204     strcpy (*basenamep, basename);
205
206     if (comp && comp->which == Z_RecordComp_simple 
207         && comp->u.simple->which == Z_ElementSetNames_generic 
208         && strncmp(comp->u.simple->u.generic, "zebra::", 7) == 0)
209     {
210         int r = zebra_storekeys_fetch(zh, sysno, odr, rec,
211                                       comp->u.simple->u.generic + 7,
212                                       input_format, output_format,
213                                       rec_bufp, rec_lenp);
214
215         rec_free(&rec);
216         return r;
217     }
218
219     if (comp && comp->which == Z_RecordComp_simple &&
220         comp->u.simple->which == Z_ElementSetNames_generic && 
221         !strcmp (comp->u.simple->u.generic, "R"))
222     {
223         raw_mode = 1;
224     }
225     yaz_log (YLOG_DEBUG, "retrieve localno=" ZINT_FORMAT " score=%d",
226              sysno, score);
227     if (rec->size[recInfo_storeData] > 0)
228     {
229         zebra_create_stream_mem(&stream, rec->info[recInfo_storeData],
230                                 rec->size[recInfo_storeData]);
231     }
232     else
233     {
234         char full_rep[1024];
235         int fd;
236
237         if (zh->path_reg && !yaz_is_abspath (fname))
238         {
239             strcpy (full_rep, zh->path_reg);
240             strcat (full_rep, "/");
241             strcat (full_rep, fname);
242         }
243         else
244             strcpy (full_rep, fname);
245
246         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
247         {
248             yaz_log (YLOG_WARN|YLOG_ERRNO, "Retrieve fail; missing file: %s",
249                   full_rep);
250             rec_free(&rec);
251             return 14;
252         }
253         zebra_create_stream_fd(&stream, fd, recordAttr->recordOffset);
254     }
255
256     if (raw_mode)
257     {
258         *output_format = VAL_SUTRS;
259         *rec_lenp = recordAttr->recordSize;
260         *rec_bufp = (char *) odr_malloc(odr, *rec_lenp);
261         stream.readf(&stream, *rec_bufp, *rec_lenp);
262     }
263     else
264     {
265         /* snippets code */
266         zebra_snippets *snippet;
267         zebra_rec_keys_t reckeys = zebra_rec_keys_open();
268         RecType rt;
269         struct recRetrieveCtrl retrieveCtrl;
270
271         retrieveCtrl.stream = &stream;
272         retrieveCtrl.fname = fname;
273         retrieveCtrl.localno = sysno;
274         retrieveCtrl.staticrank = recordAttr->staticrank;
275         retrieveCtrl.score = score;
276         retrieveCtrl.recordSize = recordAttr->recordSize;
277         retrieveCtrl.odr = odr;
278         retrieveCtrl.input_format = retrieveCtrl.output_format = input_format;
279         retrieveCtrl.comp = comp;
280         retrieveCtrl.encoding = zh->record_encoding;
281         retrieveCtrl.diagnostic = 0;
282         retrieveCtrl.addinfo = 0;
283         retrieveCtrl.dh = zh->reg->dh;
284         retrieveCtrl.res = zh->res;
285         retrieveCtrl.rec_buf = 0;
286         retrieveCtrl.rec_len = -1;
287         retrieveCtrl.hit_snippet = hit_snippet;
288         retrieveCtrl.doc_snippet = zebra_snippets_create();
289
290         zebra_rec_keys_set_buf(reckeys,
291                                rec->info[recInfo_delKeys],
292                                rec->size[recInfo_delKeys], 
293                                0);
294         zebra_rec_keys_to_snippets(zh, reckeys, retrieveCtrl.doc_snippet);
295         zebra_rec_keys_close(reckeys);
296
297 #if 0
298         /* for debugging purposes */
299         yaz_log(YLOG_LOG, "DOC SNIPPET:");
300         zebra_snippets_log(retrieveCtrl.doc_snippet, YLOG_LOG);
301         yaz_log(YLOG_LOG, "HIT SNIPPET:");
302         zebra_snippets_log(retrieveCtrl.hit_snippet, YLOG_LOG);
303 #endif
304         snippet = zebra_snippets_window(retrieveCtrl.doc_snippet,
305                                         retrieveCtrl.hit_snippet,
306                                         10);
307 #if 0
308         /* for debugging purposes */
309         yaz_log(YLOG_LOG, "WINDOW SNIPPET:");
310         zebra_snippets_log(snippet, YLOG_LOG);
311 #endif
312
313         if (!(rt = recType_byName (zh->reg->recTypes, zh->res,
314                                    file_type, &clientData)))
315         {
316             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
317         }
318         else
319         {
320             (*rt->retrieve)(clientData, &retrieveCtrl);
321             return_code = retrieveCtrl.diagnostic;
322
323             *output_format = retrieveCtrl.output_format;
324             *rec_bufp = (char *) retrieveCtrl.rec_buf;
325             *rec_lenp = retrieveCtrl.rec_len;
326             *addinfo = retrieveCtrl.addinfo;
327         }
328         zebra_snippets_destroy(snippet);
329         zebra_snippets_destroy(retrieveCtrl.doc_snippet);
330     }
331     stream.destroy(&stream);
332     rec_free(&rec);
333
334     return return_code;
335 }
336
337 /*
338  * Local variables:
339  * c-basic-offset: 4
340  * indent-tabs-mode: nil
341  * End:
342  * vim: shiftwidth=4 tabstop=8 expandtab
343  */
344