Added support for scan of SOLR indexes. To enable support, add the following request...
[yaz-moved-to-github.git] / src / zoom-sru.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file zoom-sru.c
7  * \brief Implements ZOOM SRU
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16 #include "zoom-p.h"
17
18 #include <yaz/log.h>
19 #include <yaz/pquery.h>
20
21 void handle_facet_list(ZOOM_resultset r, Z_FacetList *fl);
22
23 #if YAZ_HAVE_XML2
24 static void set_SRU_error(ZOOM_connection c, Z_SRW_diagnostic *d)
25 {
26     const char *uri = d->uri;
27     if (uri)
28         ZOOM_set_dset_error(c, ZOOM_uri_to_code(uri), uri, d->details, 0);
29 }
30 #endif
31
32
33 #if YAZ_HAVE_XML2
34 static zoom_ret send_srw(ZOOM_connection c, Z_SRW_PDU *sr)
35 {
36     Z_GDU *gdu;
37     const char *database =  ZOOM_options_get(c->options, "databaseName");
38
39     gdu = z_get_HTTP_Request_uri(c->odr_out, c->host_port,
40                                  database,
41                                  c->proxy ? 1 : 0);
42
43     if (c->sru_mode == zoom_sru_get)
44     {
45         yaz_sru_get_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
46     }
47     else if (c->sru_mode == zoom_sru_post)
48     {
49         yaz_sru_post_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
50     }
51     else if (c->sru_mode == zoom_sru_soap)
52     {
53         yaz_sru_soap_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
54     }
55     else if (c->sru_mode == zoom_sru_solr)
56     {
57         yaz_solr_encode_request(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
58     }
59     return ZOOM_send_GDU(c, gdu);
60 }
61 #endif
62
63 #if YAZ_HAVE_XML2
64 static Z_SRW_PDU *ZOOM_srw_get_pdu(ZOOM_connection c, int type)
65 {
66     Z_SRW_PDU *sr = yaz_srw_get_pdu(c->odr_out, type, c->sru_version);
67     sr->username = c->user;
68     sr->password = c->password;
69     return sr;
70 }
71 #endif
72
73 #if YAZ_HAVE_XML2
74 zoom_ret ZOOM_connection_srw_send_scan(ZOOM_connection c)
75 {
76     ZOOM_scanset scan;
77     Z_SRW_PDU *sr = 0;
78     const char *option_val = 0;
79     Z_Query *z_query;
80
81     if (!c->tasks)
82         return zoom_complete;
83     assert (c->tasks->which == ZOOM_TASK_SCAN);
84     scan = c->tasks->u.scan.scan;
85
86     sr = ZOOM_srw_get_pdu(c, Z_SRW_scan_request);
87
88     z_query = ZOOM_query_get_Z_Query(scan->query);
89     /* SRU scan can only carry CQL and PQF */
90     if (z_query->which == Z_Query_type_104)
91     {
92         sr->u.scan_request->query_type = Z_SRW_query_type_cql;
93         sr->u.scan_request->scanClause.cql =
94             odr_strdup(c->odr_out, ZOOM_query_get_query_string(scan->query));
95     }
96     else if (z_query->which == Z_Query_type_1
97              || z_query->which == Z_Query_type_101)
98     {
99         sr->u.scan_request->query_type = Z_SRW_query_type_pqf;
100         sr->u.scan_request->scanClause.pqf =
101             odr_strdup(c->odr_out, ZOOM_query_get_query_string(scan->query));
102     }
103     else
104     {
105         ZOOM_set_error(c, ZOOM_ERROR_UNSUPPORTED_QUERY, 0);
106         return zoom_complete;
107     }
108
109     sr->u.scan_request->maximumTerms = odr_intdup(
110         c->odr_out, ZOOM_options_get_int(scan->options, "number", 10));
111
112     sr->u.scan_request->responsePosition = odr_intdup(
113         c->odr_out, ZOOM_options_get_int(scan->options, "position", 1));
114
115     option_val = ZOOM_options_get(scan->options, "extraArgs");
116     yaz_encode_sru_extra(sr, c->odr_out, option_val);
117     return send_srw(c, sr);
118 }
119 #else
120 zoom_ret ZOOM_connection_srw_send_scan(ZOOM_connection c)
121 {
122     return zoom_complete;
123 }
124 #endif
125
126 #if YAZ_HAVE_XML2
127 zoom_ret ZOOM_connection_srw_send_search(ZOOM_connection c)
128 {
129     const char *facets = 0;
130     int i;
131     int *start, *count;
132     ZOOM_resultset resultset = 0;
133     Z_SRW_PDU *sr = 0;
134     const char *option_val = 0;
135     Z_Query *z_query;
136     Z_FacetList *facet_list = 0;
137     if (c->error)                  /* don't continue on error */
138         return zoom_complete;
139     assert(c->tasks);
140     switch(c->tasks->which)
141     {
142     case ZOOM_TASK_SEARCH:
143         resultset = c->tasks->u.search.resultset;
144         if (!resultset->setname)
145             resultset->setname = xstrdup("default");
146         ZOOM_options_set(resultset->options, "setname", resultset->setname);
147         start = &c->tasks->u.search.start;
148         count = &c->tasks->u.search.count;
149         facets = ZOOM_options_get(resultset->options, "facets");
150         if (facets)
151             facet_list = yaz_pqf_parse_facet_list(c->odr_out, facets);
152         break;
153     case ZOOM_TASK_RETRIEVE:
154         resultset = c->tasks->u.retrieve.resultset;
155
156         start = &c->tasks->u.retrieve.start;
157         count = &c->tasks->u.retrieve.count;
158
159         if (*start >= resultset->size)
160             return zoom_complete;
161         if (*start + *count > resultset->size)
162             *count = resultset->size - *start;
163
164         for (i = 0; i < *count; i++)
165         {
166             ZOOM_record rec =
167                 ZOOM_record_cache_lookup(resultset, i + *start,
168                                          c->tasks->u.retrieve.syntax,
169                                          c->tasks->u.retrieve.elementSetName);
170             if (!rec)
171                 break;
172             else
173             {
174                 ZOOM_Event event = ZOOM_Event_create(ZOOM_EVENT_RECV_RECORD);
175                 ZOOM_connection_put_event(c, event);
176             }
177         }
178         *start += i;
179         *count -= i;
180
181         if (*count == 0)
182             return zoom_complete;
183         break;
184     default:
185         return zoom_complete;
186     }
187     assert(resultset->query);
188
189     sr = ZOOM_srw_get_pdu(c, Z_SRW_searchRetrieve_request);
190     z_query = ZOOM_query_get_Z_Query(resultset->query);
191
192     if (z_query->which == Z_Query_type_104
193         && z_query->u.type_104->which == Z_External_CQL)
194     {
195         sr->u.request->query_type = Z_SRW_query_type_cql;
196         sr->u.request->query.cql = z_query->u.type_104->u.cql;
197     }
198     else if (z_query->which == Z_Query_type_1 && z_query->u.type_1)
199     {
200         sr->u.request->query_type = Z_SRW_query_type_pqf;
201         sr->u.request->query.pqf =
202             odr_strdup(c->odr_out,
203                        ZOOM_query_get_query_string(resultset->query));
204     }
205     else
206     {
207         ZOOM_set_error(c, ZOOM_ERROR_UNSUPPORTED_QUERY, 0);
208         return zoom_complete;
209     }
210
211     option_val = ZOOM_query_get_sru11(resultset->query);
212     if (option_val)
213     {
214         sr->u.request->sort_type = Z_SRW_sort_type_sort;
215         sr->u.request->sort.sortKeys = odr_strdup(c->odr_out, option_val);
216     }
217     sr->u.request->startRecord = odr_intdup(c->odr_out, *start + 1);
218     sr->u.request->maximumRecords = odr_intdup(
219         c->odr_out, (resultset->step > 0 && resultset->step < *count) ?
220         resultset->step : *count);
221     sr->u.request->recordSchema = resultset->schema;
222     sr->u.request->facetList = facet_list;
223
224     option_val = ZOOM_resultset_option_get(resultset, "recordPacking");
225     if (option_val)
226         sr->u.request->recordPacking = odr_strdup(c->odr_out, option_val);
227
228     option_val = ZOOM_resultset_option_get(resultset, "extraArgs");
229     yaz_encode_sru_extra(sr, c->odr_out, option_val);
230     return send_srw(c, sr);
231 }
232 #else
233 zoom_ret ZOOM_connection_srw_send_search(ZOOM_connection c)
234 {
235     return zoom_complete;
236 }
237 #endif
238
239 #if YAZ_HAVE_XML2
240 static zoom_ret handle_srw_response(ZOOM_connection c,
241                                     Z_SRW_searchRetrieveResponse *res)
242 {
243     ZOOM_resultset resultset = 0;
244     int i;
245     NMEM nmem;
246     ZOOM_Event event;
247     int *start, *count;
248     const char *syntax, *elementSetName;
249
250     if (!c->tasks)
251         return zoom_complete;
252
253     switch(c->tasks->which)
254     {
255     case ZOOM_TASK_SEARCH:
256         resultset = c->tasks->u.search.resultset;
257         start = &c->tasks->u.search.start;
258         count = &c->tasks->u.search.count;
259         syntax = c->tasks->u.search.syntax;
260         elementSetName = c->tasks->u.search.elementSetName;
261
262         /* Required not for reporting client hit count multiple times into session */
263         if (!c->tasks->u.search.recv_search_fired) {
264             yaz_log(YLOG_DEBUG, "posting ZOOM_EVENT_RECV_SEARCH");
265             event = ZOOM_Event_create(ZOOM_EVENT_RECV_SEARCH);
266             ZOOM_connection_put_event(c, event);
267             c->tasks->u.search.recv_search_fired = 1;
268         }
269         if (res->facetList)
270             handle_facet_list(resultset, res->facetList);
271         break;
272     case ZOOM_TASK_RETRIEVE:
273         resultset = c->tasks->u.retrieve.resultset;
274         start = &c->tasks->u.retrieve.start;
275         count = &c->tasks->u.retrieve.count;
276         syntax = c->tasks->u.retrieve.syntax;
277         elementSetName = c->tasks->u.retrieve.elementSetName;
278         break;
279     default:
280         return zoom_complete;
281     }
282
283     resultset->size = 0;
284
285     if (res->resultSetId)
286         ZOOM_resultset_option_set(resultset, "resultSetId", res->resultSetId);
287
288     yaz_log(c->log_details, "%p handle_srw_response got SRW response OK", c);
289
290     if (res->num_diagnostics > 0)
291     {
292         set_SRU_error(c, &res->diagnostics[0]);
293     }
294     else
295     {
296         if (res->numberOfRecords) {
297             resultset->size = *res->numberOfRecords;
298         }
299         if (res->suggestions) {
300             ZOOM_resultset_option_set(resultset, "suggestions", res->suggestions);
301         }
302         for (i = 0; i<res->num_records; i++)
303         {
304             int pos = *start + i;
305             Z_SRW_record *sru_rec;
306             Z_SRW_diagnostic *diag = 0;
307             int num_diag;
308
309             Z_NamePlusRecord *npr = (Z_NamePlusRecord *)
310                 odr_malloc(c->odr_in, sizeof(Z_NamePlusRecord));
311
312             /* only trust recordPosition if >= calculated position */
313             if (res->records[i].recordPosition &&
314                 *res->records[i].recordPosition >= pos + 1)
315                 pos = *res->records[i].recordPosition - 1;
316
317             sru_rec = &res->records[i];
318
319             npr->databaseName = 0;
320             npr->which = Z_NamePlusRecord_databaseRecord;
321             npr->u.databaseRecord = (Z_External *)
322                 odr_malloc(c->odr_in, sizeof(Z_External));
323             npr->u.databaseRecord->descriptor = 0;
324             npr->u.databaseRecord->direct_reference =
325                 odr_oiddup(c->odr_in, yaz_oid_recsyn_xml);
326             npr->u.databaseRecord->indirect_reference = 0;
327             npr->u.databaseRecord->which = Z_External_octet;
328
329             npr->u.databaseRecord->u.octet_aligned = (Odr_oct *)
330                 odr_malloc(c->odr_in, sizeof(Odr_oct));
331             npr->u.databaseRecord->u.octet_aligned->buf = (unsigned char*)
332                 sru_rec->recordData_buf;
333             npr->u.databaseRecord->u.octet_aligned->len =
334                 npr->u.databaseRecord->u.octet_aligned->size =
335                 sru_rec->recordData_len;
336
337             if (sru_rec->recordSchema
338                 && !strcmp(sru_rec->recordSchema,
339                            "info:srw/schema/1/diagnostics-v1.1"))
340             {
341                 sru_decode_surrogate_diagnostics(sru_rec->recordData_buf,
342                                                  sru_rec->recordData_len,
343                                                  &diag, &num_diag,
344                                                  resultset->odr);
345             }
346             ZOOM_record_cache_add(resultset, npr, pos, syntax, elementSetName,
347                                   sru_rec->recordSchema, diag);
348         }
349         *count -= i;
350         *start += i;
351         if (*count + *start > resultset->size)
352             *count = resultset->size - *start;
353         yaz_log(YLOG_DEBUG, "SRU result set size " ODR_INT_PRINTF " start %d count %d", resultset->size, *start, *count);
354         if (*count < 0)
355             *count = 0;
356         nmem = odr_extract_mem(c->odr_in);
357         nmem_transfer(odr_getmem(resultset->odr), nmem);
358         nmem_destroy(nmem);
359
360         if (*count > 0)
361             return ZOOM_connection_srw_send_search(c);
362     }
363     return zoom_complete;
364 }
365 #endif
366
367 #if YAZ_HAVE_XML2
368 static void handle_srw_scan_response(ZOOM_connection c,
369                                      Z_SRW_scanResponse *res)
370 {
371     NMEM nmem = odr_extract_mem(c->odr_in);
372     ZOOM_scanset scan;
373
374     if (!c->tasks || c->tasks->which != ZOOM_TASK_SCAN)
375         return;
376     scan = c->tasks->u.scan.scan;
377
378     if (res->num_diagnostics > 0)
379         set_SRU_error(c, &res->diagnostics[0]);
380
381     scan->scan_response = 0;
382     scan->srw_scan_response = res;
383     nmem_transfer(odr_getmem(scan->odr), nmem);
384
385     ZOOM_options_set_int(scan->options, "number", res->num_terms);
386     nmem_destroy(nmem);
387 }
388 #endif
389
390 int ZOOM_handle_sru(ZOOM_connection c, Z_HTTP_Response *hres,
391                     zoom_ret *cret, char **addinfo)
392 {
393 #if YAZ_HAVE_XML2
394     int ret = 0;
395
396     /* not redirect (normal response) */
397     if (!yaz_srw_check_content_type(hres))
398     {
399         *addinfo = "content-type";
400         ret = -1;
401     }
402     else if (c->sru_mode == zoom_sru_solr)
403     {
404         Z_SRW_PDU *sr;
405         ret = yaz_solr_decode_response(c->odr_in, hres, &sr);
406         if (ret == 0)
407             if (sr->which == Z_SRW_searchRetrieve_response)
408                 *cret = handle_srw_response(c, sr->u.response);
409             else if (sr->which == Z_SRW_scan_response)
410                 handle_srw_scan_response(c, sr->u.scan_response);
411     }
412     else
413     {
414         Z_SOAP *soap_package = 0;
415         ODR o = c->odr_in;
416         Z_SOAP_Handler soap_handlers[3] = {
417             {YAZ_XMLNS_SRU_v1_response, 0, (Z_SOAP_fun) yaz_srw_codec},
418             {YAZ_XMLNS_SRU_v2_response, 0, (Z_SOAP_fun) yaz_srw_codec},
419             {0, 0, 0}
420         };
421         ret = z_soap_codec(o, &soap_package,
422                            &hres->content_buf, &hres->content_len,
423                            soap_handlers);
424         if (!ret && soap_package->which == Z_SOAP_generic)
425         {
426             Z_SRW_PDU *sr = (Z_SRW_PDU*) soap_package->u.generic->p;
427
428             ZOOM_options_set(c->options, "sru_version", sr->srw_version);
429             ZOOM_options_setl(c->options, "sru_extra_response_data",
430                               sr->extraResponseData_buf, sr->extraResponseData_len);
431             if (sr->which == Z_SRW_searchRetrieve_response)
432                 *cret = handle_srw_response(c, sr->u.response);
433             else if (sr->which == Z_SRW_scan_response)
434                 handle_srw_scan_response(c, sr->u.scan_response);
435             else
436                 ret = -1;
437         }
438         else if (!ret && (soap_package->which == Z_SOAP_fault
439                           || soap_package->which == Z_SOAP_error))
440         {
441             ZOOM_set_HTTP_error(c, hres->code,
442                                 soap_package->u.fault->fault_code,
443                                 soap_package->u.fault->fault_string);
444         }
445         else
446         {
447             size_t max_chars = 1000;
448             size_t sz = hres->content_len;
449             if (sz > max_chars - 1)
450                 sz = max_chars;
451             *addinfo = odr_malloc(c->odr_in, sz + 4);
452             memcpy(*addinfo, hres->content_buf, sz);
453             if (sz == max_chars)
454                 strcpy(*addinfo + sz, "...");
455             else
456                 strcpy(*addinfo + sz, "");
457             ret = -1;
458         }
459     }
460     return ret;
461 #else
462     return -1;
463 #endif
464 }
465
466 /*
467  * Local variables:
468  * c-basic-offset: 4
469  * c-file-style: "Stroustrup"
470  * indent-tabs-mode: nil
471  * End:
472  * vim: shiftwidth=4 tabstop=8 expandtab
473  */
474