Fix problem hit count caching in async mode
[yaz-moved-to-github.git] / src / zoom-sru.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file zoom-sru.c
7  * \brief Implements ZOOM SRU
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <string.h>
15 #include <errno.h>
16 #include "zoom-p.h"
17
18 #include <yaz/log.h>
19 #include <yaz/pquery.h>
20
21 #if YAZ_HAVE_XML2
22 static void set_SRU_error(ZOOM_connection c, Z_SRW_diagnostic *d)
23 {
24     const char *uri = d->uri;
25     if (uri)
26         ZOOM_set_dset_error(c, ZOOM_uri_to_code(uri), uri, d->details, 0);
27 }
28 #endif
29
30
31 #if YAZ_HAVE_XML2
32 static zoom_ret send_srw(ZOOM_connection c, Z_SRW_PDU *sr)
33 {
34     Z_GDU *gdu;
35     const char *database =  ZOOM_options_get(c->options, "databaseName");
36
37     gdu = z_get_HTTP_Request_uri(c->odr_out, c->host_port,
38                                  database,
39                                  c->proxy ? 1 : 0);
40
41     if (c->sru_mode == zoom_sru_get)
42     {
43         yaz_sru_get_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
44     }
45     else if (c->sru_mode == zoom_sru_post)
46     {
47         yaz_sru_post_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
48     }
49     else if (c->sru_mode == zoom_sru_soap)
50     {
51         yaz_sru_soap_encode(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
52     }
53     else if (c->sru_mode == zoom_sru_solr)
54     {
55         yaz_solr_encode_request(gdu->u.HTTP_Request, sr, c->odr_out, c->charset);
56     }
57     return ZOOM_send_GDU(c, gdu);
58 }
59 #endif
60
61 #if YAZ_HAVE_XML2
62 static Z_SRW_PDU *ZOOM_srw_get_pdu(ZOOM_connection c, int type)
63 {
64     Z_SRW_PDU *sr = yaz_srw_get_pdu(c->odr_out, type, c->sru_version);
65     if (c->url_authentication && c->user)
66     {
67         Z_SRW_extra_arg **ea = &sr->extra_args;
68         while (*ea)
69             ea = &(*ea)->next;
70         *ea = (Z_SRW_extra_arg *) odr_malloc(c->odr_out, sizeof(**ea));
71         (*ea)->name = "x-username";
72         (*ea)->value = c->user;
73         ea = &(*ea)->next;
74         if (c->password)
75         {
76             *ea = (Z_SRW_extra_arg *) odr_malloc(c->odr_out, sizeof(**ea));
77             (*ea)->name = "x-password";
78             (*ea)->value = c->password;
79             ea = &(*ea)->next;
80         }
81         *ea = 0;
82     }
83     else
84     {
85         sr->username = c->user;
86         sr->password = c->password;
87     }
88     return sr;
89 }
90 #endif
91
92 #if YAZ_HAVE_XML2
93 zoom_ret ZOOM_connection_srw_send_scan(ZOOM_connection c)
94 {
95     ZOOM_scanset scan;
96     Z_SRW_PDU *sr = 0;
97     const char *option_val = 0;
98     Z_Query *z_query;
99
100     if (!c->tasks)
101         return zoom_complete;
102     assert (c->tasks->which == ZOOM_TASK_SCAN);
103     scan = c->tasks->u.scan.scan;
104
105     sr = ZOOM_srw_get_pdu(c, Z_SRW_scan_request);
106
107     z_query = ZOOM_query_get_Z_Query(scan->query);
108     /* SRU scan can only carry CQL and PQF */
109     if (z_query->which == Z_Query_type_104)
110     {
111         sr->u.scan_request->queryType = "cql";
112         sr->u.scan_request->scanClause =
113             odr_strdup(c->odr_out, ZOOM_query_get_query_string(scan->query));
114     }
115     else if (z_query->which == Z_Query_type_1
116              || z_query->which == Z_Query_type_101)
117     {
118         sr->u.scan_request->queryType = "pqf";
119         sr->u.scan_request->scanClause =
120             odr_strdup(c->odr_out, ZOOM_query_get_query_string(scan->query));
121     }
122     else
123     {
124         ZOOM_set_error(c, ZOOM_ERROR_UNSUPPORTED_QUERY, 0);
125         return zoom_complete;
126     }
127
128     sr->u.scan_request->maximumTerms = odr_intdup(
129         c->odr_out, ZOOM_options_get_int(scan->options, "number", 10));
130
131     sr->u.scan_request->responsePosition = odr_intdup(
132         c->odr_out, ZOOM_options_get_int(scan->options, "position", 1));
133
134     option_val = ZOOM_options_get(scan->options, "extraArgs");
135     yaz_encode_sru_extra(sr, c->odr_out, option_val);
136     return send_srw(c, sr);
137 }
138 #else
139 zoom_ret ZOOM_connection_srw_send_scan(ZOOM_connection c)
140 {
141     return zoom_complete;
142 }
143 #endif
144
145 #if YAZ_HAVE_XML2
146 zoom_ret ZOOM_connection_srw_send_search(ZOOM_connection c)
147 {
148     const char *facets = 0;
149     int i;
150     int *start, *count;
151     ZOOM_resultset resultset = 0;
152     Z_SRW_PDU *sr = 0;
153     const char *option_val = 0;
154     const char *schema = 0;
155     Z_Query *z_query;
156     Z_FacetList *facet_list = 0;
157
158     if (c->error)                  /* don't continue on error */
159         return zoom_complete;
160     assert(c->tasks);
161     if (c->tasks->which != ZOOM_TASK_SEARCH)
162         return zoom_complete;
163
164     resultset = c->tasks->u.search.resultset;
165
166 #if HAVE_LIBMEMCACHED_MEMCACHED_H
167     /* TODO: add sorting */
168     if (c->mc_st && resultset->live_set == 0)
169     {
170         size_t v_len;
171         uint32_t flags;
172         memcached_return_t rc;
173         char *v = memcached_get(c->mc_st, wrbuf_buf(resultset->mc_key),
174                                 wrbuf_len(resultset->mc_key),
175                                 &v_len, &flags, &rc);
176         if (v)
177         {
178             ZOOM_Event event;
179             WRBUF w = wrbuf_alloc();
180
181             wrbuf_write(w, v, v_len);
182             free(v);
183             resultset->size = odr_atoi(wrbuf_cstr(w));
184
185             yaz_log(YLOG_LOG, "For key %s got value %s",
186                     wrbuf_cstr(resultset->mc_key), wrbuf_cstr(w));
187
188             wrbuf_destroy(w);
189             event = ZOOM_Event_create(ZOOM_EVENT_RECV_SEARCH);
190             ZOOM_connection_put_event(c, event);
191             resultset->live_set = 1;
192         }
193     }
194 #endif
195     if (!resultset->setname)
196         resultset->setname = xstrdup("default");
197     ZOOM_options_set(resultset->options, "setname", resultset->setname);
198     start = &c->tasks->u.search.start;
199     count = &c->tasks->u.search.count;
200     facets = ZOOM_options_get(resultset->options, "facets");
201     if (facets)
202         facet_list = yaz_pqf_parse_facet_list(c->odr_out, facets);
203     schema = c->tasks->u.search.schema;
204
205     if (resultset->live_set)
206     {
207         if (*start >= resultset->size)
208             return zoom_complete;
209         if (*start + *count > resultset->size)
210             *count = resultset->size - *start;
211     }
212     for (i = 0; i < *count; i++)
213     {
214         ZOOM_record rec =
215             ZOOM_record_cache_lookup(resultset, i + *start,
216                                      c->tasks->u.search.syntax,
217                                      c->tasks->u.search.elementSetName,
218                                      schema);
219         if (!rec)
220             break;
221     }
222     *start += i;
223     *count -= i;
224
225     if (*count == 0 && resultset->live_set)
226         return zoom_complete;
227
228     assert(resultset->query);
229
230     sr = ZOOM_srw_get_pdu(c, Z_SRW_searchRetrieve_request);
231     z_query = ZOOM_query_get_Z_Query(resultset->query);
232
233     if (z_query->which == Z_Query_type_104
234         && z_query->u.type_104->which == Z_External_CQL)
235     {
236         sr->u.request->queryType = "cql";
237         sr->u.request->query = z_query->u.type_104->u.cql;
238     }
239     else if (z_query->which == Z_Query_type_1 && z_query->u.type_1)
240     {
241         sr->u.request->queryType = "pqf";
242         sr->u.request->query =
243             odr_strdup(c->odr_out,
244                        ZOOM_query_get_query_string(resultset->query));
245     }
246     else
247     {
248         ZOOM_set_error(c, ZOOM_ERROR_UNSUPPORTED_QUERY, 0);
249         return zoom_complete;
250     }
251
252     option_val = ZOOM_query_get_sru11(resultset->query);
253     if (option_val)
254     {
255         sr->u.request->sort_type = Z_SRW_sort_type_sort;
256         sr->u.request->sort.sortKeys = odr_strdup(c->odr_out, option_val);
257     }
258     sr->u.request->startRecord = odr_intdup(c->odr_out, *start + 1);
259     sr->u.request->maximumRecords = odr_intdup(
260         c->odr_out, (resultset->step > 0 && resultset->step < *count) ?
261         resultset->step : *count);
262     sr->u.request->recordSchema = odr_strdup_null(c->odr_out, schema);
263     sr->u.request->facetList = facet_list;
264
265     option_val = ZOOM_resultset_option_get(resultset, "recordPacking");
266     if (option_val)
267         sr->u.request->recordPacking = odr_strdup(c->odr_out, option_val);
268
269     option_val = ZOOM_resultset_option_get(resultset, "extraArgs");
270     yaz_encode_sru_extra(sr, c->odr_out, option_val);
271     return send_srw(c, sr);
272 }
273 #else
274 zoom_ret ZOOM_connection_srw_send_search(ZOOM_connection c)
275 {
276     return zoom_complete;
277 }
278 #endif
279
280 #if YAZ_HAVE_XML2
281 static zoom_ret handle_srw_response(ZOOM_connection c,
282                                     Z_SRW_searchRetrieveResponse *res)
283 {
284     ZOOM_resultset resultset = 0;
285     int i;
286     NMEM nmem;
287     ZOOM_Event event;
288     const char *syntax, *elementSetName, *schema;
289
290     if (!c->tasks)
291         return zoom_complete;
292
293     if (c->tasks->which != ZOOM_TASK_SEARCH)
294         return zoom_complete;
295
296     resultset = c->tasks->u.search.resultset;
297     syntax = c->tasks->u.search.syntax;
298     elementSetName = c->tasks->u.search.elementSetName;
299     schema = c->tasks->u.search.schema;
300
301     if (resultset->live_set == 0)
302     {
303         event = ZOOM_Event_create(ZOOM_EVENT_RECV_SEARCH);
304         ZOOM_connection_put_event(c, event);
305     }
306     if (res->facetList)
307         ZOOM_handle_facet_list(resultset, res->facetList);
308
309     resultset->size = 0;
310
311     if (res->resultSetId)
312         ZOOM_resultset_option_set(resultset, "resultSetId", res->resultSetId);
313
314     yaz_log(c->log_details, "%p handle_srw_response got SRW response OK", c);
315
316     if (res->num_diagnostics > 0)
317     {
318         resultset->live_set = 2;
319         set_SRU_error(c, &res->diagnostics[0]);
320     }
321     else
322     {
323         if (res->numberOfRecords)
324         {
325             resultset->size = *res->numberOfRecords;
326 #if HAVE_LIBMEMCACHED_MEMCACHED_H
327             if (c->mc_st && resultset->live_set == 0)
328             {
329                 uint32_t flags = 0;
330                 memcached_return_t rc;
331                 time_t expiration = 36000;
332                 char str[40];
333
334                 sprintf(str, ODR_INT_PRINTF, resultset->size);
335                 rc = memcached_set(c->mc_st,
336                                    wrbuf_buf(resultset->mc_key),wrbuf_len(resultset->mc_key),
337                                    str, strlen(str), expiration, flags);
338                 yaz_log(YLOG_LOG, "Store SRU hit count key=%s value=%s rc=%u %s",
339                         wrbuf_cstr(resultset->mc_key), str, (unsigned) rc,
340                         memcached_last_error_message(c->mc_st));
341             }
342 #endif
343         }
344         resultset->live_set = 2;
345         if (res->suggestions)
346             ZOOM_resultset_option_set(resultset, "suggestions",
347                                       res->suggestions);
348         for (i = 0; i < res->num_records; i++)
349         {
350             int pos = c->tasks->u.search.start + i;
351             Z_SRW_record *sru_rec;
352             Z_SRW_diagnostic *diag = 0;
353             int num_diag;
354
355             /* only trust recordPosition if >= calculated position */
356             if (res->records[i].recordPosition &&
357                 *res->records[i].recordPosition >= pos + 1)
358                 pos = *res->records[i].recordPosition - 1;
359
360             if (!ZOOM_record_cache_lookup(resultset,
361                                           pos,
362                                           syntax, elementSetName, schema))
363             {
364                 Z_NamePlusRecord *npr = (Z_NamePlusRecord *)
365                     odr_malloc(c->odr_in, sizeof(Z_NamePlusRecord));
366                 sru_rec = &res->records[i];
367
368                 npr->databaseName = 0;
369                 npr->which = Z_NamePlusRecord_databaseRecord;
370                 npr->u.databaseRecord = (Z_External *)
371                     odr_malloc(c->odr_in, sizeof(Z_External));
372                 npr->u.databaseRecord->descriptor = 0;
373                 npr->u.databaseRecord->direct_reference =
374                     odr_oiddup(c->odr_in, yaz_oid_recsyn_xml);
375                 npr->u.databaseRecord->indirect_reference = 0;
376                 npr->u.databaseRecord->which = Z_External_octet;
377
378                 npr->u.databaseRecord->u.octet_aligned =
379                     odr_create_Odr_oct(c->odr_in,
380                                        sru_rec->recordData_buf,
381                                    sru_rec->recordData_len);
382                 if (sru_rec->recordSchema
383                     && !strcmp(sru_rec->recordSchema,
384                                "info:srw/schema/1/diagnostics-v1.1"))
385                 {
386                     sru_decode_surrogate_diagnostics(sru_rec->recordData_buf,
387                                                      sru_rec->recordData_len,
388                                                      &diag, &num_diag,
389                                                      resultset->odr);
390                 }
391                 ZOOM_record_cache_add(resultset, npr,
392                                       pos, syntax, elementSetName,
393                                       schema, diag);
394             }
395         }
396         nmem = odr_extract_mem(c->odr_in);
397         nmem_transfer(odr_getmem(resultset->odr), nmem);
398         nmem_destroy(nmem);
399
400         return ZOOM_connection_srw_send_search(c);
401     }
402     return zoom_complete;
403 }
404 #endif
405
406 #if YAZ_HAVE_XML2
407 static void handle_srw_scan_response(ZOOM_connection c,
408                                      Z_SRW_scanResponse *res)
409 {
410     NMEM nmem = odr_extract_mem(c->odr_in);
411     ZOOM_scanset scan;
412
413     if (!c->tasks || c->tasks->which != ZOOM_TASK_SCAN)
414         return;
415     scan = c->tasks->u.scan.scan;
416
417     if (res->num_diagnostics > 0)
418         set_SRU_error(c, &res->diagnostics[0]);
419
420     scan->scan_response = 0;
421     scan->srw_scan_response = res;
422     nmem_transfer(odr_getmem(scan->odr), nmem);
423
424     ZOOM_options_set_int(scan->options, "number", res->num_terms);
425     nmem_destroy(nmem);
426 }
427 #endif
428
429 int ZOOM_handle_sru(ZOOM_connection c, Z_HTTP_Response *hres,
430                     zoom_ret *cret, char **addinfo)
431 {
432 #if YAZ_HAVE_XML2
433     int ret = 0;
434
435     /* not redirect (normal response) */
436     if (!yaz_srw_check_content_type(hres))
437     {
438         *addinfo = "content-type";
439         ret = -1;
440     }
441     else if (c->sru_mode == zoom_sru_solr)
442     {
443         Z_SRW_PDU *sr;
444         ret = yaz_solr_decode_response(c->odr_in, hres, &sr);
445         if (ret == 0)
446         {
447             if (sr->which == Z_SRW_searchRetrieve_response)
448                 *cret = handle_srw_response(c, sr->u.response);
449             else if (sr->which == Z_SRW_scan_response)
450                 handle_srw_scan_response(c, sr->u.scan_response);
451         }
452     }
453     else
454     {
455         Z_SOAP *soap_package = 0;
456         ODR o = c->odr_in;
457         Z_SOAP_Handler soap_handlers[4] = {
458             {YAZ_XMLNS_SRU_v1_response, 0, (Z_SOAP_fun) yaz_srw_codec},
459             {YAZ_XMLNS_SRU_v2_mask, 0, (Z_SOAP_fun) yaz_srw_codec},
460             {"searchRetrieveResponse", 0, (Z_SOAP_fun) yaz_srw_codec},
461             {0, 0, 0}
462         };
463         ret = z_soap_codec(o, &soap_package,
464                            &hres->content_buf, &hres->content_len,
465                            soap_handlers);
466         if (!ret && soap_package->which == Z_SOAP_generic)
467         {
468             Z_SRW_PDU *sr = (Z_SRW_PDU*) soap_package->u.generic->p;
469
470             ZOOM_options_set(c->options, "sru_version", sr->srw_version);
471             ZOOM_options_setl(c->options, "sru_extra_response_data",
472                               sr->extraResponseData_buf, sr->extraResponseData_len);
473             if (sr->which == Z_SRW_searchRetrieve_response)
474                 *cret = handle_srw_response(c, sr->u.response);
475             else if (sr->which == Z_SRW_scan_response)
476                 handle_srw_scan_response(c, sr->u.scan_response);
477             else
478                 ret = -1;
479         }
480         else if (!ret && (soap_package->which == Z_SOAP_fault
481                           || soap_package->which == Z_SOAP_error))
482         {
483             ZOOM_set_HTTP_error(c, hres->code,
484                                 soap_package->u.fault->fault_code,
485                                 soap_package->u.fault->fault_string);
486         }
487         else
488         {
489             size_t max_chars = 1000;
490             size_t sz = hres->content_len;
491             if (sz > max_chars - 1)
492                 sz = max_chars;
493             *addinfo = odr_malloc(c->odr_in, sz + 4);
494             memcpy(*addinfo, hres->content_buf, sz);
495             if (sz == max_chars)
496                 strcpy(*addinfo + sz, "...");
497             else
498                 strcpy(*addinfo + sz, "");
499             ret = -1;
500         }
501     }
502     return ret;
503 #else
504     return -1;
505 #endif
506 }
507
508 /*
509  * Local variables:
510  * c-basic-offset: 4
511  * c-file-style: "Stroustrup"
512  * indent-tabs-mode: nil
513  * End:
514  * vim: shiftwidth=4 tabstop=8 expandtab
515  */
516