Filter zoom does retrieval
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2011 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include "filter_zoom.hpp"
21 #include <yaz/zoom.h>
22 #include <metaproxy/package.hpp>
23 #include <metaproxy/util.hpp>
24 #include "torus.hpp"
25
26 #include <boost/thread/mutex.hpp>
27 #include <boost/thread/condition.hpp>
28 #include <yaz/oid_db.h>
29 #include <yaz/diagbib1.h>
30 #include <yaz/log.h>
31 #include <yaz/zgdu.h>
32 #include <yaz/querytowrbuf.h>
33
34 namespace mp = metaproxy_1;
35 namespace yf = mp::filter;
36
37 namespace metaproxy_1 {
38     namespace filter {
39         struct Zoom::Searchable {
40             std::string database;
41             std::string target;
42             std::string query_encoding;
43             std::string sru;
44             bool piggyback;
45             Searchable();
46             ~Searchable();
47         };
48         class Zoom::Backend {
49             friend class Impl;
50             friend class Frontend;
51             std::string zurl;
52             ZOOM_connection m_connection;
53             ZOOM_resultset m_resultset;
54             std::string m_frontend_database;
55         public:
56             Backend();
57             ~Backend();
58             void connect(std::string zurl, int *error, const char **addinfo);
59             void search_pqf(const char *pqf, Odr_int *hits,
60                             int *error, const char **addinfo);
61             void present(Odr_int start, Odr_int number, ZOOM_record *recs,
62                          int *error, const char **addinfo);
63             void set_option(const char *name, const char *value);
64             int get_error(const char **addinfo);
65         };
66         class Zoom::Frontend {
67             friend class Impl;
68             Impl *m_p;
69             bool m_is_virtual;
70             bool m_in_use;
71             yazpp_1::GDU m_init_gdu;
72             BackendPtr m_backend;
73             void handle_package(mp::Package &package);
74             void handle_search(mp::Package &package);
75             void handle_present(mp::Package &package);
76             BackendPtr get_backend_from_databases(std::string &database,
77                                                   int *error,
78                                                   const char **addinfo);
79             Z_Records *get_records(Odr_int start,
80                                    Odr_int number_to_present,
81                                    int *error,
82                                    const char **addinfo,
83                                    Odr_int *number_of_records_returned,
84                                    ODR odr, BackendPtr b,
85                                    Odr_oid *preferredRecordSyntax,
86                                    const char *element_set_name);
87         public:
88             Frontend(Impl *impl);
89             ~Frontend();
90         };
91         class Zoom::Impl {
92             friend class Frontend;
93         public:
94             Impl();
95             ~Impl();
96             void process(metaproxy_1::Package & package);
97             void configure(const xmlNode * ptr, bool test_only);
98         private:
99             FrontendPtr get_frontend(mp::Package &package);
100             void release_frontend(mp::Package &package);
101             void parse_torus(const xmlNode *ptr);
102
103             std::list<Zoom::Searchable>m_searchables;
104
105             std::map<mp::Session, FrontendPtr> m_clients;            
106             boost::mutex m_mutex;
107             boost::condition m_cond_session_ready;
108             mp::Torus torus;
109         };
110     }
111 }
112
113 // define Pimpl wrapper forwarding to Impl
114  
115 yf::Zoom::Zoom() : m_p(new Impl)
116 {
117 }
118
119 yf::Zoom::~Zoom()
120 {  // must have a destructor because of boost::scoped_ptr
121 }
122
123 void yf::Zoom::configure(const xmlNode *xmlnode, bool test_only)
124 {
125     m_p->configure(xmlnode, test_only);
126 }
127
128 void yf::Zoom::process(mp::Package &package) const
129 {
130     m_p->process(package);
131 }
132
133
134 // define Implementation stuff
135
136 yf::Zoom::Backend::Backend()
137 {
138     m_connection = ZOOM_connection_create(0);
139     m_resultset = 0;
140 }
141
142 yf::Zoom::Backend::~Backend()
143 {
144     ZOOM_connection_destroy(m_connection);
145     ZOOM_resultset_destroy(m_resultset);
146 }
147
148 void yf::Zoom::Backend::connect(std::string zurl,
149                                 int *error, const char **addinfo)
150 {
151     ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
152     *error = ZOOM_connection_error(m_connection, 0, addinfo);
153 }
154
155 void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits,
156                                    int *error, const char **addinfo)
157 {
158     m_resultset = ZOOM_connection_search_pqf(m_connection, pqf);
159     *error = ZOOM_connection_error(m_connection, 0, addinfo);
160     if (*error == 0)
161         *hits = ZOOM_resultset_size(m_resultset);
162     else
163         *hits = 0;
164 }
165
166 void yf::Zoom::Backend::present(Odr_int start, Odr_int number,
167                                 ZOOM_record *recs,
168                                 int *error, const char **addinfo)
169 {
170     ZOOM_resultset_records(m_resultset, recs, start, number);
171     *error = ZOOM_connection_error(m_connection, 0, addinfo);
172 }
173
174 void yf::Zoom::Backend::set_option(const char *name, const char *value)
175 {
176     ZOOM_connection_option_set(m_connection, name, value);
177     if (m_resultset)
178         ZOOM_resultset_option_set(m_resultset, name, value);
179 }
180
181 int yf::Zoom::Backend::get_error(const char **addinfo)
182 {
183     return ZOOM_connection_error(m_connection, 0, addinfo);
184 }
185
186 yf::Zoom::Searchable::Searchable()
187 {
188     piggyback = true;
189 }
190
191 yf::Zoom::Searchable::~Searchable()
192 {
193 }
194
195 yf::Zoom::Frontend::Frontend(Impl *impl) : 
196     m_p(impl), m_is_virtual(false), m_in_use(true)
197 {
198 }
199
200 yf::Zoom::Frontend::~Frontend()
201 {
202 }
203
204 yf::Zoom::FrontendPtr yf::Zoom::Impl::get_frontend(mp::Package &package)
205 {
206     boost::mutex::scoped_lock lock(m_mutex);
207
208     std::map<mp::Session,yf::Zoom::FrontendPtr>::iterator it;
209     
210     while(true)
211     {
212         it = m_clients.find(package.session());
213         if (it == m_clients.end())
214             break;
215         
216         if (!it->second->m_in_use)
217         {
218             it->second->m_in_use = true;
219             return it->second;
220         }
221         m_cond_session_ready.wait(lock);
222     }
223     FrontendPtr f(new Frontend(this));
224     m_clients[package.session()] = f;
225     f->m_in_use = true;
226     return f;
227 }
228
229 void yf::Zoom::Impl::release_frontend(mp::Package &package)
230 {
231     boost::mutex::scoped_lock lock(m_mutex);
232     std::map<mp::Session,yf::Zoom::FrontendPtr>::iterator it;
233     
234     it = m_clients.find(package.session());
235     if (it != m_clients.end())
236     {
237         if (package.session().is_closed())
238         {
239             m_clients.erase(it);
240         }
241         else
242         {
243             it->second->m_in_use = false;
244         }
245         m_cond_session_ready.notify_all();
246     }
247 }
248
249 yf::Zoom::Impl::Impl()
250 {
251 }
252
253 yf::Zoom::Impl::~Impl()
254
255 }
256
257 void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
258 {
259     if (!ptr1)
260         return ;
261     for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next)
262     {
263         if (ptr1->type != XML_ELEMENT_NODE)
264             continue;
265         if (!strcmp((const char *) ptr1->name, "record"))
266         {
267             const xmlNode *ptr2 = ptr1;
268             for (ptr2 = ptr2->children; ptr2; ptr2 = ptr2->next)
269             {
270                 if (ptr2->type != XML_ELEMENT_NODE)
271                     continue;
272                 if (!strcmp((const char *) ptr2->name, "layer"))
273                 {
274                     Zoom::Searchable s;
275
276                     const xmlNode *ptr3 = ptr2;
277                     for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
278                     {
279                         if (ptr3->type != XML_ELEMENT_NODE)
280                             continue;
281                         if (!strcmp((const char *) ptr3->name, "id"))
282                         {
283                             s.database = mp::xml::get_text(ptr3);
284                         }
285                         else if (!strcmp((const char *) ptr3->name, "zurl"))
286                         {
287                             s.target = mp::xml::get_text(ptr3);
288                         }
289                         else if (!strcmp((const char *) ptr3->name, "sru"))
290                         {
291                             s.sru = mp::xml::get_text(ptr3);
292                         }
293                         else if (!strcmp((const char *) ptr3->name,
294                                          "queryEncoding"))
295                         {
296                             s.query_encoding = mp::xml::get_text(ptr3);
297                         }
298                         else if (!strcmp((const char *) ptr3->name,
299                                          "piggyback"))
300                         {
301                             s.piggyback = mp::xml::get_bool(ptr3, true);
302                         }
303                     }
304                     if (s.database.length() && s.target.length())
305                     {
306                         yaz_log(YLOG_LOG, "add db=%s target=%s", 
307                                 s.database.c_str(), s.target.c_str());
308                         m_searchables.push_back(s);
309                     }
310                 }
311             }
312         }
313     }
314 }
315
316
317 void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
318 {
319     for (ptr = ptr->children; ptr; ptr = ptr->next)
320     {
321         if (ptr->type != XML_ELEMENT_NODE)
322             continue;
323         if (!strcmp((const char *) ptr->name, "records"))
324         {
325             parse_torus(ptr);
326         }
327         else if (!strcmp((const char *) ptr->name, "torus"))
328         {
329             std::string url;
330             const struct _xmlAttr *attr;
331             for (attr = ptr->properties; attr; attr = attr->next)
332             {
333                 if (!strcmp((const char *) attr->name, "url"))
334                     url = mp::xml::get_text(attr->children);
335                 else
336                     throw mp::filter::FilterException(
337                         "Bad attribute " + std::string((const char *)
338                                                        attr->name));
339             }
340             torus.read_searchables(url);
341             xmlDoc *doc = torus.get_doc();
342             if (doc)
343             {
344                 xmlNode *ptr = xmlDocGetRootElement(doc);
345                 parse_torus(ptr);
346             }
347         }
348         else
349         {
350             throw mp::filter::FilterException
351                 ("Bad element " 
352                  + std::string((const char *) ptr->name)
353                  + " in zoom filter");
354         }
355     }
356 }
357
358 yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
359     std::string &database, int *error, const char **addinfo)
360 {
361     std::list<BackendPtr>::const_iterator map_it;
362     if (m_backend && m_backend->m_frontend_database == database)
363         return m_backend;
364
365     std::list<Zoom::Searchable>::iterator map_s =
366         m_p->m_searchables.begin();
367
368     std::string c_db = mp::util::database_name_normalize(database);
369
370     while (map_s != m_p->m_searchables.end())
371     {
372         if (c_db.compare(map_s->database) == 0)
373             break;
374         map_s++;
375     }
376     if (map_s == m_p->m_searchables.end())
377     {
378         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
379         *addinfo = database.c_str();
380         BackendPtr b;
381         return b;
382     }
383
384     m_backend.reset();
385
386     BackendPtr b(new Backend);
387
388     b->m_frontend_database = database;
389
390     if (map_s->query_encoding.length())
391         b->set_option("rpnCharset", map_s->query_encoding.c_str());
392
393     std::string url;
394     if (map_s->sru.length())
395     {
396         url = "http://" + map_s->target;
397         b->set_option("sru", map_s->sru.c_str());
398     }
399     else
400         url = map_s->target;
401
402     b->connect(url, error, addinfo);
403     if (*error == 0)
404     {
405         m_backend = b;
406     }
407     return b;
408 }
409
410 Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
411                                            Odr_int number_to_present,
412                                            int *error,
413                                            const char **addinfo,
414                                            Odr_int *number_of_records_returned,
415                                            ODR odr,
416                                            BackendPtr b,
417                                            Odr_oid *preferredRecordSyntax,
418                                            const char *element_set_name)
419 {
420     *number_of_records_returned = 0;
421     Z_Records *records = 0;
422
423     if (start < 0 || number_to_present <= 0)
424         return records;
425     
426     if (number_to_present > 10000)
427         number_to_present = 10000;
428     
429     ZOOM_record *recs = (ZOOM_record *)
430         odr_malloc(odr, number_to_present * sizeof(*recs));
431
432     char oid_name_str[OID_STR_MAX];
433     const char *syntax_name = 0;
434
435     if (preferredRecordSyntax)
436         syntax_name =
437             yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
438     b->set_option("preferredRecordSyntax", syntax_name);
439         
440     b->set_option("elementSetName", element_set_name);
441
442     b->present(start, number_to_present, recs, error, addinfo);
443
444     Odr_int i = 0;
445     if (!*error)
446     {
447         for (i = 0; i < number_to_present; i++)
448             if (!recs[i])
449                 break;
450     }
451     if (i > 0)
452     {  // only return records if no error and at least one record
453         char *odr_database = odr_strdup(odr,
454                                         b->m_frontend_database.c_str());
455         Z_NamePlusRecordList *npl = (Z_NamePlusRecordList *)
456             odr_malloc(odr, sizeof(*npl));
457         *number_of_records_returned = i;
458         npl->num_records = i;
459         npl->records = (Z_NamePlusRecord **)
460             odr_malloc(odr, i * sizeof(*npl->records));
461         for (i = 0; i < number_to_present; i++)
462         {
463             Z_NamePlusRecord *npr = 0;
464             const char *addinfo;
465             int sur_error = ZOOM_record_error(recs[i], 0 /* msg */,
466                                               &addinfo, 0 /* diagset */);
467                 
468             if (sur_error)
469             {
470                 npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
471                                             addinfo);
472             }
473             else
474             {
475                 npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
476                 Z_External *ext =
477                     (Z_External *) ZOOM_record_get(recs[i], "ext", 0);
478                 npr->databaseName = odr_database;
479                 if (ext)
480                 {
481                     npr->which = Z_NamePlusRecord_databaseRecord;
482                     npr->u.databaseRecord = ext;
483                 }
484             }
485             npl->records[i] = npr;
486         }
487         records = (Z_Records*) odr_malloc(odr, sizeof(*records));
488         records->which = Z_Records_DBOSD;
489         records->u.databaseOrSurDiagnostics = npl;
490     }
491     return records;
492 }
493     
494
495 void yf::Zoom::Frontend::handle_search(mp::Package &package)
496 {
497     Z_GDU *gdu = package.request().get();
498     Z_APDU *apdu_req = gdu->u.z3950;
499     Z_APDU *apdu_res = 0;
500     mp::odr odr;
501     Z_SearchRequest *sr = apdu_req->u.searchRequest;
502     if (sr->num_databaseNames != 1)
503     {
504         apdu_res = odr.create_searchResponse(
505             apdu_req, YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED, 0);
506         package.response() = apdu_res;
507         return;
508     }
509
510     int error = 0;
511     const char *addinfo = 0;
512     std::string db(sr->databaseNames[0]);
513     BackendPtr b = get_backend_from_databases(db, &error, &addinfo);
514     if (error)
515     {
516         apdu_res = 
517             odr.create_searchResponse(
518                 apdu_req, error, addinfo);
519         package.response() = apdu_res;
520         return;
521     }
522
523     b->set_option("setname", "default");
524
525     Odr_int hits = 0;
526     Z_Query *query = sr->query;
527     if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
528     {
529         WRBUF w = wrbuf_alloc();
530         yaz_rpnquery_to_wrbuf(w, query->u.type_1);
531
532         b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
533         wrbuf_destroy(w);
534     }
535     else
536     {
537         apdu_res = 
538             odr.create_searchResponse(apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
539         package.response() = apdu_res;
540         return;
541     }
542     
543     const char *element_set_name = 0;
544     Odr_int number_to_present = 0;
545     if (!error)
546         mp::util::piggyback_sr(sr, hits, number_to_present, &element_set_name);
547     
548     Odr_int number_of_records_returned = 0;
549     Z_Records *records = get_records(
550         0, number_to_present, &error, &addinfo,
551         &number_of_records_returned, odr, b, sr->preferredRecordSyntax,
552         element_set_name);
553     apdu_res = odr.create_searchResponse(apdu_req, error, addinfo);
554     if (records)
555     {
556         apdu_res->u.searchResponse->records = records;
557         apdu_res->u.searchResponse->numberOfRecordsReturned =
558             odr_intdup(odr, number_of_records_returned);
559     }
560     apdu_res->u.searchResponse->resultCount = odr_intdup(odr, hits);
561     package.response() = apdu_res;
562 }
563
564 void yf::Zoom::Frontend::handle_present(mp::Package &package)
565 {
566     Z_GDU *gdu = package.request().get();
567     Z_APDU *apdu_req = gdu->u.z3950;
568     Z_APDU *apdu_res = 0;
569     Z_PresentRequest *pr = apdu_req->u.presentRequest;
570
571     mp::odr odr;
572     if (!m_backend)
573     {
574         package.response() = odr.create_presentResponse(
575             apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, 0);
576         return;
577     }
578     const char *element_set_name = 0;
579     Z_RecordComposition *comp = pr->recordComposition;
580     if (comp && comp->which != Z_RecordComp_simple)
581     {
582         package.response() = odr.create_presentResponse(
583             apdu_req, 
584             YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP, 0);
585         return;
586     }
587     if (comp && comp->u.simple->which == Z_ElementSetNames_generic)
588         element_set_name = comp->u.simple->u.generic;
589     Odr_int number_of_records_returned = 0;
590     int error = 0;
591     const char *addinfo = 0;
592     Z_Records *records = get_records(
593         *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested,
594         &error, &addinfo, &number_of_records_returned, odr, m_backend,
595         pr->preferredRecordSyntax, element_set_name);
596
597     apdu_res = odr.create_presentResponse(apdu_req, error, addinfo);
598     if (records)
599     {
600         apdu_res->u.presentResponse->records = records;
601         apdu_res->u.presentResponse->numberOfRecordsReturned =
602             odr_intdup(odr, number_of_records_returned);
603     }
604     package.response() = apdu_res;
605 }
606
607 void yf::Zoom::Frontend::handle_package(mp::Package &package)
608 {
609     Z_GDU *gdu = package.request().get();
610     if (!gdu)
611         ;
612     else if (gdu->which == Z_GDU_Z3950)
613     {
614         Z_APDU *apdu_req = gdu->u.z3950;
615         if (apdu_req->which == Z_APDU_initRequest)
616         {
617             mp::odr odr;
618             package.response() = odr.create_close(
619                 apdu_req,
620                 Z_Close_protocolError,
621                 "double init");
622         }
623         else if (apdu_req->which == Z_APDU_searchRequest)
624         {
625             handle_search(package);
626         }
627         else if (apdu_req->which == Z_APDU_presentRequest)
628         {
629             handle_present(package);
630         }
631         else
632         {
633             mp::odr odr;
634             package.response() = odr.create_close(
635                 apdu_req,
636                 Z_Close_protocolError,
637                 "zoom filter cannot handle this APDU");
638             package.session().close();
639         }
640     }
641     else
642     {
643         package.session().close();
644     }
645 }
646
647 void yf::Zoom::Impl::process(mp::Package &package)
648 {
649     FrontendPtr f = get_frontend(package);
650     Z_GDU *gdu = package.request().get();
651
652     if (f->m_is_virtual)
653     {
654         f->handle_package(package);
655     }
656     else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which ==
657              Z_APDU_initRequest)
658     {
659         Z_InitRequest *req = gdu->u.z3950->u.initRequest;
660         f->m_init_gdu = gdu;
661         
662         mp::odr odr;
663         Z_APDU *apdu = odr.create_initResponse(gdu->u.z3950, 0, 0);
664         Z_InitResponse *resp = apdu->u.initResponse;
665         
666         int i;
667         static const int masks[] = {
668             Z_Options_search,
669             Z_Options_present,
670             -1 
671         };
672         for (i = 0; masks[i] != -1; i++)
673             if (ODR_MASK_GET(req->options, masks[i]))
674                 ODR_MASK_SET(resp->options, masks[i]);
675         
676         static const int versions[] = {
677             Z_ProtocolVersion_1,
678             Z_ProtocolVersion_2,
679             Z_ProtocolVersion_3,
680             -1
681         };
682         for (i = 0; versions[i] != -1; i++)
683             if (ODR_MASK_GET(req->protocolVersion, versions[i]))
684                 ODR_MASK_SET(resp->protocolVersion, versions[i]);
685             else
686                 break;
687         
688         *resp->preferredMessageSize = *req->preferredMessageSize;
689         *resp->maximumRecordSize = *req->maximumRecordSize;
690         
691         package.response() = apdu;
692         f->m_is_virtual = true;
693     }
694     else
695         package.move();
696
697     release_frontend(package);
698 }
699
700
701 static mp::filter::Base* filter_creator()
702 {
703     return new mp::filter::Zoom;
704 }
705
706 extern "C" {
707     struct metaproxy_1_filter_struct metaproxy_1_filter_zoom = {
708         0,
709         "zoom",
710         filter_creator
711     };
712 }
713
714
715 /*
716  * Local variables:
717  * c-basic-offset: 4
718  * c-file-style: "Stroustrup"
719  * indent-tabs-mode: nil
720  * End:
721  * vim: shiftwidth=4 tabstop=8 expandtab
722  */
723