record_transform: new step usemarcon
[metaproxy-moved-to-github.git] / src / filter_record_transform.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2012 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include "filter_record_transform.hpp"
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "gduutil.hpp"
24
25 #include <yaz/diagbib1.h>
26 #include <yaz/zgdu.h>
27 #include <yaz/retrieval.h>
28
29 #if HAVE_USEMARCON
30 #include <usemarconlib.h>
31 #include <defines.h>
32 #endif
33
34 #include <iostream>
35
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
38 namespace mp_util = metaproxy_1::util;
39
40 namespace metaproxy_1 {
41     namespace filter {
42         class RecordTransform::Impl {
43         public:
44             Impl();
45             ~Impl();
46             void process(metaproxy_1::Package & package) const;
47             void configure(const xmlNode * xml_node, const char *path);
48         private:
49             yaz_retrieval_t m_retrieval;
50         };
51     }
52 }
53
54 #if HAVE_USEMARCON
55 struct info_usemarcon {
56     char *stage1;
57     char *stage2;
58
59     Usemarcon *usemarcon1;
60     Usemarcon *usemarcon2;
61 };
62
63 static int convert_usemarcon(void *info, WRBUF record, WRBUF wr_error)
64 {
65     struct info_usemarcon *p = (struct info_usemarcon *) info;
66
67     if (p->usemarcon1)
68     {
69         char *converted;
70         size_t convlen;
71         int res;
72
73         p->usemarcon1->SetMarcRecord(wrbuf_buf(record), wrbuf_len(record));
74         res = p->usemarcon1->Convert();
75         if (res != 0)
76         {
77             wrbuf_printf(wr_error, "usemarcon stage1 failed res=%d", res);
78             return -1;
79         }
80         p->usemarcon1->GetMarcRecord(converted, convlen);
81         
82         if (p->usemarcon2)
83         {
84             p->usemarcon2->SetMarcRecord(converted, convlen);
85             
86             res = p->usemarcon2->Convert();
87             free(converted);
88             if (res != 0)
89             {
90                 wrbuf_printf(wr_error, "usemarcon stage2 failed res=%d",
91                              res);
92                 return -1;
93             }
94             p->usemarcon2->GetMarcRecord(converted, convlen);
95         }
96         wrbuf_rewind(record);
97         wrbuf_write(record, converted, convlen);
98         free(converted);
99     }
100     return 0;
101 }
102
103 static void destroy_usemarcon(void *info)
104 {
105     struct info_usemarcon *p = (struct info_usemarcon *) info;
106
107     delete p->usemarcon1;
108     delete p->usemarcon2;
109     xfree(p->stage1);
110     xfree(p->stage2);
111     xfree(p);
112 }
113
114 static void *construct_usemarcon(const xmlNode *ptr, const char *path,
115                                  WRBUF wr_error)
116 {
117     struct _xmlAttr *attr;
118     if (strcmp((const char *) ptr->name, "usemarcon"))
119         return 0;
120
121     struct info_usemarcon *p = (struct info_usemarcon *) xmalloc(sizeof(*p));
122     p->stage1 = 0;
123     p->stage2 = 0;
124     p->usemarcon1 = 0;
125     p->usemarcon2 = 0;
126
127     for (attr = ptr->properties; attr; attr = attr->next)
128     {
129         if (!xmlStrcmp(attr->name, BAD_CAST "stage1") &&
130             attr->children && attr->children->type == XML_TEXT_NODE)
131             p->stage1 = xstrdup((const char *) attr->children->content);
132         else if (!xmlStrcmp(attr->name, BAD_CAST "stage2") &&
133             attr->children && attr->children->type == XML_TEXT_NODE)
134             p->stage2 = xstrdup((const char *) attr->children->content);
135         else
136         {
137             wrbuf_printf(wr_error, "Bad attribute '%s'"
138                          "Expected stage1 or stage2.", attr->name);
139             destroy_usemarcon(p);
140             return 0;
141         }
142     }
143
144     if (p->stage1)
145     {
146         p->usemarcon1 = new Usemarcon();
147         p->usemarcon1->SetIniFileName(p->stage1);
148     }
149     if (p->stage2)
150     {
151         p->usemarcon2 = new Usemarcon();
152         p->usemarcon2->SetIniFileName(p->stage2);
153     }
154     return p;
155 }
156
157 static void type_usemarcon(struct yaz_record_conv_type *t)
158 {
159     t->next = 0;
160     t->construct = construct_usemarcon;
161     t->convert = convert_usemarcon;
162     t->destroy = destroy_usemarcon;
163 }
164 #endif
165
166 // define Pimpl wrapper forwarding to Impl
167  
168 yf::RecordTransform::RecordTransform() : m_p(new Impl)
169 {
170 }
171
172 yf::RecordTransform::~RecordTransform()
173 {  // must have a destructor because of boost::scoped_ptr
174 }
175
176 void yf::RecordTransform::configure(const xmlNode *xmlnode, bool test_only,
177                                     const char *path)
178 {
179     m_p->configure(xmlnode, path);
180 }
181
182 void yf::RecordTransform::process(mp::Package &package) const
183 {
184     m_p->process(package);
185 }
186
187
188 yf::RecordTransform::Impl::Impl() 
189 {
190     m_retrieval = yaz_retrieval_create();
191     assert(m_retrieval);
192 }
193
194 yf::RecordTransform::Impl::~Impl()
195
196     if (m_retrieval)
197         yaz_retrieval_destroy(m_retrieval);
198 }
199
200 void yf::RecordTransform::Impl::configure(const xmlNode *xml_node,
201                                           const char *path)
202 {
203     yaz_retrieval_set_path(m_retrieval, path);
204
205     if (!xml_node)
206         throw mp::XMLError("RecordTransform filter config: empty XML DOM");
207
208     // parsing down to retrieval node, which can be any of the children nodes
209     xmlNode *retrieval_node;
210     for (retrieval_node = xml_node->children; 
211          retrieval_node; 
212          retrieval_node = retrieval_node->next)
213     {
214         if (retrieval_node->type != XML_ELEMENT_NODE)
215             continue;
216         if (0 == strcmp((const char *) retrieval_node->name, "retrievalinfo"))
217             break;
218     }
219
220 #if HAVE_USEMARCON
221     struct yaz_record_conv_type mt;
222     type_usemarcon(&mt);
223     struct yaz_record_conv_type *t = &mt;
224 #else
225     struct yaz_record_conv_type *t = 0;
226 #endif
227
228     // read configuration
229     if (0 != yaz_retrieval_configure_t(m_retrieval, retrieval_node, t))
230     {
231         std::string msg("RecordTransform filter config: ");
232         msg += yaz_retrieval_get_error(m_retrieval);
233         throw mp::XMLError(msg);
234     }
235 }
236
237 void yf::RecordTransform::Impl::process(mp::Package &package) const
238 {
239
240     Z_GDU *gdu_req = package.request().get();
241     Z_PresentRequest *pr_req = 0;
242     Z_SearchRequest *sr_req = 0;
243
244     const char *input_schema = 0;
245     Odr_oid *input_syntax = 0;
246
247     if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
248         gdu_req->u.z3950->which == Z_APDU_presentRequest)
249     {
250         pr_req = gdu_req->u.z3950->u.presentRequest;
251
252         input_schema =
253             mp_util::record_composition_to_esn(pr_req->recordComposition);
254         input_syntax = pr_req->preferredRecordSyntax;
255     }
256     else if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
257              gdu_req->u.z3950->which == Z_APDU_searchRequest)
258     {
259         sr_req = gdu_req->u.z3950->u.searchRequest;
260
261         input_syntax = sr_req->preferredRecordSyntax;
262
263         // we don't know how many hits we're going to get and therefore
264         // the effective element set name.. Therefore we can only allow
265         // two cases.. Both equal or absent.. If not, we'll just have to
266         // disable the piggyback!
267         if (sr_req->smallSetElementSetNames 
268             &&
269             sr_req->mediumSetElementSetNames
270             &&
271             sr_req->smallSetElementSetNames->which == Z_ElementSetNames_generic
272             && 
273             sr_req->mediumSetElementSetNames->which == Z_ElementSetNames_generic
274             && 
275             !strcmp(sr_req->smallSetElementSetNames->u.generic,
276                     sr_req->mediumSetElementSetNames->u.generic))
277         {
278             input_schema = sr_req->smallSetElementSetNames->u.generic;
279         }
280         else if (!sr_req->smallSetElementSetNames && 
281                  !sr_req->mediumSetElementSetNames)
282             ; // input_schema is 0 already
283         else
284         {
285             // disable piggyback (perhaps it was disabled already)
286             *sr_req->smallSetUpperBound = 0;
287             *sr_req->largeSetLowerBound = 0;
288             *sr_req->mediumSetPresentNumber = 0;
289             package.move();
290             return;
291         }
292         // we can handle it in record_transform.
293     }
294     else
295     {
296         package.move();
297         return;
298     }
299     
300     mp::odr odr_en(ODR_ENCODE);
301
302     // setting up variables for conversion state
303     yaz_record_conv_t rc = 0;
304
305     const char *match_schema = 0;
306     Odr_oid *match_syntax = 0;
307
308     const char *backend_schema = 0;
309     Odr_oid *backend_syntax = 0;
310
311     int ret_code 
312         = yaz_retrieval_request(m_retrieval,
313                                 input_schema, input_syntax,
314                                 &match_schema, &match_syntax,
315                                 &rc,
316                                 &backend_schema, &backend_syntax);
317     // error handling
318     if (ret_code != 0)
319     {
320         int error_code;
321         const char *details = 0;
322
323         if (ret_code == -1) /* error ? */
324         {
325             details = yaz_retrieval_get_error(m_retrieval);
326             error_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
327         }
328         else if (ret_code == 1 || ret_code == 3)
329         {
330             details = input_schema;
331             error_code = YAZ_BIB1_ELEMENT_SET_NAMES_UNSUPP;
332         }
333         else if (ret_code == 2)
334         {
335             char oidbuf[OID_STR_MAX];
336             oid_oid_to_dotstring(input_syntax, oidbuf);
337             details = odr_strdup(odr_en, oidbuf);
338             error_code = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
339         }
340         else
341         {
342             char *tmp = (char*) odr_malloc(odr_en, 80);
343             sprintf(tmp,
344                     "record_transform: yaz_retrieval_get_error returned %d",
345                     ret_code);
346             details = tmp;
347             error_code = YAZ_BIB1_UNSPECIFIED_ERROR;
348         }
349         Z_APDU *apdu;
350         if (sr_req)
351         {
352             apdu = odr_en.create_searchResponse(
353                 gdu_req->u.z3950, error_code, details);
354         }
355         else
356         {
357             apdu = odr_en.create_presentResponse(
358                 gdu_req->u.z3950, error_code, details);
359         }
360         package.response() = apdu;
361         return;
362     }
363
364     if (sr_req)
365     {
366         if (backend_syntax) 
367             sr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
368         else
369             sr_req->preferredRecordSyntax = 0;
370
371         if (backend_schema)
372         {
373             sr_req->smallSetElementSetNames
374                 = (Z_ElementSetNames *)
375                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
376             sr_req->smallSetElementSetNames->which = Z_ElementSetNames_generic;
377             sr_req->smallSetElementSetNames->u.generic 
378                 = odr_strdup(odr_en, backend_schema);
379             sr_req->mediumSetElementSetNames = sr_req->smallSetElementSetNames;
380         }
381         else
382         {
383             sr_req->smallSetElementSetNames = 0;
384             sr_req->mediumSetElementSetNames = 0;
385         }
386     }
387     else if (pr_req)
388     {
389         if (backend_syntax) 
390             pr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
391         else
392             pr_req->preferredRecordSyntax = 0;
393         
394         if (backend_schema)
395         {
396             pr_req->recordComposition 
397                 = (Z_RecordComposition *) 
398                 odr_malloc(odr_en, sizeof(Z_RecordComposition));
399             pr_req->recordComposition->which 
400                 = Z_RecordComp_simple;
401             pr_req->recordComposition->u.simple 
402                 = (Z_ElementSetNames *)
403                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
404             pr_req->recordComposition->u.simple->which = Z_ElementSetNames_generic;
405             pr_req->recordComposition->u.simple->u.generic 
406                 = odr_strdup(odr_en, backend_schema);
407         }
408         else
409             pr_req->recordComposition = 0;
410     }
411
412     // attaching Z3950 package to filter chain
413     package.request() = gdu_req;
414
415     package.move();
416     
417     Z_GDU *gdu_res = package.response().get();
418
419     // see if we have a records list to patch!
420     Z_NamePlusRecordList *records = 0;
421     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
422         gdu_res->u.z3950->which == Z_APDU_presentResponse)
423     {
424         Z_PresentResponse * pr_res = gdu_res->u.z3950->u.presentResponse;
425         
426         if (rc && pr_res 
427             && pr_res->numberOfRecordsReturned 
428             && *(pr_res->numberOfRecordsReturned) > 0
429             && pr_res->records
430             && pr_res->records->which == Z_Records_DBOSD)
431         {
432             records = pr_res->records->u.databaseOrSurDiagnostics;
433         }
434     }
435     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
436         gdu_res->u.z3950->which == Z_APDU_searchResponse)
437     {
438         Z_SearchResponse *sr_res = gdu_res->u.z3950->u.searchResponse;
439         
440         if (rc && sr_res 
441             && sr_res->numberOfRecordsReturned 
442             && *(sr_res->numberOfRecordsReturned) > 0
443             && sr_res->records
444             && sr_res->records->which == Z_Records_DBOSD)
445         {
446             records = sr_res->records->u.databaseOrSurDiagnostics;
447         }
448     }
449     
450     if (records)
451     {
452         int i;
453         for (i = 0; i < records->num_records; i++)
454         {
455             Z_NamePlusRecord *npr = records->records[i];
456             if (npr->which == Z_NamePlusRecord_databaseRecord)
457             {
458                 WRBUF output_record = wrbuf_alloc();
459                 Z_External *r = npr->u.databaseRecord;
460                 int ret_trans = 0;
461                 if (r->which == Z_External_OPAC)
462                 {
463                     ret_trans =
464                         yaz_record_conv_opac_record(rc, r->u.opac,
465                                                     output_record);
466                 }
467                 else if (r->which == Z_External_octet) 
468                 {
469                     ret_trans =
470                         yaz_record_conv_record(rc, (const char *)
471                                                r->u.octet_aligned->buf, 
472                                                r->u.octet_aligned->len,
473                                                output_record);
474                 }
475                 if (ret_trans == 0)
476                 {
477                     npr->u.databaseRecord =
478                         z_ext_record_oid(odr_en, match_syntax,
479                                          wrbuf_buf(output_record),
480                                          wrbuf_len(output_record));
481                 }
482                 else
483                 {
484                     records->records[i] =
485                         zget_surrogateDiagRec(
486                             odr_en, npr->databaseName,
487                             YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
488                             yaz_record_conv_get_error(rc));
489                 }
490                 wrbuf_destroy(output_record);
491             }
492         }
493         package.response() = gdu_res;
494     }
495     return;
496 }
497
498 static mp::filter::Base* filter_creator()
499 {
500     return new mp::filter::RecordTransform;
501 }
502
503 extern "C" {
504     struct metaproxy_1_filter_struct metaproxy_1_filter_record_transform = {
505         0,
506         "record_transform",
507         filter_creator
508     };
509 }
510
511
512 /*
513  * Local variables:
514  * c-basic-offset: 4
515  * c-file-style: "Stroustrup"
516  * indent-tabs-mode: nil
517  * End:
518  * vim: shiftwidth=4 tabstop=8 expandtab
519  */
520