HTML parser stops eating tag when <-char is met
[metaproxy-moved-to-github.git] / src / sru_util.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2013 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "sru_util.hpp"
20 #include <metaproxy/util.hpp>
21
22 #include <iostream>
23 #include <string>
24
25 namespace mp = metaproxy_1;
26
27 // Doxygen doesn't like mp::gdu, so we use this instead
28 namespace mp_util = metaproxy_1::util;
29
30 const std::string xmlns_explain("http://explain.z3950.org/dtd/2.0/");
31
32 bool mp_util::build_sru_debug_package(mp::Package &package)
33 {
34     Z_GDU *zgdu_req = package.request().get();
35     if (zgdu_req && zgdu_req->which == Z_GDU_HTTP_Request)
36     {
37         Z_HTTP_Request* http_req =  zgdu_req->u.HTTP_Request;
38         std::string content = mp_util::http_headers_debug(*http_req);
39         int http_code = 400;
40         mp_util::http_response(package, content, http_code);
41         return true;
42     }
43     package.session().close();
44     return false;
45 }
46
47 mp_util::SRUServerInfo mp_util::get_sru_server_info(mp::Package &package)
48 {
49     mp_util::SRUServerInfo sruinfo;
50
51     // getting host and port info
52     sruinfo.host = "localhost";
53     sruinfo.port = "80";
54
55     // overwriting host and port info if set from HTTP Host header
56     Z_GDU *zgdu_req = package.request().get();
57     if (zgdu_req && zgdu_req->which == Z_GDU_HTTP_Request)
58     {
59         Z_HTTP_Request* http_req =  zgdu_req->u.HTTP_Request;
60         if (http_req)
61         {
62             std::string http_path = http_req->path;
63
64             // taking out GET parameters
65             std::string::size_type ipath = http_path.rfind("?");
66             if (ipath != std::string::npos)
67                 http_path.assign(http_path, 0, ipath);
68
69             // assign to database name
70             if (http_path.size() > 1)
71                 sruinfo.database.assign(http_path, 1, std::string::npos);
72
73             std::string http_host_address
74                 = mp_util::http_header_value(http_req->headers, "Host");
75
76             std::string::size_type iaddress = http_host_address.rfind(":");
77             if (iaddress != std::string::npos)
78             {
79                 sruinfo.host.assign(http_host_address, 0, iaddress);
80                 sruinfo.port.assign(http_host_address, iaddress + 1,
81                                     std::string::npos);
82             }
83         }
84     }
85
86     //std::cout << "sruinfo.database " << sruinfo.database << "\n";
87     //std::cout << "sruinfo.host " << sruinfo.host << "\n";
88     //std::cout << "sruinfo.port " << sruinfo.port << "\n";
89
90     return sruinfo;
91 }
92
93
94 bool mp_util::build_sru_explain(metaproxy_1::Package &package,
95                                 metaproxy_1::odr &odr_en,
96                                 Z_SRW_PDU *sru_pdu_res,
97                                 SRUServerInfo sruinfo,
98                                 const xmlNode *explain,
99                                 Z_SRW_explainRequest const *er_req)
100 {
101
102     // building SRU explain record
103     std::string explain_xml;
104
105     if (explain == 0)
106     {
107         explain_xml
108             = mp_util::to_string(
109                 "<explain  xmlns=\"" + xmlns_explain + "\">\n"
110                 "  <serverInfo protocol='SRU'>\n"
111                 "    <host>")
112             + sruinfo.host
113             + mp_util::to_string("</host>\n"
114                                  "    <port>")
115             + sruinfo.port
116             + mp_util::to_string("</port>\n"
117                                  "    <database>")
118             + sruinfo.database
119             + mp_util::to_string("</database>\n"
120                                  "  </serverInfo>\n"
121                                  "</explain>\n");
122     }
123     else
124     {
125         // make new XML DOC with given explain node
126         xmlDocPtr doc =  xmlNewDoc(BAD_CAST "1.0");
127         xmlDocSetRootElement(doc, (xmlNode*)explain);
128
129         xmlChar *xmlbuff;
130         int xmlbuffsz;
131         xmlDocDumpFormatMemory(doc, &xmlbuff, &xmlbuffsz, 1);
132
133         explain_xml.assign((const char*)xmlbuff, 0, xmlbuffsz);
134     }
135
136
137     // z3950'fy recordPacking
138     int record_packing = Z_SRW_recordPacking_XML;
139     if (er_req && er_req->recordPacking && 's' == *(er_req->recordPacking))
140         record_packing = Z_SRW_recordPacking_string;
141
142     // preparing explain record insert
143     Z_SRW_explainResponse *sru_res = sru_pdu_res->u.explain_response;
144
145     // inserting one and only explain record
146
147     sru_res->record.recordPosition = odr_intdup(odr_en, 1);
148     sru_res->record.recordPacking = record_packing;
149     sru_res->record.recordSchema = (char *)xmlns_explain.c_str();
150     sru_res->record.recordData_len = 1 + explain_xml.size();
151     sru_res->record.recordData_buf
152         = odr_strdupn(odr_en, (const char *)explain_xml.c_str(),
153                       1 + explain_xml.size());
154
155     return true;
156 }
157
158
159 bool mp_util::build_sru_response(mp::Package &package,
160                                  mp::odr &odr_en,
161                                  Z_SOAP *soap,
162                                  const Z_SRW_PDU *sru_pdu_res,
163                                  char *charset,
164                                  const char *stylesheet)
165 {
166
167     // SRU request package translation to Z3950 package
168     //if (sru_pdu_res)
169     //    std::cout << *(const_cast<Z_SRW_PDU *>(sru_pdu_res)) << "\n";
170     //else
171     //    std::cout << "SRU empty\n";
172
173
174     Z_GDU *zgdu_req = package.request().get();
175     if  (zgdu_req && zgdu_req->which == Z_GDU_HTTP_Request)
176     {
177         Z_GDU *zgdu_res //= z_get_HTTP_Response(odr_en, 200);
178             = odr_en.create_HTTP_Response(package.session(),
179                                           zgdu_req->u.HTTP_Request,
180                                           200);
181
182         // adding HTTP response code and headers
183         Z_HTTP_Response * http_res = zgdu_res->u.HTTP_Response;
184         //http_res->code = http_code;
185
186         std::string ctype("text/xml");
187         if (charset)
188         {
189             ctype += "; charset=";
190             ctype += charset;
191         }
192
193         z_HTTP_header_add(odr_en,
194                           &http_res->headers, "Content-Type", ctype.c_str());
195
196         // packaging Z_SOAP into HTML response
197         static Z_SOAP_Handler soap_handlers[4] = {
198             {(char *)YAZ_XMLNS_SRU_v1_1, 0, (Z_SOAP_fun) yaz_srw_codec},
199             {(char *)YAZ_XMLNS_SRU_v1_0, 0,  (Z_SOAP_fun) yaz_srw_codec},
200             {(char *)YAZ_XMLNS_UPDATE_v0_9, 0, (Z_SOAP_fun) yaz_ucp_codec},
201             {0, 0, 0}
202         };
203
204
205         // empty stylesheet means NO stylesheet
206         if (stylesheet && *stylesheet == '\0')
207             stylesheet = 0;
208
209         // encoding SRU package
210
211         soap->u.generic->p  = (void*) sru_pdu_res;
212         //int ret =
213         z_soap_codec_enc_xsl(odr_en, &soap,
214                              &http_res->content_buf, &http_res->content_len,
215                              soap_handlers, charset, stylesheet);
216
217
218         package.response() = zgdu_res;
219         return true;
220     }
221     package.session().close();
222     return false;
223 }
224
225
226
227 Z_SRW_PDU * mp_util::decode_sru_request(mp::Package &package,
228                                         mp::odr &odr_de,
229                                         mp::odr &odr_en,
230                                         Z_SRW_PDU *sru_pdu_res,
231                                         Z_SOAP **soap,
232                                         char *charset,
233                                         char *stylesheet)
234 {
235     Z_GDU *zgdu_req = package.request().get();
236     Z_SRW_PDU *sru_pdu_req = 0;
237
238     //assert((zgdu_req->which == Z_GDU_HTTP_Request));
239
240     //ignoring all non HTTP_Request packages
241     if (!zgdu_req || !(zgdu_req->which == Z_GDU_HTTP_Request))
242     {
243         return 0;
244     }
245
246     Z_HTTP_Request* http_req =  zgdu_req->u.HTTP_Request;
247     if (! http_req)
248         return 0;
249
250     // checking if we got a SRU GET/POST/SOAP HTTP package
251     // closing connection if we did not ...
252     if (0 == yaz_sru_decode(http_req, &sru_pdu_req, soap,
253                             odr_de, &charset,
254                             &(sru_pdu_res->u.response->diagnostics),
255                             &(sru_pdu_res->u.response->num_diagnostics)))
256     {
257         if (sru_pdu_res->u.response->num_diagnostics)
258         {
259             //sru_pdu_res = sru_pdu_res_exp;
260             package.session().close();
261             return 0;
262         }
263         return sru_pdu_req;
264     }
265     else if (0 == yaz_srw_decode(http_req, &sru_pdu_req, soap,
266                                  odr_de, &charset))
267         return sru_pdu_req;
268     else
269     {
270         //sru_pdu_res = sru_pdu_res_exp;
271         package.session().close();
272         return 0;
273     }
274     return 0;
275 }
276
277
278 bool
279 mp_util::check_sru_query_exists(mp::Package &package,
280                                 mp::odr &odr_en,
281                                 Z_SRW_PDU *sru_pdu_res,
282                                 Z_SRW_searchRetrieveRequest const *sr_req)
283 {
284     if ((sr_req->query_type == Z_SRW_query_type_cql && !sr_req->query.cql))
285     {
286         yaz_add_srw_diagnostic(odr_en,
287                                &(sru_pdu_res->u.response->diagnostics),
288                                &(sru_pdu_res->u.response->num_diagnostics),
289                                YAZ_SRW_MANDATORY_PARAMETER_NOT_SUPPLIED,
290                                "query");
291         yaz_add_srw_diagnostic(odr_en,
292                                &(sru_pdu_res->u.response->diagnostics),
293                                &(sru_pdu_res->u.response->num_diagnostics),
294                                YAZ_SRW_QUERY_SYNTAX_ERROR,
295                                "CQL query is empty");
296         return false;
297     }
298     if ((sr_req->query_type == Z_SRW_query_type_xcql && !sr_req->query.xcql))
299     {
300         yaz_add_srw_diagnostic(odr_en,
301                                &(sru_pdu_res->u.response->diagnostics),
302                                &(sru_pdu_res->u.response->num_diagnostics),
303                                YAZ_SRW_QUERY_SYNTAX_ERROR,
304                                "XCQL query is empty");
305         return false;
306     }
307     if ((sr_req->query_type == Z_SRW_query_type_pqf && !sr_req->query.pqf))
308     {
309         yaz_add_srw_diagnostic(odr_en,
310                                &(sru_pdu_res->u.response->diagnostics),
311                                &(sru_pdu_res->u.response->num_diagnostics),
312                                YAZ_SRW_QUERY_SYNTAX_ERROR,
313                                "PQF query is empty");
314         return false;
315     }
316     return true;
317 }
318
319
320 Z_ElementSetNames *
321 mp_util::build_esn_from_schema(mp::odr &odr_en, const char *schema)
322 {
323     if (!schema)
324         return 0;
325
326     Z_ElementSetNames *esn
327         = (Z_ElementSetNames *) odr_malloc(odr_en, sizeof(Z_ElementSetNames));
328     esn->which = Z_ElementSetNames_generic;
329     esn->u.generic = odr_strdup(odr_en, schema);
330     return esn;
331 }
332
333
334 std::ostream& std::operator<<(std::ostream& os, Z_SRW_PDU& srw_pdu)
335 {
336     os << "SRU";
337
338     switch (srw_pdu.which)
339     {
340     case  Z_SRW_searchRetrieve_request:
341         os << " " << "searchRetrieveRequest";
342         {
343             Z_SRW_searchRetrieveRequest *sr = srw_pdu.u.request;
344             if (sr)
345             {
346                 if (sr->database)
347                     os << " " << (sr->database);
348                 else
349                     os << " -";
350                 if (sr->startRecord)
351                     os << " " << *(sr->startRecord);
352                 else
353                     os << " -";
354                 if (sr->maximumRecords)
355                     os << " " << *(sr->maximumRecords);
356                 else
357                     os << " -";
358                 if (sr->recordPacking)
359                     os << " " << (sr->recordPacking);
360                 else
361                     os << " -";
362
363                 if (sr->recordSchema)
364                     os << " " << (sr->recordSchema);
365                 else
366                     os << " -";
367
368                 switch (sr->query_type){
369                 case Z_SRW_query_type_cql:
370                     os << " CQL";
371                     if (sr->query.cql)
372                         os << " " << sr->query.cql;
373                     break;
374                 case Z_SRW_query_type_xcql:
375                     os << " XCQL";
376                     break;
377                 case Z_SRW_query_type_pqf:
378                     os << " PQF";
379                     if (sr->query.pqf)
380                         os << " " << sr->query.pqf;
381                     break;
382                 }
383             }
384         }
385         break;
386     case  Z_SRW_searchRetrieve_response:
387         os << " " << "searchRetrieveResponse";
388         {
389             Z_SRW_searchRetrieveResponse *sr = srw_pdu.u.response;
390             if (sr)
391             {
392                 if (! (sr->num_diagnostics))
393                 {
394                     os << " OK";
395                     if (sr->numberOfRecords)
396                         os << " " << *(sr->numberOfRecords);
397                     else
398                         os << " -";
399                     //if (sr->num_records)
400                     os << " " << (sr->num_records);
401                     //else
402                     //os << " -";
403                     if (sr->nextRecordPosition)
404                         os << " " << *(sr->nextRecordPosition);
405                     else
406                         os << " -";
407                 }
408                 else
409                 {
410                     os << " DIAG";
411                     if (sr->diagnostics && sr->diagnostics->uri)
412                         os << " " << (sr->diagnostics->uri);
413                     else
414                         os << " -";
415                     if (sr->diagnostics && sr->diagnostics->message)
416                         os << " " << (sr->diagnostics->message);
417                     else
418                         os << " -";
419                     if (sr->diagnostics && sr->diagnostics->details)
420                         os << " " << (sr->diagnostics->details);
421                     else
422                         os << " -";
423                 }
424
425
426             }
427         }
428         break;
429     case  Z_SRW_explain_request:
430         os << " " << "explainRequest";
431         break;
432     case  Z_SRW_explain_response:
433         os << " " << "explainResponse";
434         break;
435     case  Z_SRW_scan_request:
436         os << " " << "scanRequest";
437         break;
438     case  Z_SRW_scan_response:
439         os << " " << "scanResponse";
440         break;
441     case  Z_SRW_update_request:
442         os << " " << "updateRequest";
443         break;
444     case  Z_SRW_update_response:
445         os << " " << "updateResponse";
446         break;
447     default:
448         os << " " << "UNKNOWN";
449     }
450
451     return os;
452 }
453
454 /*
455  * Local variables:
456  * c-basic-offset: 4
457  * c-file-style: "Stroustrup"
458  * indent-tabs-mode: nil
459  * End:
460  * vim: shiftwidth=4 tabstop=8 expandtab
461  */
462