Fix PQF to Solr conversion may produce invalid Solr query YAZ-792
[yaz-moved-to-github.git] / src / rpn2solr.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements RPN to SOLR conversion
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <yaz/rpn2solr.h>
17 #include <yaz/xmalloc.h>
18 #include <yaz/diagbib1.h>
19 #include <yaz/z-core.h>
20 #include <yaz/wrbuf.h>
21
22 static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
23 {
24     int j;
25     int server_choice = 1;
26     for (j = 0; j < attributes->num_attributes; j++)
27     {
28         Z_AttributeElement *ae = attributes->attributes[j];
29         if (*ae->attributeType == 1) /* use attribute */
30         {
31             if (ae->which == Z_AttributeValue_complex)
32             {
33                 Z_ComplexAttribute *ca = ae->value.complex;
34                 int i;
35                 for (i = 0; i < ca->num_list; i++)
36                 {
37                     Z_StringOrNumeric *son = ca->list[i];
38                     if (son->which == Z_StringOrNumeric_string)
39                         return son->u.string;
40                 }
41             }
42             server_choice = 0; /* not serverChoice because we have use attr */
43         }
44     }
45     if (server_choice)
46         return "cql.serverChoice";
47     return 0;
48 }
49
50 static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
51 {
52     int j;
53     for (j = 0; j < attributes->num_attributes; j++)
54     {
55         Z_AttributeElement *ae = attributes->attributes[j];
56         if (*ae->attributeType == 2) /* relation attribute */
57         {
58             if (ae->which == Z_AttributeValue_numeric)
59             {
60                 /* Only support for numeric relation */
61                 Odr_int *relation = ae->value.numeric;
62                 /* map this numeric to representation in SOLR */
63                 switch (*relation)
64                 {
65                     /* Unsure on whether this is the relation attribute constants? */
66                 case Z_ProximityOperator_Prox_lessThan:
67                     return "<";
68                 case Z_ProximityOperator_Prox_lessThanOrEqual:
69                     return "le";
70                 case Z_ProximityOperator_Prox_equal:
71                     return ":";
72                 case Z_ProximityOperator_Prox_greaterThanOrEqual:
73                     return "ge";
74                 case Z_ProximityOperator_Prox_greaterThan:
75                     return ">";
76                 case Z_ProximityOperator_Prox_notEqual:
77                     return 0;
78                 case 100:
79                     /* phonetic is not implemented */
80                     return 0;
81                 case 101:
82                     /* stem is not not implemented */
83                     return 0;
84                 case 102:
85                     /* relevance is supported in SOLR, but not implemented yet */
86                     return 0;
87                 default:
88                     /* Invalid relation */
89                     return 0;
90                 }
91             }
92             else {
93                 /*  Can we have a complex relation value?
94                     Should we implement something?
95                 */
96             }
97         }
98     }
99     return ":";
100 }
101
102 static int check_range(solr_transform_t ct, Z_Complex *q,
103                        Z_AttributesPlusTerm **p_apt1,
104                        Z_AttributesPlusTerm **p_apt2)
105 {
106     Z_Operator *op = q->roperator;
107     if (op->which == Z_Operator_and &&
108         q->s1->which == Z_RPNStructure_simple &&
109         q->s2->which == Z_RPNStructure_simple &&
110         q->s1->u.simple->which == Z_Operand_APT &&
111         q->s2->u.simple->which == Z_Operand_APT)
112     {
113         Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm;
114         Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm;
115         const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes);
116         const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes);
117         const char *rel1 = solr_lookup_reverse(ct, "relation.",
118                                                apt1->attributes);
119         const char *rel2 = solr_lookup_reverse(ct, "relation.",
120                                                apt2->attributes);
121         if (!rel1)
122             rel1 = lookup_relation_index_from_attr(apt1->attributes);
123         if (!rel2)
124             rel2 = lookup_relation_index_from_attr(apt2->attributes);
125         if (!i1)
126             i1 = lookup_index_from_string_attr(apt1->attributes);
127         if (!i2)
128             i2 = lookup_index_from_string_attr(apt2->attributes);
129         if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2)
130         {
131             if ((rel1[0] == '>' || rel1[0] == 'g') &&
132                 (rel2[0] == '<' || rel2[0] == 'l'))
133             {
134                 *p_apt1 = apt1;
135                 *p_apt2 = apt2;
136                 return 1;
137             }
138             if ((rel2[0] == '>' || rel2[0] == 'g') &&
139                 (rel1[0] == '<' || rel1[0] == 'l'))
140             {
141                 *p_apt1 = apt2;
142                 *p_apt2 = apt1;
143                 return 1;
144             }
145         }
146     }
147     return 0;
148 }
149
150 static int rpn2solr_attr(solr_transform_t ct,
151                          Z_AttributeList *attributes, WRBUF w)
152 {
153     const char *index = solr_lookup_reverse(ct, "index.", attributes);
154     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
155
156     /* if no real match, try string attribute */
157     if (!index)
158         index = lookup_index_from_string_attr(attributes);
159     if (!index)
160         return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
161     /* for serverChoice we omit index+relation+structure */
162     if (strcmp(index, "cql.serverChoice"))
163     {
164         wrbuf_puts(w, index);
165         wrbuf_puts(w, ":");
166         if (structure)
167         {
168             if (strcmp(structure, "*"))
169             {
170                 wrbuf_puts(w, "/");
171                 wrbuf_puts(w, structure);
172                 wrbuf_puts(w, " ");
173             }
174         }
175     }
176     return 0;
177 }
178
179 static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
180 {
181     int j;
182     Z_AttributeList *attributes = apt->attributes;
183     for (j = 0; j < attributes->num_attributes; j++)
184     {
185         Z_AttributeElement *ae = attributes->attributes[j];
186         if (*ae->attributeType == 5) /* truncation attribute */
187         {
188             if (ae->which == Z_AttributeValue_numeric)
189             {
190                 return *(ae->value.numeric);
191             }
192             else if (ae->which == Z_AttributeValue_complex) {
193                 ;
194                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
195                 /* Complex: Shouldn't happen */
196             }
197         }
198     }
199     /* No truncation given */
200     return 0;
201 }
202
203 #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
204
205 static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc)
206 {
207     size_t lterm = 0;
208     const char *sterm = 0;
209     switch (term->which)
210     {
211     case Z_Term_general:
212         lterm = term->u.general->len;
213         sterm = (const char *) term->u.general->buf;
214         break;
215     case Z_Term_numeric:
216         wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
217         break;
218     case Z_Term_characterString:
219         sterm = term->u.characterString;
220         lterm = strlen(sterm);
221         break;
222     default:
223         return YAZ_BIB1_TERM_TYPE_UNSUPP;
224     }
225
226     if (sterm)
227     {
228         size_t i;
229         int must_quote = 0;
230
231         if (lterm == 0)
232             must_quote = 1;
233         else
234         {
235             for (i = 0 ; i < lterm; i++)
236                 if (sterm[i] == ' ')
237                     must_quote = 1;
238         }
239         if (must_quote)
240             wrbuf_puts(w, "\"");
241         if (trunc == 2 || trunc == 3)
242             wrbuf_puts(w, "*");
243         for (i = 0 ; i < lterm; i++)
244         {
245             if (sterm[i] == '\\' && i < lterm - 1)
246             {
247                 i++;
248                 if (strchr(SOLR_SPECIAL, sterm[i]))
249                     wrbuf_putc(w, '\\');
250                 wrbuf_putc(w, sterm[i]);
251             }
252             else if (sterm[i] == '?' && trunc == 104)
253             {
254                 wrbuf_putc(w, '*');
255             }
256             else if (sterm[i] == '#' && trunc == 104)
257             {
258                 wrbuf_putc(w, '?');
259             }
260             else if (strchr(SOLR_SPECIAL, sterm[i]))
261             {
262                 wrbuf_putc(w, '\\');
263                 wrbuf_putc(w, sterm[i]);
264             }
265             else
266                 wrbuf_putc(w, sterm[i]);
267         }
268         if (trunc == 1 || trunc == 3)
269             wrbuf_puts(w, "*");
270         if (must_quote)
271             wrbuf_puts(w, "\"");
272     }
273     return 0;
274 }
275
276 static int rpn2solr_simple(solr_transform_t ct,
277                            void (*pr)(const char *buf, void *client_data),
278                            void *client_data,
279                            Z_AttributesPlusTerm *apt, WRBUF w,
280                            Z_AttributesPlusTerm *apt2)
281  {
282      int ret = 0;
283      Z_Term *term = apt->term;
284      Odr_int trunc = get_truncation(apt);
285      const char *relation2 = 0;
286      const char *relation1 = solr_lookup_reverse(ct, "relation.",
287                                                  apt->attributes);
288      /* Attempt to fix bug #2978: Look for a relation attribute */
289      if (!relation1)
290          relation1 = lookup_relation_index_from_attr(apt->attributes);
291      if (!relation1)
292      {
293          return YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
294      }
295      if (apt2)
296      {
297          relation2 = solr_lookup_reverse(ct, "relation.",
298                                          apt2->attributes);
299          if (!relation2)
300              relation2 = lookup_relation_index_from_attr(apt2->attributes);
301      }
302      wrbuf_rewind(w);
303      ret = rpn2solr_attr(ct, apt->attributes, w);
304      if (ret)
305          return ret;
306      if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104)
307              ;
308      else
309      {
310          return YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE;
311      }
312
313      if (!relation1)
314          ret = emit_term(ct, w, term, trunc);
315      else if (relation1[0] == '<' || relation1[0] == 'l')
316      {
317          wrbuf_puts(w, "[* TO ");
318          ret = emit_term(ct, w, term, trunc);
319          if (!strcmp(relation1, "le") || !strcmp(relation1, "<="))
320              wrbuf_puts(w, "]");
321          else
322              wrbuf_puts(w, "}");
323      }
324      else if (relation1[0] == '>' || relation1[0] == 'g')
325      {
326          if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge"))
327              wrbuf_puts(w, "[");
328          else
329              wrbuf_puts(w, "{");
330          ret = emit_term(ct, w, term, trunc);
331          wrbuf_puts(w, " TO ");
332          if (apt2)
333          {
334              emit_term(ct, w, apt2->term, 0);
335              if (!relation2 || !strcmp(relation2, "<=") ||
336                  !strcmp(relation2, "le"))
337                  wrbuf_puts(w, "]");
338              else
339                  wrbuf_puts(w, "}");
340          }
341          else
342              wrbuf_puts(w, "*]");
343      }
344      else
345          ret = emit_term(ct, w, term, trunc);
346      if (ret == 0)
347          pr(wrbuf_cstr(w), client_data);
348      return ret;
349  }
350
351
352 static int rpn2solr_structure(solr_transform_t ct,
353                               void (*pr)(const char *buf, void *client_data),
354                               void *client_data,
355                               Z_RPNStructure *q, int nested,
356                               WRBUF w)
357 {
358     if (q->which == Z_RPNStructure_simple)
359     {
360         if (q->u.simple->which != Z_Operand_APT)
361             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
362         else
363             return rpn2solr_simple(ct, pr, client_data,
364                                    q->u.simple->u.attributesPlusTerm, w, 0);
365     }
366     else
367     {
368         Z_Operator *op = q->u.complex->roperator;
369         Z_AttributesPlusTerm *apt1, *apt2;
370         int r;
371
372         if (check_range(ct, q->u.complex, &apt1, &apt2))
373             return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2);
374         if (nested)
375             pr("(", client_data);
376
377         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
378         if (r)
379             return r;
380         switch (op->which)
381         {
382         case Z_Operator_and:
383             pr(" AND ", client_data);
384             break;
385         case Z_Operator_or:
386             pr(" OR ", client_data);
387             break;
388         case Z_Operator_and_not:
389             pr(" AND NOT ", client_data);
390             break;
391         case Z_Operator_prox:
392             return YAZ_BIB1_UNSUPP_SEARCH;
393         }
394         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
395         if (nested)
396             pr(")", client_data);
397         return r;
398     }
399 }
400
401 int solr_transform_rpn2solr_stream_r(solr_transform_t ct,
402                                      WRBUF addinfo,
403                                      void (*pr)(const char *buf, void *client_data),
404                                      void *client_data,
405                                      Z_RPNQuery *q)
406 {
407     int r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure,
408                                /* nested*/ 0, addinfo);
409     if (!r)
410         wrbuf_rewind(addinfo);
411     return r;
412 }
413
414 int solr_transform_rpn2solr_stream(solr_transform_t ct,
415                                    void (*pr)(const char *buf, void *client_data),
416                                    void *client_data,
417                                    Z_RPNQuery *q)
418 {
419     WRBUF w = wrbuf_alloc();
420     int r = solr_transform_rpn2solr_stream_r(ct, w, pr, client_data, q);
421     if (r)
422         solr_transform_set_error(ct, r, wrbuf_len(w) ? wrbuf_cstr(w) : 0);
423     wrbuf_destroy(w);
424     return r;
425 }
426
427 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
428                                   WRBUF w,
429                                   Z_RPNQuery *q)
430 {
431     return solr_transform_rpn2solr_stream(ct, wrbuf_vp_puts, w, q);
432 }
433
434 /*
435  * Local variables:
436  * c-basic-offset: 4
437  * c-file-style: "Stroustrup"
438  * indent-tabs-mode: nil
439  * End:
440  * vim: shiftwidth=4 tabstop=8 expandtab
441  */
442