Remove some redundant wrbuf_vp_puts alikes
[yaz-moved-to-github.git] / src / rpn2solr.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements RPN to SOLR conversion
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <yaz/rpn2solr.h>
17 #include <yaz/xmalloc.h>
18 #include <yaz/diagbib1.h>
19 #include <yaz/z-core.h>
20 #include <yaz/wrbuf.h>
21
22 static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
23 {
24     int j;
25     int server_choice = 1;
26     for (j = 0; j < attributes->num_attributes; j++)
27     {
28         Z_AttributeElement *ae = attributes->attributes[j];
29         if (*ae->attributeType == 1) /* use attribute */
30         {
31             if (ae->which == Z_AttributeValue_complex)
32             {
33                 Z_ComplexAttribute *ca = ae->value.complex;
34                 int i;
35                 for (i = 0; i < ca->num_list; i++)
36                 {
37                     Z_StringOrNumeric *son = ca->list[i];
38                     if (son->which == Z_StringOrNumeric_string)
39                         return son->u.string;
40                 }
41             }
42             server_choice = 0; /* not serverChoice because we have use attr */
43         }
44     }
45     if (server_choice)
46         return "cql.serverChoice";
47     return 0;
48 }
49
50 static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
51 {
52     int j;
53     for (j = 0; j < attributes->num_attributes; j++)
54     {
55         Z_AttributeElement *ae = attributes->attributes[j];
56         if (*ae->attributeType == 2) /* relation attribute */
57         {
58             if (ae->which == Z_AttributeValue_numeric)
59             {
60                 /* Only support for numeric relation */
61                 Odr_int *relation = ae->value.numeric;
62                 /* map this numeric to representation in SOLR */
63                 switch (*relation)
64                 {
65                     /* Unsure on whether this is the relation attribute constants? */
66                 case Z_ProximityOperator_Prox_lessThan:
67                     return "<";
68                 case Z_ProximityOperator_Prox_lessThanOrEqual:
69                     return "le";
70                 case Z_ProximityOperator_Prox_equal:
71                     return ":";
72                 case Z_ProximityOperator_Prox_greaterThanOrEqual:
73                     return "ge";
74                 case Z_ProximityOperator_Prox_greaterThan:
75                     return ">";
76                 case Z_ProximityOperator_Prox_notEqual:
77                     return 0;
78                 case 100:
79                     /* phonetic is not implemented */
80                     return 0;
81                 case 101:
82                     /* stem is not not implemented */
83                     return 0;
84                 case 102:
85                     /* relevance is supported in SOLR, but not implemented yet */
86                     return 0;
87                 default:
88                     /* Invalid relation */
89                     return 0;
90                 }
91             }
92             else {
93                 /*  Can we have a complex relation value?
94                     Should we implement something?
95                 */
96             }
97         }
98     }
99     return ":";
100 }
101
102 static int check_range(solr_transform_t ct, Z_Complex *q,
103                        Z_AttributesPlusTerm **p_apt1,
104                        Z_AttributesPlusTerm **p_apt2)
105 {
106     Z_Operator *op = q->roperator;
107     if (op->which == Z_Operator_and &&
108         q->s1->which == Z_RPNStructure_simple &&
109         q->s2->which == Z_RPNStructure_simple &&
110         q->s1->u.simple->which == Z_Operand_APT &&
111         q->s2->u.simple->which == Z_Operand_APT)
112     {
113         Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm;
114         Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm;
115         const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes);
116         const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes);
117         const char *rel1 = solr_lookup_reverse(ct, "relation.",
118                                                apt1->attributes);
119         const char *rel2 = solr_lookup_reverse(ct, "relation.",
120                                                apt2->attributes);
121         if (!rel1)
122             rel1 = lookup_relation_index_from_attr(apt1->attributes);
123         if (!rel2)
124             rel2 = lookup_relation_index_from_attr(apt2->attributes);
125         if (!i1)
126             i1 = lookup_index_from_string_attr(apt1->attributes);
127         if (!i2)
128             i2 = lookup_index_from_string_attr(apt2->attributes);
129         if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2)
130         {
131             if ((rel1[0] == '>' || rel1[0] == 'g') &&
132                 (rel2[0] == '<' || rel2[0] == 'l'))
133             {
134                 *p_apt1 = apt1;
135                 *p_apt2 = apt2;
136                 return 1;
137             }
138             if ((rel2[0] == '>' || rel2[0] == 'g') &&
139                 (rel1[0] == '<' || rel1[0] == 'l'))
140             {
141                 *p_apt1 = apt2;
142                 *p_apt2 = apt1;
143                 return 1;
144             }
145         }
146     }
147     return 0;
148 }
149
150 static int rpn2solr_attr(solr_transform_t ct,
151                          Z_AttributeList *attributes, WRBUF w)
152 {
153     const char *index = solr_lookup_reverse(ct, "index.", attributes);
154     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
155
156     /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */
157     if (!index)
158         index = lookup_index_from_string_attr(attributes);
159     if (!index)
160     {
161         solr_transform_set_error(ct,
162                                  YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
163         return -1;
164     }
165     /* for serverChoice we omit index+relation+structure */
166     if (strcmp(index, "cql.serverChoice"))
167     {
168         wrbuf_puts(w, index);
169         wrbuf_puts(w, ":");
170         if (structure)
171         {
172             if (strcmp(structure, "*"))
173             {
174                 wrbuf_puts(w, "/");
175                 wrbuf_puts(w, structure);
176                 wrbuf_puts(w, " ");
177             }
178         }
179     }
180     return 0;
181 }
182
183 static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
184 {
185     int j;
186     Z_AttributeList *attributes = apt->attributes;
187     for (j = 0; j < attributes->num_attributes; j++)
188     {
189         Z_AttributeElement *ae = attributes->attributes[j];
190         if (*ae->attributeType == 5) /* truncation attribute */
191         {
192             if (ae->which == Z_AttributeValue_numeric)
193             {
194                 return *(ae->value.numeric);
195             }
196             else if (ae->which == Z_AttributeValue_complex) {
197                 ;
198                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
199                 /* Complex: Shouldn't happen */
200             }
201         }
202     }
203     /* No truncation given */
204     return 0;
205 }
206
207 #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
208
209 static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc)
210 {
211     size_t lterm = 0;
212     const char *sterm = 0;
213     switch (term->which)
214     {
215     case Z_Term_general:
216         lterm = term->u.general->len;
217         sterm = (const char *) term->u.general->buf;
218         break;
219     case Z_Term_numeric:
220         wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
221         break;
222     case Z_Term_characterString:
223         sterm = term->u.characterString;
224         lterm = strlen(sterm);
225         break;
226     default:
227         solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
228         return -1;
229     }
230
231     if (sterm)
232     {
233         size_t i;
234         int must_quote = 0;
235
236         for (i = 0 ; i < lterm; i++)
237             if (sterm[i] == ' ')
238                 must_quote = 1;
239         if (must_quote)
240             wrbuf_puts(w, "\"");
241         if (trunc == 2 || trunc == 3)
242             wrbuf_puts(w, "*");
243         for (i = 0 ; i < lterm; i++)
244         {
245             if (sterm[i] == '\\' && i < lterm - 1)
246             {
247                 i++;
248                 if (strchr(SOLR_SPECIAL, sterm[i]))
249                     wrbuf_putc(w, '\\');
250                 wrbuf_putc(w, sterm[i]);
251             }
252             else if (sterm[i] == '?' && trunc == 104)
253             {
254                 wrbuf_putc(w, '*');
255             }
256             else if (sterm[i] == '#' && trunc == 104)
257             {
258                 wrbuf_putc(w, '?');
259             }
260             else if (strchr(SOLR_SPECIAL, sterm[i]))
261             {
262                 wrbuf_putc(w, '\\');
263                 wrbuf_putc(w, sterm[i]);
264             }
265             else
266                 wrbuf_putc(w, sterm[i]);
267         }
268         if (trunc == 1 || trunc == 3)
269             wrbuf_puts(w, "*");
270         if (must_quote)
271             wrbuf_puts(w, "\"");
272     }
273     return 0;
274 }
275
276 static int rpn2solr_simple(solr_transform_t ct,
277                            void (*pr)(const char *buf, void *client_data),
278                            void *client_data,
279                            Z_AttributesPlusTerm *apt, WRBUF w,
280                            Z_AttributesPlusTerm *apt2)
281  {
282      int ret = 0;
283      Z_Term *term = apt->term;
284      Odr_int trunc = get_truncation(apt);
285      const char *relation2 = 0;
286      const char *relation1 = solr_lookup_reverse(ct, "relation.",
287                                                  apt->attributes);
288      /* Attempt to fix bug #2978: Look for a relation attribute */
289      if (!relation1)
290          relation1 = lookup_relation_index_from_attr(apt->attributes);
291      if (!relation1)
292      {
293          solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, 0);
294          return -1;
295      }
296      if (apt2)
297      {
298          relation2 = solr_lookup_reverse(ct, "relation.",
299                                          apt2->attributes);
300          if (!relation2)
301              relation2 = lookup_relation_index_from_attr(apt2->attributes);
302      }
303      wrbuf_rewind(w);
304      ret = rpn2solr_attr(ct, apt->attributes, w);
305      if (ret)
306          return ret;
307      if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104)
308              ;
309      else
310      {
311          solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
312          return -1;
313      }
314
315      if (!relation1)
316          ret = emit_term(ct, w, term, trunc);
317      else if (relation1[0] == '<' || relation1[0] == 'l')
318      {
319          wrbuf_puts(w, "[* TO ");
320          ret = emit_term(ct, w, term, trunc);
321          if (!strcmp(relation1, "le") || !strcmp(relation1, "<="))
322              wrbuf_puts(w, "]");
323          else
324              wrbuf_puts(w, "}");
325      }
326      else if (relation1[0] == '>' || relation1[0] == 'g')
327      {
328          if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge"))
329              wrbuf_puts(w, "[");
330          else
331              wrbuf_puts(w, "{");
332          ret = emit_term(ct, w, term, trunc);
333          wrbuf_puts(w, " TO ");
334          if (apt2)
335          {
336              emit_term(ct, w, apt2->term, 0);
337              if (!relation2 || !strcmp(relation2, "<=") ||
338                  !strcmp(relation2, "le"))
339                  wrbuf_puts(w, "]");
340              else
341                  wrbuf_puts(w, "}");
342          }
343          else
344              wrbuf_puts(w, "*]");
345      }
346      else
347          ret = emit_term(ct, w, term, trunc);
348      if (ret == 0)
349          pr(wrbuf_cstr(w), client_data);
350      return ret;
351  }
352
353
354 static int rpn2solr_structure(solr_transform_t ct,
355                               void (*pr)(const char *buf, void *client_data),
356                                void *client_data,
357                               Z_RPNStructure *q, int nested,
358                               WRBUF w)
359 {
360     if (q->which == Z_RPNStructure_simple)
361     {
362         if (q->u.simple->which != Z_Operand_APT)
363         {
364             solr_transform_set_error(
365                 ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0);
366             return -1;
367         }
368         else
369             return rpn2solr_simple(ct, pr, client_data,
370                                    q->u.simple->u.attributesPlusTerm, w, 0);
371     }
372     else
373     {
374         Z_Operator *op = q->u.complex->roperator;
375         Z_AttributesPlusTerm *apt1, *apt2;
376         int r;
377
378         if (check_range(ct, q->u.complex, &apt1, &apt2))
379             return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2);
380         if (nested)
381             pr("(", client_data);
382
383         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
384         if (r)
385             return r;
386         switch(op->which)
387         {
388         case  Z_Operator_and:
389             pr(" AND ", client_data);
390             break;
391         case  Z_Operator_or:
392             pr(" OR ", client_data);
393             break;
394         case  Z_Operator_and_not:
395             pr(" AND NOT ", client_data);
396             break;
397         case  Z_Operator_prox:
398             solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0);
399             return -1;
400         }
401         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
402         if (nested)
403             pr(")", client_data);
404         return r;
405     }
406 }
407
408 int solr_transform_rpn2solr_stream(solr_transform_t ct,
409                                    void (*pr)(const char *buf, void *client_data),
410                                    void *client_data,
411                                    Z_RPNQuery *q)
412 {
413     int r;
414     WRBUF w = wrbuf_alloc();
415     solr_transform_set_error(ct, 0, 0);
416     r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w);
417     wrbuf_destroy(w);
418     return r;
419 }
420
421
422 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
423                                   WRBUF w,
424                                   Z_RPNQuery *q)
425 {
426     return solr_transform_rpn2solr_stream(ct, wrbuf_vp_puts, w, q);
427 }
428
429 /*
430  * Local variables:
431  * c-basic-offset: 4
432  * c-file-style: "Stroustrup"
433  * indent-tabs-mode: nil
434  * End:
435  * vim: shiftwidth=4 tabstop=8 expandtab
436  */
437