Fix: Setting env YAZ_LOG makes YAZ hang YAZ-745
[yaz-moved-to-github.git] / src / rpn2solr.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements RPN to SOLR conversion
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <yaz/rpn2solr.h>
17 #include <yaz/xmalloc.h>
18 #include <yaz/diagbib1.h>
19 #include <yaz/z-core.h>
20 #include <yaz/wrbuf.h>
21
22 static void wrbuf_vputs(const char *buf, void *client_data)
23 {
24     wrbuf_write((WRBUF) client_data, buf, strlen(buf));
25 }
26
27 static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
28 {
29     int j;
30     int server_choice = 1;
31     for (j = 0; j < attributes->num_attributes; j++)
32     {
33         Z_AttributeElement *ae = attributes->attributes[j];
34         if (*ae->attributeType == 1) /* use attribute */
35         {
36             if (ae->which == Z_AttributeValue_complex)
37             {
38                 Z_ComplexAttribute *ca = ae->value.complex;
39                 int i;
40                 for (i = 0; i < ca->num_list; i++)
41                 {
42                     Z_StringOrNumeric *son = ca->list[i];
43                     if (son->which == Z_StringOrNumeric_string)
44                         return son->u.string;
45                 }
46             }
47             server_choice = 0; /* not serverChoice because we have use attr */
48         }
49     }
50     if (server_choice)
51         return "cql.serverChoice";
52     return 0;
53 }
54
55 static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
56 {
57     int j;
58     for (j = 0; j < attributes->num_attributes; j++)
59     {
60         Z_AttributeElement *ae = attributes->attributes[j];
61         if (*ae->attributeType == 2) /* relation attribute */
62         {
63             if (ae->which == Z_AttributeValue_numeric)
64             {
65                 /* Only support for numeric relation */
66                 Odr_int *relation = ae->value.numeric;
67                 /* map this numeric to representation in SOLR */
68                 switch (*relation)
69                 {
70                     /* Unsure on whether this is the relation attribute constants? */
71                 case Z_ProximityOperator_Prox_lessThan:
72                     return "<";
73                 case Z_ProximityOperator_Prox_lessThanOrEqual:
74                     return "le";
75                 case Z_ProximityOperator_Prox_equal:
76                     return ":";
77                 case Z_ProximityOperator_Prox_greaterThanOrEqual:
78                     return "ge";
79                 case Z_ProximityOperator_Prox_greaterThan:
80                     return ">";
81                 case Z_ProximityOperator_Prox_notEqual:
82                     return 0;
83                 case 100:
84                     /* phonetic is not implemented */
85                     return 0;
86                 case 101:
87                     /* stem is not not implemented */
88                     return 0;
89                 case 102:
90                     /* relevance is supported in SOLR, but not implemented yet */
91                     return 0;
92                 default:
93                     /* Invalid relation */
94                     return 0;
95                 }
96             }
97             else {
98                 /*  Can we have a complex relation value?
99                     Should we implement something?
100                 */
101             }
102         }
103     }
104     return ":";
105 }
106
107 static int check_range(solr_transform_t ct, Z_Complex *q,
108                        Z_AttributesPlusTerm **p_apt1,
109                        Z_AttributesPlusTerm **p_apt2)
110 {
111     Z_Operator *op = q->roperator;
112     if (op->which == Z_Operator_and &&
113         q->s1->which == Z_RPNStructure_simple &&
114         q->s2->which == Z_RPNStructure_simple &&
115         q->s1->u.simple->which == Z_Operand_APT &&
116         q->s2->u.simple->which == Z_Operand_APT)
117     {
118         Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm;
119         Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm;
120         const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes);
121         const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes);
122         const char *rel1 = solr_lookup_reverse(ct, "relation.",
123                                                apt1->attributes);
124         const char *rel2 = solr_lookup_reverse(ct, "relation.",
125                                                apt2->attributes);
126         if (!rel1)
127             rel1 = lookup_relation_index_from_attr(apt1->attributes);
128         if (!rel2)
129             rel2 = lookup_relation_index_from_attr(apt2->attributes);
130         if (!i1)
131             i1 = lookup_index_from_string_attr(apt1->attributes);
132         if (!i2)
133             i2 = lookup_index_from_string_attr(apt2->attributes);
134         if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2)
135         {
136             if ((rel1[0] == '>' || rel1[0] == 'g') &&
137                 (rel2[0] == '<' || rel2[0] == 'l'))
138             {
139                 *p_apt1 = apt1;
140                 *p_apt2 = apt2;
141                 return 1;
142             }
143             if ((rel2[0] == '>' || rel2[0] == 'g') &&
144                 (rel1[0] == '<' || rel1[0] == 'l'))
145             {
146                 *p_apt1 = apt2;
147                 *p_apt2 = apt1;
148                 return 1;
149             }
150         }
151     }
152     return 0;
153 }
154
155 static int rpn2solr_attr(solr_transform_t ct,
156                          Z_AttributeList *attributes, WRBUF w)
157 {
158     const char *index = solr_lookup_reverse(ct, "index.", attributes);
159     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
160
161     /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */
162     if (!index)
163         index = lookup_index_from_string_attr(attributes);
164     if (!index)
165     {
166         solr_transform_set_error(ct,
167                                  YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
168         return -1;
169     }
170     /* for serverChoice we omit index+relation+structure */
171     if (strcmp(index, "cql.serverChoice"))
172     {
173         wrbuf_puts(w, index);
174         wrbuf_puts(w, ":");
175         if (structure)
176         {
177             if (strcmp(structure, "*"))
178             {
179                 wrbuf_puts(w, "/");
180                 wrbuf_puts(w, structure);
181                 wrbuf_puts(w, " ");
182             }
183         }
184     }
185     return 0;
186 }
187
188 static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
189 {
190     int j;
191     Z_AttributeList *attributes = apt->attributes;
192     for (j = 0; j < attributes->num_attributes; j++)
193     {
194         Z_AttributeElement *ae = attributes->attributes[j];
195         if (*ae->attributeType == 5) /* truncation attribute */
196         {
197             if (ae->which == Z_AttributeValue_numeric)
198             {
199                 return *(ae->value.numeric);
200             }
201             else if (ae->which == Z_AttributeValue_complex) {
202                 ;
203                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
204                 /* Complex: Shouldn't happen */
205             }
206         }
207     }
208     /* No truncation given */
209     return 0;
210 }
211
212 #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
213
214 static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc)
215 {
216     size_t lterm = 0;
217     const char *sterm = 0;
218     switch (term->which)
219     {
220     case Z_Term_general:
221         lterm = term->u.general->len;
222         sterm = (const char *) term->u.general->buf;
223         break;
224     case Z_Term_numeric:
225         wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
226         break;
227     case Z_Term_characterString:
228         sterm = term->u.characterString;
229         lterm = strlen(sterm);
230         break;
231     default:
232         solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
233         return -1;
234     }
235
236     if (sterm)
237     {
238         size_t i;
239         int must_quote = 0;
240
241         for (i = 0 ; i < lterm; i++)
242             if (sterm[i] == ' ')
243                 must_quote = 1;
244         if (must_quote)
245             wrbuf_puts(w, "\"");
246         if (trunc == 2 || trunc == 3)
247             wrbuf_puts(w, "*");
248         for (i = 0 ; i < lterm; i++)
249         {
250             if (sterm[i] == '\\' && i < lterm - 1)
251             {
252                 i++;
253                 if (strchr(SOLR_SPECIAL, sterm[i]))
254                     wrbuf_putc(w, '\\');
255                 wrbuf_putc(w, sterm[i]);
256             }
257             else if (sterm[i] == '?' && trunc == 104)
258             {
259                 wrbuf_putc(w, '*');
260             }
261             else if (sterm[i] == '#' && trunc == 104)
262             {
263                 wrbuf_putc(w, '?');
264             }
265             else if (strchr(SOLR_SPECIAL, sterm[i]))
266             {
267                 wrbuf_putc(w, '\\');
268                 wrbuf_putc(w, sterm[i]);
269             }
270             else
271                 wrbuf_putc(w, sterm[i]);
272         }
273         if (trunc == 1 || trunc == 3)
274             wrbuf_puts(w, "*");
275         if (must_quote)
276             wrbuf_puts(w, "\"");
277     }
278     return 0;
279 }
280
281 static int rpn2solr_simple(solr_transform_t ct,
282                            void (*pr)(const char *buf, void *client_data),
283                            void *client_data,
284                            Z_AttributesPlusTerm *apt, WRBUF w,
285                            Z_AttributesPlusTerm *apt2)
286  {
287      int ret = 0;
288      Z_Term *term = apt->term;
289      Odr_int trunc = get_truncation(apt);
290      const char *relation2 = 0;
291      const char *relation1 = solr_lookup_reverse(ct, "relation.",
292                                                  apt->attributes);
293      /* Attempt to fix bug #2978: Look for a relation attribute */
294      if (!relation1)
295          relation1 = lookup_relation_index_from_attr(apt->attributes);
296      if (!relation1)
297      {
298          solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, 0);
299          return -1;
300      }
301      if (apt2)
302      {
303          relation2 = solr_lookup_reverse(ct, "relation.",
304                                          apt2->attributes);
305          if (!relation2)
306              relation2 = lookup_relation_index_from_attr(apt2->attributes);
307      }
308      wrbuf_rewind(w);
309      ret = rpn2solr_attr(ct, apt->attributes, w);
310      if (ret)
311          return ret;
312      if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104)
313              ;
314      else
315      {
316          solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
317          return -1;
318      }
319
320      if (!relation1)
321          ret = emit_term(ct, w, term, trunc);
322      else if (relation1[0] == '<' || relation1[0] == 'l')
323      {
324          wrbuf_puts(w, "[* TO ");
325          ret = emit_term(ct, w, term, trunc);
326          if (!strcmp(relation1, "le") || !strcmp(relation1, "<="))
327              wrbuf_puts(w, "]");
328          else
329              wrbuf_puts(w, "}");
330      }
331      else if (relation1[0] == '>' || relation1[0] == 'g')
332      {
333          if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge"))
334              wrbuf_puts(w, "[");
335          else
336              wrbuf_puts(w, "{");
337          ret = emit_term(ct, w, term, trunc);
338          wrbuf_puts(w, " TO ");
339          if (apt2)
340          {
341              emit_term(ct, w, apt2->term, 0);
342              if (!relation2 || !strcmp(relation2, "<=") ||
343                  !strcmp(relation2, "le"))
344                  wrbuf_puts(w, "]");
345              else
346                  wrbuf_puts(w, "}");
347          }
348          else
349              wrbuf_puts(w, "*]");
350      }
351      else
352          ret = emit_term(ct, w, term, trunc);
353      if (ret == 0)
354          pr(wrbuf_cstr(w), client_data);
355      return ret;
356  }
357
358
359 static int rpn2solr_structure(solr_transform_t ct,
360                               void (*pr)(const char *buf, void *client_data),
361                                void *client_data,
362                               Z_RPNStructure *q, int nested,
363                               WRBUF w)
364 {
365     if (q->which == Z_RPNStructure_simple)
366     {
367         if (q->u.simple->which != Z_Operand_APT)
368         {
369             solr_transform_set_error(
370                 ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0);
371             return -1;
372         }
373         else
374             return rpn2solr_simple(ct, pr, client_data,
375                                    q->u.simple->u.attributesPlusTerm, w, 0);
376     }
377     else
378     {
379         Z_Operator *op = q->u.complex->roperator;
380         Z_AttributesPlusTerm *apt1, *apt2;
381         int r;
382
383         if (check_range(ct, q->u.complex, &apt1, &apt2))
384             return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2);
385         if (nested)
386             pr("(", client_data);
387
388         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
389         if (r)
390             return r;
391         switch(op->which)
392         {
393         case  Z_Operator_and:
394             pr(" AND ", client_data);
395             break;
396         case  Z_Operator_or:
397             pr(" OR ", client_data);
398             break;
399         case  Z_Operator_and_not:
400             pr(" AND NOT ", client_data);
401             break;
402         case  Z_Operator_prox:
403             solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0);
404             return -1;
405         }
406         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
407         if (nested)
408             pr(")", client_data);
409         return r;
410     }
411 }
412
413 int solr_transform_rpn2solr_stream(solr_transform_t ct,
414                                    void (*pr)(const char *buf, void *client_data),
415                                    void *client_data,
416                                    Z_RPNQuery *q)
417 {
418     int r;
419     WRBUF w = wrbuf_alloc();
420     solr_transform_set_error(ct, 0, 0);
421     r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w);
422     wrbuf_destroy(w);
423     return r;
424 }
425
426
427 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
428                                   WRBUF w,
429                                   Z_RPNQuery *q)
430 {
431     return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q);
432 }
433
434 /*
435  * Local variables:
436  * c-basic-offset: 4
437  * c-file-style: "Stroustrup"
438  * indent-tabs-mode: nil
439  * End:
440  * vim: shiftwidth=4 tabstop=8 expandtab
441  */
442