332ae88c9d09d7fbc4c6581c922d3538b7ad03bf
[yaz-moved-to-github.git] / src / rpn2solr.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements RPN to SOLR conversion
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <yaz/rpn2solr.h>
17 #include <yaz/xmalloc.h>
18 #include <yaz/diagbib1.h>
19 #include <yaz/z-core.h>
20 #include <yaz/wrbuf.h>
21
22 static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
23 {
24     int j;
25     int server_choice = 1;
26     for (j = 0; j < attributes->num_attributes; j++)
27     {
28         Z_AttributeElement *ae = attributes->attributes[j];
29         if (*ae->attributeType == 1) /* use attribute */
30         {
31             if (ae->which == Z_AttributeValue_complex)
32             {
33                 Z_ComplexAttribute *ca = ae->value.complex;
34                 int i;
35                 for (i = 0; i < ca->num_list; i++)
36                 {
37                     Z_StringOrNumeric *son = ca->list[i];
38                     if (son->which == Z_StringOrNumeric_string)
39                         return son->u.string;
40                 }
41             }
42             server_choice = 0; /* not serverChoice because we have use attr */
43         }
44     }
45     if (server_choice)
46         return "cql.serverChoice";
47     return 0;
48 }
49
50 static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
51 {
52     int j;
53     for (j = 0; j < attributes->num_attributes; j++)
54     {
55         Z_AttributeElement *ae = attributes->attributes[j];
56         if (*ae->attributeType == 2) /* relation attribute */
57         {
58             if (ae->which == Z_AttributeValue_numeric)
59             {
60                 /* Only support for numeric relation */
61                 Odr_int *relation = ae->value.numeric;
62                 /* map this numeric to representation in SOLR */
63                 switch (*relation)
64                 {
65                     /* Unsure on whether this is the relation attribute constants? */
66                 case Z_ProximityOperator_Prox_lessThan:
67                     return "<";
68                 case Z_ProximityOperator_Prox_lessThanOrEqual:
69                     return "le";
70                 case Z_ProximityOperator_Prox_equal:
71                     return ":";
72                 case Z_ProximityOperator_Prox_greaterThanOrEqual:
73                     return "ge";
74                 case Z_ProximityOperator_Prox_greaterThan:
75                     return ">";
76                 case Z_ProximityOperator_Prox_notEqual:
77                     return 0;
78                 case 100:
79                     /* phonetic is not implemented */
80                     return 0;
81                 case 101:
82                     /* stem is not not implemented */
83                     return 0;
84                 case 102:
85                     /* relevance is supported in SOLR, but not implemented yet */
86                     return 0;
87                 default:
88                     /* Invalid relation */
89                     return 0;
90                 }
91             }
92             else {
93                 /*  Can we have a complex relation value?
94                     Should we implement something?
95                 */
96             }
97         }
98     }
99     return ":";
100 }
101
102 static int check_range(solr_transform_t ct, Z_Complex *q,
103                        Z_AttributesPlusTerm **p_apt1,
104                        Z_AttributesPlusTerm **p_apt2)
105 {
106     Z_Operator *op = q->roperator;
107     if (op->which == Z_Operator_and &&
108         q->s1->which == Z_RPNStructure_simple &&
109         q->s2->which == Z_RPNStructure_simple &&
110         q->s1->u.simple->which == Z_Operand_APT &&
111         q->s2->u.simple->which == Z_Operand_APT)
112     {
113         Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm;
114         Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm;
115         const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes);
116         const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes);
117         const char *rel1 = solr_lookup_reverse(ct, "relation.",
118                                                apt1->attributes);
119         const char *rel2 = solr_lookup_reverse(ct, "relation.",
120                                                apt2->attributes);
121         if (!rel1)
122             rel1 = lookup_relation_index_from_attr(apt1->attributes);
123         if (!rel2)
124             rel2 = lookup_relation_index_from_attr(apt2->attributes);
125         if (!i1)
126             i1 = lookup_index_from_string_attr(apt1->attributes);
127         if (!i2)
128             i2 = lookup_index_from_string_attr(apt2->attributes);
129         if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2)
130         {
131             if ((rel1[0] == '>' || rel1[0] == 'g') &&
132                 (rel2[0] == '<' || rel2[0] == 'l'))
133             {
134                 *p_apt1 = apt1;
135                 *p_apt2 = apt2;
136                 return 1;
137             }
138             if ((rel2[0] == '>' || rel2[0] == 'g') &&
139                 (rel1[0] == '<' || rel1[0] == 'l'))
140             {
141                 *p_apt1 = apt2;
142                 *p_apt2 = apt1;
143                 return 1;
144             }
145         }
146     }
147     return 0;
148 }
149
150 static int rpn2solr_attr(solr_transform_t ct,
151                          Z_AttributeList *attributes, WRBUF w)
152 {
153     const char *index = solr_lookup_reverse(ct, "index.", attributes);
154     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
155
156     /* if no real match, try string attribute */
157     if (!index)
158         index = lookup_index_from_string_attr(attributes);
159     if (!index)
160         return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
161     /* for serverChoice we omit index+relation+structure */
162     if (strcmp(index, "cql.serverChoice"))
163     {
164         wrbuf_puts(w, index);
165         wrbuf_puts(w, ":");
166         if (structure)
167         {
168             if (strcmp(structure, "*"))
169             {
170                 wrbuf_puts(w, "/");
171                 wrbuf_puts(w, structure);
172                 wrbuf_puts(w, " ");
173             }
174         }
175     }
176     return 0;
177 }
178
179 static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
180 {
181     int j;
182     Z_AttributeList *attributes = apt->attributes;
183     for (j = 0; j < attributes->num_attributes; j++)
184     {
185         Z_AttributeElement *ae = attributes->attributes[j];
186         if (*ae->attributeType == 5) /* truncation attribute */
187         {
188             if (ae->which == Z_AttributeValue_numeric)
189             {
190                 return *(ae->value.numeric);
191             }
192             else if (ae->which == Z_AttributeValue_complex) {
193                 ;
194                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
195                 /* Complex: Shouldn't happen */
196             }
197         }
198     }
199     /* No truncation given */
200     return 0;
201 }
202
203 #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
204
205 static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc)
206 {
207     size_t lterm = 0;
208     const char *sterm = 0;
209     switch (term->which)
210     {
211     case Z_Term_general:
212         lterm = term->u.general->len;
213         sterm = (const char *) term->u.general->buf;
214         break;
215     case Z_Term_numeric:
216         wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
217         break;
218     case Z_Term_characterString:
219         sterm = term->u.characterString;
220         lterm = strlen(sterm);
221         break;
222     default:
223         return YAZ_BIB1_TERM_TYPE_UNSUPP;
224     }
225
226     if (sterm)
227     {
228         size_t i;
229         int must_quote = 0;
230
231         for (i = 0 ; i < lterm; i++)
232             if (sterm[i] == ' ')
233                 must_quote = 1;
234         if (must_quote)
235             wrbuf_puts(w, "\"");
236         if (trunc == 2 || trunc == 3)
237             wrbuf_puts(w, "*");
238         for (i = 0 ; i < lterm; i++)
239         {
240             if (sterm[i] == '\\' && i < lterm - 1)
241             {
242                 i++;
243                 if (strchr(SOLR_SPECIAL, sterm[i]))
244                     wrbuf_putc(w, '\\');
245                 wrbuf_putc(w, sterm[i]);
246             }
247             else if (sterm[i] == '?' && trunc == 104)
248             {
249                 wrbuf_putc(w, '*');
250             }
251             else if (sterm[i] == '#' && trunc == 104)
252             {
253                 wrbuf_putc(w, '?');
254             }
255             else if (strchr(SOLR_SPECIAL, sterm[i]))
256             {
257                 wrbuf_putc(w, '\\');
258                 wrbuf_putc(w, sterm[i]);
259             }
260             else
261                 wrbuf_putc(w, sterm[i]);
262         }
263         if (trunc == 1 || trunc == 3)
264             wrbuf_puts(w, "*");
265         if (must_quote)
266             wrbuf_puts(w, "\"");
267     }
268     return 0;
269 }
270
271 static int rpn2solr_simple(solr_transform_t ct,
272                            void (*pr)(const char *buf, void *client_data),
273                            void *client_data,
274                            Z_AttributesPlusTerm *apt, WRBUF w,
275                            Z_AttributesPlusTerm *apt2)
276  {
277      int ret = 0;
278      Z_Term *term = apt->term;
279      Odr_int trunc = get_truncation(apt);
280      const char *relation2 = 0;
281      const char *relation1 = solr_lookup_reverse(ct, "relation.",
282                                                  apt->attributes);
283      /* Attempt to fix bug #2978: Look for a relation attribute */
284      if (!relation1)
285          relation1 = lookup_relation_index_from_attr(apt->attributes);
286      if (!relation1)
287      {
288          return YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
289      }
290      if (apt2)
291      {
292          relation2 = solr_lookup_reverse(ct, "relation.",
293                                          apt2->attributes);
294          if (!relation2)
295              relation2 = lookup_relation_index_from_attr(apt2->attributes);
296      }
297      wrbuf_rewind(w);
298      ret = rpn2solr_attr(ct, apt->attributes, w);
299      if (ret)
300          return ret;
301      if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104)
302              ;
303      else
304      {
305          return YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE;
306      }
307
308      if (!relation1)
309          ret = emit_term(ct, w, term, trunc);
310      else if (relation1[0] == '<' || relation1[0] == 'l')
311      {
312          wrbuf_puts(w, "[* TO ");
313          ret = emit_term(ct, w, term, trunc);
314          if (!strcmp(relation1, "le") || !strcmp(relation1, "<="))
315              wrbuf_puts(w, "]");
316          else
317              wrbuf_puts(w, "}");
318      }
319      else if (relation1[0] == '>' || relation1[0] == 'g')
320      {
321          if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge"))
322              wrbuf_puts(w, "[");
323          else
324              wrbuf_puts(w, "{");
325          ret = emit_term(ct, w, term, trunc);
326          wrbuf_puts(w, " TO ");
327          if (apt2)
328          {
329              emit_term(ct, w, apt2->term, 0);
330              if (!relation2 || !strcmp(relation2, "<=") ||
331                  !strcmp(relation2, "le"))
332                  wrbuf_puts(w, "]");
333              else
334                  wrbuf_puts(w, "}");
335          }
336          else
337              wrbuf_puts(w, "*]");
338      }
339      else
340          ret = emit_term(ct, w, term, trunc);
341      if (ret == 0)
342          pr(wrbuf_cstr(w), client_data);
343      return ret;
344  }
345
346
347 static int rpn2solr_structure(solr_transform_t ct,
348                               void (*pr)(const char *buf, void *client_data),
349                               void *client_data,
350                               Z_RPNStructure *q, int nested,
351                               WRBUF w)
352 {
353     if (q->which == Z_RPNStructure_simple)
354     {
355         if (q->u.simple->which != Z_Operand_APT)
356             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
357         else
358             return rpn2solr_simple(ct, pr, client_data,
359                                    q->u.simple->u.attributesPlusTerm, w, 0);
360     }
361     else
362     {
363         Z_Operator *op = q->u.complex->roperator;
364         Z_AttributesPlusTerm *apt1, *apt2;
365         int r;
366
367         if (check_range(ct, q->u.complex, &apt1, &apt2))
368             return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2);
369         if (nested)
370             pr("(", client_data);
371
372         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
373         if (r)
374             return r;
375         switch (op->which)
376         {
377         case Z_Operator_and:
378             pr(" AND ", client_data);
379             break;
380         case Z_Operator_or:
381             pr(" OR ", client_data);
382             break;
383         case Z_Operator_and_not:
384             pr(" AND NOT ", client_data);
385             break;
386         case Z_Operator_prox:
387             return YAZ_BIB1_UNSUPP_SEARCH;
388         }
389         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
390         if (nested)
391             pr(")", client_data);
392         return r;
393     }
394 }
395
396 int solr_transform_rpn2solr_stream(solr_transform_t ct,
397                                    void (*pr)(const char *buf, void *client_data),
398                                    void *client_data,
399                                    Z_RPNQuery *q)
400 {
401     int r;
402     WRBUF w = wrbuf_alloc();
403     r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w);
404     if (r)
405         solr_transform_set_error(ct, r, 0);
406     wrbuf_destroy(w);
407     return r;
408 }
409
410
411 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
412                                   WRBUF w,
413                                   Z_RPNQuery *q)
414 {
415     return solr_transform_rpn2solr_stream(ct, wrbuf_vp_puts, w, q);
416 }
417
418 /*
419  * Local variables:
420  * c-basic-offset: 4
421  * c-file-style: "Stroustrup"
422  * indent-tabs-mode: nil
423  * End:
424  * vim: shiftwidth=4 tabstop=8 expandtab
425  */
426