Refactor for string based queruType (SRU 2.0)
[yaz-moved-to-github.git] / src / rpn2solr.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements RPN to SOLR conversion
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <assert.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <yaz/rpn2solr.h>
17 #include <yaz/xmalloc.h>
18 #include <yaz/diagbib1.h>
19 #include <yaz/z-core.h>
20 #include <yaz/wrbuf.h>
21
22 static void wrbuf_vputs(const char *buf, void *client_data)
23 {
24     wrbuf_write((WRBUF) client_data, buf, strlen(buf));
25 }
26
27 static const char *lookup_index_from_string_attr(Z_AttributeList *attributes)
28 {
29     int j;
30     int server_choice = 1;
31     for (j = 0; j < attributes->num_attributes; j++)
32     {
33         Z_AttributeElement *ae = attributes->attributes[j];
34         if (*ae->attributeType == 1) /* use attribute */
35         {
36             if (ae->which == Z_AttributeValue_complex)
37             {
38                 Z_ComplexAttribute *ca = ae->value.complex;
39                 int i;
40                 for (i = 0; i < ca->num_list; i++)
41                 {
42                     Z_StringOrNumeric *son = ca->list[i];
43                     if (son->which == Z_StringOrNumeric_string)
44                         return son->u.string;
45                 }
46             }
47             server_choice = 0; /* not serverChoice because we have use attr */
48         }
49     }
50     if (server_choice)
51         return "cql.serverChoice";
52     return 0;
53 }
54
55 static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes)
56 {
57     int j;
58     for (j = 0; j < attributes->num_attributes; j++)
59     {
60         Z_AttributeElement *ae = attributes->attributes[j];
61         if (*ae->attributeType == 2) /* relation attribute */
62         {
63             if (ae->which == Z_AttributeValue_numeric)
64             {
65                 /* Only support for numeric relation */
66                 Odr_int *relation = ae->value.numeric;
67                 /* map this numeric to representation in SOLR */
68                 switch (*relation)
69                 {
70                     /* Unsure on whether this is the relation attribute constants? */
71                 case Z_ProximityOperator_Prox_lessThan:
72                     return 0;
73                 case Z_ProximityOperator_Prox_lessThanOrEqual:
74                     return 0;
75                 case Z_ProximityOperator_Prox_equal:
76                     return ":";
77                 case Z_ProximityOperator_Prox_greaterThanOrEqual:
78                     return 0;
79                 case Z_ProximityOperator_Prox_greaterThan:
80                     return 0;
81                 case Z_ProximityOperator_Prox_notEqual:
82                     return 0;
83                 case 100:
84                     /* phonetic is not implemented*/
85                     return 0;
86                 case 101:
87                     /* stem is not not implemented */
88                     return 0;
89                 case 102:
90                     /* relevance is supported in SOLR, but not implemented yet */
91                     return 0;
92                 default:
93                     /* Invalid relation */
94                     return 0;
95                 }
96             }
97             else {
98                 /*  Can we have a complex relation value?
99                     Should we implement something?
100                 */
101             }
102         }
103     }
104     return ":";
105 }
106
107 static int rpn2solr_attr(solr_transform_t ct,
108                          Z_AttributeList *attributes, WRBUF w, char **close_range)
109 {
110     const char *relation = solr_lookup_reverse(ct, "relation.", attributes);
111     const char *index = solr_lookup_reverse(ct, "index.", attributes);
112     const char *structure = solr_lookup_reverse(ct, "structure.", attributes);
113
114     /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */
115     if (!index)
116         index = lookup_index_from_string_attr(attributes);
117
118     /* Attempt to fix bug #2978: Look for a relation attribute */
119     if (!relation)
120         relation = lookup_relation_index_from_attr(attributes);
121
122     if (!index)
123     {
124         solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0);
125         return -1;
126     }
127     /* for serverChoice we omit index+relation+structure */
128     if (strcmp(index, "cql.serverChoice"))
129     {
130         wrbuf_puts(w, index);
131         if (relation)
132         {
133             if (!strcmp(relation, "exact"))
134                 /* TODO Verify if a exact  SOLR exists */
135                 relation = ":";
136             else if (!strcmp(relation, "eq"))
137                 relation = ":";
138             else if (!strcmp(relation, "le")) {
139                 /* TODO Not support as such, but could perhaps be transformed into a range */
140                 relation = ":[* TO ";
141                 *close_range = "]";
142             }
143             else if (!strcmp(relation, "ge")) {
144                 /* TODO Not support as such, but could perhaps be transformed into a range */
145                 relation = ":[";
146                 *close_range = " TO *]";
147             }
148             /* Missing mapping of not equal, phonetic, stem and relevance */
149             wrbuf_puts(w, relation);
150         }
151         else
152             wrbuf_puts(w, ":");
153
154         if (structure)
155         {
156             if (strcmp(structure, "*"))
157             {
158                 wrbuf_puts(w, "/");
159                 wrbuf_puts(w, structure);
160                 wrbuf_puts(w, " ");
161             }
162         }
163 //        if (close_range)
164 //            wrbuf_puts(w, close_range);
165     }
166     return 0;
167 }
168
169 static Odr_int get_truncation(Z_AttributesPlusTerm *apt)
170 {
171     int j;
172     Z_AttributeList *attributes = apt->attributes;
173     for (j = 0; j < attributes->num_attributes; j++)
174     {
175         Z_AttributeElement *ae = attributes->attributes[j];
176         if (*ae->attributeType == 5) /* truncation attribute */
177         {
178             if (ae->which == Z_AttributeValue_numeric)
179             {
180                 return *(ae->value.numeric);
181             }
182             else if (ae->which == Z_AttributeValue_complex) {
183                 ;
184                 //yaz_log(YLOG_DEBUG, "Z_Attribute_complex");
185                 /* Complex: Shouldn't happen */
186             }
187         }
188     }
189     /* No truncation given */
190     return 0;
191 }
192
193 #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\"
194
195 static int rpn2solr_simple(solr_transform_t ct,
196                            void (*pr)(const char *buf, void *client_data),
197                            void *client_data,
198                            Z_Operand *q, WRBUF w)
199 {
200     int ret = 0;
201     if (q->which != Z_Operand_APT)
202     {
203         ret = -1;
204         solr_transform_set_error(ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0);
205     }
206     else
207     {
208         Z_AttributesPlusTerm *apt = q->u.attributesPlusTerm;
209         Z_Term *term = apt->term;
210         const char *sterm = 0;
211         size_t lterm = 0;
212         Odr_int trunc = get_truncation(apt);
213         char *close_range = 0;
214
215         wrbuf_rewind(w);
216         ret = rpn2solr_attr(ct, apt->attributes, w, &close_range);
217
218         if (trunc == 0 || trunc == 1 || trunc == 100 || trunc == 104)
219             ;
220         else
221         {
222             solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0);
223             return -1;
224         }
225         switch (term->which)
226         {
227         case Z_Term_general:
228             lterm = term->u.general->len;
229             sterm = (const char *) term->u.general->buf;
230             break;
231         case Z_Term_numeric:
232             wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric);
233             break;
234         case Z_Term_characterString:
235             sterm = term->u.characterString;
236             lterm = strlen(sterm);
237             break;
238         default:
239             ret = -1;
240             solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0);
241         }
242
243         if (sterm)
244         {
245             size_t i;
246             int must_quote = 0;
247
248             for (i = 0 ; i < lterm; i++)
249                 if (sterm[i] == ' ')
250                     must_quote = 1;
251             if (must_quote)
252                 wrbuf_puts(w, "\"");
253             for (i = 0 ; i < lterm; i++)
254             {
255                 if (sterm[i] == '\\' && i < lterm - 1)
256                 {
257                     i++;
258                     if (strchr(SOLR_SPECIAL, sterm[i]))
259                         wrbuf_putc(w, '\\');
260                     wrbuf_putc(w, sterm[i]);
261                 }
262                 else if (sterm[i] == '?' && trunc == 104)
263                 {
264                     wrbuf_putc(w, '*');
265                 }
266                 else if (sterm[i] == '#' && trunc == 104)
267                 {
268                     wrbuf_putc(w, '?');
269                 }
270                 else if (strchr(SOLR_SPECIAL, sterm[i]))
271                 {
272                     wrbuf_putc(w, '\\');
273                     wrbuf_putc(w, sterm[i]);
274                 }
275                 else
276                     wrbuf_putc(w, sterm[i]);
277             }
278             if (trunc == 1)
279                 wrbuf_puts(w, "*");
280             if (must_quote)
281                 wrbuf_puts(w, "\"");
282             if (close_range)
283                 wrbuf_puts(w, close_range);
284         }
285         if (ret == 0)
286             pr(wrbuf_cstr(w), client_data);
287     }
288     return ret;
289 }
290
291
292 static int rpn2solr_structure(solr_transform_t ct,
293                               void (*pr)(const char *buf, void *client_data),
294                               void *client_data,
295                               Z_RPNStructure *q, int nested,
296                               WRBUF w)
297 {
298     if (q->which == Z_RPNStructure_simple)
299         return rpn2solr_simple(ct, pr, client_data, q->u.simple, w);
300     else
301     {
302         Z_Operator *op = q->u.complex->roperator;
303         int r;
304
305         if (nested)
306             pr("(", client_data);
307
308         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w);
309         if (r)
310             return r;
311         switch(op->which)
312         {
313         case  Z_Operator_and:
314             pr(" AND ", client_data);
315             break;
316         case  Z_Operator_or:
317             pr(" OR ", client_data);
318             break;
319         case  Z_Operator_and_not:
320             pr(" AND NOT ", client_data);
321             break;
322         case  Z_Operator_prox:
323             solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0);
324             return -1;
325         }
326         r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w);
327         if (nested)
328             pr(")", client_data);
329         return r;
330     }
331 }
332
333 int solr_transform_rpn2solr_stream(solr_transform_t ct,
334                                    void (*pr)(const char *buf, void *client_data),
335                                    void *client_data,
336                                    Z_RPNQuery *q)
337 {
338     int r;
339     WRBUF w = wrbuf_alloc();
340     solr_transform_set_error(ct, 0, 0);
341     r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w);
342     wrbuf_destroy(w);
343     return r;
344 }
345
346
347 int solr_transform_rpn2solr_wrbuf(solr_transform_t ct,
348                                   WRBUF w,
349                                   Z_RPNQuery *q)
350 {
351     return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q);
352 }
353
354 /*
355  * Local variables:
356  * c-basic-offset: 4
357  * c-file-style: "Stroustrup"
358  * indent-tabs-mode: nil
359  * End:
360  * vim: shiftwidth=4 tabstop=8 expandtab
361  */
362