3744fe675b2446b32a9e85414a58cec3de47c950
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
41                            const char *value)
42 {
43     struct sparql_entry *e;
44     assert(s);
45
46     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
47     e->pattern = nmem_strdup(s->nmem, pattern);
48     e->value = nmem_strdup(s->nmem, value);
49     e->next = 0;
50     *s->last = e;
51     s->last = &e->next;
52     return 0;
53 }
54
55 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
56                               Z_RPNQuery *q)
57 {
58     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
59 }
60
61 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
62 {
63     int j;
64     for (j = 0; j < attributes->num_attributes; j++)
65     {
66         Z_AttributeElement *ae = attributes->attributes[j];
67         if (*ae->attributeType == type)
68         {
69             if (ae->which == Z_AttributeValue_numeric)
70                 return *ae->value.numeric;
71         }
72     }
73     return 0;
74 }
75
76 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
77 {
78     int j;
79     for (j = 0; j < attributes->num_attributes; j++)
80     {
81         Z_AttributeElement *ae = attributes->attributes[j];
82         if (*ae->attributeType == type)
83         {
84             if (ae->which == Z_AttributeValue_complex)
85             {
86                 Z_ComplexAttribute *ca = ae->value.complex;
87                 int i;
88                 for (i = 0; i < ca->num_list; i++)
89                 {
90                     Z_StringOrNumeric *son = ca->list[i];
91                     if (son->which == Z_StringOrNumeric_string)
92                         return son->u.string;
93                 }
94             }
95         }
96     }
97     return 0;
98 }
99
100 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
101                Z_AttributesPlusTerm *q, int indent, int *var_no)
102 {
103     Z_Term *term = q->term;
104     Odr_int v = lookup_attr_numeric(q->attributes, 1);
105     struct sparql_entry *e = 0;
106     const char *cp;
107     const char *use_var = 0;
108     int i;
109
110     wrbuf_puts(res, "  ");
111     for (i = 0; i < indent; i++)
112         wrbuf_puts(res, " ");
113     if (v)
114     {
115         for (e = s->conf; e; e = e->next)
116         {
117             if (!strncmp(e->pattern, "index.", 6))
118             {
119                 char *end = 0;
120                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
121
122                 if (end && *end == '\0' && v == w)
123                     break;
124             }
125         }
126         if (!e)
127         {
128             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
129             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
130         }
131     }
132     else
133     {
134         const char *index_name = lookup_attr_string(q->attributes, 1);
135         if (!index_name)
136             index_name = "any";
137         for (e = s->conf; e; e = e->next)
138         {
139             if (!strncmp(e->pattern, "index.", 6))
140             {
141                 if (!strcmp(e->pattern + 6, index_name))
142                     break;
143             }
144         }
145         if (!e)
146         {
147             wrbuf_puts(addinfo, index_name);
148             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
149         }
150     }
151     assert(e);
152     wrbuf_rewind(addinfo);
153
154     for (cp = e->value; *cp; cp++)
155     {
156         if (strchr(" \t\r\n\f", *cp) && !use_var)
157         {
158             use_var = e->value;
159             if (strchr("$?", e->value[0]))
160             {
161                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
162                 wrbuf_puts(vars, " ");
163             }
164         }
165         if (*cp == '%')
166         {
167             switch (*++cp)
168             {
169             case 's':
170                 wrbuf_puts(addinfo, "\"");
171                 switch (term->which)
172                 {
173                 case Z_Term_general:
174                     wrbuf_json_write(addinfo,
175                                 term->u.general->buf, term->u.general->len);
176                     break;
177                 case Z_Term_numeric:
178                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
179                     break;
180                 case Z_Term_characterString:
181                     wrbuf_json_puts(addinfo, term->u.characterString);
182                     break;
183                 }
184                 wrbuf_puts(addinfo, "\"");
185                 break;
186             case 'd':
187                 switch (term->which)
188                 {
189                 case Z_Term_general:
190                     wrbuf_write(addinfo,
191                                 term->u.general->buf, term->u.general->len);
192                     break;
193                 case Z_Term_numeric:
194                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
195                     break;
196                 case Z_Term_characterString:
197                     wrbuf_puts(addinfo, term->u.characterString);
198                     break;
199                 }
200                 break;
201             case 'v':
202                 wrbuf_printf(addinfo, "?v%d", *var_no);
203                 break;
204             case '%':
205                 wrbuf_putc(addinfo, '%');
206                 break;
207             }
208         }
209         else
210             wrbuf_putc(addinfo, *cp);
211     }
212     wrbuf_puts(res, wrbuf_cstr(addinfo));
213     (*var_no)++;
214     return 0;
215 }
216
217
218 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
219                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
220                          int *var_no)
221 {
222     int i;
223     if (q->which == Z_RPNStructure_complex)
224     {
225         int r;
226         Z_Complex *c = q->u.complex;
227         Z_Operator *op = c->roperator;
228         if (op->which == Z_Operator_and)
229         {
230             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
231             if (r)
232                 return r;
233             wrbuf_puts(res, " .\n");
234             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
235         }
236         else if (op->which == Z_Operator_or)
237         {
238             for (i = 0; i < indent; i++)
239                 wrbuf_puts(res, " ");
240             wrbuf_puts(res, "  {\n");
241             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
242             if (r)
243                 return r;
244             wrbuf_puts(res, "\n");
245             for (i = 0; i < indent; i++)
246                 wrbuf_puts(res, " ");
247             wrbuf_puts(res, "  } UNION {\n");
248             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
249             wrbuf_puts(res, "\n");
250             for (i = 0; i < indent; i++)
251                 wrbuf_puts(res, " ");
252             wrbuf_puts(res, "  }");
253             return r;
254         }
255         else
256         {
257             return YAZ_BIB1_OPERATOR_UNSUPP;
258         }
259     }
260     else
261     {
262         Z_Operand *op = q->u.simple;
263         if (op->which == Z_Operand_APT)
264             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
265                        var_no);
266         else
267             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
268     }
269     return 0;
270 }
271
272 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
273                                WRBUF addinfo,
274                                void (*pr)(const char *buf,
275                                           void *client_data),
276                                void *client_data,
277                                Z_RPNQuery *q)
278 {
279     struct sparql_entry *e;
280     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
281     int r = 0, errors = 0;
282
283     for (e = s->conf; e; e = e->next)
284     {
285         if (!strcmp(e->pattern, "prefix"))
286         {
287             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
288             int no = 0;
289
290             pr("PREFIX", client_data);
291             while (1)
292             {
293                 const char *tok_str;
294                 int token = yaz_tok_move(p);
295                 if (token != YAZ_TOK_STRING)
296                     break;
297                 pr(" ", client_data);
298
299                 tok_str = yaz_tok_parse_string(p);
300                 if (tok_str[0])
301                 {
302                     if (no > 0 && tok_str[0] != '<')
303                         pr("<", client_data);
304                     pr(tok_str, client_data);
305                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
306                         pr(">", client_data);
307                 }
308                 no++;
309             }
310             pr("\n", client_data);
311             yaz_tok_parse_destroy(p);
312         }
313         else if (!strcmp(e->pattern, "criteria"))
314         {
315             ;
316         }
317         else if (!strcmp(e->pattern, "criteria.optional"))
318         {
319             ;
320         }
321         else if (!strncmp(e->pattern, "index.", 6))
322         {
323             ;
324         }
325         else if (!strcmp(e->pattern, "form"))
326         {
327             ;
328         }
329         else if (!strcmp(e->pattern, "modifier"))
330         {
331             ;
332         }
333         else
334         {
335             errors++;
336         }
337     }
338     for (e = s->conf; e; e = e->next)
339     {
340         if (!strcmp(e->pattern, "form"))
341         {
342             pr(e->value, client_data);
343             pr("\n", client_data);
344         }
345     }
346     pr("WHERE {\n", client_data);
347     for (e = s->conf; e; e = e->next)
348     {
349         if (!strcmp(e->pattern, "criteria"))
350         {
351             pr("  ", client_data);
352             pr(e->value, client_data);
353             pr(" .\n", client_data);
354         }
355     }
356     if (!errors)
357     {
358         WRBUF res = wrbuf_alloc();
359         WRBUF vars = wrbuf_alloc();
360         int var_no = 0;
361         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
362         if (r == 0)
363         {
364             WRBUF t_var = wrbuf_alloc();
365             for (e = s->conf; e; e = e->next)
366             {
367                 if (!strcmp(e->pattern, "criteria.optional"))
368                 {
369                     int optional = 1;
370                     size_t i = strlen(e->value), j;
371
372                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
373                         --i;
374                     j = i;
375                     while (i > 0 && !strchr("$?", e->value[i-1]))
376                         --i;
377                     if (i > 0 && j > i)
378                     {
379                         wrbuf_rewind(t_var);
380                         wrbuf_write(t_var, e->value + i, j - i);
381                         wrbuf_puts(t_var, " ");
382                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
383                             optional = 0;
384                     }
385
386                     pr("  ", client_data);
387                     if (optional)
388                         pr("OPTIONAL { ", client_data);
389                     pr(e->value, client_data);
390                     if (optional)
391                         pr(" }", client_data);
392                     pr(" .\n", client_data);
393                 }
394             }
395             pr(wrbuf_cstr(res), client_data);
396             wrbuf_destroy(t_var);
397         }
398         wrbuf_destroy(res);
399         wrbuf_destroy(vars);
400     }
401     pr("\n}\n", client_data);
402
403     for (e = s->conf; e; e = e->next)
404     {
405         if (!strcmp(e->pattern, "modifier"))
406         {
407             pr(e->value, client_data);
408             pr("\n", client_data);
409         }
410     }
411     yaz_tok_cfg_destroy(cfg);
412
413     return errors ? -1 : r;
414 }
415
416 /*
417  * Local variables:
418  * c-basic-offset: 4
419  * c-file-style: "Stroustrup"
420  * indent-tabs-mode: nil
421  * End:
422  * vim: shiftwidth=4 tabstop=8 expandtab
423  */
424