Allow include of db definitions MPSPARQL-15
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 void yaz_sparql_include(yaz_sparql_t s, yaz_sparql_t u)
41 {
42     struct sparql_entry *e = u->conf;
43     for (; e; e = e->next)
44         yaz_sparql_add_pattern(s, e->pattern, e->value);
45 }
46
47 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
48                            const char *value)
49 {
50     struct sparql_entry *e;
51     assert(s);
52
53     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
54     e->pattern = nmem_strdup(s->nmem, pattern);
55     e->value = nmem_strdup(s->nmem, value);
56     e->next = 0;
57     *s->last = e;
58     s->last = &e->next;
59     return 0;
60 }
61
62 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
63                               Z_RPNQuery *q)
64 {
65     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
66 }
67
68 int yaz_sparql_from_uri_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
69                               const char *uri, const char *schema)
70 {
71     return yaz_sparql_from_uri_stream(s, addinfo, wrbuf_vp_puts, w, uri,
72                                       schema);
73 }
74
75 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
76 {
77     int j;
78     for (j = 0; j < attributes->num_attributes; j++)
79     {
80         Z_AttributeElement *ae = attributes->attributes[j];
81         if (*ae->attributeType == type)
82         {
83             if (ae->which == Z_AttributeValue_numeric)
84                 return *ae->value.numeric;
85         }
86     }
87     return 0;
88 }
89
90 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
91 {
92     int j;
93     for (j = 0; j < attributes->num_attributes; j++)
94     {
95         Z_AttributeElement *ae = attributes->attributes[j];
96         if (*ae->attributeType == type)
97         {
98             if (ae->which == Z_AttributeValue_complex)
99             {
100                 Z_ComplexAttribute *ca = ae->value.complex;
101                 int i;
102                 for (i = 0; i < ca->num_list; i++)
103                 {
104                     Z_StringOrNumeric *son = ca->list[i];
105                     if (son->which == Z_StringOrNumeric_string)
106                         return son->u.string;
107                 }
108             }
109         }
110     }
111     return 0;
112 }
113
114 static int z_term(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
115                   struct sparql_entry *e, const char *use_var,
116                   Z_Term *term, int indent, int *var_no)
117 {
118     const char *cp;
119     for (cp = e->value; *cp; cp++)
120     {
121         if (strchr(" \t\r\n\f", *cp) && !use_var)
122         {
123             use_var = e->value;
124             if (strchr("$?", e->value[0]))
125             {
126                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
127                 wrbuf_puts(vars, " ");
128             }
129         }
130         if (*cp == '%')
131         {
132             switch (*++cp)
133             {
134             case 's':
135                 wrbuf_puts(addinfo, "\"");
136                 switch (term->which)
137                 {
138                 case Z_Term_general:
139                     wrbuf_json_write(addinfo,
140                                 term->u.general->buf, term->u.general->len);
141                     break;
142                 case Z_Term_numeric:
143                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
144                     break;
145                 case Z_Term_characterString:
146                     wrbuf_json_puts(addinfo, term->u.characterString);
147                     break;
148                 }
149                 wrbuf_puts(addinfo, "\"");
150                 break;
151             case 'u':
152                 wrbuf_puts(addinfo, "<");
153                 switch (term->which)
154                 {
155                 case Z_Term_general:
156                     wrbuf_json_write(addinfo,
157                                 term->u.general->buf, term->u.general->len);
158                     break;
159                 case Z_Term_numeric:
160                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
161                     break;
162                 case Z_Term_characterString:
163                     wrbuf_json_puts(addinfo, term->u.characterString);
164                     break;
165                 }
166                 wrbuf_puts(addinfo, ">");
167                 break;
168             case 'd':
169                 switch (term->which)
170                 {
171                 case Z_Term_general:
172                     wrbuf_write(addinfo,
173                                 term->u.general->buf, term->u.general->len);
174                     break;
175                 case Z_Term_numeric:
176                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
177                     break;
178                 case Z_Term_characterString:
179                     wrbuf_puts(addinfo, term->u.characterString);
180                     break;
181                 }
182                 break;
183             case 'v':
184                 wrbuf_printf(addinfo, "?v%d", *var_no);
185                 break;
186             case '%':
187                 wrbuf_putc(addinfo, '%');
188                 break;
189             }
190         }
191         else
192             wrbuf_putc(addinfo, *cp);
193     }
194     wrbuf_puts(res, wrbuf_cstr(addinfo));
195     return 0;
196 }
197
198 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
199                Z_AttributesPlusTerm *q, int indent, int *var_no)
200 {
201     Odr_int v = lookup_attr_numeric(q->attributes, 1);
202     struct sparql_entry *e = 0;
203     const char *use_var = 0;
204     int i;
205
206     wrbuf_puts(res, "  ");
207     for (i = 0; i < indent; i++)
208         wrbuf_puts(res, " ");
209     if (v)
210     {
211         for (e = s->conf; e; e = e->next)
212         {
213             if (!strncmp(e->pattern, "index.", 6))
214             {
215                 char *end = 0;
216                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
217
218                 if (end && *end == '\0' && v == w)
219                     break;
220             }
221         }
222         if (!e)
223         {
224             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
225             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
226         }
227     }
228     else
229     {
230         const char *index_name = lookup_attr_string(q->attributes, 1);
231         if (!index_name)
232             index_name = "any";
233         for (e = s->conf; e; e = e->next)
234         {
235             if (!strncmp(e->pattern, "index.", 6))
236             {
237                 if (!strcmp(e->pattern + 6, index_name))
238                     break;
239             }
240         }
241         if (!e)
242         {
243             wrbuf_puts(addinfo, index_name);
244             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
245         }
246     }
247     assert(e);
248     wrbuf_rewind(addinfo);
249
250     z_term(s, addinfo, res, vars, e, use_var, q->term, indent, var_no);
251     (*var_no)++;
252     return 0;
253 }
254
255
256 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
257                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
258                          int *var_no)
259 {
260     int i;
261     if (q->which == Z_RPNStructure_complex)
262     {
263         int r;
264         Z_Complex *c = q->u.complex;
265         Z_Operator *op = c->roperator;
266         if (op->which == Z_Operator_and)
267         {
268             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
269             if (r)
270                 return r;
271             wrbuf_puts(res, " .\n");
272             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
273         }
274         else if (op->which == Z_Operator_or)
275         {
276             for (i = 0; i < indent; i++)
277                 wrbuf_puts(res, " ");
278             wrbuf_puts(res, "  {\n");
279             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
280             if (r)
281                 return r;
282             wrbuf_puts(res, "\n");
283             for (i = 0; i < indent; i++)
284                 wrbuf_puts(res, " ");
285             wrbuf_puts(res, "  } UNION {\n");
286             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
287             wrbuf_puts(res, "\n");
288             for (i = 0; i < indent; i++)
289                 wrbuf_puts(res, " ");
290             wrbuf_puts(res, "  }");
291             return r;
292         }
293         else
294         {
295             return YAZ_BIB1_OPERATOR_UNSUPP;
296         }
297     }
298     else
299     {
300         Z_Operand *op = q->u.simple;
301         if (op->which == Z_Operand_APT)
302             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
303                        var_no);
304         else
305             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
306     }
307     return 0;
308 }
309
310 static int emit_prefixes(yaz_sparql_t s,
311                           WRBUF addinfo,
312                           void (*pr)(const char *buf,
313                                      void *client_data),
314                           void *client_data)
315 {
316     struct sparql_entry *e;
317     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
318     int errors = 0;
319     for (e = s->conf; e; e = e->next)
320     {
321         if (!strcmp(e->pattern, "prefix"))
322         {
323             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
324             int no = 0;
325
326             pr("PREFIX", client_data);
327             while (1)
328             {
329                 const char *tok_str;
330                 int token = yaz_tok_move(p);
331                 if (token != YAZ_TOK_STRING)
332                     break;
333                 pr(" ", client_data);
334
335                 tok_str = yaz_tok_parse_string(p);
336                 if (tok_str[0])
337                 {
338                     if (no > 0 && tok_str[0] != '<')
339                         pr("<", client_data);
340                     pr(tok_str, client_data);
341                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
342                         pr(">", client_data);
343                 }
344                 no++;
345             }
346             pr("\n", client_data);
347             yaz_tok_parse_destroy(p);
348         }
349         else if (!strcmp(e->pattern, "criteria"))
350         {
351             ;
352         }
353         else if (!strcmp(e->pattern, "criteria.optional"))
354         {
355             ;
356         }
357         else if (!strncmp(e->pattern, "index.", 6))
358         {
359             ;
360         }
361         else if (!strcmp(e->pattern, "form"))
362         {
363             ;
364         }
365         else if (!strcmp(e->pattern, "modifier"))
366         {
367             ;
368         }
369         else if (!strncmp(e->pattern, "present", 7))
370         {
371             ;
372         }
373         else if (!strncmp(e->pattern, "uri", 3))
374         {
375             ;
376         }
377         else
378         {
379             errors++;
380         }
381     }
382     yaz_tok_cfg_destroy(cfg);
383     return errors;
384 }
385
386 struct sparql_entry *lookup_schema(yaz_sparql_t s, const char *schema)
387 {
388     struct sparql_entry *e;
389
390     for (e = s->conf; e; e = e->next)
391     {
392         if (!strncmp(e->pattern, "present.", 8))
393         {
394             if (!schema || !strcmp(e->pattern + 8, schema))
395                 break;
396         }
397         if (!strncmp(e->pattern, "uri.", 4))
398         {
399             if (!schema || !strcmp(e->pattern + 4, schema))
400                 break;
401         }
402     }
403     return e;
404 }
405
406 int yaz_sparql_lookup_schema(yaz_sparql_t s, const char *schema)
407 {
408     return lookup_schema(s, schema) ? 1 : 0;
409 }
410
411 int yaz_sparql_from_uri_stream(yaz_sparql_t s,
412                                WRBUF addinfo,
413                                void (*pr)(const char *buf, void *client_data),
414                                void *client_data,
415                                const char *uri, const char *schema)
416 {
417     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
418     struct sparql_entry *e = lookup_schema(s, schema);
419     if (!e)
420         errors++;
421     if (!errors)
422     {
423         WRBUF res = wrbuf_alloc();
424         WRBUF vars = wrbuf_alloc();
425         int var_no = 0;
426         Z_Term term;
427
428         term.which = Z_Term_characterString;
429         term.u.characterString = (char *) uri;
430         r = z_term(s, addinfo, res, vars, e, 0, &term, 0, &var_no);
431         if (!r)
432         {
433             pr(wrbuf_cstr(res), client_data);
434             pr("\n", client_data);
435         }
436         wrbuf_destroy(res);
437         wrbuf_destroy(vars);
438     }
439     return errors ? -1 : r;
440 }
441
442 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
443                                WRBUF addinfo,
444                                void (*pr)(const char *buf,
445                                           void *client_data),
446                                void *client_data,
447                                Z_RPNQuery *q)
448 {
449     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
450     struct sparql_entry *e;
451
452     for (e = s->conf; e; e = e->next)
453     {
454         if (!strcmp(e->pattern, "form"))
455         {
456             pr(e->value, client_data);
457             pr("\n", client_data);
458         }
459     }
460     pr("WHERE {\n", client_data);
461     for (e = s->conf; e; e = e->next)
462     {
463         if (!strcmp(e->pattern, "criteria"))
464         {
465             pr("  ", client_data);
466             pr(e->value, client_data);
467             pr(" .\n", client_data);
468         }
469     }
470     if (!errors)
471     {
472         WRBUF res = wrbuf_alloc();
473         WRBUF vars = wrbuf_alloc();
474         int var_no = 0;
475         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
476         if (r == 0)
477         {
478             WRBUF t_var = wrbuf_alloc();
479             for (e = s->conf; e; e = e->next)
480             {
481                 if (!strcmp(e->pattern, "criteria.optional"))
482                 {
483                     int optional = 1;
484                     size_t i = strlen(e->value), j;
485
486                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
487                         --i;
488                     j = i;
489                     while (i > 0 && !strchr("$?", e->value[i-1]))
490                         --i;
491                     if (i > 0 && j > i)
492                     {
493                         wrbuf_rewind(t_var);
494                         wrbuf_write(t_var, e->value + i, j - i);
495                         wrbuf_puts(t_var, " ");
496                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
497                             optional = 0;
498                     }
499
500                     pr("  ", client_data);
501                     if (optional)
502                         pr("OPTIONAL { ", client_data);
503                     pr(e->value, client_data);
504                     if (optional)
505                         pr(" }", client_data);
506                     pr(" .\n", client_data);
507                 }
508             }
509             pr(wrbuf_cstr(res), client_data);
510             wrbuf_destroy(t_var);
511         }
512         wrbuf_destroy(res);
513         wrbuf_destroy(vars);
514     }
515     pr("\n}\n", client_data);
516
517     for (e = s->conf; e; e = e->next)
518     {
519         if (!strcmp(e->pattern, "modifier"))
520         {
521             pr(e->value, client_data);
522             pr("\n", client_data);
523         }
524     }
525     return errors ? -1 : r;
526 }
527
528 /*
529  * Local variables:
530  * c-basic-offset: 4
531  * c-file-style: "Stroustrup"
532  * indent-tabs-mode: nil
533  * End:
534  * vim: shiftwidth=4 tabstop=8 expandtab
535  */
536