e9987a148cd13f7398f37b3155f0c5453ecf679b
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
41                            const char *value)
42 {
43     struct sparql_entry *e;
44     assert(s);
45
46     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
47     e->pattern = nmem_strdup(s->nmem, pattern);
48     e->value = nmem_strdup(s->nmem, value);
49     e->next = 0;
50     *s->last = e;
51     s->last = &e->next;
52     return 0;
53 }
54
55 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
56                               Z_RPNQuery *q)
57 {
58     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
59 }
60
61 int yaz_sparql_from_uri_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
62                               const char *uri, const char *schema)
63 {
64     return yaz_sparql_from_uri_stream(s, addinfo, wrbuf_vp_puts, w, uri,
65                                       schema);
66 }
67
68 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
69 {
70     int j;
71     for (j = 0; j < attributes->num_attributes; j++)
72     {
73         Z_AttributeElement *ae = attributes->attributes[j];
74         if (*ae->attributeType == type)
75         {
76             if (ae->which == Z_AttributeValue_numeric)
77                 return *ae->value.numeric;
78         }
79     }
80     return 0;
81 }
82
83 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
84 {
85     int j;
86     for (j = 0; j < attributes->num_attributes; j++)
87     {
88         Z_AttributeElement *ae = attributes->attributes[j];
89         if (*ae->attributeType == type)
90         {
91             if (ae->which == Z_AttributeValue_complex)
92             {
93                 Z_ComplexAttribute *ca = ae->value.complex;
94                 int i;
95                 for (i = 0; i < ca->num_list; i++)
96                 {
97                     Z_StringOrNumeric *son = ca->list[i];
98                     if (son->which == Z_StringOrNumeric_string)
99                         return son->u.string;
100                 }
101             }
102         }
103     }
104     return 0;
105 }
106
107 static int z_term(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
108                   struct sparql_entry *e, const char *use_var,
109                   Z_Term *term, int indent, int *var_no)
110 {
111     const char *cp;
112     for (cp = e->value; *cp; cp++)
113     {
114         if (strchr(" \t\r\n\f", *cp) && !use_var)
115         {
116             use_var = e->value;
117             if (strchr("$?", e->value[0]))
118             {
119                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
120                 wrbuf_puts(vars, " ");
121             }
122         }
123         if (*cp == '%')
124         {
125             switch (*++cp)
126             {
127             case 's':
128                 wrbuf_puts(addinfo, "\"");
129                 switch (term->which)
130                 {
131                 case Z_Term_general:
132                     wrbuf_json_write(addinfo,
133                                 term->u.general->buf, term->u.general->len);
134                     break;
135                 case Z_Term_numeric:
136                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
137                     break;
138                 case Z_Term_characterString:
139                     wrbuf_json_puts(addinfo, term->u.characterString);
140                     break;
141                 }
142                 wrbuf_puts(addinfo, "\"");
143                 break;
144             case 'u':
145                 wrbuf_puts(addinfo, "<");
146                 switch (term->which)
147                 {
148                 case Z_Term_general:
149                     wrbuf_json_write(addinfo,
150                                 term->u.general->buf, term->u.general->len);
151                     break;
152                 case Z_Term_numeric:
153                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
154                     break;
155                 case Z_Term_characterString:
156                     wrbuf_json_puts(addinfo, term->u.characterString);
157                     break;
158                 }
159                 wrbuf_puts(addinfo, ">");
160                 break;
161             case 'd':
162                 switch (term->which)
163                 {
164                 case Z_Term_general:
165                     wrbuf_write(addinfo,
166                                 term->u.general->buf, term->u.general->len);
167                     break;
168                 case Z_Term_numeric:
169                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
170                     break;
171                 case Z_Term_characterString:
172                     wrbuf_puts(addinfo, term->u.characterString);
173                     break;
174                 }
175                 break;
176             case 'v':
177                 wrbuf_printf(addinfo, "?v%d", *var_no);
178                 break;
179             case '%':
180                 wrbuf_putc(addinfo, '%');
181                 break;
182             }
183         }
184         else
185             wrbuf_putc(addinfo, *cp);
186     }
187     wrbuf_puts(res, wrbuf_cstr(addinfo));
188     return 0;
189 }
190
191 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
192                Z_AttributesPlusTerm *q, int indent, int *var_no)
193 {
194     Odr_int v = lookup_attr_numeric(q->attributes, 1);
195     struct sparql_entry *e = 0;
196     const char *use_var = 0;
197     int i;
198
199     wrbuf_puts(res, "  ");
200     for (i = 0; i < indent; i++)
201         wrbuf_puts(res, " ");
202     if (v)
203     {
204         for (e = s->conf; e; e = e->next)
205         {
206             if (!strncmp(e->pattern, "index.", 6))
207             {
208                 char *end = 0;
209                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
210
211                 if (end && *end == '\0' && v == w)
212                     break;
213             }
214         }
215         if (!e)
216         {
217             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
218             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
219         }
220     }
221     else
222     {
223         const char *index_name = lookup_attr_string(q->attributes, 1);
224         if (!index_name)
225             index_name = "any";
226         for (e = s->conf; e; e = e->next)
227         {
228             if (!strncmp(e->pattern, "index.", 6))
229             {
230                 if (!strcmp(e->pattern + 6, index_name))
231                     break;
232             }
233         }
234         if (!e)
235         {
236             wrbuf_puts(addinfo, index_name);
237             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
238         }
239     }
240     assert(e);
241     wrbuf_rewind(addinfo);
242
243     z_term(s, addinfo, res, vars, e, use_var, q->term, indent, var_no);
244     (*var_no)++;
245     return 0;
246 }
247
248
249 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
250                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
251                          int *var_no)
252 {
253     int i;
254     if (q->which == Z_RPNStructure_complex)
255     {
256         int r;
257         Z_Complex *c = q->u.complex;
258         Z_Operator *op = c->roperator;
259         if (op->which == Z_Operator_and)
260         {
261             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
262             if (r)
263                 return r;
264             wrbuf_puts(res, " .\n");
265             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
266         }
267         else if (op->which == Z_Operator_or)
268         {
269             for (i = 0; i < indent; i++)
270                 wrbuf_puts(res, " ");
271             wrbuf_puts(res, "  {\n");
272             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
273             if (r)
274                 return r;
275             wrbuf_puts(res, "\n");
276             for (i = 0; i < indent; i++)
277                 wrbuf_puts(res, " ");
278             wrbuf_puts(res, "  } UNION {\n");
279             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
280             wrbuf_puts(res, "\n");
281             for (i = 0; i < indent; i++)
282                 wrbuf_puts(res, " ");
283             wrbuf_puts(res, "  }");
284             return r;
285         }
286         else
287         {
288             return YAZ_BIB1_OPERATOR_UNSUPP;
289         }
290     }
291     else
292     {
293         Z_Operand *op = q->u.simple;
294         if (op->which == Z_Operand_APT)
295             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
296                        var_no);
297         else
298             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
299     }
300     return 0;
301 }
302
303 static int emit_prefixes(yaz_sparql_t s,
304                           WRBUF addinfo,
305                           void (*pr)(const char *buf,
306                                      void *client_data),
307                           void *client_data)
308 {
309     struct sparql_entry *e;
310     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
311     int errors = 0;
312     for (e = s->conf; e; e = e->next)
313     {
314         if (!strcmp(e->pattern, "prefix"))
315         {
316             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
317             int no = 0;
318
319             pr("PREFIX", client_data);
320             while (1)
321             {
322                 const char *tok_str;
323                 int token = yaz_tok_move(p);
324                 if (token != YAZ_TOK_STRING)
325                     break;
326                 pr(" ", client_data);
327
328                 tok_str = yaz_tok_parse_string(p);
329                 if (tok_str[0])
330                 {
331                     if (no > 0 && tok_str[0] != '<')
332                         pr("<", client_data);
333                     pr(tok_str, client_data);
334                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
335                         pr(">", client_data);
336                 }
337                 no++;
338             }
339             pr("\n", client_data);
340             yaz_tok_parse_destroy(p);
341         }
342         else if (!strcmp(e->pattern, "criteria"))
343         {
344             ;
345         }
346         else if (!strcmp(e->pattern, "criteria.optional"))
347         {
348             ;
349         }
350         else if (!strncmp(e->pattern, "index.", 6))
351         {
352             ;
353         }
354         else if (!strcmp(e->pattern, "form"))
355         {
356             ;
357         }
358         else if (!strcmp(e->pattern, "modifier"))
359         {
360             ;
361         }
362         else if (!strncmp(e->pattern, "uri", 3))
363         {
364             ;
365         }
366         else
367         {
368             errors++;
369         }
370     }
371     yaz_tok_cfg_destroy(cfg);
372     return errors;
373 }
374
375 int yaz_sparql_lookup_schema(yaz_sparql_t s, const char *schema)
376 {
377     struct sparql_entry *e;
378
379     for (e = s->conf; e; e = e->next)
380     {
381         if (!schema && !strcmp(e->pattern, "uri"))
382             break;
383         else if (schema && !strncmp(e->pattern, "uri.", 4))
384         {
385             if (!strcmp(e->pattern + 4, schema))
386                 break;
387         }
388     }
389     return e ? 1 : 0;
390 }
391
392 int yaz_sparql_from_uri_stream(yaz_sparql_t s,
393                                WRBUF addinfo,
394                                void (*pr)(const char *buf, void *client_data),
395                                void *client_data,
396                                const char *uri, const char *schema)
397 {
398     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
399     struct sparql_entry *e;
400
401     for (e = s->conf; e; e = e->next)
402     {
403         if (!schema && !strcmp(e->pattern, "uri"))
404             break;
405         else if (schema && !strncmp(e->pattern, "uri.", 4))
406         {
407             if (!strcmp(e->pattern + 4, schema))
408                 break;
409         }
410     }
411     if (!e)
412         errors++;
413     if (!errors)
414     {
415         WRBUF res = wrbuf_alloc();
416         WRBUF vars = wrbuf_alloc();
417         int var_no = 0;
418         Z_Term term;
419
420         term.which = Z_Term_characterString;
421         term.u.characterString = (char *) uri;
422         r = z_term(s, addinfo, res, vars, e, 0, &term, 0, &var_no);
423         if (!r)
424         {
425             pr(wrbuf_cstr(res), client_data);
426             pr("\n", client_data);
427         }
428         wrbuf_destroy(res);
429         wrbuf_destroy(vars);
430     }
431     return errors ? -1 : r;
432 }
433
434 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
435                                WRBUF addinfo,
436                                void (*pr)(const char *buf,
437                                           void *client_data),
438                                void *client_data,
439                                Z_RPNQuery *q)
440 {
441     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
442     struct sparql_entry *e;
443
444     for (e = s->conf; e; e = e->next)
445     {
446         if (!strcmp(e->pattern, "form"))
447         {
448             pr(e->value, client_data);
449             pr("\n", client_data);
450         }
451     }
452     pr("WHERE {\n", client_data);
453     for (e = s->conf; e; e = e->next)
454     {
455         if (!strcmp(e->pattern, "criteria"))
456         {
457             pr("  ", client_data);
458             pr(e->value, client_data);
459             pr(" .\n", client_data);
460         }
461     }
462     if (!errors)
463     {
464         WRBUF res = wrbuf_alloc();
465         WRBUF vars = wrbuf_alloc();
466         int var_no = 0;
467         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
468         if (r == 0)
469         {
470             WRBUF t_var = wrbuf_alloc();
471             for (e = s->conf; e; e = e->next)
472             {
473                 if (!strcmp(e->pattern, "criteria.optional"))
474                 {
475                     int optional = 1;
476                     size_t i = strlen(e->value), j;
477
478                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
479                         --i;
480                     j = i;
481                     while (i > 0 && !strchr("$?", e->value[i-1]))
482                         --i;
483                     if (i > 0 && j > i)
484                     {
485                         wrbuf_rewind(t_var);
486                         wrbuf_write(t_var, e->value + i, j - i);
487                         wrbuf_puts(t_var, " ");
488                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
489                             optional = 0;
490                     }
491
492                     pr("  ", client_data);
493                     if (optional)
494                         pr("OPTIONAL { ", client_data);
495                     pr(e->value, client_data);
496                     if (optional)
497                         pr(" }", client_data);
498                     pr(" .\n", client_data);
499                 }
500             }
501             pr(wrbuf_cstr(res), client_data);
502             wrbuf_destroy(t_var);
503         }
504         wrbuf_destroy(res);
505         wrbuf_destroy(vars);
506     }
507     pr("\n}\n", client_data);
508
509     for (e = s->conf; e; e = e->next)
510     {
511         if (!strcmp(e->pattern, "modifier"))
512         {
513             pr(e->value, client_data);
514             pr("\n", client_data);
515         }
516     }
517     return errors ? -1 : r;
518 }
519
520 /*
521  * Local variables:
522  * c-basic-offset: 4
523  * c-file-style: "Stroustrup"
524  * indent-tabs-mode: nil
525  * End:
526  * vim: shiftwidth=4 tabstop=8 expandtab
527  */
528