Update schema and improve manual page MPSPARQL-29
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 void yaz_sparql_include(yaz_sparql_t s, yaz_sparql_t u)
41 {
42     struct sparql_entry *e = u->conf;
43     for (; e; e = e->next)
44         yaz_sparql_add_pattern(s, e->pattern, e->value);
45 }
46
47 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
48                            const char *value)
49 {
50     struct sparql_entry *e;
51     assert(s);
52
53     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
54     e->pattern = nmem_strdup(s->nmem, pattern);
55     e->value = nmem_strdup(s->nmem, value);
56     e->next = 0;
57     *s->last = e;
58     s->last = &e->next;
59     return 0;
60 }
61
62 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
63                               Z_RPNQuery *q)
64 {
65     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
66 }
67
68 int yaz_sparql_from_uri_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
69                               const char *uri, const char *schema)
70 {
71     return yaz_sparql_from_uri_stream(s, addinfo, wrbuf_vp_puts, w, uri,
72                                       schema);
73 }
74
75 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
76 {
77     int j;
78     for (j = 0; j < attributes->num_attributes; j++)
79     {
80         Z_AttributeElement *ae = attributes->attributes[j];
81         if (*ae->attributeType == type)
82         {
83             if (ae->which == Z_AttributeValue_numeric)
84                 return *ae->value.numeric;
85         }
86     }
87     return 0;
88 }
89
90 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
91 {
92     int j;
93     for (j = 0; j < attributes->num_attributes; j++)
94     {
95         Z_AttributeElement *ae = attributes->attributes[j];
96         if (*ae->attributeType == type)
97         {
98             if (ae->which == Z_AttributeValue_complex)
99             {
100                 Z_ComplexAttribute *ca = ae->value.complex;
101                 int i;
102                 for (i = 0; i < ca->num_list; i++)
103                 {
104                     Z_StringOrNumeric *son = ca->list[i];
105                     if (son->which == Z_StringOrNumeric_string)
106                         return son->u.string;
107                 }
108             }
109         }
110     }
111     return 0;
112 }
113
114 static int z_term(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
115                   struct sparql_entry *e, const char *use_var,
116                   Z_Term *term, int indent, int *var_no)
117 {
118     const char *cp;
119     for (cp = e->value; *cp; cp++)
120     {
121         if (strchr(" \t\r\n\f", *cp) && !use_var)
122         {
123             use_var = e->value;
124             if (strchr("$?", e->value[0]))
125             {
126                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
127                 wrbuf_puts(vars, " ");
128             }
129         }
130         if (*cp == '%')
131         {
132             switch (*++cp)
133             {
134             case 's':
135                 wrbuf_puts(addinfo, "\"");
136                 switch (term->which)
137                 {
138                 case Z_Term_general:
139                     wrbuf_json_write(addinfo,
140                                 term->u.general->buf, term->u.general->len);
141                     break;
142                 case Z_Term_numeric:
143                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
144                     break;
145                 case Z_Term_characterString:
146                     wrbuf_json_puts(addinfo, term->u.characterString);
147                     break;
148                 }
149                 wrbuf_puts(addinfo, "\"");
150                 break;
151             case 'u':
152                 wrbuf_puts(addinfo, "<");
153                 switch (term->which)
154                 {
155                 case Z_Term_general:
156                     wrbuf_json_write(addinfo,
157                                 term->u.general->buf, term->u.general->len);
158                     break;
159                 case Z_Term_numeric:
160                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
161                     break;
162                 case Z_Term_characterString:
163                     wrbuf_json_puts(addinfo, term->u.characterString);
164                     break;
165                 }
166                 wrbuf_puts(addinfo, ">");
167                 break;
168             case 't':
169                 switch (term->which)
170                 {
171                 case Z_Term_general:
172                     wrbuf_json_write(addinfo,
173                                 term->u.general->buf, term->u.general->len);
174                     break;
175                 case Z_Term_numeric:
176                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
177                     break;
178                 case Z_Term_characterString:
179                     wrbuf_json_puts(addinfo, term->u.characterString);
180                     break;
181                 }
182                 break;
183             case 'd':
184                 switch (term->which)
185                 {
186                 case Z_Term_general:
187                     wrbuf_write(addinfo,
188                                 term->u.general->buf, term->u.general->len);
189                     break;
190                 case Z_Term_numeric:
191                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
192                     break;
193                 case Z_Term_characterString:
194                     wrbuf_puts(addinfo, term->u.characterString);
195                     break;
196                 }
197                 break;
198             case 'v':
199                 wrbuf_printf(addinfo, "?v%d", *var_no);
200                 break;
201             case '%':
202                 wrbuf_putc(addinfo, '%');
203                 break;
204             }
205         }
206         else
207             wrbuf_putc(addinfo, *cp);
208     }
209     wrbuf_puts(res, wrbuf_cstr(addinfo));
210     return 0;
211 }
212
213 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
214                Z_AttributesPlusTerm *q, int indent, int *var_no)
215 {
216     Odr_int v = lookup_attr_numeric(q->attributes, 1);
217     struct sparql_entry *e = 0;
218     const char *use_var = 0;
219     int i;
220
221     wrbuf_puts(res, "  ");
222     for (i = 0; i < indent; i++)
223         wrbuf_puts(res, " ");
224     if (v)
225     {
226         for (e = s->conf; e; e = e->next)
227         {
228             if (!strncmp(e->pattern, "index.", 6))
229             {
230                 char *end = 0;
231                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
232
233                 if (end && *end == '\0' && v == w)
234                     break;
235             }
236         }
237         if (!e)
238         {
239             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
240             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
241         }
242     }
243     else
244     {
245         const char *index_name = lookup_attr_string(q->attributes, 1);
246         if (!index_name)
247             index_name = "any";
248         for (e = s->conf; e; e = e->next)
249         {
250             if (!strncmp(e->pattern, "index.", 6))
251             {
252                 if (!strcmp(e->pattern + 6, index_name))
253                     break;
254             }
255         }
256         if (!e)
257         {
258             wrbuf_puts(addinfo, index_name);
259             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
260         }
261     }
262     assert(e);
263     wrbuf_rewind(addinfo);
264
265     z_term(s, addinfo, res, vars, e, use_var, q->term, indent, var_no);
266     (*var_no)++;
267     return 0;
268 }
269
270
271 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
272                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
273                          int *var_no)
274 {
275     int i;
276     if (q->which == Z_RPNStructure_complex)
277     {
278         int r;
279         Z_Complex *c = q->u.complex;
280         Z_Operator *op = c->roperator;
281         if (op->which == Z_Operator_and)
282         {
283             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
284             if (r)
285                 return r;
286             wrbuf_puts(res, " .\n");
287             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
288         }
289         else if (op->which == Z_Operator_or)
290         {
291             for (i = 0; i < indent; i++)
292                 wrbuf_puts(res, " ");
293             wrbuf_puts(res, "  {\n");
294             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
295             if (r)
296                 return r;
297             wrbuf_puts(res, "\n");
298             for (i = 0; i < indent; i++)
299                 wrbuf_puts(res, " ");
300             wrbuf_puts(res, "  } UNION {\n");
301             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
302             wrbuf_puts(res, "\n");
303             for (i = 0; i < indent; i++)
304                 wrbuf_puts(res, " ");
305             wrbuf_puts(res, "  }");
306             return r;
307         }
308         else
309         {
310             return YAZ_BIB1_OPERATOR_UNSUPP;
311         }
312     }
313     else
314     {
315         Z_Operand *op = q->u.simple;
316         if (op->which == Z_Operand_APT)
317             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
318                        var_no);
319         else
320             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
321     }
322     return 0;
323 }
324
325 static int emit_prefixes(yaz_sparql_t s,
326                           WRBUF addinfo,
327                           void (*pr)(const char *buf,
328                                      void *client_data),
329                           void *client_data)
330 {
331     struct sparql_entry *e;
332     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
333     int errors = 0;
334     for (e = s->conf; e; e = e->next)
335     {
336         if (!strcmp(e->pattern, "prefix"))
337         {
338             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
339             int no = 0;
340
341             pr("PREFIX", client_data);
342             while (1)
343             {
344                 const char *tok_str;
345                 int token = yaz_tok_move(p);
346                 if (token != YAZ_TOK_STRING)
347                     break;
348                 pr(" ", client_data);
349
350                 tok_str = yaz_tok_parse_string(p);
351                 if (tok_str[0])
352                 {
353                     if (no > 0 && tok_str[0] != '<')
354                         pr("<", client_data);
355                     pr(tok_str, client_data);
356                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
357                         pr(">", client_data);
358                 }
359                 no++;
360             }
361             pr("\n", client_data);
362             yaz_tok_parse_destroy(p);
363         }
364         else if (!strcmp(e->pattern, "criteria"))
365         {
366             ;
367         }
368         else if (!strcmp(e->pattern, "criteria.optional"))
369         {
370             ;
371         }
372         else if (!strncmp(e->pattern, "index.", 6))
373         {
374             ;
375         }
376         else if (!strcmp(e->pattern, "form"))
377         {
378             ;
379         }
380         else if (!strcmp(e->pattern, "modifier"))
381         {
382             ;
383         }
384         else if (!strncmp(e->pattern, "present", 7))
385         {
386             ;
387         }
388         else if (!strncmp(e->pattern, "uri", 3))
389         {
390             ;
391         }
392         else
393         {
394             errors++;
395         }
396     }
397     yaz_tok_cfg_destroy(cfg);
398     return errors;
399 }
400
401 struct sparql_entry *lookup_schema(yaz_sparql_t s, const char *schema)
402 {
403     struct sparql_entry *e;
404
405     for (e = s->conf; e; e = e->next)
406     {
407         if (!strncmp(e->pattern, "present.", 8))
408         {
409             if (!schema || !strcmp(e->pattern + 8, schema))
410                 break;
411         }
412         if (!strncmp(e->pattern, "uri.", 4))
413         {
414             if (!schema || !strcmp(e->pattern + 4, schema))
415                 break;
416         }
417     }
418     return e;
419 }
420
421 int yaz_sparql_lookup_schema(yaz_sparql_t s, const char *schema)
422 {
423     return lookup_schema(s, schema) ? 1 : 0;
424 }
425
426 int yaz_sparql_from_uri_stream(yaz_sparql_t s,
427                                WRBUF addinfo,
428                                void (*pr)(const char *buf, void *client_data),
429                                void *client_data,
430                                const char *uri, const char *schema)
431 {
432     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
433     struct sparql_entry *e = lookup_schema(s, schema);
434     if (!e)
435         errors++;
436     if (!errors)
437     {
438         WRBUF res = wrbuf_alloc();
439         WRBUF vars = wrbuf_alloc();
440         int var_no = 0;
441         Z_Term term;
442
443         term.which = Z_Term_characterString;
444         term.u.characterString = (char *) uri;
445         r = z_term(s, addinfo, res, vars, e, 0, &term, 0, &var_no);
446         if (!r)
447         {
448             pr(wrbuf_cstr(res), client_data);
449             pr("\n", client_data);
450         }
451         wrbuf_destroy(res);
452         wrbuf_destroy(vars);
453     }
454     return errors ? -1 : r;
455 }
456
457 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
458                                WRBUF addinfo,
459                                void (*pr)(const char *buf,
460                                           void *client_data),
461                                void *client_data,
462                                Z_RPNQuery *q)
463 {
464     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
465     struct sparql_entry *e;
466
467     for (e = s->conf; e; e = e->next)
468     {
469         if (!strcmp(e->pattern, "form"))
470         {
471             pr(e->value, client_data);
472             pr("\n", client_data);
473         }
474     }
475     pr("WHERE {\n", client_data);
476     for (e = s->conf; e; e = e->next)
477     {
478         if (!strcmp(e->pattern, "criteria"))
479         {
480             pr("  ", client_data);
481             pr(e->value, client_data);
482             pr(" .\n", client_data);
483         }
484     }
485     if (!errors)
486     {
487         WRBUF res = wrbuf_alloc();
488         WRBUF vars = wrbuf_alloc();
489         int var_no = 0;
490         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
491         if (r == 0)
492         {
493             WRBUF t_var = wrbuf_alloc();
494             for (e = s->conf; e; e = e->next)
495             {
496                 if (!strcmp(e->pattern, "criteria.optional"))
497                 {
498                     int optional = 1;
499                     size_t i = strlen(e->value), j;
500
501                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
502                         --i;
503                     j = i;
504                     while (i > 0 && !strchr("$?", e->value[i-1]))
505                         --i;
506                     if (i > 0 && j > i)
507                     {
508                         wrbuf_rewind(t_var);
509                         wrbuf_write(t_var, e->value + i, j - i);
510                         wrbuf_puts(t_var, " ");
511                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
512                             optional = 0;
513                     }
514
515                     pr("  ", client_data);
516                     if (optional)
517                         pr("OPTIONAL { ", client_data);
518                     pr(e->value, client_data);
519                     if (optional)
520                         pr(" }", client_data);
521                     pr(" .\n", client_data);
522                 }
523             }
524             pr(wrbuf_cstr(res), client_data);
525             wrbuf_destroy(t_var);
526         }
527         wrbuf_destroy(res);
528         wrbuf_destroy(vars);
529     }
530     pr("\n}\n", client_data);
531
532     for (e = s->conf; e; e = e->next)
533     {
534         if (!strcmp(e->pattern, "modifier"))
535         {
536             pr(e->value, client_data);
537             pr("\n", client_data);
538         }
539     }
540     return errors ? -1 : r;
541 }
542
543 void yaz_sparql_explain_indexes( yaz_sparql_t s, WRBUF w, int indent)
544 {
545     char indentspace[200]; // must be enough
546     assert(indent<200);
547     int i;
548     for (i=0; i < indent; i++)
549         indentspace[i] = ' ';
550     indentspace[indent] = '\0';
551
552     struct sparql_entry *e;
553     wrbuf_puts(w,indentspace);
554     wrbuf_puts(w,"<indexInfo>\n");
555
556     for (e = s->conf; e; e = e->next)
557     {
558         /*
559         wrbuf_puts(w,"    <FOO>");
560         wrbuf_xmlputs(w, e->pattern );
561         wrbuf_puts(w,"  : ");
562         wrbuf_xmlputs(w, e->value );
563         wrbuf_puts(w,"    </FOO>\n");
564         */
565         if ( strncmp(e->pattern, "index.", 6 ) == 0 )
566         {
567             wrbuf_puts(w,indentspace);
568             wrbuf_puts(w,"  <index>\n");
569             wrbuf_puts(w,indentspace);
570             wrbuf_puts(w,"    <title>");
571             wrbuf_xmlputs(w, e->pattern + 6);
572             wrbuf_puts(w,"</title>\n");
573             wrbuf_puts(w,indentspace);
574             wrbuf_puts(w,"    <map><name>");
575             wrbuf_xmlputs(w, e->pattern + 6);
576             wrbuf_puts(w,"</name></map>\n");
577             wrbuf_puts(w,indentspace);
578             wrbuf_puts(w,"  </index>\n");
579         }
580     }
581     wrbuf_puts(w,indentspace);
582     wrbuf_puts(w,"</indexInfo>\n");
583 }
584
585 /*
586  * Local variables:
587  * c-basic-offset: 4
588  * c-file-style: "Stroustrup"
589  * indent-tabs-mode: nil
590  * End:
591  * vim: shiftwidth=4 tabstop=8 expandtab
592  */
593