First work on Separate search and present queries
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
41                            const char *value)
42 {
43     struct sparql_entry *e;
44     assert(s);
45
46     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
47     e->pattern = nmem_strdup(s->nmem, pattern);
48     e->value = nmem_strdup(s->nmem, value);
49     e->next = 0;
50     *s->last = e;
51     s->last = &e->next;
52     return 0;
53 }
54
55 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
56                               Z_RPNQuery *q)
57 {
58     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
59 }
60
61 int yaz_sparql_from_uri_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
62                               const char *uri, const char *schema)
63 {
64     return yaz_sparql_from_uri_stream(s, addinfo, wrbuf_vp_puts, w, uri,
65                                       schema);
66 }
67
68 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
69 {
70     int j;
71     for (j = 0; j < attributes->num_attributes; j++)
72     {
73         Z_AttributeElement *ae = attributes->attributes[j];
74         if (*ae->attributeType == type)
75         {
76             if (ae->which == Z_AttributeValue_numeric)
77                 return *ae->value.numeric;
78         }
79     }
80     return 0;
81 }
82
83 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
84 {
85     int j;
86     for (j = 0; j < attributes->num_attributes; j++)
87     {
88         Z_AttributeElement *ae = attributes->attributes[j];
89         if (*ae->attributeType == type)
90         {
91             if (ae->which == Z_AttributeValue_complex)
92             {
93                 Z_ComplexAttribute *ca = ae->value.complex;
94                 int i;
95                 for (i = 0; i < ca->num_list; i++)
96                 {
97                     Z_StringOrNumeric *son = ca->list[i];
98                     if (son->which == Z_StringOrNumeric_string)
99                         return son->u.string;
100                 }
101             }
102         }
103     }
104     return 0;
105 }
106
107 static int z_term(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
108                   struct sparql_entry *e, const char *use_var,
109                   Z_Term *term, int indent, int *var_no)
110 {
111     const char *cp;
112     for (cp = e->value; *cp; cp++)
113     {
114         if (strchr(" \t\r\n\f", *cp) && !use_var)
115         {
116             use_var = e->value;
117             if (strchr("$?", e->value[0]))
118             {
119                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
120                 wrbuf_puts(vars, " ");
121             }
122         }
123         if (*cp == '%')
124         {
125             switch (*++cp)
126             {
127             case 's':
128                 wrbuf_puts(addinfo, "\"");
129                 switch (term->which)
130                 {
131                 case Z_Term_general:
132                     wrbuf_json_write(addinfo,
133                                 term->u.general->buf, term->u.general->len);
134                     break;
135                 case Z_Term_numeric:
136                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
137                     break;
138                 case Z_Term_characterString:
139                     wrbuf_json_puts(addinfo, term->u.characterString);
140                     break;
141                 }
142                 wrbuf_puts(addinfo, "\"");
143                 break;
144             case 'u':
145                 wrbuf_puts(addinfo, "<");
146                 switch (term->which)
147                 {
148                 case Z_Term_general:
149                     wrbuf_json_write(addinfo,
150                                 term->u.general->buf, term->u.general->len);
151                     break;
152                 case Z_Term_numeric:
153                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
154                     break;
155                 case Z_Term_characterString:
156                     wrbuf_json_puts(addinfo, term->u.characterString);
157                     break;
158                 }
159                 wrbuf_puts(addinfo, ">");
160                 break;
161             case 'd':
162                 switch (term->which)
163                 {
164                 case Z_Term_general:
165                     wrbuf_write(addinfo,
166                                 term->u.general->buf, term->u.general->len);
167                     break;
168                 case Z_Term_numeric:
169                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
170                     break;
171                 case Z_Term_characterString:
172                     wrbuf_puts(addinfo, term->u.characterString);
173                     break;
174                 }
175                 break;
176             case 'v':
177                 wrbuf_printf(addinfo, "?v%d", *var_no);
178                 break;
179             case '%':
180                 wrbuf_putc(addinfo, '%');
181                 break;
182             }
183         }
184         else
185             wrbuf_putc(addinfo, *cp);
186     }
187     wrbuf_puts(res, wrbuf_cstr(addinfo));
188     return 0;
189 }
190
191 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
192                Z_AttributesPlusTerm *q, int indent, int *var_no)
193 {
194     Odr_int v = lookup_attr_numeric(q->attributes, 1);
195     struct sparql_entry *e = 0;
196     const char *use_var = 0;
197     int i;
198
199     wrbuf_puts(res, "  ");
200     for (i = 0; i < indent; i++)
201         wrbuf_puts(res, " ");
202     if (v)
203     {
204         for (e = s->conf; e; e = e->next)
205         {
206             if (!strncmp(e->pattern, "index.", 6))
207             {
208                 char *end = 0;
209                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
210
211                 if (end && *end == '\0' && v == w)
212                     break;
213             }
214         }
215         if (!e)
216         {
217             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
218             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
219         }
220     }
221     else
222     {
223         const char *index_name = lookup_attr_string(q->attributes, 1);
224         if (!index_name)
225             index_name = "any";
226         for (e = s->conf; e; e = e->next)
227         {
228             if (!strncmp(e->pattern, "index.", 6))
229             {
230                 if (!strcmp(e->pattern + 6, index_name))
231                     break;
232             }
233         }
234         if (!e)
235         {
236             wrbuf_puts(addinfo, index_name);
237             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
238         }
239     }
240     assert(e);
241     wrbuf_rewind(addinfo);
242
243     z_term(s, addinfo, res, vars, e, use_var, q->term, indent, var_no);
244     (*var_no)++;
245     return 0;
246 }
247
248
249 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
250                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
251                          int *var_no)
252 {
253     int i;
254     if (q->which == Z_RPNStructure_complex)
255     {
256         int r;
257         Z_Complex *c = q->u.complex;
258         Z_Operator *op = c->roperator;
259         if (op->which == Z_Operator_and)
260         {
261             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
262             if (r)
263                 return r;
264             wrbuf_puts(res, " .\n");
265             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
266         }
267         else if (op->which == Z_Operator_or)
268         {
269             for (i = 0; i < indent; i++)
270                 wrbuf_puts(res, " ");
271             wrbuf_puts(res, "  {\n");
272             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
273             if (r)
274                 return r;
275             wrbuf_puts(res, "\n");
276             for (i = 0; i < indent; i++)
277                 wrbuf_puts(res, " ");
278             wrbuf_puts(res, "  } UNION {\n");
279             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
280             wrbuf_puts(res, "\n");
281             for (i = 0; i < indent; i++)
282                 wrbuf_puts(res, " ");
283             wrbuf_puts(res, "  }");
284             return r;
285         }
286         else
287         {
288             return YAZ_BIB1_OPERATOR_UNSUPP;
289         }
290     }
291     else
292     {
293         Z_Operand *op = q->u.simple;
294         if (op->which == Z_Operand_APT)
295             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
296                        var_no);
297         else
298             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
299     }
300     return 0;
301 }
302
303 static int emit_prefixes(yaz_sparql_t s,
304                           WRBUF addinfo,
305                           void (*pr)(const char *buf,
306                                      void *client_data),
307                           void *client_data)
308 {
309     struct sparql_entry *e;
310     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
311     int errors = 0;
312     for (e = s->conf; e; e = e->next)
313     {
314         if (!strcmp(e->pattern, "prefix"))
315         {
316             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
317             int no = 0;
318
319             pr("PREFIX", client_data);
320             while (1)
321             {
322                 const char *tok_str;
323                 int token = yaz_tok_move(p);
324                 if (token != YAZ_TOK_STRING)
325                     break;
326                 pr(" ", client_data);
327
328                 tok_str = yaz_tok_parse_string(p);
329                 if (tok_str[0])
330                 {
331                     if (no > 0 && tok_str[0] != '<')
332                         pr("<", client_data);
333                     pr(tok_str, client_data);
334                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
335                         pr(">", client_data);
336                 }
337                 no++;
338             }
339             pr("\n", client_data);
340             yaz_tok_parse_destroy(p);
341         }
342         else if (!strcmp(e->pattern, "criteria"))
343         {
344             ;
345         }
346         else if (!strcmp(e->pattern, "criteria.optional"))
347         {
348             ;
349         }
350         else if (!strncmp(e->pattern, "index.", 6))
351         {
352             ;
353         }
354         else if (!strcmp(e->pattern, "form"))
355         {
356             ;
357         }
358         else if (!strcmp(e->pattern, "modifier"))
359         {
360             ;
361         }
362         else if (!strncmp(e->pattern, "uri", 3))
363         {
364             ;
365         }
366         else
367         {
368             errors++;
369         }
370     }
371     yaz_tok_cfg_destroy(cfg);
372     return errors;
373 }
374
375 int yaz_sparql_from_uri_stream(yaz_sparql_t s,
376                                WRBUF addinfo,
377                                void (*pr)(const char *buf, void *client_data),
378                                void *client_data,
379                                const char *uri, const char *schema)
380 {
381     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
382     struct sparql_entry *e;
383
384     for (e = s->conf; e; e = e->next)
385     {
386         if (!schema && !strcmp(e->pattern, "uri"))
387             break;
388         else if (schema && !strncmp(e->pattern, "uri.", 4))
389         {
390             if (!strcmp(e->pattern + 4, schema))
391                 break;
392         }
393     }
394     if (!e)
395         errors++;
396     if (!errors)
397     {
398         WRBUF res = wrbuf_alloc();
399         WRBUF vars = wrbuf_alloc();
400         int var_no = 0;
401         Z_Term term;
402
403         term.which = Z_Term_characterString;
404         term.u.characterString = (char *) uri;
405         r = z_term(s, addinfo, res, vars, e, 0, &term, 0, &var_no);
406         if (!r)
407         {
408             pr(wrbuf_cstr(res), client_data);
409             pr("\n", client_data);
410         }
411         wrbuf_destroy(res);
412         wrbuf_destroy(vars);
413     }
414     return errors ? -1 : r;
415 }
416
417 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
418                                WRBUF addinfo,
419                                void (*pr)(const char *buf,
420                                           void *client_data),
421                                void *client_data,
422                                Z_RPNQuery *q)
423 {
424     int r = 0, errors = emit_prefixes(s, addinfo, pr, client_data);
425     struct sparql_entry *e;
426
427     for (e = s->conf; e; e = e->next)
428     {
429         if (!strcmp(e->pattern, "form"))
430         {
431             pr(e->value, client_data);
432             pr("\n", client_data);
433         }
434     }
435     pr("WHERE {\n", client_data);
436     for (e = s->conf; e; e = e->next)
437     {
438         if (!strcmp(e->pattern, "criteria"))
439         {
440             pr("  ", client_data);
441             pr(e->value, client_data);
442             pr(" .\n", client_data);
443         }
444     }
445     if (!errors)
446     {
447         WRBUF res = wrbuf_alloc();
448         WRBUF vars = wrbuf_alloc();
449         int var_no = 0;
450         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
451         if (r == 0)
452         {
453             WRBUF t_var = wrbuf_alloc();
454             for (e = s->conf; e; e = e->next)
455             {
456                 if (!strcmp(e->pattern, "criteria.optional"))
457                 {
458                     int optional = 1;
459                     size_t i = strlen(e->value), j;
460
461                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
462                         --i;
463                     j = i;
464                     while (i > 0 && !strchr("$?", e->value[i-1]))
465                         --i;
466                     if (i > 0 && j > i)
467                     {
468                         wrbuf_rewind(t_var);
469                         wrbuf_write(t_var, e->value + i, j - i);
470                         wrbuf_puts(t_var, " ");
471                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
472                             optional = 0;
473                     }
474
475                     pr("  ", client_data);
476                     if (optional)
477                         pr("OPTIONAL { ", client_data);
478                     pr(e->value, client_data);
479                     if (optional)
480                         pr(" }", client_data);
481                     pr(" .\n", client_data);
482                 }
483             }
484             pr(wrbuf_cstr(res), client_data);
485             wrbuf_destroy(t_var);
486         }
487         wrbuf_destroy(res);
488         wrbuf_destroy(vars);
489     }
490     pr("\n}\n", client_data);
491
492     for (e = s->conf; e; e = e->next)
493     {
494         if (!strcmp(e->pattern, "modifier"))
495         {
496             pr(e->value, client_data);
497             pr("\n", client_data);
498         }
499     }
500     return errors ? -1 : r;
501 }
502
503 /*
504  * Local variables:
505  * c-basic-offset: 4
506  * c-file-style: "Stroustrup"
507  * indent-tabs-mode: nil
508  * End:
509  * vim: shiftwidth=4 tabstop=8 expandtab
510  */
511