%u variable: for <urls> MPSPARQL-12
[mp-sparql-moved-to-github.git] / src / sparql.c
1 /**
2  * \file sparql.c
3  * \brief SPARQL
4  */
5
6 #include <assert.h>
7 #include <yaz/diagbib1.h>
8 #include <yaz/tokenizer.h>
9 #include "sparql.h"
10
11 struct sparql_entry {
12     char *pattern;
13     char *value;
14     struct sparql_entry *next;
15 };
16
17 struct yaz_sparql_s {
18     NMEM nmem;
19     struct sparql_entry *conf;
20     struct sparql_entry **last;
21 };
22
23 yaz_sparql_t yaz_sparql_create(void)
24 {
25     NMEM nmem = nmem_create();
26     yaz_sparql_t s = (yaz_sparql_t) nmem_malloc(nmem, sizeof *s);
27
28     s->nmem = nmem;
29     s->conf = 0;
30     s->last = &s->conf;
31     return s;
32 }
33
34 void yaz_sparql_destroy(yaz_sparql_t s)
35 {
36     if (s)
37         nmem_destroy(s->nmem);
38 }
39
40 int yaz_sparql_add_pattern(yaz_sparql_t s, const char *pattern,
41                            const char *value)
42 {
43     struct sparql_entry *e;
44     assert(s);
45
46     e = (struct sparql_entry *) nmem_malloc(s->nmem, sizeof(*e));
47     e->pattern = nmem_strdup(s->nmem, pattern);
48     e->value = nmem_strdup(s->nmem, value);
49     e->next = 0;
50     *s->last = e;
51     s->last = &e->next;
52     return 0;
53 }
54
55 int yaz_sparql_from_rpn_wrbuf(yaz_sparql_t s, WRBUF addinfo, WRBUF w,
56                               Z_RPNQuery *q)
57 {
58     return yaz_sparql_from_rpn_stream(s, addinfo, wrbuf_vp_puts, w, q);
59 }
60
61 static Odr_int lookup_attr_numeric(Z_AttributeList *attributes, int type)
62 {
63     int j;
64     for (j = 0; j < attributes->num_attributes; j++)
65     {
66         Z_AttributeElement *ae = attributes->attributes[j];
67         if (*ae->attributeType == type)
68         {
69             if (ae->which == Z_AttributeValue_numeric)
70                 return *ae->value.numeric;
71         }
72     }
73     return 0;
74 }
75
76 static const char *lookup_attr_string(Z_AttributeList *attributes, int type)
77 {
78     int j;
79     for (j = 0; j < attributes->num_attributes; j++)
80     {
81         Z_AttributeElement *ae = attributes->attributes[j];
82         if (*ae->attributeType == type)
83         {
84             if (ae->which == Z_AttributeValue_complex)
85             {
86                 Z_ComplexAttribute *ca = ae->value.complex;
87                 int i;
88                 for (i = 0; i < ca->num_list; i++)
89                 {
90                     Z_StringOrNumeric *son = ca->list[i];
91                     if (son->which == Z_StringOrNumeric_string)
92                         return son->u.string;
93                 }
94             }
95         }
96     }
97     return 0;
98 }
99
100 static int apt(yaz_sparql_t s, WRBUF addinfo, WRBUF res, WRBUF vars,
101                Z_AttributesPlusTerm *q, int indent, int *var_no)
102 {
103     Z_Term *term = q->term;
104     Odr_int v = lookup_attr_numeric(q->attributes, 1);
105     struct sparql_entry *e = 0;
106     const char *cp;
107     const char *use_var = 0;
108     int i;
109
110     wrbuf_puts(res, "  ");
111     for (i = 0; i < indent; i++)
112         wrbuf_puts(res, " ");
113     if (v)
114     {
115         for (e = s->conf; e; e = e->next)
116         {
117             if (!strncmp(e->pattern, "index.", 6))
118             {
119                 char *end = 0;
120                 Odr_int w = odr_strtol(e->pattern + 6, &end, 10);
121
122                 if (end && *end == '\0' && v == w)
123                     break;
124             }
125         }
126         if (!e)
127         {
128             wrbuf_printf(addinfo, ODR_INT_PRINTF, v);
129             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
130         }
131     }
132     else
133     {
134         const char *index_name = lookup_attr_string(q->attributes, 1);
135         if (!index_name)
136             index_name = "any";
137         for (e = s->conf; e; e = e->next)
138         {
139             if (!strncmp(e->pattern, "index.", 6))
140             {
141                 if (!strcmp(e->pattern + 6, index_name))
142                     break;
143             }
144         }
145         if (!e)
146         {
147             wrbuf_puts(addinfo, index_name);
148             return YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
149         }
150     }
151     assert(e);
152     wrbuf_rewind(addinfo);
153
154     for (cp = e->value; *cp; cp++)
155     {
156         if (strchr(" \t\r\n\f", *cp) && !use_var)
157         {
158             use_var = e->value;
159             if (strchr("$?", e->value[0]))
160             {
161                 wrbuf_write(vars, e->value + 1, cp - e->value - 1);
162                 wrbuf_puts(vars, " ");
163             }
164         }
165         if (*cp == '%')
166         {
167             switch (*++cp)
168             {
169             case 's':
170                 wrbuf_puts(addinfo, "\"");
171                 switch (term->which)
172                 {
173                 case Z_Term_general:
174                     wrbuf_json_write(addinfo,
175                                 term->u.general->buf, term->u.general->len);
176                     break;
177                 case Z_Term_numeric:
178                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
179                     break;
180                 case Z_Term_characterString:
181                     wrbuf_json_puts(addinfo, term->u.characterString);
182                     break;
183                 }
184                 wrbuf_puts(addinfo, "\"");
185                 break;
186             case 'u':
187                 wrbuf_puts(addinfo, "<");
188                 switch (term->which)
189                 {
190                 case Z_Term_general:
191                     wrbuf_json_write(addinfo,
192                                 term->u.general->buf, term->u.general->len);
193                     break;
194                 case Z_Term_numeric:
195                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
196                     break;
197                 case Z_Term_characterString:
198                     wrbuf_json_puts(addinfo, term->u.characterString);
199                     break;
200                 }
201                 wrbuf_puts(addinfo, ">");
202                 break;
203             case 'd':
204                 switch (term->which)
205                 {
206                 case Z_Term_general:
207                     wrbuf_write(addinfo,
208                                 term->u.general->buf, term->u.general->len);
209                     break;
210                 case Z_Term_numeric:
211                     wrbuf_printf(addinfo, ODR_INT_PRINTF, *term->u.numeric);
212                     break;
213                 case Z_Term_characterString:
214                     wrbuf_puts(addinfo, term->u.characterString);
215                     break;
216                 }
217                 break;
218             case 'v':
219                 wrbuf_printf(addinfo, "?v%d", *var_no);
220                 break;
221             case '%':
222                 wrbuf_putc(addinfo, '%');
223                 break;
224             }
225         }
226         else
227             wrbuf_putc(addinfo, *cp);
228     }
229     wrbuf_puts(res, wrbuf_cstr(addinfo));
230     (*var_no)++;
231     return 0;
232 }
233
234
235 static int rpn_structure(yaz_sparql_t s, WRBUF addinfo,
236                          WRBUF res, WRBUF vars, Z_RPNStructure *q, int indent,
237                          int *var_no)
238 {
239     int i;
240     if (q->which == Z_RPNStructure_complex)
241     {
242         int r;
243         Z_Complex *c = q->u.complex;
244         Z_Operator *op = c->roperator;
245         if (op->which == Z_Operator_and)
246         {
247             r = rpn_structure(s, addinfo, res, vars, c->s1, indent, var_no);
248             if (r)
249                 return r;
250             wrbuf_puts(res, " .\n");
251             return rpn_structure(s, addinfo, res, vars, c->s2, indent, var_no);
252         }
253         else if (op->which == Z_Operator_or)
254         {
255             for (i = 0; i < indent; i++)
256                 wrbuf_puts(res, " ");
257             wrbuf_puts(res, "  {\n");
258             r = rpn_structure(s, addinfo, res, vars, c->s1, indent + 1, var_no);
259             if (r)
260                 return r;
261             wrbuf_puts(res, "\n");
262             for (i = 0; i < indent; i++)
263                 wrbuf_puts(res, " ");
264             wrbuf_puts(res, "  } UNION {\n");
265             r = rpn_structure(s, addinfo, res, vars, c->s2, indent + 1, var_no);
266             wrbuf_puts(res, "\n");
267             for (i = 0; i < indent; i++)
268                 wrbuf_puts(res, " ");
269             wrbuf_puts(res, "  }");
270             return r;
271         }
272         else
273         {
274             return YAZ_BIB1_OPERATOR_UNSUPP;
275         }
276     }
277     else
278     {
279         Z_Operand *op = q->u.simple;
280         if (op->which == Z_Operand_APT)
281             return apt(s, addinfo, res, vars, op->u.attributesPlusTerm, indent,
282                        var_no);
283         else
284             return YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM;
285     }
286     return 0;
287 }
288
289 int yaz_sparql_from_rpn_stream(yaz_sparql_t s,
290                                WRBUF addinfo,
291                                void (*pr)(const char *buf,
292                                           void *client_data),
293                                void *client_data,
294                                Z_RPNQuery *q)
295 {
296     struct sparql_entry *e;
297     yaz_tok_cfg_t cfg = yaz_tok_cfg_create();
298     int r = 0, errors = 0;
299
300     for (e = s->conf; e; e = e->next)
301     {
302         if (!strcmp(e->pattern, "prefix"))
303         {
304             yaz_tok_parse_t p = yaz_tok_parse_buf(cfg, e->value);
305             int no = 0;
306
307             pr("PREFIX", client_data);
308             while (1)
309             {
310                 const char *tok_str;
311                 int token = yaz_tok_move(p);
312                 if (token != YAZ_TOK_STRING)
313                     break;
314                 pr(" ", client_data);
315
316                 tok_str = yaz_tok_parse_string(p);
317                 if (tok_str[0])
318                 {
319                     if (no > 0 && tok_str[0] != '<')
320                         pr("<", client_data);
321                     pr(tok_str, client_data);
322                     if (no > 0 && tok_str[strlen(tok_str)-1] != '>')
323                         pr(">", client_data);
324                 }
325                 no++;
326             }
327             pr("\n", client_data);
328             yaz_tok_parse_destroy(p);
329         }
330         else if (!strcmp(e->pattern, "criteria"))
331         {
332             ;
333         }
334         else if (!strcmp(e->pattern, "criteria.optional"))
335         {
336             ;
337         }
338         else if (!strncmp(e->pattern, "index.", 6))
339         {
340             ;
341         }
342         else if (!strcmp(e->pattern, "form"))
343         {
344             ;
345         }
346         else if (!strcmp(e->pattern, "modifier"))
347         {
348             ;
349         }
350         else
351         {
352             errors++;
353         }
354     }
355     for (e = s->conf; e; e = e->next)
356     {
357         if (!strcmp(e->pattern, "form"))
358         {
359             pr(e->value, client_data);
360             pr("\n", client_data);
361         }
362     }
363     pr("WHERE {\n", client_data);
364     for (e = s->conf; e; e = e->next)
365     {
366         if (!strcmp(e->pattern, "criteria"))
367         {
368             pr("  ", client_data);
369             pr(e->value, client_data);
370             pr(" .\n", client_data);
371         }
372     }
373     if (!errors)
374     {
375         WRBUF res = wrbuf_alloc();
376         WRBUF vars = wrbuf_alloc();
377         int var_no = 0;
378         r = rpn_structure(s, addinfo, res, vars, q->RPNStructure, 0, &var_no);
379         if (r == 0)
380         {
381             WRBUF t_var = wrbuf_alloc();
382             for (e = s->conf; e; e = e->next)
383             {
384                 if (!strcmp(e->pattern, "criteria.optional"))
385                 {
386                     int optional = 1;
387                     size_t i = strlen(e->value), j;
388
389                     while (i > 0 && strchr(" \t\r\n\f", e->value[i-1]))
390                         --i;
391                     j = i;
392                     while (i > 0 && !strchr("$?", e->value[i-1]))
393                         --i;
394                     if (i > 0 && j > i)
395                     {
396                         wrbuf_rewind(t_var);
397                         wrbuf_write(t_var, e->value + i, j - i);
398                         wrbuf_puts(t_var, " ");
399                         if (strstr(wrbuf_cstr(vars), wrbuf_cstr(t_var)))
400                             optional = 0;
401                     }
402
403                     pr("  ", client_data);
404                     if (optional)
405                         pr("OPTIONAL { ", client_data);
406                     pr(e->value, client_data);
407                     if (optional)
408                         pr(" }", client_data);
409                     pr(" .\n", client_data);
410                 }
411             }
412             pr(wrbuf_cstr(res), client_data);
413             wrbuf_destroy(t_var);
414         }
415         wrbuf_destroy(res);
416         wrbuf_destroy(vars);
417     }
418     pr("\n}\n", client_data);
419
420     for (e = s->conf; e; e = e->next)
421     {
422         if (!strcmp(e->pattern, "modifier"))
423         {
424             pr(e->value, client_data);
425             pr("\n", client_data);
426         }
427     }
428     yaz_tok_cfg_destroy(cfg);
429
430     return errors ? -1 : r;
431 }
432
433 /*
434  * Local variables:
435  * c-basic-offset: 4
436  * c-file-style: "Stroustrup"
437  * indent-tabs-mode: nil
438  * End:
439  * vim: shiftwidth=4 tabstop=8 expandtab
440  */
441