New metadata facility "icurule" for normalizing metadata text PAZ-1002
[pazpar2-moved-to-github.git] / src / pazpar2_config.c
1 /* This file is part of Pazpar2.
2    Copyright (C) Index Data
3
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <string.h>
25 #include <assert.h>
26
27 #include <libxml/parser.h>
28 #include <libxml/tree.h>
29 #include <libxml/xinclude.h>
30
31 #include <yaz/yaz-util.h>
32 #include <yaz/nmem.h>
33 #include <yaz/snprintf.h>
34 #include <yaz/tpath.h>
35 #include <yaz/xml_include.h>
36
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #if HAVE_UNISTD_H
40 #include <unistd.h>
41 #endif
42 #include "ppmutex.h"
43 #include "incref.h"
44 #include "pazpar2_config.h"
45 #include "service_xslt.h"
46 #include "settings.h"
47 #include "eventl.h"
48 #include "http.h"
49
50 struct conf_config
51 {
52     NMEM nmem; /* for conf_config and servers memory */
53     struct conf_server *servers;
54
55     int no_threads;
56     WRBUF confdir;
57     char *path;
58     iochan_man_t iochan_man;
59     database_hosts_t database_hosts;
60 };
61
62 struct service_xslt
63 {
64     char *id;
65     xsltStylesheetPtr xsp;
66     struct service_xslt *next;
67 };
68
69 static char *xml_context(const xmlNode *ptr, char *res, size_t len)
70 {
71     xmlAttr *attr = ptr->properties;
72     size_t off = len - 1;
73
74     res[off] = '\0';
75     for (; attr; attr = attr->next)
76     {
77         size_t l = strlen((const char *) attr->name);
78         if (off <= l + 1)
79             break;
80         off = off - l;
81         memcpy(res + off, attr->name, l);
82         res[--off] = '@';
83     }
84     while (ptr && ptr->type == XML_ELEMENT_NODE)
85     {
86         size_t l = strlen((const char *) ptr->name);
87         if (off <= l + 1)
88             break;
89
90         off = off - l;
91         memcpy(res + off, ptr->name, l);
92         res[--off] = '/';
93
94         ptr = ptr->parent;
95     }
96     return res + off;
97 }
98
99 struct conf_service *service_init(struct conf_server *server,
100                                          int num_metadata, int num_sortkeys,
101                                          const char *service_id)
102 {
103     struct conf_service * service = 0;
104     NMEM nmem = nmem_create();
105
106
107     service = nmem_malloc(nmem, sizeof(struct conf_service));
108     service->mutex = 0;
109     service->ref_count = 1;
110     service->nmem = nmem;
111     service->next = 0;
112     service->databases = 0;
113     service->xslt_list = 0;
114     service->ccl_bibset = 0;
115     service->server = server;
116     service->session_timeout = 60; /* default session timeout */
117     service->z3950_session_timeout = 180;
118     service->z3950_operation_timeout = 30;
119     service->rank_cluster = 1;
120     service->rank_debug = 0;
121     service->rank_follow = 0.0;
122     service->rank_lead = 0.0;
123     service->rank_length = 2;
124
125     service->charsets = 0;
126
127     service->id = service_id ? nmem_strdup(nmem, service_id) : 0;
128
129     // Setup a dictionary from server.
130     service->dictionary = 0;
131
132     service->settings = nmem_malloc(nmem, sizeof(*service->settings));
133     service->settings->num_settings = PZ_MAX_EOF;
134     service->settings->settings = nmem_malloc(nmem, sizeof(struct setting*) * service->settings->num_settings);
135     memset(service->settings->settings, 0, sizeof(struct setting*) * service->settings->num_settings);
136     //  inherit_server_settings_values(service);
137
138     service->next = 0;
139
140     service->num_metadata = num_metadata;
141
142     service->metadata = 0;
143     if (service->num_metadata)
144         service->metadata
145             = nmem_malloc(nmem,
146                           sizeof(struct conf_metadata) * service->num_metadata);
147     service->num_sortkeys = num_sortkeys;
148
149     service->default_sort = nmem_strdup(nmem, "relevance");
150     service->sortkeys = 0;
151     if (service->num_sortkeys)
152         service->sortkeys
153             = nmem_malloc(nmem,
154                           sizeof(struct conf_sortkey) * service->num_sortkeys);
155     service->xml_node = 0;
156     return service;
157 }
158
159 static struct conf_metadata* conf_service_add_metadata(
160     struct conf_service *service,
161     int field_id,
162     const char *name,
163     enum conf_metadata_type type,
164     enum conf_metadata_merge merge,
165     enum conf_setting_type setting,
166     int brief,
167     int termlist,
168     const char *rank,
169     int sortkey_offset,
170     enum conf_metadata_mergekey mt,
171     const char *facetrule,
172     const char *limitmap,
173     const char *limitcluster,
174     const char *icurule
175     )
176 {
177     struct conf_metadata * md = 0;
178     NMEM nmem = service->nmem;
179
180     if (!service->metadata || !service->num_metadata
181         || field_id < 0  || !(field_id < service->num_metadata))
182         return 0;
183
184     md = service->metadata + field_id;
185     assert(nmem && md && name);
186
187     md->name = nmem_strdup(nmem, name);
188
189     md->type = type;
190
191     // enforcing that type_year is always range_merge
192     if (md->type == Metadata_type_year)
193         md->merge = Metadata_merge_range;
194     else
195         md->merge = merge;
196
197     md->setting = setting;
198     md->brief = brief;
199     md->termlist = termlist;
200     md->rank = nmem_strdup_null(nmem, rank);
201     md->sortkey_offset = sortkey_offset;
202     md->mergekey = mt;
203     md->facetrule = nmem_strdup_null(nmem, facetrule);
204     md->limitmap = nmem_strdup_null(nmem, limitmap);
205     md->limitcluster = nmem_strdup_null(nmem, limitcluster);
206     md->icurule = nmem_strdup_null(nmem, icurule);
207     return md;
208 }
209
210 static struct conf_sortkey *conf_service_add_sortkey(
211     struct conf_service *service,
212     int field_id,
213     const char *name,
214     enum conf_metadata_type type)
215 {
216     struct conf_sortkey *sk = 0;
217     NMEM nmem = service->nmem;
218
219     if (!service->sortkeys || !service->num_sortkeys
220         || field_id < 0 || !(field_id < service->num_sortkeys))
221         return 0;
222
223     sk = service->sortkeys + field_id;
224
225     assert(nmem && sk && name);
226
227     sk->name = nmem_strdup(nmem, name);
228     sk->type = type;
229     return sk;
230 }
231
232 int conf_service_metadata_field_id(struct conf_service *service,
233                                    const char * name)
234 {
235     int i = 0;
236
237     if (!service || !service->metadata || !service->num_metadata)
238         return -1;
239
240     for (i = 0; i < service->num_metadata; i++)
241         if (!strcmp(name, (service->metadata[i]).name))
242             return i;
243     return -1;
244 }
245
246 int conf_service_sortkey_field_id(struct conf_service *service,
247                                   const char * name)
248 {
249     int i = 0;
250
251     if (!service || !service->sortkeys || !service->num_sortkeys)
252         return -1;
253
254     for (i = 0; i < service->num_sortkeys; i++)
255         if (!strcmp(name, (service->sortkeys[i]).name))
256             return i;
257     return -1;
258 }
259
260 static void conf_dir_path(struct conf_config *config, WRBUF w, const char *src)
261 {
262     char full_path[1024];
263     if (yaz_filepath_resolve(src, config->path,
264                              wrbuf_len(config->confdir) > 0 ?
265                              wrbuf_cstr(config->confdir) : ".",
266                              full_path))
267     {
268         wrbuf_puts(w, full_path);
269     }
270     else
271     {
272         yaz_log(YLOG_WARN, "File not found: fname=%s path=%s base=%s", src,
273                 config->path, wrbuf_cstr(config->confdir));
274         wrbuf_puts(w, src);
275     }
276 }
277
278 void service_destroy(struct conf_service *service)
279 {
280     if (service)
281     {
282         if (!pazpar2_decref(&service->ref_count, service->mutex))
283         {
284             service_xslt_destroy(service);
285             pp2_charset_fact_destroy(service->charsets);
286             ccl_qual_rm(&service->ccl_bibset);
287             yaz_mutex_destroy(&service->mutex);
288             nmem_destroy(service->nmem);
289         }
290     }
291 }
292
293 void service_incref(struct conf_service *service)
294 {
295     pazpar2_incref(&service->ref_count, service->mutex);
296 }
297
298 static int parse_metadata(struct conf_service *service, xmlNode *n,
299                           int *md_node, int *sk_node)
300 {
301     enum conf_metadata_type type = Metadata_type_generic;
302     enum conf_metadata_merge merge = Metadata_merge_no;
303     enum conf_setting_type setting = Metadata_setting_no;
304     enum conf_metadata_mergekey mergekey_type = Metadata_mergekey_no;
305     int brief = 0;
306     int termlist = 0;
307     int sortkey_offset = 0;
308     xmlChar *xml_name = 0;
309     xmlChar *xml_brief = 0;
310     xmlChar *xml_sortkey = 0;
311     xmlChar *xml_merge = 0;
312     xmlChar *xml_type = 0;
313     xmlChar *xml_termlist = 0;
314     xmlChar *xml_rank = 0;
315     xmlChar *xml_setting = 0;
316     xmlChar *xml_mergekey = 0;
317     xmlChar *xml_limitmap = 0;
318     xmlChar *xml_limitcluster = 0;
319     xmlChar *xml_icu_chain = 0;
320     xmlChar *xml_icurule = 0;
321
322     struct _xmlAttr *attr;
323
324     assert(service);
325
326     for (attr = n->properties; attr; attr = attr->next)
327     {
328         if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
329             attr->children && attr->children->type == XML_TEXT_NODE)
330             xml_name = attr->children->content;
331         else if (!xmlStrcmp(attr->name, BAD_CAST "brief") &&
332                  attr->children && attr->children->type == XML_TEXT_NODE)
333             xml_brief = attr->children->content;
334         else if (!xmlStrcmp(attr->name, BAD_CAST "sortkey") &&
335                  attr->children && attr->children->type == XML_TEXT_NODE)
336             xml_sortkey = attr->children->content;
337         else if (!xmlStrcmp(attr->name, BAD_CAST "merge") &&
338                  attr->children && attr->children->type == XML_TEXT_NODE)
339             xml_merge = attr->children->content;
340         else if (!xmlStrcmp(attr->name, BAD_CAST "type") &&
341                  attr->children && attr->children->type == XML_TEXT_NODE)
342             xml_type = attr->children->content;
343         else if (!xmlStrcmp(attr->name, BAD_CAST "termlist") &&
344                  attr->children && attr->children->type == XML_TEXT_NODE)
345             xml_termlist = attr->children->content;
346         else if (!xmlStrcmp(attr->name, BAD_CAST "rank") &&
347                  attr->children && attr->children->type == XML_TEXT_NODE)
348             xml_rank = attr->children->content;
349         else if (!xmlStrcmp(attr->name, BAD_CAST "setting") &&
350                  attr->children && attr->children->type == XML_TEXT_NODE)
351             xml_setting = attr->children->content;
352         else if (!xmlStrcmp(attr->name, BAD_CAST "mergekey") &&
353                  attr->children && attr->children->type == XML_TEXT_NODE)
354             xml_mergekey = attr->children->content;
355         else if (!xmlStrcmp(attr->name, BAD_CAST "facetrule") &&
356                  attr->children && attr->children->type == XML_TEXT_NODE)
357             xml_icu_chain = attr->children->content;
358         else if (!xmlStrcmp(attr->name, BAD_CAST "limitmap") &&
359                  attr->children && attr->children->type == XML_TEXT_NODE)
360             xml_limitmap = attr->children->content;
361         else if (!xmlStrcmp(attr->name, BAD_CAST "limitcluster") &&
362                  attr->children && attr->children->type == XML_TEXT_NODE)
363             xml_limitcluster = attr->children->content;
364         else if (!xmlStrcmp(attr->name, BAD_CAST "icurule") &&
365                  attr->children && attr->children->type == XML_TEXT_NODE)
366             xml_icurule = attr->children->content;
367         else
368         {
369             yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name);
370             return -1;
371         }
372     }
373
374     // now do the parsing logic
375     if (!xml_name)
376     {
377         yaz_log(YLOG_FATAL, "Must specify name in metadata element");
378         return -1;
379     }
380     if (xml_brief)
381     {
382         if (!strcmp((const char *) xml_brief, "yes"))
383             brief = 1;
384         else if (strcmp((const char *) xml_brief, "no"))
385         {
386             yaz_log(YLOG_FATAL, "metadata/brief must be yes or no");
387             return -1;
388         }
389     }
390
391     if (xml_termlist)
392     {
393         if (!strcmp((const char *) xml_termlist, "yes"))
394             termlist = 1;
395         else if (strcmp((const char *) xml_termlist, "no"))
396         {
397             yaz_log(YLOG_FATAL, "metadata/termlist must be yes or no");
398             return -1;
399         }
400     }
401
402     if (xml_type)
403     {
404         if (!strcmp((const char *) xml_type, "generic"))
405             type = Metadata_type_generic;
406         else if (!strcmp((const char *) xml_type, "year"))
407             type = Metadata_type_year;
408         else if (!strcmp((const char *) xml_type, "date"))
409             type = Metadata_type_date;
410         else if (!strcmp((const char *) xml_type, "float"))
411             type = Metadata_type_float;
412         else
413         {
414             yaz_log(YLOG_FATAL,
415                     "Unknown value for metadata/type: %s", xml_type);
416             return -1;
417         }
418     }
419
420     if (xml_merge)
421     {
422         if (!strcmp((const char *) xml_merge, "no"))
423             merge = Metadata_merge_no;
424         else if (!strcmp((const char *) xml_merge, "unique"))
425             merge = Metadata_merge_unique;
426         else if (!strcmp((const char *) xml_merge, "longest"))
427             merge = Metadata_merge_longest;
428         else if (!strcmp((const char *) xml_merge, "range"))
429             merge = Metadata_merge_range;
430         else if (!strcmp((const char *) xml_merge, "all"))
431             merge = Metadata_merge_all;
432         else if (!strcmp((const char *) xml_merge, "first"))
433             merge = Metadata_merge_first;
434         else
435         {
436             yaz_log(YLOG_FATAL,
437                     "Unknown value for metadata/merge: %s", xml_merge);
438             return -1;
439         }
440     }
441
442     if (xml_setting)
443     {
444         if (!strcmp((const char *) xml_setting, "no"))
445             setting = Metadata_setting_no;
446         else if (!strcmp((const char *) xml_setting, "postproc"))
447             setting = Metadata_setting_postproc;
448         else if (!strcmp((const char *) xml_setting, "parameter"))
449             setting = Metadata_setting_parameter;
450         else
451         {
452             yaz_log(YLOG_FATAL,
453                     "Unknown value for metadata/setting: %s", xml_setting);
454             return -1;
455         }
456     }
457
458     // add a sortkey if so specified
459     if (xml_sortkey && strcmp((const char *) xml_sortkey, "no"))
460     {
461         enum conf_metadata_type sk_type = type;
462         if (merge == Metadata_merge_no)
463         {
464             yaz_log(YLOG_FATAL,
465                     "Can't specify sortkey on a non-merged field");
466             return -1;
467         }
468         if (!strcmp((const char *) xml_sortkey, "yes"))
469             ;
470         if (!strcmp((const char *) xml_sortkey, "numeric"))
471             ;
472         else if (!strcmp((const char *) xml_sortkey, "skiparticle"))
473         {
474             if (sk_type == Metadata_type_generic)
475                 sk_type = Metadata_type_skiparticle;
476             else
477             {
478                 yaz_log(YLOG_FATAL,
479                         "skiparticle only supported for type=generic: %s",
480                     xml_type);
481                 return -1;
482             }
483         }
484         else
485         {
486             yaz_log(YLOG_FATAL,
487                     "Unknown sortkey in metadata element: %s",
488                     xml_sortkey);
489             return -1;
490         }
491         sortkey_offset = *sk_node;
492
493         conf_service_add_sortkey(service, *sk_node,
494                                  (const char *) xml_name, sk_type);
495         (*sk_node)++;
496     }
497     else
498         sortkey_offset = -1;
499
500     if (xml_mergekey)
501     {
502         if (!strcmp((const char *) xml_mergekey, "required"))
503             mergekey_type = Metadata_mergekey_required;
504         else if (!strcmp((const char *) xml_mergekey, "optional"))
505             mergekey_type = Metadata_mergekey_optional;
506         else if (!strcmp((const char *) xml_mergekey, "no"))
507             mergekey_type = Metadata_mergekey_no;
508         else
509         {
510             yaz_log(YLOG_FATAL, "Unknown value for mergekey: %s", xml_mergekey);
511             return -1;
512         }
513     }
514
515     // metadata known, assign values
516     conf_service_add_metadata(service, *md_node,
517                               (const char *) xml_name,
518                               type, merge, setting,
519                               brief, termlist,
520                               (const char *) xml_rank, sortkey_offset,
521                               mergekey_type,
522                               (const char *) xml_icu_chain,
523                               (const char *) xml_limitmap,
524                               (const char *) xml_limitcluster,
525                               (const char *) xml_icurule
526         );
527     (*md_node)++;
528     return 0;
529 }
530
531
532 static void count_metadata(xmlNode *n, int *num_metadata, int *num_sortkeys)
533 {
534     xmlChar *sortkey = xmlGetProp(n, (xmlChar *) "sortkey");
535     (*num_metadata)++;
536
537     if (sortkey && strcmp((const char *) sortkey, "no"))
538         (*num_sortkeys)++;
539     xmlFree(sortkey);
540 }
541
542
543 static struct conf_service *service_create_static(struct conf_server *server,
544                                                   xmlNode *node,
545                                                   const char *service_id)
546 {
547     xmlNode *n;
548     int md_node = 0;
549     int sk_node = 0;
550
551     struct conf_service *service = 0;
552     int num_metadata = 0;
553     int num_sortkeys = 0;
554     int got_settings = 0;
555
556     // count num_metadata and num_sortkeys
557     for (n = node->children; n; n = n->next)
558         if (n->type == XML_ELEMENT_NODE && !strcmp((const char *)
559                                                    n->name, "metadata"))
560         {
561             if (n->children) // This is a <metadata> container, look at its contents.
562             {
563                 xmlNode *m;
564                 for (m = n->children; m; m = m->next)
565                 {
566                     if (m->type == XML_ELEMENT_NODE &&
567                             !strcmp((const char *) m->name, "metadata"))
568                         count_metadata(m, &num_metadata, &num_sortkeys);
569                 }
570             }
571             else // This is a metadata-element proper, count it right away.
572                 count_metadata(n, &num_metadata, &num_sortkeys);
573         }
574
575     service = service_init(server, num_metadata, num_sortkeys, service_id);
576
577     for (n = node->children; n; n = n->next)
578     {
579         if (n->type != XML_ELEMENT_NODE)
580             continue;
581         if (!strcmp((const char *) n->name, "timeout"))
582         {
583             xmlChar *src = xmlGetProp(n, (xmlChar *) "session");
584             if (src)
585             {
586                 service->session_timeout = atoi((const char *) src);
587                 xmlFree(src);
588                 if (service->session_timeout < 9)
589                 {
590                     yaz_log(YLOG_FATAL, "session timeout out of range");
591                     return 0;
592                 }
593             }
594             src = xmlGetProp(n, (xmlChar *) "z3950_operation");
595             if (src)
596             {
597                 service->z3950_operation_timeout = atoi((const char *) src);
598                 xmlFree(src);
599                 if (service->z3950_session_timeout < 9)
600                 {
601                     yaz_log(YLOG_FATAL, "Z39.50 operation timeout out of range");
602                     return 0;
603                 }
604             }
605             src = xmlGetProp(n, (xmlChar *) "z3950_session");
606             if (src)
607             {
608                 service->z3950_session_timeout = atoi((const char *) src);
609                 xmlFree(src);
610                 if (service->z3950_session_timeout < 9)
611                 {
612                     yaz_log(YLOG_FATAL, "Z39.50 session timeout out of range");
613                     return 0;
614                 }
615             }
616         }
617         else if (!strcmp((const char *) n->name, "ccldirective"))
618         {
619             char *name;
620             char *value;
621             if (!service->ccl_bibset)
622                 service->ccl_bibset = ccl_qual_mk();
623             name = (char *) xmlGetProp(n, (xmlChar *) "name");
624             if (!name)
625             {
626                 yaz_log(YLOG_FATAL, "ccldirective: missing @name");
627                 return 0;
628             }
629             value = (char *) xmlGetProp(n, (xmlChar *) "value");
630             if (!value)
631             {
632                 xmlFree(name);
633                 yaz_log(YLOG_FATAL, "ccldirective: missing @value");
634                 return 0;
635             }
636             ccl_qual_add_special(service->ccl_bibset, name, value);
637             xmlFree(value);
638             xmlFree(name);
639         }
640         else if (!strcmp((const char *) n->name, "settings"))
641             got_settings++;
642         else if (!strcmp((const char *) n->name, "icu_chain"))
643         {
644             if (!service->charsets)
645                 service->charsets = pp2_charset_fact_create();
646             if (pp2_charset_fact_define(service->charsets, n, 0))
647             {
648                 yaz_log(YLOG_FATAL, "ICU chain definition error");
649                 return 0;
650             }
651         }
652         else if (!strcmp((const char *) n->name, "relevance")
653                  || !strcmp((const char *) n->name, "sort")
654                  || !strcmp((const char *) n->name, "mergekey")
655                  || !strcmp((const char *) n->name, "facet"))
656
657         {
658             if (!service->charsets)
659                 service->charsets = pp2_charset_fact_create();
660             if (pp2_charset_fact_define(service->charsets, n,
661                                         (const char *) n->name))
662             {
663                 yaz_log(YLOG_FATAL, "ICU chain definition error");
664                 return 0;
665             }
666         }
667         else if (!strcmp((const char *) n->name, (const char *) "metadata"))
668         {
669             if (n->children) // This is a <metadata> container, look at its content.
670             {
671                 xmlNode *m;
672                 for (m = n->children; m; m = m->next)
673                     if ((!strcmp((const char *) m->name, (const char *) "metadata")))
674                         if (parse_metadata(service, m, &md_node, &sk_node))
675                             return 0;
676             }
677             else // This is a metadata-element proper, count it right away.
678                 if (parse_metadata(service, n, &md_node, &sk_node))
679                     return 0;
680         }
681         else if (!strcmp((const char *) n->name, (const char *) "xslt"))
682         {
683             if (service_xslt_config(service, n))
684                 return 0;
685         }
686         else if (!strcmp((const char *) n->name, "rank"))
687         {
688             char *rank_cluster = (char *) xmlGetProp(n, (xmlChar *) "cluster");
689             char *rank_debug = (char *) xmlGetProp(n, (xmlChar *) "debug");
690             char *rank_follow = (char *) xmlGetProp(n, (xmlChar *) "follow");
691             char *rank_lead = (char *) xmlGetProp(n, (xmlChar *) "lead");
692             char *rank_length= (char *) xmlGetProp(n, (xmlChar *) "length");
693             if (rank_cluster)
694             {
695                 if (!strcmp(rank_cluster, "yes"))
696                     service->rank_cluster = 1;
697                 else if (!strcmp(rank_cluster, "no"))
698                     service->rank_cluster = 0;
699                 else
700                 {
701                     yaz_log(YLOG_FATAL, "service: rank@cluster boolean");
702                     return 0;
703                 }
704             }
705             if (rank_debug)
706             {
707                 if (!strcmp(rank_debug, "yes"))
708                     service->rank_debug = 1;
709                 else if (!strcmp(rank_debug, "no"))
710                     service->rank_debug = 0;
711                 else
712                 {
713                     yaz_log(YLOG_FATAL, "service: rank@debug boolean");
714                     return 0;
715                 }
716             }
717             if (rank_follow)
718             {
719                 service->rank_follow = atof(rank_follow);
720             }
721             if (rank_lead)
722             {
723                 service->rank_lead = atof(rank_lead);
724             }
725             if (rank_length)
726             {
727                 if (!strcmp(rank_length, "linear"))
728                     service->rank_length = 2;
729                 else if (!strcmp(rank_length, "log"))
730                     service->rank_length = 1;
731                 else if (!strcmp(rank_length, "none"))
732                     service->rank_length = 0;
733                 else
734                 {
735                     yaz_log(YLOG_FATAL, "service: rank@length linear|log|none");
736                     return 0;
737                 }
738             }
739             xmlFree(rank_cluster);
740             xmlFree(rank_debug);
741             xmlFree(rank_follow);
742             xmlFree(rank_lead);
743             xmlFree(rank_length);
744         }
745         else if (!strcmp((const char *) n->name, "sort-default"))
746         {
747             char *default_sort = (char *) xmlGetProp(n, (xmlChar *) "field");
748
749             if (default_sort && strcmp(default_sort, "")) {
750                 service->default_sort = nmem_strdup(service->nmem, default_sort);
751                 yaz_log(YLOG_LOG, "service %s: default sort order configured to: %s",
752                         service_id ? service_id : "unnamed", default_sort);
753             }
754             else
755             {
756                 yaz_log(YLOG_FATAL, "default sort order is invalid: %s", default_sort);
757                 return 0;
758             }
759             xmlFree(default_sort);
760         }
761         else
762         {
763             char tmp[80];
764             yaz_log(YLOG_FATAL, "Bad element: %s . Context: %s", n->name,
765                     xml_context(n, tmp, sizeof tmp));
766             return 0;
767         }
768     }
769     if (got_settings)
770     {
771         int pass;
772         /* metadata has been read.. Consider now settings */
773         init_settings(service);
774         for (pass = 1; pass <= 2; pass++)
775         {
776             for (n = node->children; n; n = n->next)
777             {
778                 if (n->type != XML_ELEMENT_NODE)
779                     continue;
780                 if (!strcmp((const char *) n->name, "settings"))
781                 {
782                     int ret;
783                     xmlChar *src = xmlGetProp(n, (xmlChar *) "src");
784                     if (src)
785                     {
786                         WRBUF w = wrbuf_alloc();
787                         conf_dir_path(server->config, w, (const char *) src);
788                         ret = settings_read_file(service, wrbuf_cstr(w), pass);
789                         wrbuf_destroy(w);
790                         xmlFree(src);
791                     }
792                     else
793                     {
794                         ret = settings_read_node(service, n, pass);
795                     }
796                     if (ret)
797                         return 0;
798                 }
799             }
800         }
801     }
802
803     {
804         xmlBufferPtr buf = xmlBufferCreate();
805         xmlNodeDump(buf, node->doc, node, 0, 0);
806         service->xml_node =
807             nmem_strdupn(service->nmem, (const char *) buf->content, buf->use);
808         xmlBufferFree(buf);
809     }
810     return service;
811 }
812
813 static int inherit_server_settings(struct conf_service *s)
814 {
815     int ret = 0;
816     struct conf_server *server = s->server;
817     if (!s->dictionary) /* service has no config settings ? */
818     {
819         if (server->settings_fname)
820         {
821             /* inherit settings from server */
822             init_settings(s);
823             if (settings_read_file(s, server->settings_fname, 1))
824                 ret = -1;
825             if (settings_read_file(s, server->settings_fname, 2))
826                 ret = -1;
827         }
828         else
829         {
830             yaz_log(YLOG_WARN, "server '%s' has no settings", s->id ? s->id : "unnamed");
831             init_settings(s);
832         }
833     }
834
835     /* use relevance/sort/mergekey/facet from server if not defined
836        for this service.. */
837     if (!s->charsets)
838     {
839         if (server->charsets)
840         {
841             s->charsets = server->charsets;
842             pp2_charset_fact_incref(s->charsets);
843         }
844         else
845         {
846             s->charsets = pp2_charset_fact_create();
847         }
848     }
849     return ret;
850 }
851
852 struct conf_service *service_create(struct conf_server *server,
853                                     xmlNode *node)
854 {
855     struct conf_service *service = service_create_static(server, node, 0);
856     if (service)
857     {
858         inherit_server_settings(service);
859         assert(service->mutex == 0);
860         pazpar2_mutex_create(&service->mutex, "conf");
861     }
862     return service;
863 }
864
865 static struct conf_server *server_create(struct conf_config *config,
866                                          NMEM nmem, xmlNode *node)
867 {
868     xmlNode *n;
869     struct conf_server *server = nmem_malloc(nmem, sizeof(struct conf_server));
870     xmlChar *server_id = xmlGetProp(node, (xmlChar *) "id");
871
872     server->host = "@";
873     server->port = 0;
874     server->proxy_host = 0;
875     server->proxy_port = 0;
876     server->myurl = 0;
877     server->service = 0;
878     server->config = config;
879     server->next = 0;
880     server->charsets = 0;
881     server->http_server = 0;
882     server->iochan_man = 0;
883     server->database_hosts = config->database_hosts;
884     server->settings_fname = 0;
885
886     if (server_id)
887     {
888         server->server_id = nmem_strdup(nmem, (const char *)server_id);
889         xmlFree(server_id);
890     }
891     else
892         server->server_id = 0;
893     for (n = node->children; n; n = n->next)
894     {
895         if (n->type != XML_ELEMENT_NODE)
896             continue;
897         if (!strcmp((const char *) n->name, "listen"))
898         {
899             xmlChar *port = xmlGetProp(n, (xmlChar *) "port");
900             xmlChar *host = xmlGetProp(n, (xmlChar *) "host");
901
902             if (port)
903                 server->port = nmem_strdup(nmem, (const char *) port);
904             if (host)
905                 server->host = nmem_strdup(nmem, (const char *) host);
906
907             xmlFree(port);
908             xmlFree(host);
909         }
910         else if (!strcmp((const char *) n->name, "proxy"))
911         {
912             xmlChar *port = xmlGetProp(n, (xmlChar *) "port");
913             xmlChar *host = xmlGetProp(n, (xmlChar *) "host");
914             xmlChar *myurl = xmlGetProp(n, (xmlChar *) "myurl");
915             if (port)
916                 server->proxy_port = atoi((const char *) port);
917             if (host)
918                 server->proxy_host = nmem_strdup(nmem, (const char *) host);
919             if (myurl)
920                 server->myurl = nmem_strdup(nmem, (const char *) myurl);
921             xmlFree(port);
922             xmlFree(host);
923             xmlFree(myurl);
924         }
925         else if (!strcmp((const char *) n->name, "settings"))
926         {
927             xmlChar *src = xmlGetProp(n, (xmlChar *) "src");
928             WRBUF w;
929             if (!src)
930             {
931                 yaz_log(YLOG_FATAL, "Missing src attribute for settings");
932                 return 0;
933             }
934             if (server->settings_fname)
935             {
936                 xmlFree(src);
937                 yaz_log(YLOG_FATAL, "Can't repeat 'settings'");
938                 return 0;
939             }
940             w = wrbuf_alloc();
941             conf_dir_path(config, w, (const char *) src);
942             server->settings_fname = nmem_strdup(nmem, wrbuf_cstr(w));
943             wrbuf_destroy(w);
944             xmlFree(src);
945         }
946         else if (!strcmp((const char *) n->name, "icu_chain"))
947         {
948             if (!server->charsets)
949                 server->charsets = pp2_charset_fact_create();
950             if (pp2_charset_fact_define(server->charsets, n, 0))
951             {
952                 yaz_log(YLOG_FATAL, "ICU chain definition error");
953                 return 0;
954             }
955         }
956         else if (!strcmp((const char *) n->name, "relevance")
957                  || !strcmp((const char *) n->name, "sort")
958                  || !strcmp((const char *) n->name, "mergekey")
959                  || !strcmp((const char *) n->name, "facet"))
960         {
961             if (!server->charsets)
962                 server->charsets = pp2_charset_fact_create();
963             if (pp2_charset_fact_define(server->charsets, n,
964                                         (const char *) n->name))
965             {
966                 yaz_log(YLOG_FATAL, "ICU chain definition error");
967                 return 0;
968             }
969         }
970         else if (!strcmp((const char *) n->name, "service"))
971         {
972             char *service_id = (char *)
973                 xmlGetProp(n, (xmlChar *) "id");
974
975             struct conf_service **sp = &server->service;
976             for (; *sp; sp = &(*sp)->next)
977                 if ((*sp)->id && service_id &&
978                     0 == strcmp((*sp)->id, service_id))
979                 {
980                     yaz_log(YLOG_FATAL, "Duplicate service: %s", service_id);
981                     break;
982                 }
983                 else if (!(*sp)->id && !service_id)
984                 {
985                     yaz_log(YLOG_FATAL, "Duplicate unnamed service");
986                     break;
987                 }
988
989             if (*sp)  /* service already exist */
990             {
991                 xmlFree(service_id);
992                 return 0;
993             }
994             else
995             {
996                 struct conf_service *s = service_create_static(server, n,
997                                                                service_id);
998                 xmlFree(service_id);
999                 if (!s)
1000                     return 0;
1001                 *sp = s;
1002             }
1003         }
1004         else
1005         {
1006             yaz_log(YLOG_FATAL, "Bad element: %s", n->name);
1007             return 0;
1008         }
1009     }
1010     if (!server->port)
1011     {
1012         yaz_log(YLOG_FATAL, "No listening port given");
1013         return 0;
1014     }
1015     if (server->service)
1016     {
1017         struct conf_service *s;
1018         for (s = server->service; s; s = s->next)
1019             inherit_server_settings(s);
1020     }
1021     return server;
1022 }
1023
1024 WRBUF conf_get_fname(struct conf_config *config, const char *fname)
1025 {
1026     WRBUF w = wrbuf_alloc();
1027
1028     conf_dir_path(config, w, fname);
1029     return w;
1030 }
1031
1032 struct conf_service *locate_service(struct conf_server *server,
1033                                     const char *service_id)
1034 {
1035     struct conf_service *s = server->service;
1036     for (; s; s = s->next)
1037         if (s->id && service_id && 0 == strcmp(s->id, service_id))
1038             break;
1039         else if (!s->id && !service_id)
1040             break;
1041     if (s)
1042         service_incref(s);
1043     return s;
1044 }
1045
1046 static void info_service_metadata(struct conf_service *service, WRBUF w)
1047 {
1048     int i;
1049     struct conf_metadata *md;
1050
1051     if (service->num_metadata)
1052     {
1053         for (i = 0; i < service->num_metadata; i++)
1054         {
1055             md = &(service->metadata[i]);
1056             wrbuf_puts(w, "   <metadata");
1057             if (md->name) {
1058                 wrbuf_puts(w, " name=\"");
1059                 wrbuf_xmlputs(w, md->name);
1060                 wrbuf_puts(w, "\"");
1061             }
1062             if (md->brief) {
1063                 wrbuf_puts(w, " brief=\"yes\"");
1064             }
1065             if (md->termlist) {
1066                 wrbuf_puts(w, " termlist=\"yes\"");
1067             }
1068             if (md->rank) {
1069                 wrbuf_puts(w, " rank=\"");
1070                 wrbuf_xmlputs(w, md->rank);
1071                 wrbuf_puts(w, "\"");
1072             }
1073             if (md->sortkey_offset > 0) {
1074                 wrbuf_puts(w, " sortkey=\"");
1075                 switch (service->sortkeys[md->sortkey_offset].type)
1076                 {
1077                 case Metadata_type_relevance:
1078                     wrbuf_puts(w, "relevance");
1079                     break;
1080                 case Metadata_type_skiparticle:
1081                     wrbuf_puts(w, "skiparticle");
1082                     break;
1083                 case Metadata_type_position:
1084                     wrbuf_puts(w, "position");
1085                     break;
1086                 default:
1087                     wrbuf_puts(w, "yes");
1088                     break;
1089                 }
1090                 wrbuf_puts(w, "\"");
1091             }
1092             switch (md->type)
1093             {
1094             case Metadata_type_generic:
1095             case Metadata_type_skiparticle:
1096                 break;
1097             case Metadata_type_year:
1098                 wrbuf_puts(w, " type=\"year\"");
1099                 break;
1100             case Metadata_type_date:
1101                 wrbuf_puts(w, " type=\"date\"");
1102                 break;
1103             case Metadata_type_float:
1104                 wrbuf_puts(w, " type=\"float\"");
1105                 break;
1106             case Metadata_type_relevance:
1107             case Metadata_type_position:
1108                 break;
1109             }
1110             switch (md->merge)
1111             {
1112             case Metadata_merge_no:
1113                 break;
1114             case Metadata_merge_unique:
1115                 wrbuf_puts(w, " merge=\"unique\"");
1116                 break;
1117             case Metadata_merge_longest:
1118                 wrbuf_puts(w, " merge=\"longest\"");
1119                 break;
1120             case Metadata_merge_range:
1121                 wrbuf_puts(w, " merge=\"range\"");
1122                 break;
1123             case Metadata_merge_all:
1124                 wrbuf_puts(w, " merge=\"all\"");
1125                 break;
1126             case Metadata_merge_first:
1127                 wrbuf_puts(w, " merge=\"first\"");
1128                 break;
1129             }
1130             switch (md->mergekey)
1131             {
1132             case Metadata_mergekey_no:
1133                 break;
1134             case Metadata_mergekey_optional:
1135                 wrbuf_puts(w, " mergekey=\"optional\"");
1136                 break;
1137             case Metadata_mergekey_required:
1138                 wrbuf_puts(w, " mergekey=\"required\"");
1139                 break;
1140             }
1141             wrbuf_puts(w, " />\n");
1142         }
1143     }
1144 }
1145
1146 static void info_service_databases(struct conf_service *service, WRBUF w)
1147 {
1148     struct database *db;
1149     struct setting *s;
1150     int i;
1151
1152     if (service->databases)
1153     {
1154         wrbuf_puts(w, "   <databases>\n");
1155         for(db = service->databases; db; db = db->next)
1156         {
1157             wrbuf_puts(w, "    <database");
1158             if (db->id)
1159             {
1160                 wrbuf_puts(w, " id=\"");
1161                 wrbuf_printf(w, "%s", db->id);
1162                 wrbuf_puts(w, "\"");
1163             }
1164             wrbuf_puts(w, ">\n");
1165             for (i = 0; i < db->num_settings; i++)
1166             {
1167                 s = db->settings[i];
1168                 while (s != NULL)
1169                 {
1170                     wrbuf_puts(w, "     <setting");
1171                     wrbuf_puts(w, " name=\"");
1172                     wrbuf_xmlputs(w, s->name);
1173                     wrbuf_puts(w, "\"");
1174                     wrbuf_puts(w, " value=\"");
1175                     wrbuf_xmlputs(w, s->value);
1176                     wrbuf_puts(w, "\"");
1177                     wrbuf_puts(w, " />\n");
1178                     s = s->next;
1179                 }
1180             }
1181             wrbuf_puts(w, "    </database>\n");
1182         }
1183         wrbuf_puts(w, "   </databases>\n");
1184     }
1185 }
1186
1187 void info_services(struct conf_server *server, WRBUF w)
1188 {
1189     struct conf_service *s = server->service;
1190     int i;
1191     struct setting *S;
1192
1193     wrbuf_puts(w, " <services>\n");
1194     for (; s; s = s->next)
1195     {
1196         wrbuf_puts(w, "  <service");
1197         if (s->id)
1198         {
1199             wrbuf_puts(w, " id=\"");
1200             wrbuf_xmlputs(w, s->id);
1201             wrbuf_puts(w, "\"");
1202         }
1203         wrbuf_puts(w, ">\n");
1204         if (s->settings)
1205         {
1206             for (i=0; i<s->settings->num_settings; i++)
1207             {
1208                 S = s->settings->settings[i];
1209                 while (S != NULL) {
1210                     wrbuf_puts(w, "   <setting");
1211                     wrbuf_puts(w, " name=\"");
1212                     wrbuf_xmlputs(w,  S->name);
1213                     wrbuf_puts(w, "\"");
1214                     wrbuf_puts(w, " value=\"");
1215                     wrbuf_xmlputs(w, S->value);
1216                     wrbuf_puts(w, "\"");
1217                     if (S->target) {
1218                         wrbuf_puts(w, " target=\"");
1219                         wrbuf_xmlputs(w, S->target);
1220                         wrbuf_puts(w, "\"");
1221                     }
1222
1223                     wrbuf_puts(w, " />\n");
1224
1225                     S = S->next;
1226                 }
1227             }
1228         }
1229         info_service_metadata(s, w);
1230         info_service_databases(s, w);
1231         wrbuf_puts(w, "  </service>");
1232
1233         wrbuf_puts(w, "\n");
1234     }
1235     wrbuf_puts(w, " </services>\n");
1236 }
1237
1238 static int parse_config(struct conf_config *config, xmlNode *root)
1239 {
1240     xmlNode *n;
1241
1242     for (n = root->children; n; n = n->next)
1243     {
1244         if (n->type != XML_ELEMENT_NODE)
1245             continue;
1246         if (!strcmp((const char *) n->name, "server"))
1247         {
1248             struct conf_server *tmp = server_create(config, config->nmem, n);
1249             if (!tmp)
1250                 return -1;
1251             tmp->next = config->servers;
1252             config->servers = tmp;
1253         }
1254         else if (!strcmp((const char *) n->name, "threads"))
1255         {
1256             xmlChar *number = xmlGetProp(n, (xmlChar *) "number");
1257             if (number)
1258             {
1259                 config->no_threads = atoi((const char *) number);
1260                 xmlFree(number);
1261             }
1262         }
1263         else if (!strcmp((const char *) n->name, "file"))
1264         {
1265             xmlChar *path = xmlGetProp(n, (xmlChar *) "path");
1266             if (path)
1267             {
1268                 config->path = nmem_strdup(config->nmem, (const char *) path);
1269                 xmlFree(path);
1270             }
1271         }
1272         else if (!strcmp((const char *) n->name, "targetprofiles"))
1273         {
1274             yaz_log(YLOG_FATAL, "targetprofiles unsupported here. Must be part of service");
1275             return -1;
1276
1277         }
1278         else
1279         {
1280             yaz_log(YLOG_FATAL, "Bad element: %s", n->name);
1281             return -1;
1282         }
1283     }
1284     return 0;
1285 }
1286
1287 struct conf_config *config_create(const char *fname)
1288 {
1289     xmlDoc *doc = xmlReadFile(fname,
1290                               NULL,
1291                               XML_PARSE_XINCLUDE
1292                               + XML_PARSE_NSCLEAN + XML_PARSE_NONET);
1293     xmlNode *n;
1294     const char *p;
1295     int r;
1296     NMEM nmem = nmem_create();
1297     struct conf_config *config = nmem_malloc(nmem, sizeof(struct conf_config));
1298
1299     xmlSubstituteEntitiesDefault(1);
1300     xmlLoadExtDtdDefaultValue = 1;
1301     if (!doc)
1302     {
1303         yaz_log(YLOG_FATAL, "Failed to read %s", fname);
1304         nmem_destroy(nmem);
1305         return 0;
1306     }
1307
1308     // Perform XInclude.
1309     r = xmlXIncludeProcess(doc);
1310     if (r == -1)
1311     {
1312         yaz_log(YLOG_FATAL, "XInclude processing failed");
1313         return 0;
1314     }
1315
1316     config->nmem = nmem;
1317     config->servers = 0;
1318     config->path = nmem_strdup(nmem, ".");
1319     config->no_threads = 0;
1320     config->iochan_man = 0;
1321     config->database_hosts = database_hosts_create();
1322
1323     config->confdir = wrbuf_alloc();
1324     if ((p = strrchr(fname,
1325 #ifdef WIN32
1326                      '\\'
1327 #else
1328                      '/'
1329 #endif
1330              )))
1331     {
1332         int len = p - fname;
1333         wrbuf_write(config->confdir, fname, len);
1334     }
1335     wrbuf_puts(config->confdir, "");
1336
1337     n = xmlDocGetRootElement(doc);
1338     r = yaz_xml_include_simple(n, wrbuf_cstr(config->confdir));
1339     if (r == 0) /* OK */
1340     {
1341         yaz_log(YLOG_LOG, "Configuration %s after include processing",
1342                 fname);
1343 #if LIBXML_VERSION >= 20600
1344         xmlDocFormatDump(yaz_log_file(), doc, 0);
1345 #else
1346         xmlDocDump(yaz_log_file(), doc);
1347 #endif
1348         r = parse_config(config, n);
1349     }
1350     xmlFreeDoc(doc);
1351
1352     if (r)
1353     {
1354         config_destroy(config);
1355         return 0;
1356     }
1357     return config;
1358 }
1359
1360 void server_destroy(struct conf_server *server)
1361 {
1362     struct conf_service *s = server->service;
1363     while (s)
1364     {
1365         struct conf_service *s_next = s->next;
1366         service_destroy(s);
1367         s = s_next;
1368     }
1369     pp2_charset_fact_destroy(server->charsets);
1370     http_server_destroy(server->http_server);
1371 }
1372
1373 void config_destroy(struct conf_config *config)
1374 {
1375     if (config)
1376     {
1377         struct conf_server *server = config->servers;
1378         iochan_man_destroy(&config->iochan_man);
1379         while (server)
1380         {
1381             struct conf_server *s_next = server->next;
1382             server_destroy(server);
1383             server = s_next;
1384             database_hosts_destroy(&config->database_hosts);
1385         }
1386         wrbuf_destroy(config->confdir);
1387         nmem_destroy(config->nmem);
1388     }
1389 }
1390
1391 void config_stop_listeners(struct conf_config *conf)
1392 {
1393     struct conf_server *ser;
1394     for (ser = conf->servers; ser; ser = ser->next)
1395         http_close_server(ser);
1396 }
1397
1398 void config_process_events(struct conf_config *conf)
1399 {
1400     struct conf_server *ser;
1401
1402     for (ser = conf->servers; ser; ser = ser->next)
1403     {
1404         struct conf_service *s = ser->service;
1405
1406         for (;s ; s = s->next)
1407         {
1408             assert(s->mutex == 0);
1409             pazpar2_mutex_create(&s->mutex, "service");
1410         }
1411         http_mutex_init(ser);
1412     }
1413     iochan_man_events(conf->iochan_man);
1414 }
1415
1416 int config_start_listeners(struct conf_config *conf,
1417                            const char *listener_override,
1418                            const char *record_fname)
1419 {
1420     struct conf_server *ser;
1421
1422     conf->iochan_man = iochan_man_create(conf->no_threads);
1423     for (ser = conf->servers; ser; ser = ser->next)
1424     {
1425         WRBUF w;
1426         int r;
1427
1428         ser->iochan_man = conf->iochan_man;
1429         if (listener_override)
1430         {
1431             const char *cp = strrchr(listener_override, ':');
1432             if (cp)
1433             {
1434                 ser->host = nmem_strdupn(conf->nmem, listener_override,
1435                                          cp - listener_override);
1436                 ser->port = nmem_strdup(conf->nmem, cp + 1);
1437             }
1438             else
1439             {
1440                 ser->host = "@";
1441                 ser->port = nmem_strdup(conf->nmem, listener_override);
1442             }
1443             listener_override = 0; /* only first server is overriden */
1444         }
1445         r = http_init(ser, record_fname);
1446         if (r)
1447             return -1;
1448
1449         w = wrbuf_alloc();
1450         if (ser->proxy_host || ser->proxy_port)
1451         {
1452             if (ser->proxy_host)
1453                 wrbuf_puts(w, ser->proxy_host);
1454             if (ser->proxy_port)
1455             {
1456                 if (wrbuf_len(w))
1457                     wrbuf_puts(w, ":");
1458                 wrbuf_printf(w, "%d", ser->proxy_port);
1459             }
1460         }
1461         if (wrbuf_len(w))
1462             http_set_proxyaddr(wrbuf_cstr(w), ser);
1463         wrbuf_destroy(w);
1464     }
1465     return 0;
1466 }
1467
1468 /*
1469  * Local variables:
1470  * c-basic-offset: 4
1471  * c-file-style: "Stroustrup"
1472  * indent-tabs-mode: nil
1473  * End:
1474  * vim: shiftwidth=4 tabstop=8 expandtab
1475  */
1476