New metadata facility "icurule" for normalizing metadata text PAZ-1002
[pazpar2-moved-to-github.git] / src / pazpar2_config.c
index 92717e2..e23c740 100644 (file)
@@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <libxml/parser.h>
 #include <libxml/tree.h>
+#include <libxml/xinclude.h>
 
 #include <yaz/yaz-util.h>
 #include <yaz/nmem.h>
@@ -53,6 +54,7 @@ struct conf_config
 
     int no_threads;
     WRBUF confdir;
+    char *path;
     iochan_man_t iochan_man;
     database_hosts_t database_hosts;
 };
@@ -168,7 +170,8 @@ static struct conf_metadata* conf_service_add_metadata(
     enum conf_metadata_mergekey mt,
     const char *facetrule,
     const char *limitmap,
-    const char *limitcluster
+    const char *limitcluster,
+    const char *icurule
     )
 {
     struct conf_metadata * md = 0;
@@ -200,6 +203,7 @@ static struct conf_metadata* conf_service_add_metadata(
     md->facetrule = nmem_strdup_null(nmem, facetrule);
     md->limitmap = nmem_strdup_null(nmem, limitmap);
     md->limitcluster = nmem_strdup_null(nmem, limitcluster);
+    md->icurule = nmem_strdup_null(nmem, icurule);
     return md;
 }
 
@@ -255,13 +259,20 @@ int conf_service_sortkey_field_id(struct conf_service *service,
 
 static void conf_dir_path(struct conf_config *config, WRBUF w, const char *src)
 {
-    if (config->confdir && wrbuf_len(config->confdir) > 0 &&
-        !yaz_is_abspath(src))
+    char full_path[1024];
+    if (yaz_filepath_resolve(src, config->path,
+                             wrbuf_len(config->confdir) > 0 ?
+                             wrbuf_cstr(config->confdir) : ".",
+                             full_path))
     {
-        wrbuf_printf(w, "%s/%s", wrbuf_cstr(config->confdir), src);
+        wrbuf_puts(w, full_path);
     }
     else
+    {
+        yaz_log(YLOG_WARN, "File not found: fname=%s path=%s base=%s", src,
+                config->path, wrbuf_cstr(config->confdir));
         wrbuf_puts(w, src);
+    }
 }
 
 void service_destroy(struct conf_service *service)
@@ -306,6 +317,7 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
     xmlChar *xml_limitmap = 0;
     xmlChar *xml_limitcluster = 0;
     xmlChar *xml_icu_chain = 0;
+    xmlChar *xml_icurule = 0;
 
     struct _xmlAttr *attr;
 
@@ -349,6 +361,9 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
         else if (!xmlStrcmp(attr->name, BAD_CAST "limitcluster") &&
                  attr->children && attr->children->type == XML_TEXT_NODE)
             xml_limitcluster = attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "icurule") &&
+                 attr->children && attr->children->type == XML_TEXT_NODE)
+            xml_icurule = attr->children->content;
         else
         {
             yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name);
@@ -506,11 +521,25 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
                               mergekey_type,
                               (const char *) xml_icu_chain,
                               (const char *) xml_limitmap,
-                              (const char *) xml_limitcluster);
+                              (const char *) xml_limitcluster,
+                              (const char *) xml_icurule
+        );
     (*md_node)++;
     return 0;
 }
 
+
+static void count_metadata(xmlNode *n, int *num_metadata, int *num_sortkeys)
+{
+    xmlChar *sortkey = xmlGetProp(n, (xmlChar *) "sortkey");
+    (*num_metadata)++;
+
+    if (sortkey && strcmp((const char *) sortkey, "no"))
+        (*num_sortkeys)++;
+    xmlFree(sortkey);
+}
+
+
 static struct conf_service *service_create_static(struct conf_server *server,
                                                   xmlNode *node,
                                                   const char *service_id)
@@ -529,11 +558,18 @@ static struct conf_service *service_create_static(struct conf_server *server,
         if (n->type == XML_ELEMENT_NODE && !strcmp((const char *)
                                                    n->name, "metadata"))
         {
-            xmlChar *sortkey = xmlGetProp(n, (xmlChar *) "sortkey");
-            num_metadata++;
-            if (sortkey && strcmp((const char *) sortkey, "no"))
-                num_sortkeys++;
-            xmlFree(sortkey);
+            if (n->children) // This is a <metadata> container, look at its contents.
+            {
+                xmlNode *m;
+                for (m = n->children; m; m = m->next)
+                {
+                    if (m->type == XML_ELEMENT_NODE &&
+                            !strcmp((const char *) m->name, "metadata"))
+                        count_metadata(m, &num_metadata, &num_sortkeys);
+                }
+            }
+            else // This is a metadata-element proper, count it right away.
+                count_metadata(n, &num_metadata, &num_sortkeys);
         }
 
     service = service_init(server, num_metadata, num_sortkeys, service_id);
@@ -621,8 +657,8 @@ static struct conf_service *service_create_static(struct conf_server *server,
         {
             if (!service->charsets)
                 service->charsets = pp2_charset_fact_create();
-            if (pp2_charset_fact_define(service->charsets,
-                                        n->children, (const char *) n->name))
+            if (pp2_charset_fact_define(service->charsets, n,
+                                        (const char *) n->name))
             {
                 yaz_log(YLOG_FATAL, "ICU chain definition error");
                 return 0;
@@ -630,8 +666,17 @@ static struct conf_service *service_create_static(struct conf_server *server,
         }
         else if (!strcmp((const char *) n->name, (const char *) "metadata"))
         {
-            if (parse_metadata(service, n, &md_node, &sk_node))
-                return 0;
+            if (n->children) // This is a <metadata> container, look at its content.
+            {
+                xmlNode *m;
+                for (m = n->children; m; m = m->next)
+                    if ((!strcmp((const char *) m->name, (const char *) "metadata")))
+                        if (parse_metadata(service, m, &md_node, &sk_node))
+                            return 0;
+            }
+            else // This is a metadata-element proper, count it right away.
+                if (parse_metadata(service, n, &md_node, &sk_node))
+                    return 0;
         }
         else if (!strcmp((const char *) n->name, (const char *) "xslt"))
         {
@@ -651,7 +696,7 @@ static struct conf_service *service_create_static(struct conf_server *server,
                     service->rank_cluster = 1;
                 else if (!strcmp(rank_cluster, "no"))
                     service->rank_cluster = 0;
-                else 
+                else
                 {
                     yaz_log(YLOG_FATAL, "service: rank@cluster boolean");
                     return 0;
@@ -682,7 +727,7 @@ static struct conf_service *service_create_static(struct conf_server *server,
                 if (!strcmp(rank_length, "linear"))
                     service->rank_length = 2;
                 else if (!strcmp(rank_length, "log"))
-                    service->rank_length = 1; 
+                    service->rank_length = 1;
                 else if (!strcmp(rank_length, "none"))
                     service->rank_length = 0;
                 else
@@ -915,8 +960,8 @@ static struct conf_server *server_create(struct conf_config *config,
         {
             if (!server->charsets)
                 server->charsets = pp2_charset_fact_create();
-            if (pp2_charset_fact_define(server->charsets,
-                                        n->children, (const char *) n->name))
+            if (pp2_charset_fact_define(server->charsets, n,
+                                        (const char *) n->name))
             {
                 yaz_log(YLOG_FATAL, "ICU chain definition error");
                 return 0;
@@ -1215,6 +1260,15 @@ static int parse_config(struct conf_config *config, xmlNode *root)
                 xmlFree(number);
             }
         }
+        else if (!strcmp((const char *) n->name, "file"))
+        {
+            xmlChar *path = xmlGetProp(n, (xmlChar *) "path");
+            if (path)
+            {
+                config->path = nmem_strdup(config->nmem, (const char *) path);
+                xmlFree(path);
+            }
+        }
         else if (!strcmp((const char *) n->name, "targetprofiles"))
         {
             yaz_log(YLOG_FATAL, "targetprofiles unsupported here. Must be part of service");
@@ -1230,9 +1284,12 @@ static int parse_config(struct conf_config *config, xmlNode *root)
     return 0;
 }
 
-struct conf_config *config_create(const char *fname, int verbose)
+struct conf_config *config_create(const char *fname)
 {
-    xmlDoc *doc = xmlParseFile(fname);
+    xmlDoc *doc = xmlReadFile(fname,
+                              NULL,
+                              XML_PARSE_XINCLUDE
+                              + XML_PARSE_NSCLEAN + XML_PARSE_NONET);
     xmlNode *n;
     const char *p;
     int r;
@@ -1248,8 +1305,17 @@ struct conf_config *config_create(const char *fname, int verbose)
         return 0;
     }
 
+    // Perform XInclude.
+    r = xmlXIncludeProcess(doc);
+    if (r == -1)
+    {
+        yaz_log(YLOG_FATAL, "XInclude processing failed");
+        return 0;
+    }
+
     config->nmem = nmem;
     config->servers = 0;
+    config->path = nmem_strdup(nmem, ".");
     config->no_threads = 0;
     config->iochan_man = 0;
     config->database_hosts = database_hosts_create();
@@ -1272,16 +1338,13 @@ struct conf_config *config_create(const char *fname, int verbose)
     r = yaz_xml_include_simple(n, wrbuf_cstr(config->confdir));
     if (r == 0) /* OK */
     {
-        if (verbose)
-        {
-            yaz_log(YLOG_LOG, "Configuration %s after include processing",
-                    fname);
+        yaz_log(YLOG_LOG, "Configuration %s after include processing",
+                fname);
 #if LIBXML_VERSION >= 20600
-            xmlDocFormatDump(yaz_log_file(), doc, 0);
+        xmlDocFormatDump(yaz_log_file(), doc, 0);
 #else
-            xmlDocDump(yaz_log_file(), doc);
+        xmlDocDump(yaz_log_file(), doc);
 #endif
-        }
         r = parse_config(config, n);
     }
     xmlFreeDoc(doc);