Merge branch 'paz-1002-a'
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 12 May 2015 11:14:17 +0000 (13:14 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 12 May 2015 11:14:17 +0000 (13:14 +0200)
12 files changed:
configure.ac
debian/control
doc/pazpar2_conf.xml
pazpar2.spec
src/charsets.c
src/pazpar2_config.c
src/pazpar2_config.h
src/record.h
src/session.c
src/settings.c
src/settings.h
test/test_icu.cfg

index b40a776..85ef2db 100644 (file)
@@ -24,7 +24,7 @@ AC_LANG(C)
 
 AC_C_INLINE
 
-YAZ_INIT([static icu],[5.7.0])
+YAZ_INIT([static icu],[5.12.0])
 if test -z "$YAZLIB"; then
        AC_MSG_ERROR([YAZ development libraries missing])
 fi
index c50b234..2ef845d 100644 (file)
@@ -4,7 +4,7 @@ Priority: extra
 Maintainer: Adam Dickmeiss <adam@indexdata.dk>
 Build-Depends: debhelper (>= 7),
        autotools-dev,
-       libyaz5-dev (>= 5.7.0),
+       libyaz5-dev (>= 5.12.0),
        docbook-xsl
 Standards-Version: 3.7.2
 
index 8d64acc..86dbdc8 100644 (file)
          </varlistentry>
 
          <varlistentry>
+          <term id="icurule">icurule</term>
+          <listitem>
+           <para>
+            Specifies the ICU rule set to be used for normalizing
+            metadata text. The "display" part of the rule is kept
+            in the returned metadata record (record+show commands), the
+            end result - normalized text - is used for performing
+            within-cluster merge (unique, longest, etc). If the icurule is
+            omitted, type generic (text) is converted as follows:
+            any of the characters "<literal> ,/.:([</literal>" are
+            chopped of prefix and suffix of text content
+            <emphasis>unless</emphasis> it includes the
+            characters "<literal>://</literal>" (URL).
+           </para>
+           <note>
+            <para>
+             Requires Pazpar2 1.9.0 or later.
+            </para>
+           </note>
+          </listitem>
+         </varlistentry>
+
+         <varlistentry>
           <term>setting</term>
           <listitem>
            <para>
index 1cdd1b5..8d34f23 100644 (file)
@@ -8,11 +8,11 @@ Group: Applications/Internet
 Vendor: Index Data ApS <info@indexdata.dk>
 Source: pazpar2-%{version}.tar.gz
 BuildRoot: %{_tmppath}/%{name}-%{version}-root
-BuildRequires: libyaz5-devel >= 5.7.0
+BuildRequires: libyaz5-devel >= 5.12.0
 Packager: Adam Dickmeiss <adam@indexdata.dk>
 URL: http://www.indexdata.com/pazpar2
 Summary: pazpar2 daemon
-Requires: libyaz5 >= 5.7.0
+Requires: libyaz5 >= 5.12.0
 Requires: pazpar2-xsl
 
 %description
index 61b6f19..b36d7e2 100644 (file)
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/wrbuf.h>
 #include <yaz/log.h>
 #include <yaz/yaz-version.h>
+#include <yaz/xml_get.h>
 #include <ctype.h>
 #include <assert.h>
 #include <string.h>
@@ -166,11 +167,10 @@ int pp2_charset_fact_add(pp2_charset_fact_t pft,
 }
 
 int pp2_charset_fact_define(pp2_charset_fact_t pft,
-                            xmlNode *xml_node, const char *default_id)
+                            xmlNode *xml_node, const char *id)
 {
     int r;
     pp2_charset_t pct;
-    xmlChar *id = 0;
 
     assert(xml_node);
 
@@ -190,20 +190,17 @@ int pp2_charset_fact_define(pp2_charset_fact_t pft,
     pct = pp2_charset_create_xml(xml_node);
     if (!pct)
         return -1;
-    if (!default_id)
+    if (!id)
     {
-        id = xmlGetProp(xml_node, (xmlChar*) "id");
+        id = yaz_xml_get_prop(xml_node, "id");
         if (!id)
         {
             yaz_log(YLOG_WARN, "Missing id for icu_chain");
             pp2_charset_destroy(pct);
             return -1;
         }
-        default_id = (const char *) id;
     }
-    r = pp2_charset_fact_add(pft, pct, default_id);
-    if (id)
-        xmlFree(id);
+    r = pp2_charset_fact_add(pft, pct, id);
     return r;
 }
 
index dbfd850..e23c740 100644 (file)
@@ -170,7 +170,8 @@ static struct conf_metadata* conf_service_add_metadata(
     enum conf_metadata_mergekey mt,
     const char *facetrule,
     const char *limitmap,
-    const char *limitcluster
+    const char *limitcluster,
+    const char *icurule
     )
 {
     struct conf_metadata * md = 0;
@@ -202,6 +203,7 @@ static struct conf_metadata* conf_service_add_metadata(
     md->facetrule = nmem_strdup_null(nmem, facetrule);
     md->limitmap = nmem_strdup_null(nmem, limitmap);
     md->limitcluster = nmem_strdup_null(nmem, limitcluster);
+    md->icurule = nmem_strdup_null(nmem, icurule);
     return md;
 }
 
@@ -315,6 +317,7 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
     xmlChar *xml_limitmap = 0;
     xmlChar *xml_limitcluster = 0;
     xmlChar *xml_icu_chain = 0;
+    xmlChar *xml_icurule = 0;
 
     struct _xmlAttr *attr;
 
@@ -358,6 +361,9 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
         else if (!xmlStrcmp(attr->name, BAD_CAST "limitcluster") &&
                  attr->children && attr->children->type == XML_TEXT_NODE)
             xml_limitcluster = attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "icurule") &&
+                 attr->children && attr->children->type == XML_TEXT_NODE)
+            xml_icurule = attr->children->content;
         else
         {
             yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name);
@@ -515,7 +521,9 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
                               mergekey_type,
                               (const char *) xml_icu_chain,
                               (const char *) xml_limitmap,
-                              (const char *) xml_limitcluster);
+                              (const char *) xml_limitcluster,
+                              (const char *) xml_icurule
+        );
     (*md_node)++;
     return 0;
 }
index 1343790..83d7314 100644 (file)
@@ -82,6 +82,7 @@ struct conf_metadata
 
     char *limitmap;  // Should be expanded into service-wide default e.g. pz:limitmap:<name>=value setting
     char *limitcluster;
+    char *icurule;
 };
 
 
index cb8df1e..0f1f249 100644 (file)
@@ -27,6 +27,7 @@ struct conf_service;
 union data_types {
     struct {
         const char *disp;
+        const char *norm;
         const char *sort;
         const char *snippet;
     } text;
index 325ba28..3dcf40f 100644 (file)
@@ -56,6 +56,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/querytowrbuf.h>
 #include <yaz/oid_db.h>
 #include <yaz/snprintf.h>
+#include <yaz/xml_get.h>
 
 #define USE_TIMING 0
 #if USE_TIMING
@@ -149,30 +150,19 @@ static void session_leave(struct session *s, const char *caller)
         session_log(s, YLOG_DEBUG, "Session unlock by %s", caller);
 }
 
-static void session_normalize_facet(struct session *s,
-                                    const char *type, const char *value,
-                                    WRBUF display_wrbuf, WRBUF facet_wrbuf)
+static int run_icu(struct session *s, const char *icu_chain_id,
+                   const char *value,
+                   WRBUF norm_wr, WRBUF disp_wr)
 {
-    struct conf_service *service = s->service;
-    pp2_charset_token_t prt;
     const char *facet_component;
-    int i;
-    const char *icu_chain_id = 0;
-
-    for (i = 0; i < service->num_metadata; i++)
-        if (!strcmp((service->metadata + i)->name, type))
-            icu_chain_id = (service->metadata + i)->facetrule;
-    if (!icu_chain_id)
-        icu_chain_id = "facet";
-    prt = pp2_charset_token_create(service->charsets, icu_chain_id);
+    struct conf_service *service = s->service;
+    pp2_charset_token_t prt =
+        pp2_charset_token_create(service->charsets, icu_chain_id);
     if (!prt)
     {
         session_log(s, YLOG_FATAL,
-                    "Unknown ICU chain '%s' for facet of type '%s'",
-                icu_chain_id, type);
-        wrbuf_destroy(facet_wrbuf);
-        wrbuf_destroy(display_wrbuf);
-        return;
+                    "Unknown ICU chain '%s'", icu_chain_id);
+        return 0;
     }
     pp2_charset_token_first(prt, value, 0);
     while ((facet_component = pp2_charset_token_next(prt)))
@@ -180,19 +170,37 @@ static void session_normalize_facet(struct session *s,
         const char *display_component;
         if (*facet_component)
         {
-            if (wrbuf_len(facet_wrbuf))
-                wrbuf_puts(facet_wrbuf, " ");
-            wrbuf_puts(facet_wrbuf, facet_component);
+            if (wrbuf_len(norm_wr))
+                wrbuf_puts(norm_wr, " ");
+            wrbuf_puts(norm_wr, facet_component);
         }
         display_component = pp2_get_display(prt);
         if (display_component)
         {
-            if (wrbuf_len(display_wrbuf))
-                wrbuf_puts(display_wrbuf, " ");
-            wrbuf_puts(display_wrbuf, display_component);
+            if (wrbuf_len(disp_wr))
+                wrbuf_puts(disp_wr, " ");
+            wrbuf_puts(disp_wr, display_component);
         }
     }
     pp2_charset_token_destroy(prt);
+    return 1;
+}
+
+static void session_normalize_facet(struct session *s,
+                                    const char *type, const char *value,
+                                    WRBUF display_wrbuf, WRBUF facet_wrbuf)
+{
+    struct conf_service *service = s->service;
+    int i;
+    const char *icu_chain_id = 0;
+
+    for (i = 0; i < service->num_metadata; i++)
+        if (!strcmp((service->metadata + i)->name, type))
+            icu_chain_id = (service->metadata + i)->facetrule;
+    if (!icu_chain_id)
+        icu_chain_id = "facet";
+
+    run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf);
 }
 
 void add_facet(struct session *s, const char *type, const char *value, int count)
@@ -1470,7 +1478,8 @@ void statistics(struct session *se, struct statistics *stat)
 }
 
 static struct record_metadata *record_metadata_init(
-    NMEM nmem, const char *value, enum conf_metadata_type type,
+    NMEM nmem, const char *value, const char *norm,
+    enum conf_metadata_type type,
     struct _xmlAttr *attr)
 {
     struct record_metadata *rec_md = record_metadata_create(nmem);
@@ -1500,11 +1509,20 @@ static struct record_metadata *record_metadata_init(
     {
     case Metadata_type_generic:
     case Metadata_type_skiparticle:
-        if (strstr(value, "://")) /* looks like a URL */
+        if (norm)
+        {
             rec_md->data.text.disp = nmem_strdup(nmem, value);
+            rec_md->data.text.norm = nmem_strdup(nmem, norm);
+        }
         else
-            rec_md->data.text.disp =
-                normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+        {
+            if (strstr(value, "://")) /* looks like a URL */
+                rec_md->data.text.disp = nmem_strdup(nmem, value);
+            else
+                rec_md->data.text.disp =
+                    normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+            rec_md->data.text.norm = rec_md->data.text.disp;
+        }
         rec_md->data.text.sort = 0;
         rec_md->data.text.snippet = 0;
         break;
@@ -1564,7 +1582,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
             continue;
         if (!strcmp((const char *) n->name, "metadata"))
         {
-            xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+            const char *type = yaz_xml_get_prop(n, "type");
             if (type == NULL) {
                 yaz_log(YLOG_FATAL, "Missing type attribute on metadata element. Skipping!");
             }
@@ -1583,7 +1601,6 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                 if (value)
                     xmlFree(value);
             }
-            xmlFree(type);
         }
     }
     return no_found;
@@ -1596,7 +1613,7 @@ static const char *get_mergekey(xmlDoc *doc, xmlNode *root,
 {
     char *mergekey_norm = 0;
     WRBUF norm_wr = wrbuf_alloc();
-    xmlChar *mergekey;
+    const char *mergekey;
 
     if (session_mergekey)
     {
@@ -1608,10 +1625,9 @@ static const char *get_mergekey(xmlDoc *doc, xmlNode *root,
         for (i = 0; i < num; i++)
             get_mergekey_from_doc(doc, root, values[i], service, norm_wr);
     }
-    else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
+    else if ((mergekey = yaz_xml_get_prop(root, "mergekey")))
     {
-        mergekey_norm_wr(service->charsets, norm_wr, (const char *) mergekey);
-        xmlFree(mergekey);
+        mergekey_norm_wr(service->charsets, norm_wr, mergekey);
     }
     else
     {
@@ -1678,7 +1694,7 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb)
             continue;
         if (!strcmp((const char *) n->name, "metadata"))
         {
-            xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+            const char *type = yaz_xml_get_prop(n, "type");
             if (type)
             {
                 size_t len;
@@ -1706,7 +1722,6 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb)
                     }
                     xmlFree(value);
                 }
-                xmlFree(type);
             }
         }
     }
@@ -1714,6 +1729,8 @@ static int check_record_filter(xmlNode *root, struct session_database *sdb)
 }
 
 static int ingest_to_cluster(struct client *cl,
+                             WRBUF wrbuf_disp,
+                             WRBUF wrbuf_norm,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
@@ -1726,6 +1743,7 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
 {
     int ret = 0;
     struct session *se = client_get_session(cl);
+    WRBUF wrbuf_disp, wrbuf_norm;
 
     if (!check_record_filter(root, sdb))
     {
@@ -1734,11 +1752,15 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
                     record_no, sdb->database->id);
         return 0;
     }
+    wrbuf_disp = wrbuf_alloc();
+    wrbuf_norm = wrbuf_alloc();
     session_enter(se, "ingest_sub_record");
     if (client_get_session(cl) == se && se->relevance)
-        ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
+        ret = ingest_to_cluster(cl, wrbuf_disp, wrbuf_norm,
+                                xdoc, root, record_no, mergekeys);
     session_leave(se, "ingest_sub_record");
-
+    wrbuf_destroy(wrbuf_norm);
+    wrbuf_destroy(wrbuf_disp);
     return ret;
 }
 
@@ -2019,14 +2041,14 @@ static int check_limit_local(struct client *cl,
 }
 
 static int ingest_to_cluster(struct client *cl,
+                             WRBUF wrbuf_disp,
+                             WRBUF wrbuf_norm,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
                              struct record_metadata_attr *merge_keys)
 {
     xmlNode *n;
-    xmlChar *type = 0;
-    xmlChar *value = 0;
     struct session *se = client_get_session(cl);
     struct conf_service *service = se->service;
     int term_factor = 1;
@@ -2043,12 +2065,6 @@ static int ingest_to_cluster(struct client *cl,
 
     for (n = root->children; n; n = n->next)
     {
-        if (type)
-            xmlFree(type);
-        if (value)
-            xmlFree(value);
-        type = value = 0;
-
         if (n->type != XML_ELEMENT_NODE)
             continue;
         if (!strcmp((const char *) n->name, "metadata"))
@@ -2057,20 +2073,12 @@ static int ingest_to_cluster(struct client *cl,
             struct record_metadata **wheretoput = 0;
             struct record_metadata *rec_md = 0;
             int md_field_id = -1;
+            xmlChar *value0;
+            const char *type = yaz_xml_get_prop(n, "type");
 
-            type = xmlGetProp(n, (xmlChar *) "type");
-            value = xmlNodeListGetString(xdoc, n->children, 1);
             if (!type)
                 continue;
-            if (!value || !*value)
-            {
-                xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty");
-                if (!empty)
-                    continue;
-                if (value)
-                    xmlFree(value);
-                value = empty;
-            }
+
             md_field_id
                 = conf_service_metadata_field_id(service, (const char *) type);
             if (md_field_id < 0)
@@ -2084,15 +2092,30 @@ static int ingest_to_cluster(struct client *cl,
                 continue;
             }
 
+            wrbuf_rewind(wrbuf_disp);
+            value0 = xmlNodeListGetString(xdoc, n->children, 1);
+            if (!value0 || !*value0)
+            {
+                const char *empty = yaz_xml_get_prop(n, "empty");
+                if (!empty)
+                    continue;
+                wrbuf_puts(wrbuf_disp, (const char *) empty);
+            }
+            else
+            {
+                wrbuf_puts(wrbuf_disp, (const char *) value0);
+            }
+            if (value0)
+                xmlFree(value0);
             ser_md = &service->metadata[md_field_id];
 
             // non-merged metadata
-            rec_md = record_metadata_init(se->nmem, (const char *) value,
+            rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), 0,
                                           ser_md->type, n->properties);
             if (!rec_md)
             {
                 session_log(se, YLOG_WARN, "bad metadata data '%s' "
-                            "for element '%s'", value, type);
+                            "for element '%s'", wrbuf_cstr(wrbuf_disp), type);
                 continue;
             }
 
@@ -2100,7 +2123,7 @@ static int ingest_to_cluster(struct client *cl,
             {
                 WRBUF w = wrbuf_alloc();
                 if (relevance_snippet(se->relevance,
-                                      (char*) value, ser_md->name, w))
+                                      wrbuf_cstr(wrbuf_disp), ser_md->name, w))
                     rec_md->data.text.snippet = nmem_strdup(se->nmem,
                                                             wrbuf_cstr(w));
                 wrbuf_destroy(w);
@@ -2116,20 +2139,12 @@ static int ingest_to_cluster(struct client *cl,
 
     if (check_limit_local(cl, record, record_no))
     {
-        if (type)
-            xmlFree(type);
-        if (value)
-            xmlFree(value);
         return -2;
     }
     cluster = reclist_insert(se->reclist, se->relevance, service, record,
                              merge_keys, &se->total_merged);
     if (!cluster)
     {
-        if (type)
-            xmlFree(type);
-        if (value)
-            xmlFree(value);
         return 0; // complete match with existing record
     }
 
@@ -2167,13 +2182,6 @@ static int ingest_to_cluster(struct client *cl,
     // now parsing XML record and adding data to cluster or record metadata
     for (n = root->children; n; n = n->next)
     {
-        pp2_charset_token_t prt;
-        if (type)
-            xmlFree(type);
-        if (value)
-            xmlFree(value);
-        type = value = 0;
-
         if (n->type != XML_ELEMENT_NODE)
             continue;
         if (!strcmp((const char *) n->name, "metadata"))
@@ -2185,12 +2193,12 @@ static int ingest_to_cluster(struct client *cl,
             int md_field_id = -1;
             int sk_field_id = -1;
             const char *rank = 0;
-            xmlChar *xml_rank = 0;
-
-            type = xmlGetProp(n, (xmlChar *) "type");
-            value = xmlNodeListGetString(xdoc, n->children, 1);
+            const char *xml_rank = 0;
+            const char *type = 0;
+            xmlChar *value0;
 
-            if (!type || !value || !*value)
+            type = yaz_xml_get_prop(n, "type");
+            if (!type)
                 continue;
 
             md_field_id
@@ -2206,12 +2214,39 @@ static int ingest_to_cluster(struct client *cl,
                 ser_sk = &service->sortkeys[sk_field_id];
             }
 
-            // merged metadata
-            rec_md = record_metadata_init(se->nmem, (const char *) value,
-                                          ser_md->type, 0);
+            wrbuf_rewind(wrbuf_disp);
+            wrbuf_rewind(wrbuf_norm);
 
-            // see if the field was not in cluster already (from beginning)
+            value0 = xmlNodeListGetString(xdoc, n->children, 1);
+            if (!value0 || !*value0)
+            {
+                if (value0)
+                    xmlFree(value0);
+                continue;
+            }
+
+            if (ser_md->icurule)
+            {
+                run_icu(se, ser_md->icurule, (const char *) value0,
+                        wrbuf_norm, wrbuf_disp);
+                yaz_log(YLOG_LOG, "run_icu input=%s norm=%s disp=%s",
+                        (const char *) value0,
+                        wrbuf_cstr(wrbuf_norm), wrbuf_cstr(wrbuf_disp));
+                rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+                                              wrbuf_cstr(wrbuf_norm),
+                                              ser_md->type, 0);
+            }
+            else
+            {
+                wrbuf_puts(wrbuf_disp, (const char *) value0);
+                rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+                                              0,
+                                              ser_md->type, 0);
+            }
 
+            xmlFree(value0);
+
+            // see if the field was not in cluster already (from beginning)
             if (!rec_md)
                 continue;
 
@@ -2234,7 +2269,7 @@ static int ingest_to_cluster(struct client *cl,
             }
             else
             {
-                xml_rank = xmlGetProp(n, (xmlChar *) "rank");
+                xml_rank = yaz_xml_get_prop(n, "rank");
                 rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
             }
 
@@ -2253,8 +2288,8 @@ static int ingest_to_cluster(struct client *cl,
             {
                 while (*wheretoput)
                 {
-                    if (!strcmp((const char *) (*wheretoput)->data.text.disp,
-                                rec_md->data.text.disp))
+                    if (!strcmp((const char *) (*wheretoput)->data.text.norm,
+                                rec_md->data.text.norm))
                         break;
                     wheretoput = &(*wheretoput)->next;
                 }
@@ -2264,12 +2299,13 @@ static int ingest_to_cluster(struct client *cl,
             else if (ser_md->merge == Metadata_merge_longest)
             {
                 if (!*wheretoput
-                    || strlen(rec_md->data.text.disp)
-                    > strlen((*wheretoput)->data.text.disp))
+                    || strlen(rec_md->data.text.norm)
+                    > strlen((*wheretoput)->data.text.norm))
                 {
                     *wheretoput = rec_md;
                     if (ser_sk)
                     {
+                        pp2_charset_token_t prt;
                         const char *sort_str = 0;
                         int skip_article =
                             ser_sk->type == Metadata_type_skiparticle;
@@ -2333,7 +2369,8 @@ static int ingest_to_cluster(struct client *cl,
             if (rank)
             {
                 relevance_countwords(se->relevance, cluster,
-                                     (char *) value, rank, ser_md->name);
+                                     wrbuf_cstr(wrbuf_disp),
+                                     rank, ser_md->name);
             }
             // construct facets ... unless the client already has reported them
             if (ser_md->termlist && !client_has_facet(cl, (char *) type))
@@ -2351,15 +2388,8 @@ static int ingest_to_cluster(struct client *cl,
                     }
                 }
                 else
-                    add_facet(se, (char *) type, (char *) value, term_factor);
+                    add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor);
             }
-
-            // cleaning up
-            if (xml_rank)
-                xmlFree(xml_rank);
-            xmlFree(type);
-            xmlFree(value);
-            type = value = 0;
         }
         else
         {
@@ -2369,11 +2399,6 @@ static int ingest_to_cluster(struct client *cl,
             se->number_of_warnings_unknown_elements++;
         }
     }
-    if (type)
-        xmlFree(type);
-    if (value)
-        xmlFree(value);
-
     nmem_destroy(ingest_nmem);
     xfree(metadata0);
     relevance_donerecord(se->relevance, cluster);
index ae92345..1c52b30 100644 (file)
@@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <stdio.h>
 #include <sys/types.h>
 #include <yaz/dirent.h>
+#include <yaz/xml_get.h>
 #include <stdlib.h>
 #include <sys/stat.h>
 
@@ -176,11 +177,10 @@ int settings_read_node_x(xmlNode *n,
                                      struct setting *set))
 {
     int ret_val = 0; /* success */
-    char *namea = (char *) xmlGetProp(n, (xmlChar *) "name");
-    char *targeta = (char *) xmlGetProp(n, (xmlChar *) "target");
-    char *valuea = (char *) xmlGetProp(n, (xmlChar *) "value");
-    char *usera = (char *) xmlGetProp(n, (xmlChar *) "user");
-    char *precedencea = (char *) xmlGetProp(n, (xmlChar *) "precedence");
+    const char *namea = yaz_xml_get_prop(n, "name");
+    const char *targeta = yaz_xml_get_prop(n, "target");
+    const char *valuea = yaz_xml_get_prop(n, "value");
+    const char *precedencea = yaz_xml_get_prop(n, "precedence");
 
     for (n = n->children; n; n = n->next)
     {
@@ -190,11 +190,10 @@ int settings_read_node_x(xmlNode *n,
         {
             xmlNode *root = n->children;
             struct setting set;
-            char *name = (char *) xmlGetProp(n, (xmlChar *) "name");
-            char *target = (char *) xmlGetProp(n, (xmlChar *) "target");
-            char *value = (char *) xmlGetProp(n, (xmlChar *) "value");
-            char *user = (char *) xmlGetProp(n, (xmlChar *) "user");
-            char *precedence = (char *) xmlGetProp(n, (xmlChar *) "precedence");
+            const char *name = yaz_xml_get_prop(n, "name");
+            const char *target = yaz_xml_get_prop(n, "target");
+            const char *value = yaz_xml_get_prop(n, "value");
+            const char *precedence = yaz_xml_get_prop(n, "precedence");
             xmlChar *buf_out = 0;
 
             set.next = 0;
@@ -231,7 +230,7 @@ int settings_read_node_x(xmlNode *n,
                     xmlDocSetRootElement(doc, xmlCopyNode(root, 1));
                     xmlDocDumpMemory(doc, &buf_out, &len_out);
                     /* xmlDocDumpMemory 0-terminates */
-                    set.value = (char *) buf_out;
+                    set.value = (const char *) buf_out;
                     xmlFreeDoc(doc);
                 }
             }
@@ -248,11 +247,6 @@ int settings_read_node_x(xmlNode *n,
                 ret_val = -1;
             }
             xmlFree(buf_out);
-            xmlFree(name);
-            xmlFree(precedence);
-            xmlFree(value);
-            xmlFree(user);
-            xmlFree(target);
         }
         else
         {
@@ -261,11 +255,6 @@ int settings_read_node_x(xmlNode *n,
             ret_val = -1;
         }
     }
-    xmlFree(namea);
-    xmlFree(precedencea);
-    xmlFree(valuea);
-    xmlFree(usera);
-    xmlFree(targeta);
     return ret_val;
 }
 
index cd01937..6378a8a 100644 (file)
@@ -65,9 +65,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 struct setting
 {
     int precedence;
-    char *target;
-    char *name;
-    char *value;
+    const char *target;
+    const char *name;
+    const char *value;
     struct setting *next;
 };
 
index e3c1e89..1d8cc9c 100644 (file)
@@ -32,8 +32,9 @@
     </icu_chain>
 
     <icu_chain id="mychain" locale="en">
+      <transliterate>[[:WhiteSpace:][,.!;]]* } [$] > ;</transliterate>
       <display/>
-      <transform rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+      <casemap rule="l"/>
     </icu_chain>
 
     <icu_chain id="facet-author" locale="en">
@@ -56,7 +57,7 @@
       <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
       <metadata name="id"/>
       <metadata name="lccn" merge="unique"/>
-      <metadata name="description" brief="yes" merge="longest" rank="3"/>
+      <metadata name="description" brief="yes" merge="longest" rank="3" icurule="mychain"/>
       
       <metadata name="test-usersetting" brief="yes" setting="postproc"/>
       <metadata name="test" setting="parameter"/>