Metadata 'skiparticle works for ICU normalization
[pazpar2-moved-to-github.git] / src / logic.c
index 0d578a0..c07d303 100644 (file)
@@ -822,9 +822,31 @@ void pazpar2_event_loop()
 }
 
 static struct record_metadata *record_metadata_init(
-    NMEM nmem, const char *value, enum conf_metadata_type type)
+    NMEM nmem, const char *value, enum conf_metadata_type type,
+    struct _xmlAttr *attr)
 {
     struct record_metadata *rec_md = record_metadata_create(nmem);
+    struct record_metadata_attr **attrp = &rec_md->attributes;
+    
+    for (; attr; attr = attr->next)
+    {
+        if (attr->children && attr->children->content)
+        {
+            if (strcmp((const char *) attr->name, "type"))
+            {  /* skip the "type" attribute.. Its value is already part of
+                  the element in output (md-%s) and so repeating it here
+                  is redundant */
+                *attrp = nmem_malloc(nmem, sizeof(**attrp));
+                (*attrp)->name =
+                    nmem_strdup(nmem, (const char *) attr->name);
+                (*attrp)->value =
+                    nmem_strdup(nmem, (const char *) attr->children->content);
+                attrp = &(*attrp)->next;
+            }
+        }
+    }
+    *attrp = 0;
+
     if (type == Metadata_type_generic)
     {
         char *p = nmem_strdup(nmem, value);
@@ -873,7 +895,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                     pp2_relevance_token_t prt =
                         pp2_relevance_tokenize(
                             service->mergekey_pct,
-                            (const char *) value);
+                            (const char *) value, 0);
                     
                     wrbuf_puts(norm_wr, name);
                     wrbuf_puts(norm_wr, "=");
@@ -913,7 +935,7 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
         pp2_relevance_token_t prt =
             pp2_relevance_tokenize(
                 service->mergekey_pct,
-                (const char *) mergekey);
+                (const char *) mergekey, 0);
         
         while ((norm_str = pp2_relevance_token_next(prt)))
         {
@@ -1123,7 +1145,7 @@ struct record *ingest_record(struct client *cl, const char *rec,
 
             // non-merged metadata
             rec_md = record_metadata_init(se->nmem, (const char *) value,
-                                          ser_md->type);
+                                          ser_md->type, n->properties);
             if (!rec_md)
             {
                 yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
@@ -1137,7 +1159,7 @@ struct record *ingest_record(struct client *cl, const char *rec,
 
             // merged metadata
             rec_md = record_metadata_init(se->nmem, (const char *) value,
-                                          ser_md->type);
+                                          ser_md->type, 0);
             wheretoput = &cluster->metadata[md_field_id];
 
             // and polulate with data:
@@ -1175,11 +1197,11 @@ struct record *ingest_record(struct client *cl, const char *rec,
                          
                         prt = pp2_relevance_tokenize(
                             service->sort_pct,
-                            rec_md->data.text.disp);
+                            rec_md->data.text.disp, skip_article);
 
                         pp2_relevance_token_next(prt);
                          
-                        sort_str = pp2_get_sort(prt, skip_article);
+                        sort_str = pp2_get_sort(prt);
                          
                         cluster->sortkeys[sk_field_id]->text.disp = 
                             rec_md->data.text.disp;
@@ -1230,7 +1252,8 @@ struct record *ingest_record(struct client *cl, const char *rec,
             // ranking of _all_ fields enabled ... 
             if (ser_md->rank)
                 relevance_countwords(se->relevance, cluster, 
-                                     (char *) value, ser_md->rank);
+                                     (char *) value, ser_md->rank,
+                                     ser_md->name);
 
             // construct facets ... 
             if (ser_md->termlist)