Start work on multiple mergekeys - tests do not pass
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 27 Nov 2013 12:11:36 +0000 (13:11 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 27 Nov 2013 12:11:36 +0000 (13:11 +0100)
src/reclists.c
src/reclists.h
src/record.h
src/session.c

index b16a60e..8e7522a 100644 (file)
@@ -50,6 +50,22 @@ struct reclist_bucket
     struct reclist_sortparms *sort_parms;
 };
 
+static void append_merge_keys(struct record_metadata_attr **p,
+                              struct record_metadata_attr *a,
+                              NMEM nmem)
+{
+    while (*p)
+        p = &(*p)->next;
+    for (; a; a = a->next)
+    {
+        *p = (struct record_metadata_attr *) nmem_malloc(nmem, sizeof(**p));
+        (*p)->name = nmem_strdup_null(nmem, a->name);
+        (*p)->value = nmem_strdup_null(nmem, a->value);
+        p = &(*p)->next;
+    }
+    *p = 0;
+}
+
 struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms,
                                                   struct conf_service *service)
 {
@@ -346,43 +362,55 @@ int reclist_get_num_records(struct reclist *l)
 struct record_cluster *reclist_insert(struct reclist *l,
                                       struct conf_service *service,
                                       struct record *record,
-                                      const char *merge_key, int *total)
+                                      struct record_metadata_attr *merge_keys,
+                                      int *total)
 {
-    unsigned int bucket;
-    struct reclist_bucket **p;
     struct record_cluster *cluster = 0;
+    struct record_metadata_attr *mkl = merge_keys;
+    struct reclist_bucket **p;
 
     assert(service);
     assert(l);
     assert(record);
-    assert(merge_key);
+    assert(merge_keys);
     assert(total);
 
-    bucket = jenkins_hash((unsigned char*) merge_key) % l->hash_size;
-
     yaz_mutex_enter(l->mutex);
-    for (p = &l->hashtable[bucket]; *p; p = &(*p)->hash_next)
+
+    for (; mkl; mkl = mkl->next)
     {
-        // We found a matching record. Merge them
-        if (!strcmp(merge_key, (*p)->record->merge_key))
-        {
-            struct record **re;
+        const char *merge_key = mkl->value;
+        unsigned int bucket =
+            jenkins_hash((unsigned char*) merge_key) % l->hash_size;
 
-            cluster = (*p)->record;
-            for (re = &cluster->records; *re; re = &(*re)->next)
+        for (p = &l->hashtable[bucket]; *p; p = &(*p)->hash_next)
+        {
+            struct record_metadata_attr *mkr = (*p)->record->merge_keys;
+            for (; mkr; mkr = mkr->next)
             {
-                if ((*re)->client == record->client &&
-                    record_compare(record, *re, service))
+                // We found a matching record. Merge them
+                if (!strcmp(merge_key, mkr->value))
                 {
-                    yaz_mutex_leave(l->mutex);
-                    return 0;
+                    struct record **re;
+
+                    cluster = (*p)->record;
+                    for (re = &cluster->records; *re; re = &(*re)->next)
+                    {
+                        if ((*re)->client == record->client &&
+                            record_compare(record, *re, service))
+                        {
+                            yaz_mutex_leave(l->mutex);
+                            return 0;
+                        }
+                    }
+                    *re = record;
+                    record->next = 0;
+                    goto out;
                 }
             }
-            *re = record;
-            record->next = 0;
-            break;
         }
     }
+out:
     if (!cluster)
     {
         struct reclist_bucket *new =
@@ -394,10 +422,13 @@ struct record_cluster *reclist_insert(struct reclist *l,
         new->record = cluster;
         new->hash_next = 0;
         cluster->records = record;
-        cluster->merge_key = nmem_strdup(l->nmem, merge_key);
+
+        cluster->merge_keys = 0;
+        append_merge_keys(&cluster->merge_keys, merge_keys, l->nmem);
+
         cluster->relevance_score = 0;
         cluster->term_frequency_vec = 0;
-        cluster->recid = nmem_strdup(l->nmem, merge_key);
+        cluster->recid = merge_keys->value;
         (*total)++;
         cluster->metadata =
             nmem_malloc(l->nmem,
index ba495bd..769b0c2 100644 (file)
@@ -41,7 +41,8 @@ void reclist_limit(struct reclist *l, struct session *session, int lazy);
 struct record_cluster *reclist_insert(struct reclist *tl,
                                       struct conf_service *service,
                                       struct record  *record,
-                                      const char *merge_key, int *total);
+                                      struct record_metadata_attr *merge_keys, 
+                                      int *total);
 void reclist_sort(struct reclist *l, struct reclist_sortparms *parms);
 struct record_cluster *reclist_read_record(struct reclist *l);
 void reclist_enter(struct reclist *l);
index f2761f5..fc01b5c 100644 (file)
@@ -82,7 +82,9 @@ struct record_cluster
     // Array mirrors list of metadata fields in config
     struct record_metadata **metadata;
     union data_types **sortkeys;
-    char *merge_key;
+    // char *merge_key;
+    struct record_metadata_attr *merge_keys;
+
     int relevance_score;
     int *term_frequency_vec;
     float *term_frequency_vecf;
index 9091b6b..9e52d59 100644 (file)
@@ -1667,12 +1667,12 @@ static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm);
+                             struct record_metadata_attr *mergekey);
 
 static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
                              int record_no, NMEM nmem,
                              struct session_database *sdb,
-                             const char **mergekey_norm)
+                             struct record_metadata_attr *mergekeys)
 {
     int ret = 0;
     struct session *se = client_get_session(cl);
@@ -1687,20 +1687,9 @@ static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
                     record_no, sdb->database->id);
         return 0;
     }
-    if (!*mergekey_norm)
-    {
-        *mergekey_norm = get_mergekey(xdoc, root, cl, record_no, service, nmem,
-                                      se->mergekey);
-    }
-    if (!*mergekey_norm)
-    {
-        session_log(se, YLOG_WARN, "Got no mergekey for record no %d from %s",
-                    record_no, sdb->database->id);
-        return -1;
-    }
     session_enter(se, "ingest_sub_record");
     if (client_get_session(cl) == se && se->relevance)
-        ret = ingest_to_cluster(cl, xdoc, root, record_no, *mergekey_norm);
+        ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
     session_leave(se, "ingest_sub_record");
 
     return ret;
@@ -1724,7 +1713,6 @@ int ingest_record(struct client *cl, const char *rec,
     xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
     int r = 0;
     xmlNode *root;
-    const char *mergekey_norm = 0;
 
     if (!xdoc)
         return -1;
@@ -1740,19 +1728,41 @@ int ingest_record(struct client *cl, const char *rec,
 
     if (!strcmp((const char *) root->name, "cluster"))
     {
-        for (root = root->children; root; root = root->next)
-            if (root->type == XML_ELEMENT_NODE)
+        xmlNode *sroot;
+        for (sroot = root->children; sroot; sroot = sroot->next)
+            if (sroot->type == XML_ELEMENT_NODE)
             {
-                r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
-                    &mergekey_norm);
+                const char *mergekey_norm =
+                    get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
+                         se->mergekey);
+
+                struct record_metadata_attr *mk = (struct record_metadata_attr*)
+                    nmem_malloc(nmem, sizeof(*mk));
+                mk->name = 0;
+                mk->value = nmem_strdup(nmem, mergekey_norm);
+                mk->next = 0;
+
+                r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
+                                      mk);
                 if (r)
                     break;
             }
     }
     else if (!strcmp((const char *) root->name, "record"))
     {
-        r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb,
-                              &mergekey_norm);
+        const char *mergekey_norm =
+            get_mergekey(xdoc, root, cl, record_no, service, nmem,
+                         se->mergekey);
+        if (mergekey_norm)
+        {
+            struct record_metadata_attr *mk = (struct record_metadata_attr*)
+                nmem_malloc(nmem, sizeof(*mk));
+            mk->name = 0;
+            mk->value = nmem_strdup(nmem, mergekey_norm);
+            mk->next = 0;
+
+            r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk);
+        }
     }
     else
     {
@@ -1926,7 +1936,7 @@ static int ingest_to_cluster(struct client *cl,
                              xmlDoc *xdoc,
                              xmlNode *root,
                              int record_no,
-                             const char *mergekey_norm)
+                             struct record_metadata_attr *merge_keys)
 {
     xmlNode *n;
     xmlChar *type = 0;
@@ -2029,7 +2039,7 @@ static int ingest_to_cluster(struct client *cl,
         return -2;
     }
     cluster = reclist_insert(se->reclist, service, record,
-                             mergekey_norm, &se->total_merged);
+                             merge_keys, &se->total_merged);
     if (!cluster)
         return 0; // complete match with existing record