comed to the point where I can start testing sorting of records in the recordslist.
authorMarc Cromme <marc@indexdata.dk>
Wed, 25 Apr 2007 13:28:55 +0000 (13:28 +0000)
committerMarc Cromme <marc@indexdata.dk>
Wed, 25 Apr 2007 13:28:55 +0000 (13:28 +0000)
But there is a nasty segfault in  reclist_sort(list, sort_parms) which needs to be sorted out before proceeding - prpbably a global parameter referenced someplace. just checking in to save the work so far, continuing tomorrow

src/test_relevance.c

index ec393f3..a7683a5 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: test_relevance.c,v 1.9 2007-04-25 07:00:33 marc Exp $
+/* $Id: test_relevance.c,v 1.10 2007-04-25 13:28:55 marc Exp $
    Copyright (c) 2006-2007, Index Data.
 
 This file is part of Pazpar2.
@@ -37,236 +37,142 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "reclists.h"
 
 
-void test_relevance(int argc, char **argv)
+void test_relevance_7bit(int argc, char **argv)
 {
   NMEM         nmem = nmem_create();
-  int numrecs = 10;
 
-  const char * queryterms[] = 
-      {"abe", "fisk", 0};
-  //    {"ål", "økologi", "æble", 0};
-
-  //struct record_cluster *cluster = 0;
   struct conf_service *service = 0; 
-  struct reclist *list = 0;
-  struct record *record = 0;
-  //const char *mergekey = "amergekey";
-  //int total = 0;
-
-  struct relevance *rel = 0;
-  //struct client *client = 0;
-  
-
-  rel = relevance_create(nmem, queryterms, numrecs);
-  YAZ_CHECK(rel);
-  
-  list = reclist_create(nmem, numrecs);
-  YAZ_CHECK(list);
+  service =  conf_service_create(nmem, 1, 2);
 
-  service =  conf_service_create(nmem, 4, 3);
-  YAZ_CHECK(service);
-
-  YAZ_CHECK(conf_service_add_metadata(nmem, service, 0, "title",
+  conf_service_add_metadata(nmem, service, 0, "title",
                             Metadata_type_generic, Metadata_merge_unique,
-                            1, 1, 1, 0));
-
-  YAZ_CHECK(conf_service_add_metadata(nmem, service, 1, "author",
-                            Metadata_type_generic, Metadata_merge_longest,
-                            1, 1, 1, 0));
-
-  YAZ_CHECK(conf_service_add_metadata(nmem, service, 2, "isbn",
-                            Metadata_type_number, Metadata_merge_no,
-                            1, 1, 1, 0));
+                            1, 1, 1, 0);
 
-  YAZ_CHECK(conf_service_add_metadata(nmem, service, 3, "year",
-                            Metadata_type_year, Metadata_merge_range,
-                            1, 1, 1, 0));
-
-  YAZ_CHECK(conf_service_add_sortkey(nmem, service, 0, "relevance",
-                                     Metadata_sortkey_relevance));
-
-  YAZ_CHECK(conf_service_add_sortkey(nmem, service, 1, "title",
-                                     Metadata_sortkey_string));
+  conf_service_add_sortkey(nmem, service, 0, "relevance",
+                           Metadata_sortkey_relevance);
   
-  YAZ_CHECK(conf_service_add_sortkey(nmem, service, 2, "year",
-                                     Metadata_sortkey_numeric));
+  conf_service_add_sortkey(nmem, service, 1, "title",
+                           Metadata_sortkey_string);
   
 
 
 
-  // testing record things
-  record = record_create(nmem, 4, 3);
-  YAZ_CHECK(record);
-
+  // setting up records
+  
   // why on earth do we have a client dangeling from the record ??
   // record->client = client;
 
-  char * bla = "blabla";
-  union data_types data_text;
-  data_text.text = bla;
+  union data_types data_ape = {"ape"};
+  union data_types data_bee = {"bee"};
+  union data_types data_fish = {"fish"};
+  union data_types data_zebra = {"zebra"};
+  
 
+  //union data_types data_year;
+  //data_num.number.min = 2005;
+  //data_num.number.max = 2007;
+
+  int no_recs = 4;
+
+  const char *mk_ape_fish = "ape fish";
+  struct record *rec_ape_fish = 0;
+  rec_ape_fish 
+      = record_create(nmem, service->num_metadata, service->num_sortkeys);
+  record_add_metadata(nmem, rec_ape_fish, service, "title", data_ape);
+  record_assign_sortkey(nmem, rec_ape_fish, service, "relevance", data_ape);
+  record_assign_sortkey(nmem, rec_ape_fish, service, "title", data_ape);
+  record_add_metadata(nmem, rec_ape_fish, service, "title", data_fish);
+  YAZ_CHECK(rec_ape_fish);  
+
+  const char *mk_bee_fish = "bee fish";
+  struct record *rec_bee_fish = 0;
+  rec_bee_fish 
+      = record_create(nmem, service->num_metadata, service->num_sortkeys);
+  record_add_metadata(nmem, rec_bee_fish, service, "title", data_bee);
+  record_assign_sortkey(nmem, rec_bee_fish, service, "relevance", data_bee);
+  record_assign_sortkey(nmem, rec_bee_fish, service, "title", data_bee);
+  record_add_metadata(nmem, rec_bee_fish, service, "title", data_fish);
+  YAZ_CHECK(rec_bee_fish);
+  const char *mk_fish_bee = "fish bee";
+  struct record *rec_fish_bee = 0;
+  rec_fish_bee 
+      = record_create(nmem, service->num_metadata, service->num_sortkeys);
+  record_add_metadata(nmem, rec_fish_bee, service, "title", data_fish);
+  record_assign_sortkey(nmem, rec_fish_bee, service, "relevance", data_fish);
+  record_assign_sortkey(nmem, rec_fish_bee, service, "title", data_fish);
+  record_add_metadata(nmem, rec_fish_bee, service, "title", data_bee);
+  YAZ_CHECK(rec_fish_bee);
   
-  union data_types data_num;
-  data_num.number.min = 2;
-  data_num.number.max = 5;
+  const char *mk_zebra_bee = "zebra bee";
+  struct record *rec_zebra_bee = 0;
+    rec_zebra_bee 
+      = record_create(nmem, service->num_metadata, service->num_sortkeys);
+  record_add_metadata(nmem, rec_zebra_bee, service, "title", data_zebra);
+  record_assign_sortkey(nmem, rec_zebra_bee, service, "relevance", data_zebra);
+  record_assign_sortkey(nmem, rec_zebra_bee, service, "title", data_zebra);
+  record_add_metadata(nmem, rec_zebra_bee, service, "title", data_bee);
+  YAZ_CHECK(rec_zebra_bee);
 
-  struct record_metadata * tmp_md = 0;
-  tmp_md = record_metadata_insert(nmem, &(record->metadata[0]), data_text);
-  YAZ_CHECK(tmp_md);
-  tmp_md = record_metadata_insert(nmem, &tmp_md, data_text);
-  YAZ_CHECK(tmp_md);
+  
+  struct reclist *list = 0;
+  list = reclist_create(nmem, no_recs);
+  YAZ_CHECK(list);
 
-  YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num));
-  YAZ_CHECK(record_add_metadata_field_id(nmem, record, 3, data_num));
+  int no_merged = 0;
 
-  YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text));
-  YAZ_CHECK(record_add_metadata(nmem, record, service, "author", data_text));
 
+  const char * queryterms[] = 
+      {"ape", "fish", 0};
+  //    {"ål", "økologi", "æble", 0};
 
-  YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 0, data_text));
-  YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 1, data_text));
-  YAZ_CHECK(record_assign_sortkey_field_id(nmem, record, 2, data_num));
 
-  YAZ_CHECK(record_assign_sortkey(nmem, record, service, "relevance", data_text));
-  YAZ_CHECK(record_assign_sortkey(nmem, record, service, "title", data_text));
-  YAZ_CHECK(record_assign_sortkey(nmem, record, service, "year", data_num));
+  struct relevance *rel = 0;
+  rel = relevance_create(nmem, queryterms, no_recs);
+  YAZ_CHECK(rel);
+  
+  struct record_cluster *cluster = 0;
 
-   
-   
 
+  // insert records into recordlist and get clusters 
+  // since metadata keys differ, we get multiple clusters ?? 
+  cluster 
+      = reclist_insert(list, service, rec_ape_fish, mk_ape_fish, &no_merged);
+  YAZ_CHECK(cluster);
+  relevance_newrec(rel, cluster);
 
+  cluster 
+      = reclist_insert(list, service, rec_bee_fish, mk_bee_fish, &no_merged);
+  YAZ_CHECK(cluster);
+  relevance_newrec(rel, cluster);
 
+  cluster 
+      = reclist_insert(list, service, rec_fish_bee, mk_fish_bee, &no_merged);
+  YAZ_CHECK(cluster);
+  relevance_newrec(rel, cluster);
 
-  // now we need to put some actual data into the record ... how ??
-  // there is a hell of a lot spagetti code in logic.c ingest_record()
-  // line 694 ff snippet from there:
-  // this code needs to be analyzed and the fundamental stuff extracted
+  cluster 
+      = reclist_insert(list, service, rec_zebra_bee, mk_zebra_bee, &no_merged);
+  YAZ_CHECK(cluster);
+  relevance_newrec(rel, cluster);
 
-#if 0 
-  service->metadata[imeta].name;
-  md = &service->metadata[imeta];
-   if (md->sortkey_offset >= 0)
-                        sk = &service->sortkeys[md->sortkey_offset];
 
-  // Find out where we are putting it           if (md->merge == Metadata_merge_no)
-                wheretoput = &res->metadata[imeta];
-            else
-                wheretoput = &cluster->metadata[imeta];
-            
-            
-            // Put it there
-            newm = nmem_malloc(se->nmem, sizeof(struct record_metadata));
-            newm->next = 0;
-            if (md->type == Metadata_type_generic)
-            {
-                char *p, *pe;
-                for (p = (char *) value; *p && isspace(*p); p++)
-                    ;
-                for (pe = p + strlen(p) - 1;
-                        pe > p && strchr(" ,/.:([", *pe); pe--)
-                    *pe = '\0';
-                newm->data.text = nmem_strdup(se->nmem, p);
-
-            }
-            else if (md->type == Metadata_type_year)
-            {
-                if (extract_years((char *) value, &first, &last) < 0)
-                    continue;
-            }
-            else
-            {
-                yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type);
-                continue;
-            }
-            if (md->type == Metadata_type_year && md->merge != Metadata_merge_range)
-            {
-                yaz_log(YLOG_WARN, "Only range merging supported for years");
-                continue;
-            }
-            if (md->merge == Metadata_merge_unique)
-            {
-                struct record_metadata *mnode;
-                for (mnode = *wheretoput; mnode; mnode = mnode->next)
-                    if (!strcmp((const char *) mnode->data.text, newm->data.text))
-                        break;
-                if (!mnode)
-                {
-                    newm->next = *wheretoput;
-                    *wheretoput = newm;
-                }
-            }
-            else if (md->merge == Metadata_merge_longest)
-            {
-                if (!*wheretoput ||
-                        strlen(newm->data.text) > strlen((*wheretoput)->data.text))
-                {
-                    *wheretoput = newm;
-                    if (sk)
-                    {
-                        char *s = nmem_strdup(se->nmem, newm->data.text);
-                        if (!cluster->sortkeys[md->sortkey_offset])
-                            cluster->sortkeys[md->sortkey_offset] = 
-                                nmem_malloc(se->nmem, sizeof(union data_types));
-                        normalize_mergekey(s,
-                                (sk->type == Metadata_sortkey_skiparticle));
-                        cluster->sortkeys[md->sortkey_offset]->text = s;
-                    }
-                }
-            }
-            else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
-            {
-                newm->next = *wheretoput;
-                *wheretoput = newm;
-            }
-            else if (md->merge == Metadata_merge_range)
-            {
-                assert(md->type == Metadata_type_year);
-                if (!*wheretoput)
-                {
-                    *wheretoput = newm;
-                    (*wheretoput)->data.number.min = first;
-                    (*wheretoput)->data.number.max = last;
-                    if (sk)
-                        cluster->sortkeys[md->sortkey_offset] = &newm->data;
-                }
-                else
-                {
-                    if (first < (*wheretoput)->data.number.min)
-                        (*wheretoput)->data.number.min = first;
-                    if (last > (*wheretoput)->data.number.max)
-                        (*wheretoput)->data.number.max = last;
-                }
-            if (md->rank)
-                relevance_countwords(se->relevance, cluster, 
-                                     (char *) value, md->rank);
-            if (md->termlist)
-            {
-                if (md->type == Metadata_type_year)
-                {
-                    char year[64];
-                    sprintf(year, "%d", last);
-                    add_facet(se, (char *) type, year);
-                    if (first != last)
-                    {
-                        sprintf(year, "%d", first);
-                        add_facet(se, (char *) type, year);
-                    }
-                }
-                else
-                    add_facet(se, (char *) type, (char *) value);
-            }
-#endif
+  YAZ_CHECK(no_recs == no_merged);
 
-  //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
-  //normalize_mergekey((char *) mergekey_norm, 0);
+  // now sorting according to sorting criteria, here ascending title
+  struct reclist_sortparms *sort_parms = 0;
+  reclist_sortparms_insert(nmem, &sort_parms, service, "title", 1);
+  //reclist_sortparms_insert(nmem, &sort_parms, service, "relevance", 1);
+
+  // crashes with a fat segmentation fault! To be traced tomorrow
+  //reclist_sort(list, sort_parms);
+  
+  
 
 
-#if 0
-  // insert one record into recordlist/cluster - what's a cluster, exactly??
-  cluster = reclist_insert(list, service, record, (char *)mergekey, &total);
-  relevance_newrec(rel, cluster);
-#endif
+                        
+  //mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
+  //normalize_mergekey((char *) mergekey_norm, 0);
 
 
 
@@ -295,7 +201,7 @@ int main(int argc, char **argv)
     YAZ_CHECK_LOG(); 
 
 
-    test_relevance(argc, argv); 
+    test_relevance_7bit(argc, argv); 
 
     
     YAZ_CHECK_TERM;