SOLR + facets: use Odr_int for counts
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 16 May 2011 13:54:00 +0000 (15:54 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 16 May 2011 13:54:00 +0000 (15:54 +0200)
For hit counts/occurrences Odr_int is used - not int, that usually has a limit
of 2^31-1. Use also facet_term_create_cstr for creating Z_FacetTerms to
use Odr_int and avoid Z_Term intermediate(s).

include/yaz/facet.h
src/facet.c
src/solr.c
src/srw.c
test/test_solr.c
ztest/ztest.c

index 5ac83ce..2ff9bc1 100644 (file)
@@ -69,10 +69,7 @@ void yaz_facet_attr_get_z_attributes(const Z_AttributeList *attributes,
                                     struct yaz_facet_attr *av);
 
 YAZ_EXPORT
-Z_Term *term_create(ODR odr, const char *cstr);
-
-YAZ_EXPORT
-Z_FacetTerm* facet_term_create(ODR odr, Z_Term *term, int freq);
+Z_FacetTerm *facet_term_create_cstr(ODR odr, const char *cstr, Odr_int freq);
 
 YAZ_EXPORT
 Z_FacetField* facet_field_create(ODR odr, Z_AttributeList *attributes,
index 588e802..7092768 100644 (file)
@@ -17,6 +17,7 @@
 #include <yaz/oid_db.h>
 #include <yaz/oid_std.h>
 #include <yaz/otherinfo.h>
+#include <yaz/pquery.h>
 #include <assert.h>
 
 void yaz_oi_set_facetlist(
@@ -199,6 +200,7 @@ void yaz_facet_attr_get_z_attributes(const Z_AttributeList *attributes,
     return;
 } /* facetattrs */
 
+#if 0
 Z_Term *term_create(ODR odr, const char *cstr)
 {
     Z_Term *term = odr_malloc(odr, sizeof(*term));
@@ -215,6 +217,16 @@ Z_FacetTerm* facet_term_create(ODR odr, Z_Term *term, int freq)
     *facet_term->count = freq;
     return facet_term;
 }
+#endif
+
+Z_FacetTerm *facet_term_create_cstr(ODR odr, const char *cstr, Odr_int freq)
+{
+    Z_FacetTerm *facet_term = odr_malloc(odr, sizeof(*facet_term));
+    Z_Term *term = z_Term_create(odr, Z_Term_general, cstr, strlen(cstr));
+    facet_term->term = term;
+    facet_term->count = odr_intdup(odr, freq);
+    return facet_term;
+}
 
 Z_FacetField* facet_field_create(ODR odr, Z_AttributeList *attributes,
                                  int num_terms)
index c13c4ed..701e9f3 100644 (file)
@@ -118,7 +118,7 @@ static int yaz_solr_decode_result(ODR o, xmlNodePtr ptr,
     return -1;
 }
 
-static const char *get_facet_term_count(xmlNodePtr node, int *freq)
+static const char *get_facet_term_count(xmlNodePtr node, Odr_int *freq)
 {
     const char *term = yaz_element_attribute_value_get(node, "int", "name");
     xmlNodePtr child;
@@ -131,7 +131,7 @@ static const char *get_facet_term_count(xmlNodePtr node, int *freq)
         if (child->type == XML_TEXT_NODE)
             wrbuf_puts(wrbuf, (const char *) child->content);
     }
-    *freq = atoi(wrbuf_cstr(wrbuf));
+    *freq = odr_atoi(wrbuf_cstr(wrbuf));
     wrbuf_destroy(wrbuf);
     return term;
 }
@@ -154,11 +154,10 @@ Z_FacetField *yaz_solr_decode_facet_field(ODR o, xmlNodePtr ptr,
     index = 0;
     for (node = ptr->children; node; node = node->next)
     {
-        int count = 0;
+        Odr_int count = 0;
         const char *term = get_facet_term_count(node, &count);
         facet_field_term_set(o, facet_field,
-                             facet_term_create(o, term_create(o, term), count),
-                             index);
+                             facet_term_create_cstr(o, term, count), index);
         index++;
     }
     return facet_field;
index 76ca208..3bf499c 100644 (file)
--- a/src/srw.c
+++ b/src/srw.c
@@ -447,14 +447,14 @@ static int yaz_srw_versions(ODR o, xmlNodePtr pptr,
 
 Z_FacetTerm *yaz_sru_proxy_get_facet_term_count(ODR odr, xmlNodePtr node)
 {
-    int freq;
+    Odr_int freq;
     xmlNodePtr child;
     WRBUF wrbuf = wrbuf_alloc();
+    Z_FacetTerm *facet_term;
     const char *freq_string = yaz_element_attribute_value_get(
         node, "facetvalue", "est_representation");
-    Z_Term *term;
     if (freq_string)
-        freq =  atoi(freq_string);
+        freq = odr_atoi(freq_string);
     else
         freq = -1;
 
@@ -463,10 +463,9 @@ Z_FacetTerm *yaz_sru_proxy_get_facet_term_count(ODR odr, xmlNodePtr node)
         if (child->type == XML_TEXT_NODE)
             wrbuf_puts(wrbuf, (const char *) child->content);
     }
-    term = term_create(odr, wrbuf_cstr(wrbuf));
-    yaz_log(YLOG_DEBUG, "sru-proxy facet: %s %d", wrbuf_cstr(wrbuf), freq);
+    facet_term = facet_term_create_cstr(odr, wrbuf_cstr(wrbuf), freq);
     wrbuf_destroy(wrbuf);
-    return facet_term_create(odr, term, freq);
+    return facet_term;
 };
 
 static Z_FacetField *yaz_sru_proxy_decode_facet_field(ODR odr, xmlNodePtr ptr)
index 123d553..5f69531 100644 (file)
@@ -164,8 +164,8 @@ void tst_decoding(void)
 {
 #if YAZ_HAVE_XML2
     ODR odr = odr_createmem(ODR_DECODE);
-
     Z_SRW_searchRetrieveResponse *response;
+
     YAZ_CHECK(check_response(
                   odr, 
                   "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
@@ -182,8 +182,150 @@ void tst_decoding(void)
     YAZ_CHECK_EQ(response->num_diagnostics, 0);
     YAZ_CHECK(response->diagnostics == 0);
     YAZ_CHECK(response->nextRecordPosition == 0);
+    YAZ_CHECK(response->facetList == 0);
 
     odr_reset(odr);
+
+    YAZ_CHECK(
+        check_response(
+            odr, 
+            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+            "<response><lst name=\"responseHeader\">"
+            "<int name=\"status\">0</int><int name=\"QTime\">2</int>"
+            "<lst name=\"params\"><str name=\"facet\">true</str>"
+            "<str name=\"facet.mincount\">1</str><str name=\"start\">0</str>"
+            "<str name=\"q\">@attr 1=title solr</str>"
+            "<str name=\"f.date.facet.limit\">5</str>"
+            "<str name=\"facet.field\">date</str>"
+            "<str name=\"rows\">1</str></lst>"
+            "</lst><result name=\"response\" numFound=\"91000000000\" start=\"0\">"
+            "<doc><str name=\"author\">Alenius, Hans,</str>"
+            "<str name=\"author-date\">1937-</str>"
+            "<str name=\"author-title\"/>"
+            "<arr name=\"date\"><str>1969</str></arr>"
+            "<str name=\"id\">   73857731 </str>"
+            "<arr name=\"lccn\"><str>   73857731 </str></arr>"
+            "<arr name=\"medium\"><str>book</str></arr>"
+            "<arr name=\"medium_exact\"><str>book</str></arr>"
+            "<arr name=\"physical-accomp\"><str/></arr>"
+            "<arr name=\"physical-dimensions\"><str>20 cm.</str></arr>"
+            "<arr name=\"physical-extent\"><str>140, (1) p.</str></arr>"
+            "<arr name=\"physical-format\"><str>illus.</str></arr>"
+            "<arr name=\"physical-specified\"><str/></arr>"
+            "<arr name=\"physical-unitsize\"><str/></arr>"
+            "<arr name=\"physical-unittype\"><str/></arr>"
+            "<arr name=\"publication-date\"><str>1969.</str></arr>"
+            "<arr name=\"publication-name\"><str>Norstedt,</str></arr>"
+            "<arr name=\"publication-place\"><str>Stockholm,</str></arr>"
+            "<arr name=\"subject\"><str>Photography</str><str>Artistic</str></arr>"
+            "<arr name=\"subject-long\"><str>Photography, Artistic.</str></arr>"
+            "<arr name=\"subject_exact\"><str>Photography</str><str>Artistic</str></arr>"
+            "<arr name=\"system-control-nr\"><str>(OCoLC)36247690</str></arr>"
+            "<str name=\"title\">Solring.</str><str name=\"title-complete\">Solring.</str>"
+            "<str name=\"title-dates\"/><str name=\"title-medium\"/>"
+            "<str name=\"title-number-section\"/><str name=\"title-remainder\"/>"
+            "<str name=\"title-responsibility\"/><str name=\"title_exact\">Solring.</str>"
+            "</doc></result><lst name=\"facet_counts\">"
+            "<lst name=\"facet_queries\"/>"
+            "<lst name=\"facet_fields\">"
+            "<lst name=\"date\"><int name=\"1978\">5000000000</int><int name=\"1983\">4</int>"
+            "<int name=\"1987\">4</int><int name=\"1988\">4</int>"
+            "<int name=\"2003\">3</int></lst></lst><lst name=\"facet_dates\"/>"
+            "</lst></response>", &response));
+#if HAVE_LONG_LONG
+    YAZ_CHECK(*response->numberOfRecords == 91000000000LL);
+#endif
+    YAZ_CHECK_EQ(response->num_records, 1);
+    YAZ_CHECK(response->records);
+    if (response->records)
+    {
+        const char *doc =
+            "<doc><str name=\"author\">Alenius, Hans,</str>"
+            "<str name=\"author-date\">1937-</str>"
+            "<str name=\"author-title\"/>"
+            "<arr name=\"date\"><str>1969</str></arr>"
+            "<str name=\"id\">   73857731 </str>"
+            "<arr name=\"lccn\"><str>   73857731 </str></arr>"
+            "<arr name=\"medium\"><str>book</str></arr>"
+            "<arr name=\"medium_exact\"><str>book</str></arr>"
+            "<arr name=\"physical-accomp\"><str/></arr>"
+            "<arr name=\"physical-dimensions\"><str>20 cm.</str></arr>"
+            "<arr name=\"physical-extent\"><str>140, (1) p.</str></arr>"
+            "<arr name=\"physical-format\"><str>illus.</str></arr>"
+            "<arr name=\"physical-specified\"><str/></arr>"
+            "<arr name=\"physical-unitsize\"><str/></arr>"
+            "<arr name=\"physical-unittype\"><str/></arr>"
+            "<arr name=\"publication-date\"><str>1969.</str></arr>"
+            "<arr name=\"publication-name\"><str>Norstedt,</str></arr>"
+            "<arr name=\"publication-place\"><str>Stockholm,</str></arr>"
+            "<arr name=\"subject\"><str>Photography</str><str>Artistic</str></arr>"
+            "<arr name=\"subject-long\"><str>Photography, Artistic.</str></arr>"
+            "<arr name=\"subject_exact\"><str>Photography</str><str>Artistic</str></arr>"
+            "<arr name=\"system-control-nr\"><str>(OCoLC)36247690</str></arr>"
+            "<str name=\"title\">Solring.</str><str name=\"title-complete\">Solring.</str>"
+            "<str name=\"title-dates\"/><str name=\"title-medium\"/>"
+            "<str name=\"title-number-section\"/><str name=\"title-remainder\"/>"
+            "<str name=\"title-responsibility\"/><str name=\"title_exact\">Solring.</str>"
+            "</doc>";
+
+        Z_SRW_record *record = response->records;
+        
+        YAZ_CHECK(record->recordData_len == strlen(doc) &&
+                  !memcmp(record->recordData_buf, doc, record->recordData_len));
+    }
+    YAZ_CHECK_EQ(response->num_diagnostics, 0);
+    YAZ_CHECK(response->diagnostics == 0);
+    YAZ_CHECK(response->nextRecordPosition == 0);
+
+    YAZ_CHECK(response->facetList);
+    if (response->facetList)
+    {
+        Z_FacetList *facetList = response->facetList;
+
+        YAZ_CHECK(facetList->num == 1);
+        if (facetList->num == 1)
+        {
+            Z_FacetField *facetField = facetList->elements[0];
+            int i;
+
+            YAZ_CHECK(facetField->num_terms == 5);
+            if (facetField->num_terms == 5)
+            {
+                for (i = 0; i < facetField->num_terms; i++)
+                {
+                    YAZ_CHECK(
+                        facetField->terms[i] &&
+                        facetField->terms[i]->term &&
+                        facetField->terms[i]->term->which == Z_Term_general);
+                }
+#if HAVE_LONG_LONG
+                YAZ_CHECK(*facetField->terms[0]->count == 5000000000LL);
+#endif
+                YAZ_CHECK(facetField->terms[0]->term->u.general->len == 4
+                          && !memcmp(facetField->terms[0]->term->u.general->buf,
+                                     "1978", 4));
+                YAZ_CHECK(*facetField->terms[1]->count == 4);
+                YAZ_CHECK(facetField->terms[1]->term->u.general->len == 4
+                          && !memcmp(facetField->terms[1]->term->u.general->buf,
+                                     "1983", 4));
+                YAZ_CHECK(*facetField->terms[2]->count == 4);
+                YAZ_CHECK(facetField->terms[2]->term->u.general->len == 4
+                          && !memcmp(facetField->terms[2]->term->u.general->buf,
+                                     "1987", 4));
+                YAZ_CHECK(*facetField->terms[3]->count == 4);
+                YAZ_CHECK(facetField->terms[3]->term->u.general->len == 4
+                          && !memcmp(facetField->terms[3]->term->u.general->buf,
+                                     "1988", 4));
+                YAZ_CHECK(*facetField->terms[4]->count == 3);
+                YAZ_CHECK(facetField->terms[4]->term->u.general->len == 4
+                          && !memcmp(facetField->terms[4]->term->u.general->buf,
+                                     "2003", 4));
+            }
+        }
+    }
+
+    odr_reset(odr);
+
     odr_destroy(odr);
 #endif
 }
index 3c96d91..ff8a6b1 100644 (file)
@@ -230,19 +230,20 @@ static void do_delay(const struct delay *delayp)
     }
 }
 
-static void addterms(ODR odr, Z_FacetField *facet_field, const char *facet_name) {
+static void addterms(ODR odr, Z_FacetField *facet_field, const char *facet_name)
+{
     int index;
     int freq = 100;
     int length = strlen(facet_name) + 10;
     char *key = odr_malloc(odr, length);
     key[0] = '\0';
-    for (index = 0; index < facet_field->num_terms; index++) {
-        Z_Term *term;
+    for (index = 0; index < facet_field->num_terms; index++)
+    {
         Z_FacetTerm *facet_term;
         sprintf(key, "%s%d", facet_name, index);
         yaz_log(YLOG_DEBUG, "facet add term %s %d %s", facet_name, index, key);
-        term = term_create(odr, key);
-        facet_term = facet_term_create(odr, term, freq);
+        
+        facet_term = facet_term_create_cstr(odr, key, freq);
         freq = freq - 10 ;
         facet_field_term_set(odr, facet_field, facet_term, index);
     }