Rename some charset functions
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 14 Sep 2011 07:37:54 +0000 (09:37 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 14 Sep 2011 07:37:54 +0000 (09:37 +0200)
The _relevance_-name was relevant when charset system was only used
to normalize relevance terms, but is inappropriate when it's used to
normalize 4 different types of terms.

src/charsets.c
src/charsets.h
src/relevance.c
src/session.c

index 397e73b..0199514 100644 (file)
@@ -45,32 +45,32 @@ static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node);
 static pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn);
 static pp2_charset_t pp2_charset_create_a_to_z(void);
 static void pp2_charset_destroy(pp2_charset_t pct);
-static pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct);
+static pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct);
 
 /* charset handle */
 struct pp2_charset_s {
-    const char *(*token_next_handler)(pp2_relevance_token_t prt);
-    const char *(*get_sort_handler)(pp2_relevance_token_t prt);
-    const char *(*get_display_handler)(pp2_relevance_token_t prt);
+    const char *(*token_next_handler)(pp2_charset_token_t prt);
+    const char *(*get_sort_handler)(pp2_charset_token_t prt);
+    const char *(*get_display_handler)(pp2_charset_token_t prt);
 #if YAZ_HAVE_ICU
     struct icu_chain * icu_chn;
     UErrorCode icu_sts;
 #endif
 };
 
-static const char *pp2_relevance_token_null(pp2_relevance_token_t prt);
-static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt);
-static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt);
-static const char *pp2_get_display_ascii(pp2_relevance_token_t prt);
+static const char *pp2_charset_token_null(pp2_charset_token_t prt);
+static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt);
+static const char *pp2_get_sort_ascii(pp2_charset_token_t prt);
+static const char *pp2_get_display_ascii(pp2_charset_token_t prt);
 
 #if YAZ_HAVE_ICU
-static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt);
-static const char *pp2_get_sort_icu(pp2_relevance_token_t prt);
-static const char *pp2_get_display_icu(pp2_relevance_token_t prt);
+static const char *pp2_charset_token_icu(pp2_charset_token_t prt);
+static const char *pp2_get_sort_icu(pp2_charset_token_t prt);
+static const char *pp2_get_display_icu(pp2_charset_token_t prt);
 #endif
 
 /* tokenzier handle */
-struct pp2_relevance_token_s {
+struct pp2_charset_token_s {
     const char *cp;     /* unnormalized buffer we're tokenizing */
     const char *last_cp;  /* pointer to last token we're dealing with */
     pp2_charset_t pct;  /* our main charset handle (type+config) */
@@ -218,7 +218,7 @@ pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node)
 pp2_charset_t pp2_charset_create_a_to_z(void)
 {
     pp2_charset_t pct = pp2_charset_create(0);
-    pct->token_next_handler = pp2_relevance_token_a_to_z;
+    pct->token_next_handler = pp2_charset_token_a_to_z;
     return pct;
 }
 
@@ -226,7 +226,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn)
 {
     pp2_charset_t pct = xmalloc(sizeof(*pct));
 
-    pct->token_next_handler = pp2_relevance_token_null;
+    pct->token_next_handler = pp2_charset_token_null;
     pct->get_sort_handler  = pp2_get_sort_ascii;
     pct->get_display_handler  = pp2_get_display_ascii;
 #if YAZ_HAVE_ICU
@@ -235,7 +235,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn)
     {
         pct->icu_chn = icu_chn;
         pct->icu_sts = U_ZERO_ERROR;
-        pct->token_next_handler = pp2_relevance_token_icu;
+        pct->token_next_handler = pp2_charset_token_icu;
         pct->get_sort_handler = pp2_get_sort_icu;
         pct->get_display_handler = pp2_get_display_icu;
     }
@@ -251,19 +251,19 @@ void pp2_charset_destroy(pp2_charset_t pct)
     xfree(pct);
 }
 
-pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft,
-                                           const char *id)
+pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft,
+                                               const char *id)
 {
     struct pp2_charset_entry *pce;
     for (pce = pft->list; pce; pce = pce->next)
         if (!strcmp(id, pce->name))
-            return pp2_relevance_tokenize(pce->pct);
+            return pp2_charset_tokenize(pce->pct);
     return 0;
 }
 
-pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct)
+pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct)
 {
-    pp2_relevance_token_t prt = xmalloc(sizeof(*prt));
+    pp2_charset_token_t prt = xmalloc(sizeof(*prt));
 
     assert(pct);
 
@@ -281,9 +281,8 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct)
     return prt;
 }
 
-void pp2_relevance_first(pp2_relevance_token_t prt,
-                         const char *buf,
-                         int skip_article)
+void pp2_charset_token_first(pp2_charset_token_t prt,
+                             const char *buf, int skip_article)
 { 
     if (skip_article)
     {
@@ -313,7 +312,7 @@ void pp2_relevance_first(pp2_relevance_token_t prt,
 #endif // YAZ_HAVE_ICU
 }
 
-void pp2_relevance_token_destroy(pp2_relevance_token_t prt)
+void pp2_charset_token_destroy(pp2_charset_token_t prt)
 {
     assert(prt);
 #if YAZ_HAVE_ICU
@@ -327,18 +326,18 @@ void pp2_relevance_token_destroy(pp2_relevance_token_t prt)
     xfree(prt);
 }
 
-const char *pp2_relevance_token_next(pp2_relevance_token_t prt)
+const char *pp2_charset_token_next(pp2_charset_token_t prt)
 {
     assert(prt);
     return (prt->pct->token_next_handler)(prt);
 }
 
-const char *pp2_get_sort(pp2_relevance_token_t prt)
+const char *pp2_get_sort(pp2_charset_token_t prt)
 {
     return prt->pct->get_sort_handler(prt);
 }
 
-const char *pp2_get_display(pp2_relevance_token_t prt)
+const char *pp2_get_display(pp2_charset_token_t prt)
 {
     return prt->pct->get_display_handler(prt);
 }
@@ -347,7 +346,7 @@ const char *pp2_get_display(pp2_relevance_token_t prt)
 /* original tokenizer with our tokenize interface, but we
    add +1 to ensure no '\0' are in our string (except for EOF)
 */
-static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt)
+static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt)
 {
     const char *cp = prt->cp;
     int c;
@@ -374,7 +373,7 @@ static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt)
     return wrbuf_cstr(prt->norm_str);
 }
 
-static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt)
+static const char *pp2_get_sort_ascii(pp2_charset_token_t prt)
 {
     if (prt->last_cp == 0)
         return 0;
@@ -391,7 +390,7 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt)
     }
 }
 
-static const char *pp2_get_display_ascii(pp2_relevance_token_t prt)
+static const char *pp2_get_display_ascii(pp2_charset_token_t prt)
 {
     if (prt->last_cp == 0)
         return 0;
@@ -401,7 +400,7 @@ static const char *pp2_get_display_ascii(pp2_relevance_token_t prt)
     }
 }
 
-static const char *pp2_relevance_token_null(pp2_relevance_token_t prt)
+static const char *pp2_charset_token_null(pp2_charset_token_t prt)
 {
     const char *cp = prt->cp;
 
@@ -413,7 +412,7 @@ static const char *pp2_relevance_token_null(pp2_relevance_token_t prt)
 }
 
 #if YAZ_HAVE_ICU
-static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
+static const char *pp2_charset_token_icu(pp2_charset_token_t prt)
 {
     if (icu_iter_next(prt->iter))
     {
@@ -422,12 +421,12 @@ static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
     return 0;
 }
 
-static const char *pp2_get_sort_icu(pp2_relevance_token_t prt)
+static const char *pp2_get_sort_icu(pp2_charset_token_t prt)
 {
     return icu_iter_get_sortkey(prt->iter);
 }
 
-static const char *pp2_get_display_icu(pp2_relevance_token_t prt)
+static const char *pp2_get_display_icu(pp2_charset_token_t prt)
 {
     return icu_iter_get_display(prt->iter);
 }
index cc9f269..1a7381f 100644 (file)
@@ -27,25 +27,25 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/wrbuf.h>
 #include <yaz/xmltypes.h>
 
-typedef struct pp2_relevance_token_s *pp2_relevance_token_t;
+typedef struct pp2_charset_token_s *pp2_charset_token_t;
 typedef struct pp2_charset_fact_s *pp2_charset_fact_t;
 
-void pp2_relevance_first(pp2_relevance_token_t prt,
-                         const char *buf,
-                         int skip_article);
-
-void pp2_relevance_token_destroy(pp2_relevance_token_t prt);
-const char *pp2_relevance_token_next(pp2_relevance_token_t prt);
-const char *pp2_get_sort(pp2_relevance_token_t prt);
-const char *pp2_get_display(pp2_relevance_token_t prt);
-
 pp2_charset_fact_t pp2_charset_fact_create(void);
 void pp2_charset_fact_destroy(pp2_charset_fact_t pft);
 int pp2_charset_fact_define(pp2_charset_fact_t pft,
                             xmlNode *xml_node, const char *default_id);
-pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft,
-                                           const char *id);
 void pp2_charset_fact_incref(pp2_charset_fact_t pft);
+pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft,
+                                             const char *id);
+
+void pp2_charset_token_first(pp2_charset_token_t prt,
+                             const char *buf,
+                             int skip_article);
+void pp2_charset_token_destroy(pp2_charset_token_t prt);
+const char *pp2_charset_token_next(pp2_charset_token_t prt);
+const char *pp2_get_sort(pp2_charset_token_t prt);
+const char *pp2_get_display(pp2_charset_token_t prt);
+
 #endif
 
 /*
index 4df7750..708f2ba 100644 (file)
@@ -33,7 +33,7 @@ struct relevance
     int *doc_frequency_vec;
     int vec_len;
     struct word_entry *entries;
-    pp2_relevance_token_t prt;
+    pp2_charset_token_t prt;
     NMEM nmem;
 };
 
@@ -68,7 +68,7 @@ int word_entry_match(struct word_entry *entries, const char *norm_str)
     return 0;
 }
 
-static struct word_entry *build_word_entries(pp2_relevance_token_t prt,
+static struct word_entry *build_word_entries(pp2_charset_token_t prt,
                                              NMEM nmem,
                                              const char **terms)
 {
@@ -80,8 +80,8 @@ static struct word_entry *build_word_entries(pp2_relevance_token_t prt,
     {
         const char *norm_str;
 
-        pp2_relevance_first(prt, *p, 0);
-        while ((norm_str = pp2_relevance_token_next(prt)))
+        pp2_charset_token_first(prt, *p, 0);
+        while ((norm_str = pp2_charset_token_next(prt)))
             add_word_entry(nmem, &entries, norm_str, termno);
         termno++;
     }
@@ -95,11 +95,11 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
     const char *norm_str;
     int i, length = 0;
 
-    pp2_relevance_first(r->prt, words, 0);
+    pp2_charset_token_first(r->prt, words, 0);
     for (i = 1; i < r->vec_len; i++)
         mult[i] = 0;
 
-    while ((norm_str = pp2_relevance_token_next(r->prt)))
+    while ((norm_str = pp2_charset_token_next(r->prt)))
     {
         int res = word_entry_match(r->entries, norm_str);
         if (res)
@@ -133,7 +133,7 @@ struct relevance *relevance_create(pp2_charset_fact_t pft,
     res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
     memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int));
     res->nmem = nmem;
-    res->prt = pp2_relevance_create(pft, "relevance");
+    res->prt = pp2_charset_token_create(pft, "relevance");
     res->entries = build_word_entries(res->prt, nmem, terms);
     return res;
 }
@@ -142,7 +142,7 @@ void relevance_destroy(struct relevance **rp)
 {
     if (*rp)
     {
-        pp2_relevance_token_destroy((*rp)->prt);
+        pp2_charset_token_destroy((*rp)->prt);
         *rp = 0;
     }
 }
index d56b396..00b70bb 100644 (file)
@@ -188,7 +188,7 @@ void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num)
 void add_facet(struct session *s, const char *type, const char *value, int count)
 {
     struct conf_service *service = s->service;
-    pp2_relevance_token_t prt;
+    pp2_charset_token_t prt;
     const char *facet_component;
     WRBUF facet_wrbuf = wrbuf_alloc();
     WRBUF display_wrbuf = wrbuf_alloc();
@@ -202,7 +202,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count
 
     if (!icu_chain_id)
         icu_chain_id = "facet";
-    prt = pp2_relevance_create(service->charsets, icu_chain_id);
+    prt = pp2_charset_token_create(service->charsets, icu_chain_id);
     if (!prt)
     {
         yaz_log(YLOG_FATAL, "Unknown ICU chain '%s' for facet of type '%s'",
@@ -211,8 +211,8 @@ void add_facet(struct session *s, const char *type, const char *value, int count
         wrbuf_destroy(display_wrbuf);
         return;
     }
-    pp2_relevance_first(prt, value, 0);
-    while ((facet_component = pp2_relevance_token_next(prt)))
+    pp2_charset_token_first(prt, value, 0);
+    while ((facet_component = pp2_charset_token_next(prt)))
     {
         const char *display_component;
         if (*facet_component)
@@ -229,7 +229,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count
             wrbuf_puts(display_wrbuf, display_component);
         }
     }
-    pp2_relevance_token_destroy(prt);
+    pp2_charset_token_destroy(prt);
  
     yaz_log(YLOG_LOG, "facet norm=%s", wrbuf_cstr(facet_wrbuf));
     yaz_log(YLOG_LOG, "facet display=%s", wrbuf_cstr(display_wrbuf));
@@ -1135,15 +1135,15 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                 if (value)
                 {
                     const char *norm_str;
-                    pp2_relevance_token_t prt =
-                        pp2_relevance_create(service->charsets, "mergekey");
+                    pp2_charset_token_t prt =
+                        pp2_charset_token_create(service->charsets, "mergekey");
                     
-                    pp2_relevance_first(prt, (const char *) value, 0);
+                    pp2_charset_token_first(prt, (const char *) value, 0);
                     if (wrbuf_len(norm_wr) > 0)
                         wrbuf_puts(norm_wr, " ");
                     wrbuf_puts(norm_wr, name);
                     while ((norm_str =
-                            pp2_relevance_token_next(prt)))
+                            pp2_charset_token_next(prt)))
                     {
                         if (*norm_str)
                         {
@@ -1152,7 +1152,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                         }
                     }
                     xmlFree(value);
-                    pp2_relevance_token_destroy(prt);
+                    pp2_charset_token_destroy(prt);
                     no_found++;
                 }
             }
@@ -1174,11 +1174,11 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
     if (mergekey)
     {
         const char *norm_str;
-        pp2_relevance_token_t prt =
-            pp2_relevance_create(service->charsets, "mergekey");
+        pp2_charset_token_t prt =
+            pp2_charset_token_create(service->charsets, "mergekey");
 
-        pp2_relevance_first(prt, (const char *) mergekey, 0);
-        while ((norm_str = pp2_relevance_token_next(prt)))
+        pp2_charset_token_first(prt, (const char *) mergekey, 0);
+        while ((norm_str = pp2_charset_token_next(prt)))
         {
             if (*norm_str)
             {
@@ -1187,7 +1187,7 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
                 wrbuf_puts(norm_wr, norm_str);
             }
         }
-        pp2_relevance_token_destroy(prt);
+        pp2_charset_token_destroy(prt);
         xmlFree(mergekey);
     }
     else
@@ -1386,7 +1386,7 @@ static int ingest_to_cluster(struct client *cl,
     // now parsing XML record and adding data to cluster or record metadata
     for (n = root->children; n; n = n->next)
     {
-        pp2_relevance_token_t prt;
+        pp2_charset_token_t prt;
         if (type)
             xmlFree(type);
         if (value)
@@ -1481,12 +1481,13 @@ static int ingest_to_cluster(struct client *cl,
                                 nmem_malloc(se->nmem, 
                                             sizeof(union data_types));
                          
-                        prt = pp2_relevance_create(service->charsets, "sort");
+                        prt =
+                            pp2_charset_token_create(service->charsets, "sort");
 
-                        pp2_relevance_first(prt, rec_md->data.text.disp,
-                                            skip_article);
+                        pp2_charset_token_first(prt, rec_md->data.text.disp,
+                                                skip_article);
 
-                        pp2_relevance_token_next(prt);
+                        pp2_charset_token_next(prt);
                          
                         sort_str = pp2_get_sort(prt);
                          
@@ -1500,7 +1501,7 @@ static int ingest_to_cluster(struct client *cl,
                         }
                         cluster->sortkeys[sk_field_id]->text.sort = 
                             nmem_strdup(se->nmem, sort_str);
-                        pp2_relevance_token_destroy(prt);
+                        pp2_charset_token_destroy(prt);
                     }
                 }
             }