Happy new year
[idzebra-moved-to-github.git] / index / rpnscan.c
index 6d575df..d18588e 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: rpnscan.c,v 1.22 2007-11-13 13:41:51 adam Exp $
-   Copyright (C) 1995-2007
-   Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1994-2009 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -45,14 +42,28 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                 char *termz, zebra_map_t zm)
 {
-    char termz0[IT_MAX_WORD];
+    char term_utf8[IT_MAX_WORD];
 
-    if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
+    if (zapt_term_to_utf8(zh, zapt, term_utf8) == ZEBRA_FAIL)
         return ZEBRA_FAIL;    /* error */
+    else if (zebra_maps_is_icu(zm))
+    {
+        const char *res_buf;
+        size_t res_len;
+        zebra_map_tokenize_start(zm, term_utf8, strlen(term_utf8));
+        
+        if (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
+        {
+            memcpy(termz, res_buf, res_len);
+            termz[res_len] = '\0';
+        }
+        else
+            termz[0] = '\0';
+    }
     else
     {
         const char **map;
-        const char *cp = (const char *) termz0;
+        const char *cp = (const char *) term_utf8;
         const char *cp_end = cp + strlen(cp);
         const char *src;
         int i = 0;
@@ -79,31 +90,6 @@ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     return ZEBRA_OK;
 }
 
-static void count_set(ZebraHandle zh, RSET rset, zint *count, zint approx_limit)
-{
-    zint psysno = 0;
-    struct it_key key;
-    RSFD rfd;
-
-    yaz_log(YLOG_DEBUG, "count_set");
-
-    rset->hits_limit = approx_limit;
-
-    *count = 0;
-    rfd = rset_open(rset, RSETF_READ);
-    while (rset_read(rfd, &key,0 /* never mind terms */))
-    {
-        if (key.mem[0] != psysno)
-        {
-            psysno = key.mem[0];
-           if (rfd->counted_items >= rset->hits_limit)
-               break;
-        }
-    }
-    rset_close(rfd);
-    *count = rset->hits_count;
-}
-
 static void get_first_snippet_from_rset(ZebraHandle zh, 
                                         RSET rset, zebra_snippets *snippets, 
                                         zint *sysno)
@@ -238,13 +224,11 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem,
             rset = rset_create_and(nmem, kc, kc->scope, 2, rsets);
         }
         /* count it */
-        count_set(zh, rset, &count, approx_limit);
+        zebra_count_set(zh, rset, &count, approx_limit);
 
         if (pos != -1)
         {
             zint sysno;
-            int code = -1;
-            zebra_snippets *rec_snippets = zebra_snippets_create();
             zebra_snippets *hit_snippets = zebra_snippets_create();
 
             glist[pos].term = 0;
@@ -252,22 +236,38 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem,
             
             get_first_snippet_from_rset(zh, rset, hit_snippets, &sysno);
             if (sysno)
-                code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
-         
-            if (code == 0)
             {
-                const struct zebra_snippet_word *w = 
-                    zebra_snippets_lookup(rec_snippets, hit_snippets);
-                if (w)
+                zebra_snippets *rec_snippets = zebra_snippets_create();
+                int code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
+                if (code == 0)
                 {
-                    glist[pos].display_term = odr_strdup(stream, w->term);
+                    const struct zebra_snippet_word *w = 
+                        zebra_snippets_lookup(rec_snippets, hit_snippets);
+                    if (w)
+                    {
+                        glist[pos].display_term = odr_strdup(stream, w->term);
+                    }
+                    else
+                    {
+                        yaz_log(YLOG_WARN, "zebra_snippets_lookup failed for pos=%d", pos);
+                    }
                 }
+                zebra_snippets_destroy(rec_snippets);
             }
+            if (zebra_term_untrans_iconv(zh, stream->mem, index_type,
+                                         &glist[pos].term, term))
+            {
+                /* failed.. use display_term instead (which could be 0) */
+                glist[pos].term = glist[pos].display_term;
+            }
+
             if (!glist[pos].term)
-                zebra_term_untrans_iconv(zh, stream->mem, index_type,
-                                         &glist[pos].term, term);
+            {
+                yaz_log(YLOG_WARN, "Could not generate scan term for pos=%d",
+                        pos);
+                glist[pos].term = "None";
+            }
             glist[pos].occurrences = count;
-            zebra_snippets_destroy(rec_snippets);
             zebra_snippets_destroy(hit_snippets);
         }
         rset_delete(rset);
@@ -542,11 +542,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
        zebra_setError(zh, YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED, 0);
         return ZEBRA_FAIL;
     }
-    if (sort_flag)
-    {
-        return rpn_facet(zh, stream, zapt, attributeset, position, num_entries,
-                         list, is_partial, set_name);
-    }
     for (base_no = 0; base_no < num_bases; base_no++)
     {
        int ord;