Scan now returns displayTerm which is extract from original record.
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 21 Aug 2007 13:27:04 +0000 (13:27 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 21 Aug 2007 13:27:04 +0000 (13:27 +0000)
Goodbye to @'s - for scan. Bug #1411.

include/idzebra/api.h
include/idzebra/snippet.h
index/extract.c
index/index.h
index/retrieve.c
index/rpnscan.c
index/zebrasrv.c
util/snippet.c

index f38e657..4a75217 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: api.h,v 1.51 2007-05-21 11:54:59 adam Exp $
+/* $Id: api.h,v 1.52 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -65,6 +65,7 @@ typedef struct {
 typedef struct {
     zint occurrences;    /* scan term occurrences */
     char *term;          /* scan term string */
+    char *display_term;  /* display scan term entry */
 } ZebraScanEntry;
 
 /** \var ZebraHandle
index 474b9a4..60a4ecf 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: snippet.h,v 1.9 2007-08-21 11:06:46 adam Exp $
+/* $Id: snippet.h,v 1.10 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -81,6 +81,10 @@ void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit,
                          int before, int after);
 
 
+YAZ_EXPORT
+const struct zebra_snippet_word *zebra_snippets_lookup(
+    const zebra_snippets *doc, const zebra_snippets *hit);
+
 YAZ_END_CDECL
 
 #endif
index 64e39d3..d9ed681 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: extract.c,v 1.259 2007-08-21 11:06:47 adam Exp $
+/* $Id: extract.c,v 1.260 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -128,9 +128,75 @@ struct snip_rec_info {
 };
 
 
-static void snippet_add_complete_field(RecWord *p)
+static void snippet_add_complete_field(RecWord *p, int ord)
 {
+    struct snip_rec_info *h = p->extractCtrl->handle;
+    ZebraHandle zh = h->zh;
+
+    const char *b = p->term_buf;
+    char buf[IT_MAX_WORD+1];
+    const char **map = 0;
+    int i = 0, remain = p->term_len;
+    const char *start = b;
+    const char *last = 0;
+
+    if (remain > 0)
+       map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1);
+
+    while (remain > 0 && i < IT_MAX_WORD)
+    {
+       while (map && *map && **map == *CHR_SPACE)
+       {
+           remain = p->term_len - (b - p->term_buf);
+
+            if (i == 0)
+                start = b;  /* set to first non-ws area */
+           if (remain > 0)
+           {
+               int first = i ? 0 : 1;  /* first position */
+
+               map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, 
+                                       &b, remain, first);
+           }
+           else
+               map = 0;
+       }
+       if (!map)
+           break;
+
+       if (i && i < IT_MAX_WORD)
+           buf[i++] = *CHR_SPACE;
+       while (map && *map && **map != *CHR_SPACE)
+       {
+           const char *cp = *map;
 
+           if (**map == *CHR_CUT)
+           {
+               i = 0;
+           }
+           else
+           {
+               if (i >= IT_MAX_WORD)
+                   break;
+               while (i < IT_MAX_WORD && *cp)
+                   buf[i++] = *(cp++);
+           }
+            last = b;
+           remain = p->term_len  - (b - p->term_buf);
+           if (remain > 0)
+           {
+               map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b,
+                                       remain, 0);
+           }
+           else
+               map = 0;
+       }
+    }
+    if (!i)
+       return;
+    if (last && start != last)
+        zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
+                               start, last - start);
 }
 
 static void snippet_add_incomplete_field(RecWord *p, int ord)
@@ -220,7 +286,7 @@ static void snippet_token_add(RecWord *p)
             zei, zinfo_index_category_index, p->index_type, p->index_name);
 
         if (zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type))
-            snippet_add_complete_field (p);
+            snippet_add_complete_field (p, ch);
         else
             snippet_add_incomplete_field(p, ch);
     }
index 8738d1a..8478bde 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: index.h,v 1.199 2007-08-21 11:06:47 adam Exp $
+/* $Id: index.h,v 1.200 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -321,6 +321,9 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn,
                      struct ZebraRecStream *stream, RecType rt,
                      void *recTypeClientData);
 
+int zebra_get_rec_snippets(ZebraHandle zh, zint sysno,
+                           zebra_snippets *snippets);
+
 void zebra_index_merge(ZebraHandle zh);
 
 ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, 
index d3ca5ad..8957c3c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: retrieve.c,v 1.71 2007-08-21 11:06:47 adam Exp $
+/* $Id: retrieve.c,v 1.72 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -444,17 +444,11 @@ static void snippet_xml_record(ZebraHandle zh, WRBUF wrbuf, zebra_snippets *doc)
     wrbuf_printf(wrbuf, "</record>");
 }
 
-int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname,
-                                zint sysno, ODR odr,
-                                const char *elemsetname,
-                                const Odr_oid *input_format,
-                                const Odr_oid **output_format,
-                                char **rec_bufp, int *rec_lenp)
+int zebra_get_rec_snippets(ZebraHandle zh, zint sysno,
+                           zebra_snippets *snippets)
 {
     int return_code = 0;
-    Record rec;
-    
-    rec = rec_get(zh->reg->records, sysno);
+    Record rec = rec_get(zh->reg->records, sysno);
     if (!rec)
     {
         yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
@@ -466,60 +460,73 @@ int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname,
         void *recTypeClientData;
         RecType rt = recType_byName(zh->reg->recTypes, zh->res,
                                     file_type, &recTypeClientData);
-        zebra_snippets *hit_snippet = zebra_snippets_create();
-        WRBUF wrbuf = wrbuf_alloc();
 
-        zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet);
-        
         if (!rt)
             return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
         else
         {
             struct ZebraRecStream stream;
-            
             return_code = zebra_create_record_stream(zh, &rec, &stream);
             if (return_code == 0)
             {
-                zebra_snippets *rec_snippet = zebra_snippets_create();
-                extract_snippet(zh, rec_snippet, &stream,
+                extract_snippet(zh, snippets, &stream,
                                 rt, recTypeClientData);
 
-#if 0
-                /* for debugging purposes */
-                yaz_log(YLOG_LOG, "---------------------------");
-                yaz_log(YLOG_LOG, "REC SNIPPET:");
-                zebra_snippets_log(rec_snippet, YLOG_LOG, 1);
-                yaz_log(YLOG_LOG, "---------------------------");
-                yaz_log(YLOG_LOG, "HIT SNIPPET:");
-                zebra_snippets_log(hit_snippet, YLOG_LOG, 1);
-#endif
+                stream.destroy(&stream);
+            }
+        }
+        rec_free(&rec);
+    }
+    return return_code;
+}
+
+int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname,
+                                zint sysno, ODR odr,
+                                const char *elemsetname,
+                                const Odr_oid *input_format,
+                                const Odr_oid **output_format,
+                                char **rec_bufp, int *rec_lenp)
+{
+    zebra_snippets *rec_snippets = zebra_snippets_create();
+    int return_code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
+
+    if (!return_code)
+    {
+        WRBUF wrbuf = wrbuf_alloc();
+        zebra_snippets *hit_snippet = zebra_snippets_create();
 
-                zebra_snippets_ring(rec_snippet, hit_snippet, 5, 5);
+        zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet);
 
 #if 0
-                yaz_log(YLOG_LOG, "---------------------------");
-                yaz_log(YLOG_LOG, "RING SNIPPET:");
-                zebra_snippets_log(rec_snippet, YLOG_LOG, 1);
+        /* for debugging purposes */
+        yaz_log(YLOG_LOG, "---------------------------");
+        yaz_log(YLOG_LOG, "REC SNIPPET:");
+        zebra_snippets_log(rec_snippet, YLOG_LOG, 1);
+        yaz_log(YLOG_LOG, "---------------------------");
+        yaz_log(YLOG_LOG, "HIT SNIPPET:");
+        zebra_snippets_log(hit_snippet, YLOG_LOG, 1);
 #endif
-                
-                snippet_xml_record(zh, wrbuf, rec_snippet);
-
-                *output_format = yaz_oid_recsyn_xml;
-
-                
-                zebra_snippets_destroy(rec_snippet);
-            }
-            stream.destroy(&stream);
-        }
+        
+        zebra_snippets_ring(rec_snippets, hit_snippet, 5, 5);
+        
+#if 0
+        yaz_log(YLOG_LOG, "---------------------------");
+        yaz_log(YLOG_LOG, "RING SNIPPET:");
+        zebra_snippets_log(rec_snippets, YLOG_LOG, 1);
+#endif
+        snippet_xml_record(zh, wrbuf, rec_snippets);
+        
+        *output_format = yaz_oid_recsyn_xml;
+        
         if (return_code == 0)
         {
             *rec_lenp = wrbuf_len(wrbuf);
             *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf));
         }
         wrbuf_destroy(wrbuf);
-        rec_free(&rec);
         zebra_snippets_destroy(hit_snippet);
     }
+    zebra_snippets_destroy(rec_snippets);
     return return_code;
 }
 
index 7800b11..4fbb8a5 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: rpnscan.c,v 1.11 2007-05-09 07:07:18 adam Exp $
+/* $Id: rpnscan.c,v 1.12 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -106,11 +106,44 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count)
     *count = rset->hits_count;
 }
 
+static void get_first_snippet_from_rset(RSET rset, zebra_snippets *snippets, 
+                                        zint *sysno)
+{
+    struct it_key key;
+    RSFD rfd;
+    TERMID termid;
+
+    yaz_log(YLOG_DEBUG, "get_first_snippet_from_rset");
+
+    rfd = rset_open(rset, RSETF_READ);
+    *sysno = 0;
+    while (rset_read(rfd, &key, &termid))
+    {
+        if (key.mem[0] != *sysno)
+        {
+            if (*sysno)
+                break;
+            *sysno = key.mem[0];
+        }
+        if (termid)
+        {
+            struct ord_list *ol;
+            for (ol = termid->ol; ol; ol = ol->next)
+            {
+                zebra_snippets_append(snippets, key.mem[key.len-1], 0,
+                                      ol->ord, termid->name);
+            }
+        }
+    }
+    rset_close (rfd);
+}
+
 struct scan2_info_entry {
     WRBUF term;
     char prefix[20];
     ISAM_P isam_p;
     int pos_to_save;
+    int ord;
 };
 
 static int scan_handle2(char *name, const char *info, int pos, void *client)
@@ -153,11 +186,17 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem,
     {
         if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0)
         {
-            RSET rset_t = rset_trunc(
+            struct ord_list *ol = ord_list_create(nmem);
+            RSET rset_t;
+
+            ol = ord_list_append(nmem, ol, ar[i].ord);
+
+            assert(ol);
+            rset_t = rset_trunc(
                     zh, &ar[i].isam_p, 1,
                     wrbuf_buf(ar[i].term), wrbuf_len(ar[i].term),
-                    NULL, 0, zapt->term->which, nmem, 
-                    kc, kc->scope, 0, index_type, 
+                    NULL, 1, zapt->term->which, nmem, 
+                    kc, kc->scope, ol, index_type, 
                     0 /* hits_limit */,
                     0 /* term_ref_id_str */);
             if (!rset)
@@ -188,17 +227,42 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem,
         }
         /* count it */
         count_set(zh, rset, &count);
-        rset_delete(rset);
-        if (count > 0)
+
+        if (pos != -1)
         {
-            if (pos != -1)
+            zint sysno;
+            int code = -1;
+            zebra_snippets *rec_snippets = zebra_snippets_create();
+            zebra_snippets *hit_snippets = zebra_snippets_create();
+
+            glist[pos].term = 0;
+            glist[pos].display_term = 0;
+            
+            get_first_snippet_from_rset(rset, hit_snippets, &sysno);
+            if (sysno)
+                code = zebra_get_rec_snippets(zh, sysno, rec_snippets);
+         
+            if (code == 0)
             {
+                const struct zebra_snippet_word *w = 
+                    zebra_snippets_lookup(rec_snippets, hit_snippets);
+                if (w)
+                {
+                    glist[pos].display_term = odr_strdup(stream, w->term);
+                }
+            }
+            if (!glist[pos].term)
                 zebra_term_untrans_iconv(zh, stream->mem, index_type,
                                          &glist[pos].term, term);
-                glist[pos].occurrences = count;
-            }
-            return 1;
+            glist[pos].occurrences = count;
+            zebra_snippets_destroy(rec_snippets);
+            zebra_snippets_destroy(hit_snippets);
         }
+        rset_delete(rset);
+        if (count > 0)
+            return 1;
+        else
+            return 0;
     }
     return 0;
 }
@@ -252,6 +316,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem,
         wrbuf_rewind(ar[i].term);
         wrbuf_puts(ar[i].term, termz + prefix_len);
         ar[i].isam_p = 0;
+        ar[i].ord = ords[i];
     }
     /** deal with terms before position .. */
     /* the glist index starts at zero (unlike scan positions */
@@ -317,6 +382,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem,
         wrbuf_rewind(ar[i].term);
         wrbuf_puts(ar[i].term, termz + prefix_len);
         ar[i].isam_p = 0;
+        ar[i].ord = ords[i];
     }
 
     after_pos = 1;  /* immediate term first.. */
index 25f2d27..939c7f0 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zebrasrv.c,v 1.16 2007-05-21 11:54:59 adam Exp $
+/* $Id: zebrasrv.c,v 1.17 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -383,6 +383,7 @@ static int bend_scan (void *handle, bend_scan_rr *r)
        for (i = 0; i < r->num_entries; i++)
        {
            r->entries[i].term = entries[i].term;
+           r->entries[i].display_term = entries[i].display_term;
            r->entries[i].occurrences =
                 CAST_ZINT_TO_INT(entries[i].occurrences);
        }
index b299e5f..10e69c0 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: snippet.c,v 1.13 2007-08-21 11:06:47 adam Exp $
+/* $Id: snippet.c,v 1.14 2007-08-21 13:27:04 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -209,6 +209,25 @@ static void zebra_snippets_clear(zebra_snippets *sn)
     }
 }
 
+const struct zebra_snippet_word *zebra_snippets_lookup(
+    const zebra_snippets *doc, const zebra_snippets *hit)
+{
+    const zebra_snippet_word *hit_w;
+    for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next)
+    {
+       const zebra_snippet_word *doc_w;
+        for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next)
+        {
+            if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno
+                && !doc_w->ws)
+            {
+                return doc_w;
+            }
+        }
+    }
+    return 0;
+}
+
 void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit,
                          int before, int after)
 {