New attribute @fields for snippets that indicates match in other fields.
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 23 Oct 2008 10:09:34 +0000 (12:09 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 23 Oct 2008 10:09:34 +0000 (12:09 +0200)
The snippets now has a new attribute @fields which includes one or
more field names of where the snippet *also* occur. This is useful
for any:w searches but one wants to know which other fields there is
identical terms. Note that this requires that the indexing uses
same sequence numbers . For DOM this means specifying the indexes
in the same <z:index directieve.

include/idzebra/snippet.h
index/retrieve.c
test/xslt/dom1.c

index fa77bb3..b2eb902 100644 (file)
@@ -25,12 +25,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 YAZ_BEGIN_CDECL
 
 struct zebra_snippet_word {
-    zint seqno;
-    int ord;
-    char *term;
-    int match;
-    int mark;
-    int ws;
+    zint seqno;  /**< sequence number */
+    int ord;     /**< ordinal, i.e. database,field,type */
+    char *term;  /**< term itself */
+    int match;   /**< both part and real match */
+    int mark;    /**< part of snippet */
+    int ws;      /**< white space flag (not indexed material) */
     struct zebra_snippet_word *next;
     struct zebra_snippet_word *prev;
 };
index 78a489b..eb0b65f 100644 (file)
@@ -484,6 +484,41 @@ static void retrieve_puts_int(WRBUF wrbuf, const char *name,
 }
 
 
+static void snippet_check_fields(ZebraHandle zh, WRBUF wrbuf,
+                                 zebra_snippets *doc,
+                                 const zebra_snippet_word *doc_w,
+                                 const char *w_index_type)
+{
+    /* beginning of snippet. See which fields the snippet also
+       occur */
+    const zebra_snippet_word *w;
+    int no = 0;
+    for (w = zebra_snippets_constlist(doc); w; w = w->next)
+    {
+        /* same sequence but other field? */
+        if (w->seqno == doc_w->seqno && w->ord != doc_w->ord)
+        {
+            const char *index_type;
+            const char *db = 0;
+            const char *string_index = 0;
+            
+            zebraExplain_lookup_ord(zh->reg->zei, w->ord, 
+                                    &index_type, &db, &string_index);
+            /* only report for same index type */
+            if (!strcmp(w_index_type, index_type))
+            {
+                if (no == 0)
+                    wrbuf_printf(wrbuf, " fields=\"%s", string_index);
+                else
+                    wrbuf_printf(wrbuf, " %s", string_index);
+                no++;
+            }
+        }
+    }
+    if (no)
+        wrbuf_printf(wrbuf, "\"");
+}
+
 static void snippet_xml_record(ZebraHandle zh, WRBUF wrbuf, zebra_snippets *doc)
 {
     const zebra_snippet_word *doc_w;
@@ -503,8 +538,11 @@ static void snippet_xml_record(ZebraHandle zh, WRBUF wrbuf, zebra_snippets *doc)
 
             if (mark_state == 0)
             {
+                
                 wrbuf_printf(wrbuf, "  <snippet name=\"%s\"",  string_index);
-                wrbuf_printf(wrbuf, " type=\"%s\">", index_type);
+                wrbuf_printf(wrbuf, " type=\"%s\"", index_type);
+                snippet_check_fields(zh, wrbuf, doc, doc_w, index_type);
+                wrbuf_printf(wrbuf, ">");
             }
             if (doc_w->match)
                 wrbuf_puts(wrbuf, "<s>");
index e22dbfd..1d25fff 100644 (file)
@@ -131,10 +131,10 @@ void tst(int argc, char **argv)
     YAZ_CHECK_EQ(tl_fetch_first_compare(
                      zh, "zebra::snippet", yaz_oid_recsyn_xml,
                      "<record xmlns=\"http://www.indexdata.com/zebra/\">\n"
-                     "  <snippet name=\"any\" type=\"w\">"
+                     "  <snippet name=\"any\" type=\"w\" fields=\"title\">"
                      "Selected Prose of <s>Oscar</s> Wilde"
                      "</snippet>\n"
-                     "  <snippet name=\"any\" type=\"w\">"
+                     "  <snippet name=\"any\" type=\"w\" fields=\"creator\">"
                      "Wilde, <s>Oscar</s>, 1854-1900"
                      "</snippet>\n"
                      "</record>"),