Added snippet utilities and snippet window implementation.

[idzebra-moved-to-github.git] / recctrl / xslt.c
diff --git a/recctrl/xslt.c b/recctrl/xslt.c

index 9c7de49..be771d3 100644 (file)
--- a/recctrl/xslt.c
+++ b/recctrl/xslt.c
@@ -1,4 +1,4 @@
-/* $Id: xslt.c,v 1.6 2005-05-31 17:36:16 adam Exp $
+/* $Id: xslt.c,v 1.8 2005-06-07 11:36:38 adam Exp $
     Copyright (C) 1995-2005
     Index Data ApS
  
@@ -46,7 +46,8 @@ struct filter_schema {
  struct filter_info {
      xmlDocPtr doc;
      char *fname;
-    int split_depth;
+    const char *split_level;
+    const char *split_path;
      ODR odr;
      struct filter_schema *schemas;
      xmlTextReaderPtr reader;
@@ -56,6 +57,16 @@ struct filter_info {
  #define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
  static const char *zebra_index_ns = ZEBRA_INDEX_NS;
  
+static void set_param_xml(const char **params, const char *name,
+                         const char *value, ODR odr)
+{
+    while (*params)
+       params++;
+    params[0] = name;
+    params[1] = value;
+    params[2] = 0;
+}
+
  static void set_param_str(const char **params, const char *name,
                           const char *value, ODR odr)
  {
@@ -86,7 +97,8 @@ static void *filter_init_xslt(Res res, RecType recType)
      struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
      tinfo->reader = 0;
      tinfo->fname = 0;
-    tinfo->split_depth = 0;
+    tinfo->split_level = 0;
+    tinfo->split_path = 0;
      tinfo->odr = odr_createmem(ODR_ENCODE);
      tinfo->doc = 0;
      tinfo->schemas = 0;
@@ -97,7 +109,7 @@ static void *filter_init_xslt1(Res res, RecType recType)
  {
      struct filter_info *tinfo = (struct filter_info *)
         filter_init_xslt(res, recType);
-    tinfo->split_depth = 1;
+    tinfo->split_level = "1";
      return tinfo;
  }
  
@@ -144,8 +156,9 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
         return ZEBRA_FAIL;
      for (ptr = ptr->children; ptr; ptr = ptr->next)
      {
-       if (ptr->type == XML_ELEMENT_NODE &&
-           !strcmp(ptr->name, "schema"))
+       if (ptr->type != XML_ELEMENT_NODE)
+           continue;
+       if (!strcmp(ptr->name, "schema"))
         {
             struct _xmlAttr *attr;
             struct filter_schema *schema = xmalloc(sizeof(*schema));
@@ -168,6 +181,20 @@ static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
                     xsltParseStylesheetFile(
                         (const xmlChar*) schema->stylesheet);
         }
+       else if (!strcmp(ptr->name, "split"))
+       {
+           struct _xmlAttr *attr;
+           for (attr = ptr->properties; attr; attr = attr->next)
+           {
+               attr_content(attr, "level", &tinfo->split_level);
+               attr_content(attr, "path", &tinfo->split_path);
+           }
+       }
+       else
+       {
+           yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
+           return ZEBRA_FAIL;
+       }
      }
      return ZEBRA_OK;
  }
@@ -312,6 +339,7 @@ static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
  static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
  {
      int ret;
+    int split_depth = 0;
      if (p->first_record)
      {
         if (tinfo->reader)
@@ -325,12 +353,15 @@ static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
      if (!tinfo->reader)
         return RECCTRL_EXTRACT_ERROR_GENERIC;
  
+    if (tinfo->split_level)
+       split_depth = atoi(tinfo->split_level);
      ret = xmlTextReaderRead(tinfo->reader);
      while (ret == 1) {
         int type = xmlTextReaderNodeType(tinfo->reader);
         int depth = xmlTextReaderDepth(tinfo->reader);
-       if (tinfo->split_depth == 0 ||
-           (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
+       if (split_depth == 0 ||
+           (split_depth > 0 &&
+            type == XML_READER_TYPE_ELEMENT && split_depth == depth))
         {
             xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
             xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
@@ -371,7 +402,7 @@ static int filter_extract(void *clientData, struct recExtractCtrl *p)
  
      odr_reset(tinfo->odr);
  
-    if (tinfo->split_depth == 0)
+    if (tinfo->split_level == 0 && tinfo->split_path == 0)
         return extract_full(tinfo, p);
      else
      {
@@ -391,6 +422,51 @@ static int ioclose_ret(void *context)
  }
  
  
+static const char *snippet_doc(struct recRetrieveCtrl *p)
+{
+    const char *xml_doc_str;
+    int ord = 0;
+    WRBUF wrbuf = wrbuf_alloc();
+    zebra_snippets *res = 
+       zebra_snippets_window(p->doc_snippet, p->hit_snippet, 10);
+    zebra_snippet_word *w = zebra_snippets_list(res);
+
+#if 1
+    wrbuf_printf(wrbuf, "\'");
+#else
+    wrbuf_printf(wrbuf, "<snippet>\n");
+#endif
+    for (; w; w = w->next)
+    {
+       if (ord == 0)
+           ord = w->ord;
+       else if (ord != w->ord)
+           break;
+#if 1
+       wrbuf_printf(wrbuf, "%s%s%s ", 
+                    w->match ? "*" : "",
+                    w->term,
+                    w->match ? "*" : "");
+#else
+       wrbuf_printf(wrbuf, " <term %s ord='%d' seqno='%d'>", 
+                    (w->match ? "match='1'" : ""),
+                    w->ord, w->seqno);
+       wrbuf_xmlputs(wrbuf, w->term);
+       wrbuf_printf(wrbuf, "</term>\n");
+#endif
+    }
+#if 1
+    wrbuf_printf(wrbuf, "\'");
+#else
+    wrbuf_printf(wrbuf, "</snippet>\n");
+#endif
+    xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
+
+    zebra_snippets_destroy(res);
+    wrbuf_free(wrbuf, 1);
+    return xml_doc_str;
+}
+
  static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
  {
      const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
@@ -426,6 +502,7 @@ static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
         set_param_int(params, "score", p->score, p->odr);
      set_param_int(params, "size", p->recordSize, p->odr);
      
+    set_param_xml(params, "snippet", snippet_doc(p), p->odr);
      doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
                     0 /* URL */,
                     0 /* encoding */,