-/* $Id: xslt.c,v 1.6 2005-05-31 17:36:16 adam Exp $
+/* $Id: xslt.c,v 1.8 2005-06-07 11:36:38 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
struct filter_info {
xmlDocPtr doc;
char *fname;
- int split_depth;
+ const char *split_level;
+ const char *split_path;
ODR odr;
struct filter_schema *schemas;
xmlTextReaderPtr reader;
#define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
static const char *zebra_index_ns = ZEBRA_INDEX_NS;
+static void set_param_xml(const char **params, const char *name,
+ const char *value, ODR odr)
+{
+ while (*params)
+ params++;
+ params[0] = name;
+ params[1] = value;
+ params[2] = 0;
+}
+
static void set_param_str(const char **params, const char *name,
const char *value, ODR odr)
{
struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
tinfo->reader = 0;
tinfo->fname = 0;
- tinfo->split_depth = 0;
+ tinfo->split_level = 0;
+ tinfo->split_path = 0;
tinfo->odr = odr_createmem(ODR_ENCODE);
tinfo->doc = 0;
tinfo->schemas = 0;
{
struct filter_info *tinfo = (struct filter_info *)
filter_init_xslt(res, recType);
- tinfo->split_depth = 1;
+ tinfo->split_level = "1";
return tinfo;
}
return ZEBRA_FAIL;
for (ptr = ptr->children; ptr; ptr = ptr->next)
{
- if (ptr->type == XML_ELEMENT_NODE &&
- !strcmp(ptr->name, "schema"))
+ if (ptr->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp(ptr->name, "schema"))
{
struct _xmlAttr *attr;
struct filter_schema *schema = xmalloc(sizeof(*schema));
xsltParseStylesheetFile(
(const xmlChar*) schema->stylesheet);
}
+ else if (!strcmp(ptr->name, "split"))
+ {
+ struct _xmlAttr *attr;
+ for (attr = ptr->properties; attr; attr = attr->next)
+ {
+ attr_content(attr, "level", &tinfo->split_level);
+ attr_content(attr, "path", &tinfo->split_path);
+ }
+ }
+ else
+ {
+ yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
+ return ZEBRA_FAIL;
+ }
}
return ZEBRA_OK;
}
static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
{
int ret;
+ int split_depth = 0;
if (p->first_record)
{
if (tinfo->reader)
if (!tinfo->reader)
return RECCTRL_EXTRACT_ERROR_GENERIC;
+ if (tinfo->split_level)
+ split_depth = atoi(tinfo->split_level);
ret = xmlTextReaderRead(tinfo->reader);
while (ret == 1) {
int type = xmlTextReaderNodeType(tinfo->reader);
int depth = xmlTextReaderDepth(tinfo->reader);
- if (tinfo->split_depth == 0 ||
- (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
+ if (split_depth == 0 ||
+ (split_depth > 0 &&
+ type == XML_READER_TYPE_ELEMENT && split_depth == depth))
{
xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
odr_reset(tinfo->odr);
- if (tinfo->split_depth == 0)
+ if (tinfo->split_level == 0 && tinfo->split_path == 0)
return extract_full(tinfo, p);
else
{
}
+static const char *snippet_doc(struct recRetrieveCtrl *p)
+{
+ const char *xml_doc_str;
+ int ord = 0;
+ WRBUF wrbuf = wrbuf_alloc();
+ zebra_snippets *res =
+ zebra_snippets_window(p->doc_snippet, p->hit_snippet, 10);
+ zebra_snippet_word *w = zebra_snippets_list(res);
+
+#if 1
+ wrbuf_printf(wrbuf, "\'");
+#else
+ wrbuf_printf(wrbuf, "<snippet>\n");
+#endif
+ for (; w; w = w->next)
+ {
+ if (ord == 0)
+ ord = w->ord;
+ else if (ord != w->ord)
+ break;
+#if 1
+ wrbuf_printf(wrbuf, "%s%s%s ",
+ w->match ? "*" : "",
+ w->term,
+ w->match ? "*" : "");
+#else
+ wrbuf_printf(wrbuf, " <term %s ord='%d' seqno='%d'>",
+ (w->match ? "match='1'" : ""),
+ w->ord, w->seqno);
+ wrbuf_xmlputs(wrbuf, w->term);
+ wrbuf_printf(wrbuf, "</term>\n");
+#endif
+ }
+#if 1
+ wrbuf_printf(wrbuf, "\'");
+#else
+ wrbuf_printf(wrbuf, "</snippet>\n");
+#endif
+ xml_doc_str = odr_strdup(p->odr, wrbuf_buf(wrbuf));
+
+ zebra_snippets_destroy(res);
+ wrbuf_free(wrbuf, 1);
+ return xml_doc_str;
+}
+
static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
set_param_int(params, "score", p->score, p->odr);
set_param_int(params, "size", p->recordSize, p->odr);
+ set_param_xml(params, "snippet", snippet_doc(p), p->odr);
doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
0 /* URL */,
0 /* encoding */,