+ yaz_log(YLOG_WARN, "%s dom filter: "
+ "No root for index XML record");
+ }
+}
+
+
+/* DOM filter style indexing */
+static int attr_content_xml(struct _xmlAttr *attr, const char *name,
+ xmlChar **dst_content)
+{
+ if (0 == XML_STRCMP(attr->name, name) && attr->children
+ && attr->children->type == XML_TEXT_NODE)
+ {
+ *dst_content = (attr->children->content);
+ return 1;
+ }
+ return 0;
+}
+
+
+/* DOM filter style indexing */
+static void index_value_of(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlNodePtr node,
+ xmlChar * index_p)
+{
+ xmlChar *text = xmlNodeGetContent(node);
+ size_t text_len = strlen((const char *)text);
+
+
+ /* if there is no text, we do not need to proceed */
+ if (text_len)
+ {
+ xmlChar *look = index_p;
+ xmlChar *bval;
+ xmlChar *eval;
+
+ xmlChar index[256];
+ xmlChar type[256];
+
+ /* assingning text to be indexed */
+ RecWord recWord;
+ (*extctr->init)(extctr, &recWord);
+ recWord.term_buf = (const char *)text;
+ recWord.term_len = text_len;
+
+ /* parsing all index name/type pairs */
+ /* may not start with ' ' or ':' */
+ while (*look && ' ' != *look && ':' != *look){
+
+ /* setting name and type to zero */
+ *index = '\0';
+ *type = '\0';
+
+ /* parsing one index name */
+ bval = look;
+ while (*look && ':' != *look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)index, (const char *)bval, eval - bval);
+ index[eval - bval] = '\0';
+
+
+ /* parsing one index type, if existing */
+ if (':' == *look){
+ look++;
+
+ bval = look;
+ while (*look && ' ' != *look){
+ look++;
+ }
+ eval = look;
+ strncpy((char *)type, (const char *)bval, eval - bval);
+ type[eval - bval] = '\0';
+ }
+
+ /* actually indexing the text given */
+ yaz_log(YLOG_DEBUG, "%s dom filter: "
+ "INDEX '%s:%s' '%s'",
+ tinfo->fname, index, type, text);
+
+ recWord.index_name = (const char *)index;
+ if (type && *type)
+ recWord.index_type = *type;
+ (extctr->tokenAdd)(&recWord);
+
+ /* eat whitespaces */
+ if (*look && ' ' == *look && *(look+1)){
+ look++;
+ }
+ }
+ }
+
+ xmlFree(text);
+}
+
+
+/* DOM filter style indexing */
+static void set_record_info(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlChar * id_p,
+ xmlChar * rank_p,
+ xmlChar * type_p)
+{
+ yaz_log(YLOG_DEBUG, "%s dom filter: "
+ "RECORD id=%s rank=%s type=%s",
+ tinfo->fname, id_p, rank_p, type_p);
+
+ if (id_p)
+ sscanf((const char *)id_p, "%255s", extctr->match_criteria);
+
+ if (rank_p)
+ extctr->staticrank = atozint((const char *)rank_p);
+
+ /* if (!strcmp("update", type_str)) */
+ /* index_node(tinfo, ctrl, ptr, recWord); */
+ /* else if (!strcmp("delete", type_str)) */
+ /* yaz_log(YLOG_WARN, "dom filter delete: to be implemented"); */
+ /* else */
+ /* yaz_log(YLOG_WARN, "dom filter: unknown record type '%s'", */
+ /* type_str); */
+
+}
+
+
+/* DOM filter style indexing */
+static void process_xml_element_zebra_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlNodePtr node)
+{
+ if (node->type == XML_ELEMENT_NODE
+ && node->ns && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns)){
+
+ if (0 == XML_STRCMP(node->name, "index")){
+ xmlChar *index_p = 0;
+
+ struct _xmlAttr *attr;
+ for (attr = node->properties; attr; attr = attr->next){
+ if (attr_content_xml(attr, "name", &index_p)){
+ index_value_of(tinfo, extctr, node, index_p);
+ }
+ else
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s bad attribute @%s, expected @name",
+ tinfo->fname, xmlGetNodePath(node), attr->name);
+ }
+ }
+ else if (0 == XML_STRCMP(node->name, "record")){
+ xmlChar *id_p = 0;
+ xmlChar *rank_p = 0;
+ xmlChar *type_p = 0;
+
+ struct _xmlAttr *attr;
+ for (attr = node->properties; attr; attr = attr->next){
+ if (attr_content_xml(attr, "id", &id_p))
+ ;
+ else if (attr_content_xml(attr, "rank", &rank_p))
+ ;
+ else if (attr_content_xml(attr, "type", &type_p))
+ ;
+ else
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s bad attribute @%s,"
+ " expected @id|@rank|@type",
+ tinfo->fname, xmlGetNodePath(node), attr->name);
+
+ if (type_p && 0 != strcmp("update", (const char *)type_p))
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s attribute @%s,"
+ " only implemented '@type='update'",
+ tinfo->fname, xmlGetNodePath(node), attr->name);
+
+
+ }
+ set_record_info(tinfo, extctr, id_p, rank_p, type_p);
+ } else {
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s bad element <%s>,"
+ " expected <record>|<index> in namespace '%s'",
+ tinfo->fname, xmlGetNodePath(node),
+ node->name, zebra_dom_ns);
+
+ }
+ }
+}
+
+
+/* DOM filter style indexing */
+static void process_xml_pi_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlNodePtr node,
+ xmlChar **index_pp)
+{
+
+ /* yaz_log(YLOG_DEBUG,"PI %s\n", xmlGetNodePath(node)); */
+
+ /* if right PI name, continue parsing PI */
+ if (0 == strcmp(zebra_pi_name, (const char *)node->name)){
+ xmlChar *pi_p = node->content;
+ xmlChar *look = pi_p;
+
+ xmlChar *bval;
+ xmlChar *eval;
+
+ /* parsing PI record instructions */
+ if (0 == strncmp((const char *)look, "record", 6)){
+ xmlChar id[256];
+ xmlChar rank[256];
+ xmlChar type[256];
+
+ *id = '\0';
+ *rank = '\0';
+ *type = '\0';
+
+ look += 6;
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* parse possible id */
+ if (*look && 0 == strncmp((const char *)look, "id=", 3)){
+ look += 3;
+ bval = look;
+ while (*look && ' ' != *look)
+ look++;
+ eval = look;
+ strncpy((char *)id, (const char *)bval, eval - bval);
+ id[eval - bval] = '\0';
+ }
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* parse possible rank */
+ if (*look && 0 == strncmp((const char *)look, "rank=", 5)){
+ look += 6;
+ bval = look;
+ while (*look && ' ' != *look)
+ look++;
+ eval = look;
+ strncpy((char *)rank, (const char *)bval, eval - bval);
+ rank[eval - bval] = '\0';
+ }
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ if (look && '\0' != *look)
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s content '%s', can not parse '%s'",
+ tinfo->fname, xmlGetNodePath(node), pi_p, look);
+ else
+ set_record_info(tinfo, extctr, id, rank, 0);
+
+ }
+
+ /* parsing index instruction */
+ else if (0 == strncmp((const char *)look, "index", 5)){
+ look += 5;
+
+ /* eat whitespace */
+ while (*look && ' ' == *look && *(look+1))
+ look++;
+
+ /* export index instructions to outside */
+ *index_pp = look;
+ }
+ else
+ yaz_log(YLOG_WARN,"%s dom filter: "
+ "%s content '%s', can not parse '%s'",
+ tinfo->fname, xmlGetNodePath(node), pi_p, look);
+ }
+}
+
+/* DOM filter style indexing */
+static void process_xml_element_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlNodePtr node)
+{
+ /* remember indexing instruction from PI to next element node */
+ xmlChar *index_p = 0;
+
+ /* yaz_log(YLOG_DEBUG,"ELEM %s\n", xmlGetNodePath(node)); */
+
+ /* check if we are an element node in the special zebra namespace
+ and either set record data or index value-of node content*/
+ process_xml_element_zebra_node(tinfo, extctr, node);
+
+ /* loop through kid nodes */
+ for (node = node->children; node; node = node->next)
+ {
+ /* check and set PI record and index index instructions */
+ if (node->type == XML_PI_NODE){
+ process_xml_pi_node(tinfo, extctr, node, &index_p);
+ }
+ else if (node->type == XML_ELEMENT_NODE){
+ /* if there was a PI index instruction before this element */
+ if (index_p){
+ index_value_of(tinfo, extctr, node, index_p);
+ index_p = 0;
+ }
+ process_xml_element_node(tinfo, extctr, node);
+ }
+ else
+ continue;
+ }
+}
+
+
+/* DOM filter style indexing */
+static void extract_dom_doc_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlDocPtr doc)
+{
+ /* yaz_log(YLOG_DEBUG,"DOC %s\n", xmlGetNodePath((xmlNodePtr)doc)); */
+
+ xmlChar *buf_out;
+ int len_out;
+ if (extctr->flagShowRecords){
+ xmlDocDumpMemory(doc, &buf_out, &len_out);
+ fwrite(buf_out, len_out, 1, stdout);
+ xmlFree(buf_out);