+ /* parsing PI record instructions */
+ if (0 == strncmp((const char *)look, "record", 6))
+ {
+ char id[256];
+ char rank[256];
+ char type[256];
+
+ *id = '\0';
+ *rank = '\0';
+ *type = '\0';
+ look += 6;
+ while (*look)
+ if (attr_content_pi(&look, "id", id, sizeof(id)))
+ ;
+ else if (attr_content_pi(&look, "rank", rank, sizeof(rank)))
+ ;
+ else if (attr_content_pi(&look, "type", type, sizeof(type)))
+ {
+ dom_log(YLOG_WARN, tinfo, node,
+ "content '%s', can not parse '%s'",
+ pi_p, look);
+ break;
+ }
+ set_record_info(tinfo, extctr, node, id, rank, type);
+ }
+ /* parsing index instruction */
+ else if (0 == strncmp((const char *)look, "index", 5))
+ {
+ look += 5;
+
+ /* eat whitespace */
+ while (*look && ' ' == *look)
+ look++;
+
+ /* export index instructions to outside */
+ *index_pp = look;
+ }
+ else
+ {
+ dom_log(YLOG_WARN, tinfo, node,
+ "content '%s', can not parse '%s'",
+ pi_p, look);
+ }
+ }
+}
+
+/* DOM filter style indexing */
+static void process_xml_element_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ RecWord* recword,
+ xmlNodePtr node)
+{
+ /* remember indexing instruction from PI to next element node */
+ const char *index_p = 0;
+
+ /* check if we are an element node in the special zebra namespace
+ and either set record data or index value-of node content*/
+ process_xml_element_zebra_node(tinfo, extctr, recword, node);
+
+ /* loop through kid nodes */
+ for (node = node->children; node; node = node->next)
+ {
+ /* check and set PI record and index index instructions */
+ if (node->type == XML_PI_NODE)
+ {
+ process_xml_pi_node(tinfo, extctr, node, &index_p);
+ }
+ else if (node->type == XML_ELEMENT_NODE)
+ {
+ /* if there was a PI index instruction before this element */
+ if (index_p)
+ {
+ index_value_of(tinfo, extctr, recword, node, index_p);
+ index_p = 0;
+ }
+ process_xml_element_node(tinfo, extctr, recword,node);
+ }
+ else
+ continue;
+ }
+}
+
+
+/* DOM filter style indexing */
+static void extract_dom_doc_node(struct filter_info *tinfo,
+ struct recExtractCtrl *extctr,
+ xmlDocPtr doc)
+{
+ /* only need to do the initialization once, reuse recword for all terms */
+ RecWord recword;
+ (*extctr->init)(extctr, &recword);
+
+ process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
+}
+
+
+
+
+static int convert_extract_doc(struct filter_info *tinfo,
+ struct filter_input *input,
+ struct recExtractCtrl *p,
+ xmlDocPtr doc)
+