-/* $Id: zinfo.c,v 1.58 2006-05-10 08:13:23 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: zinfo.c,v 1.68 2006-08-14 10:40:15 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
for more details.
You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
*/
#include <sys/types.h>
struct zebSUInfo {
int index_type;
+ zinfo_index_category_t cat;
#define ZEB_SU_SET_USE 1
#define ZEB_SU_STR 2
int which;
union {
char *str;
- struct {
- int set;
- int use;
- } su;
} u;
int ordinal;
+ zint doc_occurrences;
+ zint term_occurrences;
};
struct zebSUInfoB {
struct zebDatabaseInfoB *curDatabaseInfo;
zebAccessInfo accessInfo;
char date[15]; /* YYYY MMDD HH MM SS */
- int (*updateFunc)(void *handle, Record drec, data1_node *n);
+ ZebraExplainUpdateFunc *updateFunc;
void *updateHandle;
};
Res res,
int writeFlag,
void *updateHandle,
- int (*updateFunc)(void *handle, Record drec, data1_node *n))
+ ZebraExplainUpdateFunc *updateFunc)
{
Record trec;
ZebraExplainInfo zei;
zei->records = records;
zei->nmem = nmem;
zei->dh = dh;
+
+ data1_get_absyn (zei->dh, "explain", DATA1_XPATH_INDEXING_DISABLE);
+
zei->attsets = NULL;
zei->res = res;
zei->categoryList = (struct zebraCategoryListInfo *)
"attrlist");
for (np = node_list->child; np; np = np->next)
{
- data1_node *node_set = NULL;
- data1_node *node_use = NULL;
data1_node *node_str = NULL;
data1_node *node_ordinal = NULL;
data1_node *node_type = NULL;
+ data1_node *node_cat = NULL;
+ data1_node *node_doc_occurrences = NULL;
+ data1_node *node_term_occurrences = NULL;
data1_node *np2;
- char oid_str[128];
- int oid_str_len;
if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "attr"))
continue;
if (np2->which != DATA1N_tag || !np2->child ||
np2->child->which != DATA1N_data)
continue;
- if (!strcmp(np2->u.tag.tag, "set"))
- node_set = np2->child;
- else if (!strcmp(np2->u.tag.tag, "use"))
- node_use = np2->child;
- else if (!strcmp(np2->u.tag.tag, "str"))
+ if (!strcmp(np2->u.tag.tag, "str"))
node_str = np2->child;
else if (!strcmp(np2->u.tag.tag, "ordinal"))
node_ordinal = np2->child;
else if (!strcmp(np2->u.tag.tag, "type"))
node_type = np2->child;
+ else if (!strcmp(np2->u.tag.tag, "cat"))
+ node_cat = np2->child;
+ else if (!strcmp(np2->u.tag.tag, "dococcurrences"))
+ node_doc_occurrences = np2->child;
+ else if (!strcmp(np2->u.tag.tag, "termoccurrences"))
+ node_term_occurrences = np2->child;
+ else
+ {
+ yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails",
+ np2->u.tag.tag);
+ }
}
assert(node_ordinal);
yaz_log(YLOG_WARN, "Missing attribute 'type' in attribute info");
(*zsuip)->info.index_type = 'w';
}
-
- if (node_set && node_use)
- {
- (*zsuip)->info.which = ZEB_SU_SET_USE;
-
- oid_str_len = node_set->u.data.len;
- if (oid_str_len >= (int) sizeof(oid_str))
- oid_str_len = sizeof(oid_str)-1;
- memcpy(oid_str, node_set->u.data.data, oid_str_len);
- oid_str[oid_str_len] = '\0';
-
- (*zsuip)->info.u.su.set = oid_getvalbyname(oid_str);
-
- (*zsuip)->info.u.su.use = atoi_n(node_use->u.data.data,
- node_use->u.data.len);
- yaz_log(YLOG_DEBUG, "set=%d use=%d ordinal=%d",
- (*zsuip)->info.u.su.set, (*zsuip)->info.u.su.use,
- (*zsuip)->info.ordinal);
- }
- else if (node_str)
+ if (node_cat && node_cat->u.data.len > 0)
+ {
+ zinfo_index_category_t cat;
+
+ data1_node *np = node_cat;
+ if (!strncmp(np->u.data.data, "index", np->u.data.len))
+ cat = zinfo_index_category_index;
+ else if (!strncmp(np->u.data.data, "sort", np->u.data.len))
+ cat = zinfo_index_category_sort;
+ else if (!strncmp(np->u.data.data, "alwaysmatches",
+ np->u.data.len))
+ cat = zinfo_index_category_alwaysmatches;
+ else if (!strncmp(np->u.data.data, "anchor",
+ np->u.data.len))
+ cat = zinfo_index_category_anchor;
+ else
+ {
+ yaz_log(YLOG_WARN, "Bad index cateogry '%.*s'",
+ np->u.data.len, np->u.data.data);
+ cat = zinfo_index_category_index;
+ }
+ (*zsuip)->info.cat = cat;
+ }
+ else
+ (*zsuip)->info.cat = zinfo_index_category_index;
+
+ if (node_doc_occurrences)
+ {
+ data1_node *np = node_doc_occurrences;
+ (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data,
+ np->u.data.len);
+ }
+ if (node_term_occurrences)
+ {
+ data1_node *np = node_term_occurrences;
+ (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data,
+ np->u.data.len);
+ }
+ if (node_str)
{
(*zsuip)->info.which = ZEB_SU_STR;
return 0;
}
-static void writeAttributeValueDetails (ZebraExplainInfo zei,
- zebAttributeDetails zad,
- data1_node *node_atvs, data1_attset *attset)
-
-{
- struct zebSUInfoB *zsui;
- int set_ordinal = attset->reference;
- data1_attset_child *c;
-
- for (c = attset->children; c; c = c->next)
- writeAttributeValueDetails (zei, zad, node_atvs, c->child);
- for (zsui = zad->SUInfo; zsui; zsui = zsui->next)
- {
- if (zsui->info.which == ZEB_SU_SET_USE &&
- set_ordinal == zsui->info.u.su.set)
- {
- data1_node *node_attvalue, *node_value;
- node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue",
- 0 /* attr */, node_atvs);
- node_value = data1_mk_tag (zei->dh, zei->nmem, "value",
- 0 /* attr */, node_attvalue);
- data1_mk_tag_data_int (zei->dh, node_value, "numeric",
- zsui->info.u.su.use, zei->nmem);
- }
- }
-}
static void zebraExplain_writeCategoryList (ZebraExplainInfo zei,
struct zebraCategoryListInfo *zcl,
char *sgml_buf;
int sgml_len;
Record drec;
- data1_node *node_adinfo, *node_list, *node_zebra, *node_attributesBySet;
+ data1_node *node_adinfo, *node_list, *node_zebra;
struct zebSUInfoB *zsui;
- int set_min;
if (!zad->dirty)
return;
record count, etc. is affected */
if (key_flush)
(*zei->updateFunc)(zei->updateHandle, drec, zad->data1_tree);
-
- node_attributesBySet = data1_mk_tag_uni (zei->dh, zei->nmem,
- "attributesBySet", node_adinfo);
- set_min = -1;
- while (1)
- {
- data1_node *node_asd;
- data1_attset *attset;
- int set_ordinal = -1;
- for (zsui = zad->SUInfo; zsui; zsui = zsui->next)
- {
- if (zsui->info.which == ZEB_SU_SET_USE &&
- (set_ordinal < 0 || set_ordinal > zsui->info.u.su.set)
- && zsui->info.u.su.set > set_min)
- set_ordinal = zsui->info.u.su.set;
- }
- if (set_ordinal < 0)
- break;
- set_min = set_ordinal;
- node_asd = data1_mk_tag (zei->dh, zei->nmem,
- "attributeSetDetails",
- 0 /* attr */, node_attributesBySet);
-
- attset = data1_attset_search_id (zei->dh, set_ordinal);
- if (!attset)
- {
- zebraExplain_loadAttsets (zei->dh, zei->res);
- attset = data1_attset_search_id (zei->dh, set_ordinal);
- }
- if (attset)
- {
- int oid[OID_SIZE];
- oident oe;
-
- oe.proto = PROTO_Z3950;
- oe.oclass = CLASS_ATTSET;
- oe.value = (enum oid_value) set_ordinal;
-
- if (oid_ent_to_oid (&oe, oid))
- {
- data1_node *node_abt, *node_atd, *node_atvs;
- data1_mk_tag_data_oid (zei->dh, node_asd, "oid",
- oid, zei->nmem);
-
- node_abt = data1_mk_tag (zei->dh, zei->nmem,
- "attributesByType",
- 0 /*attr */, node_asd);
- node_atd = data1_mk_tag (zei->dh, zei->nmem,
- "attributeTypeDetails",
- 0 /* attr */, node_abt);
- data1_mk_tag_data_int (zei->dh, node_atd,
- "type", 1, zei->nmem);
- node_atvs = data1_mk_tag (zei->dh, zei->nmem,
- "attributeValues",
- 0 /* attr */, node_atd);
- writeAttributeValueDetails (zei, zad, node_atvs, attset);
- }
- }
- }
/* zebra info (private) */
node_zebra = data1_mk_tag_uni (zei->dh, zei->nmem,
"zebraInfo", node_adinfo);
"attrlist", node_zebra);
for (zsui = zad->SUInfo; zsui; zsui = zsui->next)
{
- struct oident oident;
- int oid[OID_SIZE];
data1_node *node_attr;
char index_type_str[2];
-
node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", 0 /* attr */,
node_list);
index_type_str[1] = '\0';
data1_mk_tag_data_text (zei->dh, node_attr, "type",
index_type_str, zei->nmem);
- if (zsui->info.which == ZEB_SU_SET_USE)
- {
- oident.proto = PROTO_Z3950;
- oident.oclass = CLASS_ATTSET;
- oident.value = (enum oid_value) zsui->info.u.su.set;
- oid_ent_to_oid (&oident, oid);
-
- data1_mk_tag_data_text (zei->dh, node_attr, "set",
- oident.desc, zei->nmem);
- data1_mk_tag_data_int (zei->dh, node_attr, "use",
- zsui->info.u.su.use, zei->nmem);
- }
- else if (zsui->info.which == ZEB_SU_STR)
+ if (zsui->info.which == ZEB_SU_STR)
{
data1_mk_tag_data_text (zei->dh, node_attr, "str",
zsui->info.u.str, zei->nmem);
}
data1_mk_tag_data_int (zei->dh, node_attr, "ordinal",
zsui->info.ordinal, zei->nmem);
+
+ data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences",
+ zsui->info.doc_occurrences, zei->nmem);
+ data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences",
+ zsui->info.term_occurrences, zei->nmem);
+ switch(zsui->info.cat)
+ {
+ case zinfo_index_category_index:
+ data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+ "index", zei->nmem); break;
+ case zinfo_index_category_sort:
+ data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+ "sort", zei->nmem); break;
+ case zinfo_index_category_alwaysmatches:
+ data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+ "alwaysmatches", zei->nmem); break;
+ case zinfo_index_category_anchor:
+ data1_mk_tag_data_text (zei->dh, node_attr, "cat",
+ "anchor", zei->nmem); break;
+ }
}
/* convert to "SGML" and write it */
#if ZINFO_DEBUG
rec_put (zei->records, &trec);
}
-int zebraExplain_lookup_attr_su_any_index(ZebraExplainInfo zei,
- int set, int use)
-{
- struct zebSUInfoB *zsui;
-
- assert (zei->curDatabaseInfo);
- for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo;
- zsui; zsui=zsui->next)
- if (zsui->info.which == ZEB_SU_SET_USE &&
- zsui->info.u.su.use == use && zsui->info.u.su.set == set)
- return zsui->info.ordinal;
- return -1;
-}
-
-int zebraExplain_lookup_attr_su(ZebraExplainInfo zei, int index_type,
- int set, int use)
-{
- struct zebSUInfoB *zsui;
-
- assert (zei->curDatabaseInfo);
- for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo;
- zsui; zsui=zsui->next)
- if (zsui->info.index_type == index_type &&
- zsui->info.which == ZEB_SU_SET_USE &&
- zsui->info.u.su.use == use && zsui->info.u.su.set == set)
- return zsui->info.ordinal;
- return -1;
-}
-
-int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_lookup_attr_str(ZebraExplainInfo zei,
+ zinfo_index_category_t cat,
+ int index_type,
const char *str)
{
- struct zebSUInfoB *zsui;
+ struct zebSUInfoB **zsui;
assert (zei->curDatabaseInfo);
- for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo;
- zsui; zsui=zsui->next)
- if (zsui->info.index_type == index_type &&
- zsui->info.which == ZEB_SU_STR && !strcmp(zsui->info.u.str, str))
- return zsui->info.ordinal;
+ for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo;
+ *zsui; zsui = &(*zsui)->next)
+ if ((*zsui)->info.index_type == index_type
+ && (*zsui)->info.cat == cat
+ && (*zsui)->info.which == ZEB_SU_STR
+ && !yaz_matchstr((*zsui)->info.u.str, str))
+ {
+ struct zebSUInfoB *zsui_this = *zsui;
+
+ /* take it out of the list and move to front */
+ *zsui = (*zsui)->next;
+ zsui_this->next = zei->curDatabaseInfo->attributeDetails->SUInfo;
+ zei->curDatabaseInfo->attributeDetails->SUInfo = zsui_this;
+
+ return zsui_this->info.ordinal;
+ }
return -1;
}
}
return 0;
}
-
-int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord,
- int *index_type,
- const char **db,
- int *set, int *use,
- const char **string_index)
+
+
+struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord,
+ int dirty_mark,
+ const char **db)
{
struct zebDatabaseInfoB *zdb;
- if (set)
- *set = -1;
- if (use)
- *use = -1;
+ for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+ {
+ struct zebSUInfoB **zsui;
+
+ if (zdb->attributeDetails->readFlag)
+ zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
+
+ for (zsui = &zdb->attributeDetails->SUInfo; *zsui;
+ zsui = &(*zsui)->next)
+ if ((*zsui)->info.ordinal == ord)
+ {
+ struct zebSUInfoB *zsui_this = *zsui;
+
+ /* take it out of the list and move to front */
+ *zsui = (*zsui)->next;
+ zsui_this->next = zdb->attributeDetails->SUInfo;
+ zdb->attributeDetails->SUInfo = zsui_this;
+
+ if (dirty_mark)
+ zdb->attributeDetails->dirty = 1;
+ if (db)
+ *db = zdb->databaseName;
+ return zsui_this;
+ }
+ }
+ return 0;
+}
+
+
+
+int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord,
+ int term_delta, int doc_delta)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0);
+ if (zsui)
+ {
+ zsui->info.term_occurrences += term_delta;
+ zsui->info.doc_occurrences += doc_delta;
+ return 0;
+ }
+ return -1;
+}
+
+int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord,
+ zint *term_occurrences,
+ zint *doc_occurrences)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0);
+ if (zsui)
+ {
+ *term_occurrences = zsui->info.term_occurrences;
+ *doc_occurrences = zsui->info.doc_occurrences;
+ return 0;
+ }
+ return -1;
+}
+
+zint zebraExplain_ord_get_doc_occurrences(ZebraExplainInfo zei, int ord)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0);
+ if (zsui)
+ return zsui->info.doc_occurrences;
+ return 0;
+}
+
+zint zebraExplain_ord_get_term_occurrences(ZebraExplainInfo zei, int ord)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0);
+ if (zsui)
+ return zsui->info.term_occurrences;
+ return 0;
+}
+
+int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord,
+ int *index_type,
+ const char **db,
+ const char **string_index)
+{
+ struct zebSUInfoB *zsui;
+
if (index_type)
*index_type = 0;
if (string_index)
*string_index = 0;
- for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+ zsui = zebraExplain_get_sui_info(zei, ord, 0, db);
+ if (zsui)
{
- struct zebSUInfoB *zsui;
-
- if (zdb->attributeDetails->readFlag)
- zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
-
- for (zsui = zdb->attributeDetails->SUInfo; zsui; zsui = zsui->next)
- if (zsui->info.ordinal == ord)
- {
- if (db)
- *db = zdb->databaseName;
- if (zsui->info.which == ZEB_SU_SET_USE)
- {
- if (set)
- *set = zsui->info.u.su.set;
- if (use)
- *use = zsui->info.u.su.use;
- }
-
- if (zsui->info.which == ZEB_SU_STR)
- if (string_index)
- *string_index = zsui->info.u.str;
-
- if (index_type)
- *index_type = zsui->info.index_type;
- return 0;
- }
+ if (zsui->info.which == ZEB_SU_STR)
+ if (string_index)
+ *string_index = zsui->info.u.str;
+ if (index_type)
+ *index_type = zsui->info.index_type;
+ return 0;
}
return -1;
}
+
+
zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei,
zebAccessObject *op,
Odr_oid *oid)
}
}
-int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type,
- int set, int use)
+struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei,
+ zinfo_index_category_t cat,
+ int index_type)
{
struct zebSUInfoB *zsui;
assert (zei->curDatabaseInfo);
- zebraExplain_addAttributeSet (zei, set);
zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui));
zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo;
zei->curDatabaseInfo->attributeDetails->SUInfo = zsui;
zei->curDatabaseInfo->attributeDetails->dirty = 1;
zei->dirty = 1;
zsui->info.index_type = index_type;
- zsui->info.which = ZEB_SU_SET_USE;
- zsui->info.u.su.set = set;
- zsui->info.u.su.use = use;
+ zsui->info.cat = cat;
+ zsui->info.doc_occurrences = 0;
+ zsui->info.term_occurrences = 0;
zsui->info.ordinal = (zei->ordinalSU)++;
- return zsui->info.ordinal;
+ return zsui;
}
-int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type,
+int zebraExplain_add_attr_str(ZebraExplainInfo zei,
+ zinfo_index_category_t cat,
+ int index_type,
const char *index_name)
{
- struct zebSUInfoB *zsui;
+ struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, cat, index_type);
- assert (zei->curDatabaseInfo);
- zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui));
- zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo;
- zei->curDatabaseInfo->attributeDetails->SUInfo = zsui;
- zei->curDatabaseInfo->attributeDetails->dirty = 1;
- zei->dirty = 1;
- zsui->info.index_type = index_type;
zsui->info.which = ZEB_SU_STR;
zsui->info.u.str = nmem_strdup(zei->nmem, index_name);
- zsui->info.ordinal = (zei->ordinalSU)++;
return zsui->info.ordinal;
}