X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzinfo.c;h=908fdd3c59d9e9d3c6ea465c06c42d3fc561f1ad;hb=b88909df16157ed1e7859bc3fad6b01520d4865e;hp=223508931990c30ea878e39793442c8eb25a2829;hpb=3ffa2e910d57f2842c7e9ed12b3f60d24e187826;p=idzebra-moved-to-github.git diff --git a/index/zinfo.c b/index/zinfo.c index 2235089..908fdd3 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,5 +1,5 @@ -/* $Id: zinfo.c,v 1.59 2006-05-10 09:08:55 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: zinfo.c,v 1.66 2006-06-13 12:02:12 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -38,12 +38,10 @@ struct zebSUInfo { int which; union { char *str; - struct { - int set; - int use; - } su; } u; int ordinal; + zint doc_occurrences; + zint term_occurrences; }; struct zebSUInfoB { @@ -116,7 +114,7 @@ struct zebraExplainInfo { struct zebDatabaseInfoB *curDatabaseInfo; zebAccessInfo accessInfo; char date[15]; /* YYYY MMDD HH MM SS */ - int (*updateFunc)(void *handle, Record drec, data1_node *n); + ZebraExplainUpdateFunc *updateFunc; void *updateHandle; }; @@ -338,7 +336,7 @@ ZebraExplainInfo zebraExplain_open( Res res, int writeFlag, void *updateHandle, - int (*updateFunc)(void *handle, Record drec, data1_node *n)) + ZebraExplainUpdateFunc *updateFunc) { Record trec; ZebraExplainInfo zei; @@ -361,6 +359,9 @@ ZebraExplainInfo zebraExplain_open( zei->records = records; zei->nmem = nmem; zei->dh = dh; + + data1_get_absyn (zei->dh, "explain", DATA1_XPATH_INDEXING_DISABLE); + zei->attsets = NULL; zei->res = res; zei->categoryList = (struct zebraCategoryListInfo *) @@ -589,14 +590,12 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, "attrlist"); for (np = node_list->child; np; np = np->next) { - data1_node *node_set = NULL; - data1_node *node_use = NULL; data1_node *node_str = NULL; data1_node *node_ordinal = NULL; data1_node *node_type = NULL; + data1_node *node_doc_occurrences = NULL; + data1_node *node_term_occurrences = NULL; data1_node *np2; - char oid_str[128]; - int oid_str_len; if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "attr")) continue; @@ -605,16 +604,21 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, if (np2->which != DATA1N_tag || !np2->child || np2->child->which != DATA1N_data) continue; - if (!strcmp(np2->u.tag.tag, "set")) - node_set = np2->child; - else if (!strcmp(np2->u.tag.tag, "use")) - node_use = np2->child; - else if (!strcmp(np2->u.tag.tag, "str")) + if (!strcmp(np2->u.tag.tag, "str")) node_str = np2->child; else if (!strcmp(np2->u.tag.tag, "ordinal")) node_ordinal = np2->child; else if (!strcmp(np2->u.tag.tag, "type")) node_type = np2->child; + else if (!strcmp(np2->u.tag.tag, "dococcurrences")) + node_doc_occurrences = np2->child; + else if (!strcmp(np2->u.tag.tag, "termoccurrences")) + node_term_occurrences = np2->child; + else + { + yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails", + np2->u.tag.tag); + } } assert(node_ordinal); @@ -629,25 +633,19 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, (*zsuip)->info.index_type = 'w'; } - if (node_set && node_use) - { - (*zsuip)->info.which = ZEB_SU_SET_USE; - - oid_str_len = node_set->u.data.len; - if (oid_str_len >= (int) sizeof(oid_str)) - oid_str_len = sizeof(oid_str)-1; - memcpy(oid_str, node_set->u.data.data, oid_str_len); - oid_str[oid_str_len] = '\0'; - - (*zsuip)->info.u.su.set = oid_getvalbyname(oid_str); - - (*zsuip)->info.u.su.use = atoi_n(node_use->u.data.data, - node_use->u.data.len); - yaz_log(YLOG_DEBUG, "set=%d use=%d ordinal=%d", - (*zsuip)->info.u.su.set, (*zsuip)->info.u.su.use, - (*zsuip)->info.ordinal); - } - else if (node_str) + if (node_doc_occurrences) + { + data1_node *np = node_doc_occurrences; + (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_term_occurrences) + { + data1_node *np = node_term_occurrences; + (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_str) { (*zsuip)->info.which = ZEB_SU_STR; @@ -940,32 +938,6 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, return 0; } -static void writeAttributeValueDetails (ZebraExplainInfo zei, - zebAttributeDetails zad, - data1_node *node_atvs, data1_attset *attset) - -{ - struct zebSUInfoB *zsui; - int set_ordinal = attset->reference; - data1_attset_child *c; - - for (c = attset->children; c; c = c->next) - writeAttributeValueDetails (zei, zad, node_atvs, c->child); - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - if (zsui->info.which == ZEB_SU_SET_USE && - set_ordinal == zsui->info.u.su.set) - { - data1_node *node_attvalue, *node_value; - node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue", - 0 /* attr */, node_atvs); - node_value = data1_mk_tag (zei->dh, zei->nmem, "value", - 0 /* attr */, node_attvalue); - data1_mk_tag_data_int (zei->dh, node_value, "numeric", - zsui->info.u.su.use, zei->nmem); - } - } -} static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, struct zebraCategoryListInfo *zcl, @@ -1039,9 +1011,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, char *sgml_buf; int sgml_len; Record drec; - data1_node *node_adinfo, *node_list, *node_zebra, *node_attributesBySet; + data1_node *node_adinfo, *node_list, *node_zebra; struct zebSUInfoB *zsui; - int set_min; if (!zad->dirty) return; @@ -1067,65 +1038,6 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, record count, etc. is affected */ if (key_flush) (*zei->updateFunc)(zei->updateHandle, drec, zad->data1_tree); - - node_attributesBySet = data1_mk_tag_uni (zei->dh, zei->nmem, - "attributesBySet", node_adinfo); - set_min = -1; - while (1) - { - data1_node *node_asd; - data1_attset *attset; - int set_ordinal = -1; - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - if (zsui->info.which == ZEB_SU_SET_USE && - (set_ordinal < 0 || set_ordinal > zsui->info.u.su.set) - && zsui->info.u.su.set > set_min) - set_ordinal = zsui->info.u.su.set; - } - if (set_ordinal < 0) - break; - set_min = set_ordinal; - node_asd = data1_mk_tag (zei->dh, zei->nmem, - "attributeSetDetails", - 0 /* attr */, node_attributesBySet); - - attset = data1_attset_search_id (zei->dh, set_ordinal); - if (!attset) - { - zebraExplain_loadAttsets (zei->dh, zei->res); - attset = data1_attset_search_id (zei->dh, set_ordinal); - } - if (attset) - { - int oid[OID_SIZE]; - oident oe; - - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = (enum oid_value) set_ordinal; - - if (oid_ent_to_oid (&oe, oid)) - { - data1_node *node_abt, *node_atd, *node_atvs; - data1_mk_tag_data_oid (zei->dh, node_asd, "oid", - oid, zei->nmem); - - node_abt = data1_mk_tag (zei->dh, zei->nmem, - "attributesByType", - 0 /*attr */, node_asd); - node_atd = data1_mk_tag (zei->dh, zei->nmem, - "attributeTypeDetails", - 0 /* attr */, node_abt); - data1_mk_tag_data_int (zei->dh, node_atd, - "type", 1, zei->nmem); - node_atvs = data1_mk_tag (zei->dh, zei->nmem, - "attributeValues", - 0 /* attr */, node_atd); - writeAttributeValueDetails (zei, zad, node_atvs, attset); - } - } - } /* zebra info (private) */ node_zebra = data1_mk_tag_uni (zei->dh, zei->nmem, "zebraInfo", node_adinfo); @@ -1133,11 +1045,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, "attrlist", node_zebra); for (zsui = zad->SUInfo; zsui; zsui = zsui->next) { - struct oident oident; - int oid[OID_SIZE]; data1_node *node_attr; char index_type_str[2]; - node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", 0 /* attr */, node_list); @@ -1146,25 +1055,18 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, index_type_str[1] = '\0'; data1_mk_tag_data_text (zei->dh, node_attr, "type", index_type_str, zei->nmem); - if (zsui->info.which == ZEB_SU_SET_USE) - { - oident.proto = PROTO_Z3950; - oident.oclass = CLASS_ATTSET; - oident.value = (enum oid_value) zsui->info.u.su.set; - oid_ent_to_oid (&oident, oid); - - data1_mk_tag_data_text (zei->dh, node_attr, "set", - oident.desc, zei->nmem); - data1_mk_tag_data_int (zei->dh, node_attr, "use", - zsui->info.u.su.use, zei->nmem); - } - else if (zsui->info.which == ZEB_SU_STR) + if (zsui->info.which == ZEB_SU_STR) { data1_mk_tag_data_text (zei->dh, node_attr, "str", zsui->info.u.str, zei->nmem); } data1_mk_tag_data_int (zei->dh, node_attr, "ordinal", zsui->info.ordinal, zei->nmem); + + data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences", + zsui->info.doc_occurrences, zei->nmem); + data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences", + zsui->info.term_occurrences, zei->nmem); } /* convert to "SGML" and write it */ #if ZINFO_DEBUG @@ -1404,48 +1306,6 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) rec_put (zei->records, &trec); } -int zebraExplain_lookup_attr_su_any_index(ZebraExplainInfo zei, - int set, int use) -{ - struct zebSUInfoB *zsui; - - assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.which == ZEB_SU_SET_USE && - zsui->info.u.su.use == use && zsui->info.u.su.set == set) - return zsui->info.ordinal; - return -1; -} - -int zebraExplain_lookup_attr_su(ZebraExplainInfo zei, int index_type, - int set, int use) -{ - struct zebSUInfoB **zsui; - -#if 0 - yaz_log(YLOG_LOG, "lookup_attr_su index_type=%d set=%d use=%d", - index_type, set, use); -#endif - assert (zei->curDatabaseInfo); - for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo; - *zsui; zsui = &(*zsui)->next) - if ((*zsui)->info.index_type == index_type && - (*zsui)->info.which == ZEB_SU_SET_USE && - (*zsui)->info.u.su.use == use && (*zsui)->info.u.su.set == set) - { - struct zebSUInfoB *zsui_this = *zsui; - - /* take it out of the list and move to front */ - *zsui = (*zsui)->next; - zsui_this->next = zei->curDatabaseInfo->attributeDetails->SUInfo; - zei->curDatabaseInfo->attributeDetails->SUInfo = zsui_this; - - return zsui_this->info.ordinal; - } - return -1; -} - int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, const char *str) { @@ -1456,7 +1316,7 @@ int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, *zsui; zsui = &(*zsui)->next) if ((*zsui)->info.index_type == index_type && (*zsui)->info.which == ZEB_SU_STR - && !strcmp((*zsui)->info.u.str, str)) + && !yaz_matchstr((*zsui)->info.u.str, str)) { struct zebSUInfoB *zsui_this = *zsui; @@ -1482,56 +1342,114 @@ int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle, } return 0; } - -int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, - int *index_type, - const char **db, - int *set, int *use, - const char **string_index) + + +struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord, + int dirty_mark, + const char **db) { struct zebDatabaseInfoB *zdb; - if (set) - *set = -1; - if (use) - *use = -1; + for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) + { + struct zebSUInfoB **zsui; + + if (zdb->attributeDetails->readFlag) + zebraExplain_readAttributeDetails (zei, zdb->attributeDetails); + + for (zsui = &zdb->attributeDetails->SUInfo; *zsui; + zsui = &(*zsui)->next) + if ((*zsui)->info.ordinal == ord) + { + struct zebSUInfoB *zsui_this = *zsui; + + /* take it out of the list and move to front */ + *zsui = (*zsui)->next; + zsui_this->next = zdb->attributeDetails->SUInfo; + zdb->attributeDetails->SUInfo = zsui_this; + + if (dirty_mark) + zdb->attributeDetails->dirty = 1; + if (db) + *db = zdb->databaseName; + return zsui_this; + } + } + return 0; +} + + + +int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord, + int term_delta, int doc_delta) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0); + if (zsui) + { + zsui->info.term_occurrences += term_delta; + zsui->info.doc_occurrences += doc_delta; + return 0; + } + return -1; +} + +int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord, + zint *term_occurrences, + zint *doc_occurrences) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + { + *term_occurrences = zsui->info.term_occurrences; + *doc_occurrences = zsui->info.doc_occurrences; + return 0; + } + return -1; +} + +zint zebraExplain_ord_get_doc_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.doc_occurrences; + return 0; +} + +zint zebraExplain_ord_get_term_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.term_occurrences; + return 0; +} + +int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, + int *index_type, + const char **db, + const char **string_index) +{ + struct zebSUInfoB *zsui; + if (index_type) *index_type = 0; if (string_index) *string_index = 0; - for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) + zsui = zebraExplain_get_sui_info(zei, ord, 0, db); + if (zsui) { - struct zebSUInfoB *zsui; - - if (zdb->attributeDetails->readFlag) - zebraExplain_readAttributeDetails (zei, zdb->attributeDetails); - - for (zsui = zdb->attributeDetails->SUInfo; zsui; zsui = zsui->next) - if (zsui->info.ordinal == ord) - { - if (db) - *db = zdb->databaseName; - if (zsui->info.which == ZEB_SU_SET_USE) - { - if (set) - *set = zsui->info.u.su.set; - if (use) - *use = zsui->info.u.su.use; - } - - if (zsui->info.which == ZEB_SU_STR) - if (string_index) - *string_index = zsui->info.u.str; - - if (index_type) - *index_type = zsui->info.index_type; - return 0; - } + if (zsui->info.which == ZEB_SU_STR) + if (string_index) + *string_index = zsui->info.u.str; + if (index_type) + *index_type = zsui->info.index_type; + return 0; } return -1; } + + zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei, zebAccessObject *op, Odr_oid *oid) @@ -1570,41 +1488,31 @@ void zebraExplain_addAttributeSet (ZebraExplainInfo zei, int set) } } -int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type, - int set, int use) +struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei, + int index_type) { struct zebSUInfoB *zsui; assert (zei->curDatabaseInfo); - zebraExplain_addAttributeSet (zei, set); zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui)); zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo; zei->curDatabaseInfo->attributeDetails->SUInfo = zsui; zei->curDatabaseInfo->attributeDetails->dirty = 1; zei->dirty = 1; zsui->info.index_type = index_type; - zsui->info.which = ZEB_SU_SET_USE; - zsui->info.u.su.set = set; - zsui->info.u.su.use = use; + zsui->info.doc_occurrences = 0; + zsui->info.term_occurrences = 0; zsui->info.ordinal = (zei->ordinalSU)++; - return zsui->info.ordinal; + return zsui; } int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type, const char *index_name) { - struct zebSUInfoB *zsui; + struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, index_type); - assert (zei->curDatabaseInfo); - zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui)); - zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo; - zei->curDatabaseInfo->attributeDetails->SUInfo = zsui; - zei->curDatabaseInfo->attributeDetails->dirty = 1; - zei->dirty = 1; - zsui->info.index_type = index_type; zsui->info.which = ZEB_SU_STR; zsui->info.u.str = nmem_strdup(zei->nmem, index_name); - zsui->info.ordinal = (zei->ordinalSU)++; return zsui->info.ordinal; }