X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzinfo.c;h=908fdd3c59d9e9d3c6ea465c06c42d3fc561f1ad;hb=b88909df16157ed1e7859bc3fad6b01520d4865e;hp=740f7f01d08041ef8b6bb84cdf082996e66052cc;hpb=1d09966e51904c44ed82eaa920ffc9fbcc087541;p=idzebra-moved-to-github.git diff --git a/index/zinfo.c b/index/zinfo.c index 740f7f0..908fdd3 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,5 +1,5 @@ -/* $Id: zinfo.c,v 1.54 2006-02-09 08:31:02 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: zinfo.c,v 1.66 2006-06-13 12:02:12 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -38,12 +38,10 @@ struct zebSUInfo { int which; union { char *str; - struct { - int set; - int use; - } su; } u; int ordinal; + zint doc_occurrences; + zint term_occurrences; }; struct zebSUInfoB { @@ -116,7 +114,7 @@ struct zebraExplainInfo { struct zebDatabaseInfoB *curDatabaseInfo; zebAccessInfo accessInfo; char date[15]; /* YYYY MMDD HH MM SS */ - int (*updateFunc)(void *handle, Record drec, data1_node *n); + ZebraExplainUpdateFunc *updateFunc; void *updateHandle; }; @@ -150,11 +148,15 @@ static Record createRecord(Records records, SYSNO *sysno) if (*sysno) { rec = rec_get(records, *sysno); + if (!rec) + return 0; xfree(rec->info[recInfo_storeData]); } else { rec = rec_new(records); + if (!rec) + return 0; *sysno = rec->sysno; rec->info[recInfo_fileType] = @@ -334,7 +336,7 @@ ZebraExplainInfo zebraExplain_open( Res res, int writeFlag, void *updateHandle, - int (*updateFunc)(void *handle, Record drec, data1_node *n)) + ZebraExplainUpdateFunc *updateFunc) { Record trec; ZebraExplainInfo zei; @@ -357,6 +359,9 @@ ZebraExplainInfo zebraExplain_open( zei->records = records; zei->nmem = nmem; zei->dh = dh; + + data1_get_absyn (zei->dh, "explain", DATA1_XPATH_INDEXING_DISABLE); + zei->attsets = NULL; zei->res = res; zei->categoryList = (struct zebraCategoryListInfo *) @@ -520,6 +525,12 @@ ZebraExplainInfo zebraExplain_open( /* write now because we want to be sure about the sysno */ trec = rec_new(records); + if (!trec) + { + yaz_log(YLOG_FATAL, "Cannot create root Explain record"); + nmem_destroy(zei->nmem); + return 0; + } trec->info[recInfo_fileType] = rec_strdup("grs.sgml", &trec->size[recInfo_fileType]); trec->info[recInfo_databaseName] = @@ -529,11 +540,11 @@ ZebraExplainInfo zebraExplain_open( trec->info[recInfo_storeData] = (char *) xmalloc(sgml_len); memcpy(trec->info[recInfo_storeData], sgml_buf, sgml_len); trec->size[recInfo_storeData] = sgml_len; - + rec_put(records, &trec); rec_rm(&trec); - } + zebraExplain_newDatabase(zei, "IR-Explain-1", 0); if (!zei->categoryList->dirty) @@ -579,14 +590,12 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, "attrlist"); for (np = node_list->child; np; np = np->next) { - data1_node *node_set = NULL; - data1_node *node_use = NULL; data1_node *node_str = NULL; data1_node *node_ordinal = NULL; data1_node *node_type = NULL; + data1_node *node_doc_occurrences = NULL; + data1_node *node_term_occurrences = NULL; data1_node *np2; - char oid_str[128]; - int oid_str_len; if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "attr")) continue; @@ -595,16 +604,21 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, if (np2->which != DATA1N_tag || !np2->child || np2->child->which != DATA1N_data) continue; - if (!strcmp(np2->u.tag.tag, "set")) - node_set = np2->child; - else if (!strcmp(np2->u.tag.tag, "use")) - node_use = np2->child; - else if (!strcmp(np2->u.tag.tag, "str")) + if (!strcmp(np2->u.tag.tag, "str")) node_str = np2->child; else if (!strcmp(np2->u.tag.tag, "ordinal")) node_ordinal = np2->child; else if (!strcmp(np2->u.tag.tag, "type")) node_type = np2->child; + else if (!strcmp(np2->u.tag.tag, "dococcurrences")) + node_doc_occurrences = np2->child; + else if (!strcmp(np2->u.tag.tag, "termoccurrences")) + node_term_occurrences = np2->child; + else + { + yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails", + np2->u.tag.tag); + } } assert(node_ordinal); @@ -619,25 +633,19 @@ static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, (*zsuip)->info.index_type = 'w'; } - if (node_set && node_use) - { - (*zsuip)->info.which = ZEB_SU_SET_USE; - - oid_str_len = node_set->u.data.len; - if (oid_str_len >= (int) sizeof(oid_str)) - oid_str_len = sizeof(oid_str)-1; - memcpy(oid_str, node_set->u.data.data, oid_str_len); - oid_str[oid_str_len] = '\0'; - - (*zsuip)->info.u.su.set = oid_getvalbyname(oid_str); - - (*zsuip)->info.u.su.use = atoi_n(node_use->u.data.data, - node_use->u.data.len); - yaz_log(YLOG_DEBUG, "set=%d use=%d ordinal=%d", - (*zsuip)->info.u.su.set, (*zsuip)->info.u.su.use, - (*zsuip)->info.ordinal); - } - else if (node_str) + if (node_doc_occurrences) + { + data1_node *np = node_doc_occurrences; + (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_term_occurrences) + { + data1_node *np = node_term_occurrences; + (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_str) { (*zsuip)->info.which = ZEB_SU_STR; @@ -930,32 +938,6 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, return 0; } -static void writeAttributeValueDetails (ZebraExplainInfo zei, - zebAttributeDetails zad, - data1_node *node_atvs, data1_attset *attset) - -{ - struct zebSUInfoB *zsui; - int set_ordinal = attset->reference; - data1_attset_child *c; - - for (c = attset->children; c; c = c->next) - writeAttributeValueDetails (zei, zad, node_atvs, c->child); - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - if (zsui->info.which == ZEB_SU_SET_USE && - set_ordinal == zsui->info.u.su.set) - { - data1_node *node_attvalue, *node_value; - node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue", - 0 /* attr */, node_atvs); - node_value = data1_mk_tag (zei->dh, zei->nmem, "value", - 0 /* attr */, node_attvalue); - data1_mk_tag_data_int (zei->dh, node_value, "numeric", - zsui->info.u.su.use, zei->nmem); - } - } -} static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, struct zebraCategoryListInfo *zcl, @@ -986,6 +968,8 @@ static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, #endif drec = createRecord (zei->records, &sysno); + if (!drec) + return; node_ci = data1_search_tag (zei->dh, node_categoryList, "/categoryList"); @@ -1027,9 +1011,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, char *sgml_buf; int sgml_len; Record drec; - data1_node *node_adinfo, *node_list, *node_zebra, *node_attributesBySet; + data1_node *node_adinfo, *node_list, *node_zebra; struct zebSUInfoB *zsui; - int set_min; if (!zad->dirty) return; @@ -1040,6 +1023,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, #endif drec = createRecord (zei->records, &zad->sysno); + if (!drec) + return; assert (zad->data1_tree); node_adinfo = data1_search_tag (zei->dh, zad->data1_tree, @@ -1053,65 +1038,6 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, record count, etc. is affected */ if (key_flush) (*zei->updateFunc)(zei->updateHandle, drec, zad->data1_tree); - - node_attributesBySet = data1_mk_tag_uni (zei->dh, zei->nmem, - "attributesBySet", node_adinfo); - set_min = -1; - while (1) - { - data1_node *node_asd; - data1_attset *attset; - int set_ordinal = -1; - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - if (zsui->info.which == ZEB_SU_SET_USE && - (set_ordinal < 0 || set_ordinal > zsui->info.u.su.set) - && zsui->info.u.su.set > set_min) - set_ordinal = zsui->info.u.su.set; - } - if (set_ordinal < 0) - break; - set_min = set_ordinal; - node_asd = data1_mk_tag (zei->dh, zei->nmem, - "attributeSetDetails", - 0 /* attr */, node_attributesBySet); - - attset = data1_attset_search_id (zei->dh, set_ordinal); - if (!attset) - { - zebraExplain_loadAttsets (zei->dh, zei->res); - attset = data1_attset_search_id (zei->dh, set_ordinal); - } - if (attset) - { - int oid[OID_SIZE]; - oident oe; - - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = (enum oid_value) set_ordinal; - - if (oid_ent_to_oid (&oe, oid)) - { - data1_node *node_abt, *node_atd, *node_atvs; - data1_mk_tag_data_oid (zei->dh, node_asd, "oid", - oid, zei->nmem); - - node_abt = data1_mk_tag (zei->dh, zei->nmem, - "attributesByType", - 0 /*attr */, node_asd); - node_atd = data1_mk_tag (zei->dh, zei->nmem, - "attributeTypeDetails", - 0 /* attr */, node_abt); - data1_mk_tag_data_int (zei->dh, node_atd, - "type", 1, zei->nmem); - node_atvs = data1_mk_tag (zei->dh, zei->nmem, - "attributeValues", - 0 /* attr */, node_atd); - writeAttributeValueDetails (zei, zad, node_atvs, attset); - } - } - } /* zebra info (private) */ node_zebra = data1_mk_tag_uni (zei->dh, zei->nmem, "zebraInfo", node_adinfo); @@ -1119,11 +1045,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, "attrlist", node_zebra); for (zsui = zad->SUInfo; zsui; zsui = zsui->next) { - struct oident oident; - int oid[OID_SIZE]; data1_node *node_attr; char index_type_str[2]; - node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", 0 /* attr */, node_list); @@ -1132,25 +1055,18 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, index_type_str[1] = '\0'; data1_mk_tag_data_text (zei->dh, node_attr, "type", index_type_str, zei->nmem); - if (zsui->info.which == ZEB_SU_SET_USE) - { - oident.proto = PROTO_Z3950; - oident.oclass = CLASS_ATTSET; - oident.value = (enum oid_value) zsui->info.u.su.set; - oid_ent_to_oid (&oident, oid); - - data1_mk_tag_data_text (zei->dh, node_attr, "set", - oident.desc, zei->nmem); - data1_mk_tag_data_int (zei->dh, node_attr, "use", - zsui->info.u.su.use, zei->nmem); - } - else if (zsui->info.which == ZEB_SU_STR) + if (zsui->info.which == ZEB_SU_STR) { data1_mk_tag_data_text (zei->dh, node_attr, "str", zsui->info.u.str, zei->nmem); } data1_mk_tag_data_int (zei->dh, node_attr, "ordinal", zsui->info.ordinal, zei->nmem); + + data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences", + zsui->info.doc_occurrences, zei->nmem); + data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences", + zsui->info.term_occurrences, zei->nmem); } /* convert to "SGML" and write it */ #if ZINFO_DEBUG @@ -1182,6 +1098,8 @@ static void zebraExplain_writeDatabase (ZebraExplainInfo zei, yaz_log(YLOG_LOG, "zebraExplain_writeDatabase %s", zdi->databaseName); #endif drec = createRecord (zei->records, &zdi->sysno); + if (!drec) + return; assert (zdi->data1_database); node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database, @@ -1272,6 +1190,8 @@ static void zebraExplain_writeAttributeSet (ZebraExplainInfo zei, #endif drec = createRecord (zei->records, &o->sysno); + if (!drec) + return; node_root = data1_read_sgml (zei->dh, zei->nmem, "AttributeSetInfo\n" @@ -1386,46 +1306,27 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) rec_put (zei->records, &trec); } -int zebraExplain_lookup_attr_su_any_index(ZebraExplainInfo zei, - int set, int use) -{ - struct zebSUInfoB *zsui; - - assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.which == ZEB_SU_SET_USE && - zsui->info.u.su.use == use && zsui->info.u.su.set == set) - return zsui->info.ordinal; - return -1; -} - -int zebraExplain_lookup_attr_su(ZebraExplainInfo zei, int index_type, - int set, int use) -{ - struct zebSUInfoB *zsui; - - assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.index_type == index_type && - zsui->info.which == ZEB_SU_SET_USE && - zsui->info.u.su.use == use && zsui->info.u.su.set == set) - return zsui->info.ordinal; - return -1; -} - int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, int index_type, const char *str) { - struct zebSUInfoB *zsui; + struct zebSUInfoB **zsui; assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.index_type == index_type && - zsui->info.which == ZEB_SU_STR && !strcmp(zsui->info.u.str, str)) - return zsui->info.ordinal; + for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo; + *zsui; zsui = &(*zsui)->next) + if ((*zsui)->info.index_type == index_type + && (*zsui)->info.which == ZEB_SU_STR + && !yaz_matchstr((*zsui)->info.u.str, str)) + { + struct zebSUInfoB *zsui_this = *zsui; + + /* take it out of the list and move to front */ + *zsui = (*zsui)->next; + zsui_this->next = zei->curDatabaseInfo->attributeDetails->SUInfo; + zei->curDatabaseInfo->attributeDetails->SUInfo = zsui_this; + + return zsui_this->info.ordinal; + } return -1; } @@ -1441,36 +1342,114 @@ int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle, } return 0; } - -int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, - int *index_type, - const char **db, - int *set, int *use) + + +struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord, + int dirty_mark, + const char **db) { struct zebDatabaseInfoB *zdb; + for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) { - struct zebSUInfoB *zsui = zdb->attributeDetails->SUInfo; - for ( ;zsui; zsui = zsui->next) - if (zsui->info.ordinal == ord) - { - if (db) - *db = zdb->databaseName; - if (zsui->info.which == ZEB_SU_SET_USE) - { - if (set) - *set = zsui->info.u.su.set; - if (use) - *use = zsui->info.u.su.use; - } - if (index_type) - *index_type = zsui->info.index_type; - return 0; - } + struct zebSUInfoB **zsui; + + if (zdb->attributeDetails->readFlag) + zebraExplain_readAttributeDetails (zei, zdb->attributeDetails); + + for (zsui = &zdb->attributeDetails->SUInfo; *zsui; + zsui = &(*zsui)->next) + if ((*zsui)->info.ordinal == ord) + { + struct zebSUInfoB *zsui_this = *zsui; + + /* take it out of the list and move to front */ + *zsui = (*zsui)->next; + zsui_this->next = zdb->attributeDetails->SUInfo; + zdb->attributeDetails->SUInfo = zsui_this; + + if (dirty_mark) + zdb->attributeDetails->dirty = 1; + if (db) + *db = zdb->databaseName; + return zsui_this; + } + } + return 0; +} + + + +int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord, + int term_delta, int doc_delta) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0); + if (zsui) + { + zsui->info.term_occurrences += term_delta; + zsui->info.doc_occurrences += doc_delta; + return 0; + } + return -1; +} + +int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord, + zint *term_occurrences, + zint *doc_occurrences) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + { + *term_occurrences = zsui->info.term_occurrences; + *doc_occurrences = zsui->info.doc_occurrences; + return 0; } return -1; } +zint zebraExplain_ord_get_doc_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.doc_occurrences; + return 0; +} + +zint zebraExplain_ord_get_term_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.term_occurrences; + return 0; +} + +int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, + int *index_type, + const char **db, + const char **string_index) +{ + struct zebSUInfoB *zsui; + + if (index_type) + *index_type = 0; + if (string_index) + *string_index = 0; + + zsui = zebraExplain_get_sui_info(zei, ord, 0, db); + if (zsui) + { + if (zsui->info.which == ZEB_SU_STR) + if (string_index) + *string_index = zsui->info.u.str; + if (index_type) + *index_type = zsui->info.index_type; + return 0; + } + return -1; +} + + + zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei, zebAccessObject *op, Odr_oid *oid) @@ -1509,41 +1488,31 @@ void zebraExplain_addAttributeSet (ZebraExplainInfo zei, int set) } } -int zebraExplain_add_attr_su(ZebraExplainInfo zei, int index_type, - int set, int use) +struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei, + int index_type) { struct zebSUInfoB *zsui; assert (zei->curDatabaseInfo); - zebraExplain_addAttributeSet (zei, set); zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui)); zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo; zei->curDatabaseInfo->attributeDetails->SUInfo = zsui; zei->curDatabaseInfo->attributeDetails->dirty = 1; zei->dirty = 1; zsui->info.index_type = index_type; - zsui->info.which = ZEB_SU_SET_USE; - zsui->info.u.su.set = set; - zsui->info.u.su.use = use; + zsui->info.doc_occurrences = 0; + zsui->info.term_occurrences = 0; zsui->info.ordinal = (zei->ordinalSU)++; - return zsui->info.ordinal; + return zsui; } int zebraExplain_add_attr_str(ZebraExplainInfo zei, int index_type, const char *index_name) { - struct zebSUInfoB *zsui; + struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, index_type); - assert (zei->curDatabaseInfo); - zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui)); - zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo; - zei->curDatabaseInfo->attributeDetails->SUInfo = zsui; - zei->curDatabaseInfo->attributeDetails->dirty = 1; - zei->dirty = 1; - zsui->info.index_type = index_type; zsui->info.which = ZEB_SU_STR; zsui->info.u.str = nmem_strdup(zei->nmem, index_name); - zsui->info.ordinal = (zei->ordinalSU)++; return zsui->info.ordinal; } @@ -1629,3 +1598,11 @@ void zebraExplain_loadAttsets (data1_handle dh, Res res) If the database doesn't exist globally (in TargetInfo) an AttributeSetInfo must be added (globally). */ +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +