X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzinfo.c;h=9684111d02958cd53208d93641262ca05f31b0a4;hp=34b990ea79898706a580c593e6f3bf209181f2d9;hb=c33ea56e3771c3b80ba66ef8fda3a09cad171ebb;hpb=6c9fcd3b5d3108702fa1ffc92dab4ab6060f9a19 diff --git a/index/zinfo.c b/index/zinfo.c index 34b990e..9684111 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,5 +1,5 @@ -/* $Id: zinfo.c,v 1.42 2005-01-15 19:38:29 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: zinfo.c,v 1.68 2006-08-14 10:40:15 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -15,13 +15,14 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ -#include +#include #include +#include #include #include @@ -31,9 +32,17 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define ZINFO_DEBUG 0 struct zebSUInfo { - int set; - int use; + int index_type; + zinfo_index_category_t cat; +#define ZEB_SU_SET_USE 1 +#define ZEB_SU_STR 2 + int which; + union { + char *str; + } u; int ordinal; + zint doc_occurrences; + zint term_occurrences; }; struct zebSUInfoB { @@ -65,6 +74,7 @@ typedef struct { struct zebDatabaseInfoB { zebAttributeDetails attributeDetails; + int ordinalDatabase; char *databaseName; data1_node *data1_database; zint recordCount; /* records in db */ @@ -90,6 +100,7 @@ struct zebraCategoryListInfo { struct zebraExplainInfo { int ordinalSU; + int ordinalDatabase; zint runNumber; int dirty; int write_flag; @@ -104,57 +115,61 @@ struct zebraExplainInfo { struct zebDatabaseInfoB *curDatabaseInfo; zebAccessInfo accessInfo; char date[15]; /* YYYY MMDD HH MM SS */ - int (*updateFunc)(void *handle, Record drec, data1_node *n); + ZebraExplainUpdateFunc *updateFunc; void *updateHandle; }; -static void zebraExplain_initCommonInfo (ZebraExplainInfo zei, data1_node *n); -static void zebraExplain_initAccessInfo (ZebraExplainInfo zei, data1_node *n); +static void zebraExplain_initCommonInfo(ZebraExplainInfo zei, data1_node *n); +static void zebraExplain_initAccessInfo(ZebraExplainInfo zei, data1_node *n); -static data1_node *read_sgml_rec (data1_handle dh, NMEM nmem, Record rec) +static data1_node *read_sgml_rec(data1_handle dh, NMEM nmem, Record rec) { - return data1_read_sgml (dh, nmem, rec->info[recInfo_storeData]); + return data1_read_sgml(dh, nmem, rec->info[recInfo_storeData]); } -static void zebraExplain_writeDatabase (ZebraExplainInfo zei, +static void zebraExplain_writeDatabase(ZebraExplainInfo zei, struct zebDatabaseInfoB *zdi, int key_flush); -static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, +static void zebraExplain_writeAttributeDetails(ZebraExplainInfo zei, zebAttributeDetails zad, const char *databaseName, int key_flush); -static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush); -static void zebraExplain_writeAttributeSet (ZebraExplainInfo zei, +static void zebraExplain_writeTarget(ZebraExplainInfo zei, int key_flush); +static void zebraExplain_writeAttributeSet(ZebraExplainInfo zei, zebAccessObject o, int key_flush); -static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, +static void zebraExplain_writeCategoryList(ZebraExplainInfo zei, struct zebraCategoryListInfo *zcl, int key_flush); -static Record createRecord (Records records, SYSNO *sysno) +static Record createRecord(Records records, SYSNO *sysno) { Record rec; if (*sysno) { - rec = rec_get (records, *sysno); - xfree (rec->info[recInfo_storeData]); + rec = rec_get(records, *sysno); + if (!rec) + return 0; + xfree(rec->info[recInfo_storeData]); } else { - rec = rec_new (records); + rec = rec_new(records); + if (!rec) + return 0; *sysno = rec->sysno; rec->info[recInfo_fileType] = - rec_strdup ("grs.sgml", &rec->size[recInfo_fileType]); + rec_strdup("grs.sgml", &rec->size[recInfo_fileType]); rec->info[recInfo_databaseName] = - rec_strdup ("IR-Explain-1", + rec_strdup("IR-Explain-1", &rec->size[recInfo_databaseName]); } return rec; } -void zebraExplain_flush (ZebraExplainInfo zei, void *handle) +void zebraExplain_flush(ZebraExplainInfo zei, void *handle) { if (!zei) return; @@ -167,43 +182,43 @@ void zebraExplain_flush (ZebraExplainInfo zei, void *handle) /* write each database info record */ for (zdi = zei->databaseInfo; zdi; zdi = zdi->next) { - zebraExplain_writeDatabase (zei, zdi, 1); - zebraExplain_writeAttributeDetails (zei, zdi->attributeDetails, + zebraExplain_writeDatabase(zei, zdi, 1); + zebraExplain_writeAttributeDetails(zei, zdi->attributeDetails, zdi->databaseName, 1); } - zebraExplain_writeTarget (zei, 1); - zebraExplain_writeCategoryList (zei, + zebraExplain_writeTarget(zei, 1); + zebraExplain_writeCategoryList(zei, zei->categoryList, 1); - assert (zei->accessInfo); + assert(zei->accessInfo); for (o = zei->accessInfo->attributeSetIds; o; o = o->next) if (!o->sysno) - zebraExplain_writeAttributeSet (zei, o, 1); + zebraExplain_writeAttributeSet(zei, o, 1); for (o = zei->accessInfo->schemas; o; o = o->next) if (!o->sysno) { -/* zebraExplain_writeSchema (zei, o, 1); */ +/* zebraExplain_writeSchema(zei, o, 1); */ } for (zdi = zei->databaseInfo; zdi; zdi = zdi->next) { - zebraExplain_writeDatabase (zei, zdi, 0); - zebraExplain_writeAttributeDetails (zei, zdi->attributeDetails, + zebraExplain_writeDatabase(zei, zdi, 0); + zebraExplain_writeAttributeDetails(zei, zdi->attributeDetails, zdi->databaseName, 0); } - zebraExplain_writeTarget (zei, 0); + zebraExplain_writeTarget(zei, 0); } } -void zebraExplain_close (ZebraExplainInfo zei) +void zebraExplain_close(ZebraExplainInfo zei) { #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_close"); + yaz_log(YLOG_LOG, "zebraExplain_close"); #endif if (!zei) return; - zebraExplain_flush (zei, zei->updateHandle); - nmem_destroy (zei->nmem); + zebraExplain_flush(zei, zei->updateHandle); + nmem_destroy(zei->nmem); } void zebraExplain_mergeOids (ZebraExplainInfo zei, data1_node *n, @@ -218,25 +233,25 @@ void zebraExplain_mergeOids (ZebraExplainInfo zei, data1_node *n, Odr_oid *oid; zebAccessObject ao; - if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "oid")) + if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "oid")) continue; len = np->child->u.data.len; if (len > 63) len = 63; - memcpy (str, np->child->u.data.data, len); + memcpy(str, np->child->u.data.data, len); str[len] = '\0'; - oid = odr_getoidbystr_nmem (zei->nmem, str); + oid = odr_getoidbystr_nmem(zei->nmem, str); for (ao = *op; ao; ao = ao->next) - if (!oid_oidcmp (oid, ao->oid)) + if (!oid_oidcmp(oid, ao->oid)) { ao->sysno = 1; break; } if (!ao) { - ao = (zebAccessObject) nmem_malloc (zei->nmem, sizeof(*ao)); + ao = (zebAccessObject) nmem_malloc(zei->nmem, sizeof(*ao)); ao->handle = NULL; ao->sysno = 1; ao->oid = oid; @@ -246,7 +261,7 @@ void zebraExplain_mergeOids (ZebraExplainInfo zei, data1_node *n, } } -void zebraExplain_mergeAccessInfo (ZebraExplainInfo zei, data1_node *n, +void zebraExplain_mergeAccessInfo(ZebraExplainInfo zei, data1_node *n, zebAccessInfo *accessInfo) { data1_node *np; @@ -254,19 +269,19 @@ void zebraExplain_mergeAccessInfo (ZebraExplainInfo zei, data1_node *n, if (!n) { *accessInfo = (zebAccessInfo) - nmem_malloc (zei->nmem, sizeof(**accessInfo)); + nmem_malloc(zei->nmem, sizeof(**accessInfo)); (*accessInfo)->attributeSetIds = NULL; (*accessInfo)->schemas = NULL; } else { - if (!(n = data1_search_tag (zei->dh, n->child, "accessInfo"))) + if (!(n = data1_search_tag(zei->dh, n->child, "accessInfo"))) return; - if ((np = data1_search_tag (zei->dh, n->child, "attributeSetIds"))) - zebraExplain_mergeOids (zei, np, + if ((np = data1_search_tag(zei->dh, n->child, "attributeSetIds"))) + zebraExplain_mergeOids(zei, np, &(*accessInfo)->attributeSetIds); - if ((np = data1_search_tag (zei->dh, n->child, "schemas"))) - zebraExplain_mergeOids (zei, np, + if ((np = data1_search_tag(zei->dh, n->child, "schemas"))) + zebraExplain_mergeOids(zei, np, &(*accessInfo)->schemas); } } @@ -317,114 +332,119 @@ explain: nextResultSetPosition = 2 */ -ZebraExplainInfo zebraExplain_open ( +ZebraExplainInfo zebraExplain_open( Records records, data1_handle dh, Res res, int writeFlag, void *updateHandle, - int (*updateFunc)(void *handle, Record drec, data1_node *n)) + ZebraExplainUpdateFunc *updateFunc) { Record trec; ZebraExplainInfo zei; struct zebDatabaseInfoB **zdip; time_t our_time; struct tm *tm; - NMEM nmem = nmem_create (); + NMEM nmem = nmem_create(); #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_open wr=%d", writeFlag); + yaz_log(YLOG_LOG, "zebraExplain_open wr=%d", writeFlag); #endif - zei = (ZebraExplainInfo) nmem_malloc (nmem, sizeof(*zei)); + zei = (ZebraExplainInfo) nmem_malloc(nmem, sizeof(*zei)); + zei->databaseInfo = 0; zei->write_flag = writeFlag; zei->updateHandle = updateHandle; zei->updateFunc = updateFunc; zei->dirty = 0; + zei->ordinalDatabase = 1; zei->curDatabaseInfo = NULL; zei->records = records; zei->nmem = nmem; zei->dh = dh; + + data1_get_absyn (zei->dh, "explain", DATA1_XPATH_INDEXING_DISABLE); + zei->attsets = NULL; zei->res = res; zei->categoryList = (struct zebraCategoryListInfo *) - nmem_malloc (zei->nmem, sizeof(*zei->categoryList)); + nmem_malloc(zei->nmem, sizeof(*zei->categoryList)); zei->categoryList->sysno = 0; zei->categoryList->dirty = 0; zei->categoryList->data1_categoryList = NULL; - if ( atoi (res_get_def (res, "notimestamps", "0") )== 0) + if ( atoi(res_get_def(res, "notimestamps", "0") )== 0) { - time (&our_time); - tm = localtime (&our_time); - sprintf (zei->date, "%04d%02d%02d%02d%02d%02d", + time(&our_time); + tm = localtime(&our_time); + sprintf(zei->date, "%04d%02d%02d%02d%02d%02d", tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); } else { - sprintf (zei->date, "%04d%02d%02d%02d%02d%02d", + sprintf(zei->date, "%04d%02d%02d%02d%02d%02d", 0, 0, 0, 0, 0, 0); } zdip = &zei->databaseInfo; - trec = rec_get (records, 1); /* get "root" record */ + trec = rec_get_root(records); /* get "root" record */ zei->ordinalSU = 1; zei->runNumber = 0; - zebraExplain_mergeAccessInfo (zei, 0, &zei->accessInfo); + zebraExplain_mergeAccessInfo(zei, 0, &zei->accessInfo); if (trec) /* targetInfo already exists ... */ { data1_node *node_tgtinfo, *node_zebra, *node_list, *np; - zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec); + zei->data1_target = read_sgml_rec(zei->dh, zei->nmem, trec); #if 0 if (!zei->data1_target || !zei->data1_target->u.root.absyn) #else if (!zei->data1_target) #endif { - yaz_log (YLOG_FATAL, "Explain schema missing. Check profilePath"); - nmem_destroy (zei->nmem); + yaz_log(YLOG_FATAL, "Explain schema missing. Check profilePath"); + nmem_destroy(zei->nmem); return 0; } #if ZINFO_DEBUG - data1_pr_tree (zei->dh, zei->data1_target, stderr); + data1_pr_tree(zei->dh, zei->data1_target, stderr); #endif - node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target, + node_tgtinfo = data1_search_tag(zei->dh, zei->data1_target, "/targetInfo"); - zebraExplain_mergeAccessInfo (zei, node_tgtinfo, + zebraExplain_mergeAccessInfo(zei, node_tgtinfo, &zei->accessInfo); - node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child, + node_zebra = data1_search_tag(zei->dh, node_tgtinfo->child, "zebraInfo"); np = 0; if (node_zebra) { - node_list = data1_search_tag (zei->dh, node_zebra->child, + node_list = data1_search_tag(zei->dh, node_zebra->child, "databaseList"); if (node_list) np = node_list->child; } - for (; np; np = np->next) + for(; np; np = np->next) { data1_node *node_name = NULL; data1_node *node_id = NULL; data1_node *node_aid = NULL; data1_node *np2; - if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "database")) + if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "database")) continue; - for (np2 = np->child; np2; np2 = np2->next) + for(np2 = np->child; np2; np2 = np2->next) { if (np2->which != DATA1N_tag) continue; - if (!strcmp (np2->u.tag.tag, "name")) + if (!strcmp(np2->u.tag.tag, "name")) node_name = np2->child; - else if (!strcmp (np2->u.tag.tag, "id")) + else if (!strcmp(np2->u.tag.tag, "id")) node_id = np2->child; - else if (!strcmp (np2->u.tag.tag, "attributeDetailsId")) + else if (!strcmp(np2->u.tag.tag, "attributeDetailsId")) node_aid = np2->child; } - assert (node_id && node_name && node_aid); + assert(node_id && node_name && node_aid); - *zdip = (struct zebDatabaseInfoB *) - nmem_malloc (zei->nmem, sizeof(**zdip)); + *zdip =(struct zebDatabaseInfoB *) + nmem_malloc(zei->nmem, sizeof(**zdip)); (*zdip)->readFlag = 1; (*zdip)->dirty = 0; (*zdip)->data1_database = NULL; @@ -434,8 +454,8 @@ ZebraExplainInfo zebraExplain_open ( (*zdip)->databaseName = (char *) nmem_malloc (zei->nmem, 1+node_name->u.data.len); - memcpy ((*zdip)->databaseName, node_name->u.data.data, - node_name->u.data.len); + memcpy((*zdip)->databaseName, node_name->u.data.data, + node_name->u.data.len); (*zdip)->databaseName[node_name->u.data.len] = '\0'; (*zdip)->sysno = atoi_zn (node_id->u.data.data, node_id->u.data.len); @@ -451,21 +471,27 @@ ZebraExplainInfo zebraExplain_open ( } if (node_zebra) { - np = data1_search_tag (zei->dh, node_zebra->child, - "ordinalSU"); + np = data1_search_tag(zei->dh, node_zebra->child, + "ordinalSU"); np = np->child; assert (np && np->which == DATA1N_data); - zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len); + zei->ordinalSU = atoi_n(np->u.data.data, np->u.data.len); - np = data1_search_tag (zei->dh, node_zebra->child, + np = data1_search_tag(zei->dh, node_zebra->child, + "ordinalDatabase"); + np = np->child; + assert (np && np->which == DATA1N_data); + zei->ordinalDatabase = atoi_n(np->u.data.data, np->u.data.len); + + np = data1_search_tag(zei->dh, node_zebra->child, "runNumber"); np = np->child; assert (np && np->which == DATA1N_data); - zei->runNumber = atoi_zn (np->u.data.data, np->u.data.len); - yaz_log (YLOG_DEBUG, "read runnumber=" ZINT_FORMAT, zei->runNumber); + zei->runNumber = atoi_zn(np->u.data.data, np->u.data.len); + yaz_log(YLOG_DEBUG, "read runnumber=" ZINT_FORMAT, zei->runNumber); *zdip = NULL; } - rec_rm (&trec); + rec_rm(&trec); } else /* create initial targetInfo */ { @@ -478,7 +504,7 @@ ZebraExplainInfo zebraExplain_open ( int sgml_len; zei->data1_target = - data1_read_sgml (zei->dh, zei->nmem, + data1_read_sgml(zei->dh, zei->nmem, "TargetInfo\n" "Zebra\n" "1\n" @@ -487,34 +513,40 @@ ZebraExplainInfo zebraExplain_open ( "\n" ); if (!zei->data1_target) { - yaz_log (YLOG_FATAL, "Explain schema missing. Check profilePath"); - nmem_destroy (zei->nmem); + yaz_log(YLOG_FATAL, "Explain schema missing. Check profilePath"); + nmem_destroy(zei->nmem); return 0; } - node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target, + node_tgtinfo = data1_search_tag(zei->dh, zei->data1_target, "/targetInfo"); - assert (node_tgtinfo); + assert(node_tgtinfo); - zebraExplain_initCommonInfo (zei, node_tgtinfo); - zebraExplain_initAccessInfo (zei, node_tgtinfo); + zebraExplain_initCommonInfo(zei, node_tgtinfo); + zebraExplain_initAccessInfo(zei, node_tgtinfo); /* write now because we want to be sure about the sysno */ - trec = rec_new (records); + trec = rec_new(records); + if (!trec) + { + yaz_log(YLOG_FATAL, "Cannot create root Explain record"); + nmem_destroy(zei->nmem); + return 0; + } trec->info[recInfo_fileType] = - rec_strdup ("grs.sgml", &trec->size[recInfo_fileType]); + rec_strdup("grs.sgml", &trec->size[recInfo_fileType]); trec->info[recInfo_databaseName] = - rec_strdup ("IR-Explain-1", &trec->size[recInfo_databaseName]); + rec_strdup("IR-Explain-1", &trec->size[recInfo_databaseName]); sgml_buf = data1_nodetoidsgml(dh, zei->data1_target, 0, &sgml_len); - trec->info[recInfo_storeData] = (char *) xmalloc (sgml_len); - memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len); + trec->info[recInfo_storeData] = (char *) xmalloc(sgml_len); + memcpy(trec->info[recInfo_storeData], sgml_buf, sgml_len); trec->size[recInfo_storeData] = sgml_len; - - rec_put (records, &trec); - rec_rm (&trec); - + + rec_put(records, &trec); + rec_rm(&trec); } - zebraExplain_newDatabase (zei, "IR-Explain-1", 0); + + zebraExplain_newDatabase(zei, "IR-Explain-1", 0); if (!zei->categoryList->dirty) { @@ -523,23 +555,23 @@ ZebraExplainInfo zebraExplain_open ( zcl->dirty = 1; zcl->data1_categoryList = - data1_read_sgml (zei->dh, zei->nmem, + data1_read_sgml(zei->dh, zei->nmem, "CategoryList\n" "\n"); if (zcl->data1_categoryList) { - node_cl = data1_search_tag (zei->dh, zcl->data1_categoryList, + node_cl = data1_search_tag(zei->dh, zcl->data1_categoryList, "/categoryList"); - assert (node_cl); - zebraExplain_initCommonInfo (zei, node_cl); + assert(node_cl); + zebraExplain_initCommonInfo(zei, node_cl); } } } return zei; } -static void zebraExplain_readAttributeDetails (ZebraExplainInfo zei, +static void zebraExplain_readAttributeDetails(ZebraExplainInfo zei, zebAttributeDetails zad) { Record rec; @@ -547,57 +579,116 @@ static void zebraExplain_readAttributeDetails (ZebraExplainInfo zei, data1_node *node_adinfo, *node_zebra, *node_list, *np; assert (zad->sysno); - rec = rec_get (zei->records, zad->sysno); + rec = rec_get(zei->records, zad->sysno); - zad->data1_tree = read_sgml_rec (zei->dh, zei->nmem, rec); + zad->data1_tree = read_sgml_rec(zei->dh, zei->nmem, rec); - node_adinfo = data1_search_tag (zei->dh, zad->data1_tree, + node_adinfo = data1_search_tag(zei->dh, zad->data1_tree, "/attributeDetails"); - node_zebra = data1_search_tag (zei->dh, node_adinfo->child, + node_zebra = data1_search_tag(zei->dh, node_adinfo->child, "zebraInfo"); - node_list = data1_search_tag (zei->dh, node_zebra->child, + node_list = data1_search_tag(zei->dh, node_zebra->child, "attrlist"); for (np = node_list->child; np; np = np->next) { - data1_node *node_set = NULL; - data1_node *node_use = NULL; + data1_node *node_str = NULL; data1_node *node_ordinal = NULL; + data1_node *node_type = NULL; + data1_node *node_cat = NULL; + data1_node *node_doc_occurrences = NULL; + data1_node *node_term_occurrences = NULL; data1_node *np2; - char oid_str[128]; - int oid_str_len; - if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "attr")) + if (np->which != DATA1N_tag || strcmp(np->u.tag.tag, "attr")) continue; for (np2 = np->child; np2; np2 = np2->next) { if (np2->which != DATA1N_tag || !np2->child || np2->child->which != DATA1N_data) continue; - if (!strcmp (np2->u.tag.tag, "set")) - node_set = np2->child; - else if (!strcmp (np2->u.tag.tag, "use")) - node_use = np2->child; - else if (!strcmp (np2->u.tag.tag, "ordinal")) + if (!strcmp(np2->u.tag.tag, "str")) + node_str = np2->child; + else if (!strcmp(np2->u.tag.tag, "ordinal")) node_ordinal = np2->child; + else if (!strcmp(np2->u.tag.tag, "type")) + node_type = np2->child; + else if (!strcmp(np2->u.tag.tag, "cat")) + node_cat = np2->child; + else if (!strcmp(np2->u.tag.tag, "dococcurrences")) + node_doc_occurrences = np2->child; + else if (!strcmp(np2->u.tag.tag, "termoccurrences")) + node_term_occurrences = np2->child; + else + { + yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails", + np2->u.tag.tag); + } } - assert (node_set && node_use && node_ordinal); - - oid_str_len = node_set->u.data.len; - if (oid_str_len >= (int) sizeof(oid_str)) - oid_str_len = sizeof(oid_str)-1; - memcpy (oid_str, node_set->u.data.data, oid_str_len); - oid_str[oid_str_len] = '\0'; + assert(node_ordinal); *zsuip = (struct zebSUInfoB *) - nmem_malloc (zei->nmem, sizeof(**zsuip)); - (*zsuip)->info.set = oid_getvalbyname (oid_str); + nmem_malloc(zei->nmem, sizeof(**zsuip)); - (*zsuip)->info.use = atoi_n (node_use->u.data.data, - node_use->u.data.len); + if (node_type && node_type->u.data.len > 0) + (*zsuip)->info.index_type = node_type->u.data.data[0]; + else + { + yaz_log(YLOG_WARN, "Missing attribute 'type' in attribute info"); + (*zsuip)->info.index_type = 'w'; + } + if (node_cat && node_cat->u.data.len > 0) + { + zinfo_index_category_t cat; + + data1_node *np = node_cat; + if (!strncmp(np->u.data.data, "index", np->u.data.len)) + cat = zinfo_index_category_index; + else if (!strncmp(np->u.data.data, "sort", np->u.data.len)) + cat = zinfo_index_category_sort; + else if (!strncmp(np->u.data.data, "alwaysmatches", + np->u.data.len)) + cat = zinfo_index_category_alwaysmatches; + else if (!strncmp(np->u.data.data, "anchor", + np->u.data.len)) + cat = zinfo_index_category_anchor; + else + { + yaz_log(YLOG_WARN, "Bad index cateogry '%.*s'", + np->u.data.len, np->u.data.data); + cat = zinfo_index_category_index; + } + (*zsuip)->info.cat = cat; + } + else + (*zsuip)->info.cat = zinfo_index_category_index; + + if (node_doc_occurrences) + { + data1_node *np = node_doc_occurrences; + (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_term_occurrences) + { + data1_node *np = node_term_occurrences; + (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data, + np->u.data.len); + } + if (node_str) + { + (*zsuip)->info.which = ZEB_SU_STR; + + (*zsuip)->info.u.str = nmem_strdupn(zei->nmem, + node_str->u.data.data, + node_str->u.data.len); + } + else + { + yaz_log(YLOG_WARN, "Missing set/use/str in attribute info"); + continue; + } (*zsuip)->info.ordinal = atoi_n (node_ordinal->u.data.data, node_ordinal->u.data.len); - yaz_log (YLOG_DEBUG, "set=%d use=%d ordinal=%d", - (*zsuip)->info.set, (*zsuip)->info.use, (*zsuip)->info.ordinal); zsuip = &(*zsuip)->next; } *zsuip = NULL; @@ -629,6 +720,14 @@ static void zebraExplain_readDatabase (ZebraExplainInfo zei, && np->child && np->child->which == DATA1N_data) zdi->recordBytes = atoi_zn (np->child->u.data.data, np->child->u.data.len); + + if (node_zebra + && (np = data1_search_tag (zei->dh, node_zebra->child, + "ordinalDatabase")) + && np->child && np->child->which == DATA1N_data) + zdi->ordinalDatabase = atoi_n(np->child->u.data.data, + np->child->u.data.len); + if ((np = data1_search_tag (zei->dh, node_dbinfo->child, "recordCount")) && (np = data1_search_tag (zei->dh, np->child, @@ -703,19 +802,19 @@ int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database) if (!zdi) return -1; #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_curDatabase: %s", database); + yaz_log(YLOG_LOG, "zebraExplain_curDatabase: %s", database); #endif if (zdi->readFlag) { #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_readDatabase: %s", database); + yaz_log(YLOG_LOG, "zebraExplain_readDatabase: %s", database); #endif zebraExplain_readDatabase (zei, zdi); } if (zdi->attributeDetails->readFlag) { #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_readAttributeDetails: %s", database); + yaz_log(YLOG_LOG, "zebraExplain_readAttributeDetails: %s", database); #endif zebraExplain_readAttributeDetails (zei, zdi->attributeDetails); } @@ -787,7 +886,7 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, database_n = database; #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_newDatabase: %s", database); + yaz_log(YLOG_LOG, "zebraExplain_newDatabase: %s", database); #endif assert (zei); for (zdi = zei->databaseInfo; zdi; zdi=zdi->next) @@ -807,6 +906,8 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, zdi->readFlag = 0; zdi->databaseName = nmem_strdup (zei->nmem, database_n); + zdi->ordinalDatabase = zei->ordinalDatabase++; + zebraExplain_mergeAccessInfo (zei, 0, &zdi->accessInfo); assert (zei->dh); @@ -866,30 +967,6 @@ int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database, return 0; } -static void writeAttributeValueDetails (ZebraExplainInfo zei, - zebAttributeDetails zad, - data1_node *node_atvs, data1_attset *attset) - -{ - struct zebSUInfoB *zsui; - int set_ordinal = attset->reference; - data1_attset_child *c; - - for (c = attset->children; c; c = c->next) - writeAttributeValueDetails (zei, zad, node_atvs, c->child); - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - data1_node *node_attvalue, *node_value; - if (set_ordinal != zsui->info.set) - continue; - node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue", - 0 /* attr */, node_atvs); - node_value = data1_mk_tag (zei->dh, zei->nmem, "value", - 0 /* attr */, node_attvalue); - data1_mk_tag_data_int (zei->dh, node_value, "numeric", - zsui->info.use, zei->nmem); - } -} static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, struct zebraCategoryListInfo *zcl, @@ -916,10 +993,12 @@ static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, node_categoryList = zcl->data1_categoryList; #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_writeCategoryList"); + yaz_log(YLOG_LOG, "zebraExplain_writeCategoryList"); #endif drec = createRecord (zei->records, &sysno); + if (!drec) + return; node_ci = data1_search_tag (zei->dh, node_categoryList, "/categoryList"); @@ -961,19 +1040,20 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, char *sgml_buf; int sgml_len; Record drec; - data1_node *node_adinfo, *node_list, *node_zebra, *node_attributesBySet; + data1_node *node_adinfo, *node_list, *node_zebra; struct zebSUInfoB *zsui; - int set_min; if (!zad->dirty) return; zad->dirty = 0; #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_writeAttributeDetails"); + yaz_log(YLOG_LOG, "zebraExplain_writeAttributeDetails"); #endif drec = createRecord (zei->records, &zad->sysno); + if (!drec) + return; assert (zad->data1_tree); node_adinfo = data1_search_tag (zei->dh, zad->data1_tree, @@ -987,64 +1067,6 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, record count, etc. is affected */ if (key_flush) (*zei->updateFunc)(zei->updateHandle, drec, zad->data1_tree); - - node_attributesBySet = data1_mk_tag_uni (zei->dh, zei->nmem, - "attributesBySet", node_adinfo); - set_min = -1; - while (1) - { - data1_node *node_asd; - data1_attset *attset; - int set_ordinal = -1; - for (zsui = zad->SUInfo; zsui; zsui = zsui->next) - { - if ((set_ordinal < 0 || set_ordinal > zsui->info.set) - && zsui->info.set > set_min) - set_ordinal = zsui->info.set; - } - if (set_ordinal < 0) - break; - set_min = set_ordinal; - node_asd = data1_mk_tag (zei->dh, zei->nmem, - "attributeSetDetails", - 0 /* attr */, node_attributesBySet); - - attset = data1_attset_search_id (zei->dh, set_ordinal); - if (!attset) - { - zebraExplain_loadAttsets (zei->dh, zei->res); - attset = data1_attset_search_id (zei->dh, set_ordinal); - } - if (attset) - { - int oid[OID_SIZE]; - oident oe; - - oe.proto = PROTO_Z3950; - oe.oclass = CLASS_ATTSET; - oe.value = (enum oid_value) set_ordinal; - - if (oid_ent_to_oid (&oe, oid)) - { - data1_node *node_abt, *node_atd, *node_atvs; - data1_mk_tag_data_oid (zei->dh, node_asd, "oid", - oid, zei->nmem); - - node_abt = data1_mk_tag (zei->dh, zei->nmem, - "attributesByType", - 0 /*attr */, node_asd); - node_atd = data1_mk_tag (zei->dh, zei->nmem, - "attributeTypeDetails", - 0 /* attr */, node_abt); - data1_mk_tag_data_int (zei->dh, node_atd, - "type", 1, zei->nmem); - node_atvs = data1_mk_tag (zei->dh, zei->nmem, - "attributeValues", - 0 /* attr */, node_atd); - writeAttributeValueDetails (zei, zad, node_atvs, attset); - } - } - } /* zebra info (private) */ node_zebra = data1_mk_tag_uni (zei->dh, zei->nmem, "zebraInfo", node_adinfo); @@ -1052,24 +1074,43 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, "attrlist", node_zebra); for (zsui = zad->SUInfo; zsui; zsui = zsui->next) { - struct oident oident; - int oid[OID_SIZE]; data1_node *node_attr; + char index_type_str[2]; node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", 0 /* attr */, node_list); - - oident.proto = PROTO_Z3950; - oident.oclass = CLASS_ATTSET; - oident.value = (enum oid_value) zsui->info.set; - oid_ent_to_oid (&oident, oid); - - data1_mk_tag_data_text (zei->dh, node_attr, "set", - oident.desc, zei->nmem); - data1_mk_tag_data_int (zei->dh, node_attr, "use", - zsui->info.use, zei->nmem); + + index_type_str[0] = zsui->info.index_type; + index_type_str[1] = '\0'; + data1_mk_tag_data_text (zei->dh, node_attr, "type", + index_type_str, zei->nmem); + if (zsui->info.which == ZEB_SU_STR) + { + data1_mk_tag_data_text (zei->dh, node_attr, "str", + zsui->info.u.str, zei->nmem); + } data1_mk_tag_data_int (zei->dh, node_attr, "ordinal", zsui->info.ordinal, zei->nmem); + + data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences", + zsui->info.doc_occurrences, zei->nmem); + data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences", + zsui->info.term_occurrences, zei->nmem); + switch(zsui->info.cat) + { + case zinfo_index_category_index: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "index", zei->nmem); break; + case zinfo_index_category_sort: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "sort", zei->nmem); break; + case zinfo_index_category_alwaysmatches: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "alwaysmatches", zei->nmem); break; + case zinfo_index_category_anchor: + data1_mk_tag_data_text (zei->dh, node_attr, "cat", + "anchor", zei->nmem); break; + } } /* convert to "SGML" and write it */ #if ZINFO_DEBUG @@ -1098,9 +1139,11 @@ static void zebraExplain_writeDatabase (ZebraExplainInfo zei, zdi->dirty = 0; #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_writeDatabase %s", zdi->databaseName); + yaz_log(YLOG_LOG, "zebraExplain_writeDatabase %s", zdi->databaseName); #endif drec = createRecord (zei->records, &zdi->sysno); + if (!drec) + return; assert (zdi->data1_database); node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database, @@ -1125,6 +1168,10 @@ static void zebraExplain_writeDatabase (ZebraExplainInfo zei, "zebraInfo", node_dbinfo); data1_mk_tag_data_zint (zei->dh, node_zebra, "recordBytes", zdi->recordBytes, zei->nmem); + + data1_mk_tag_data_zint(zei->dh, node_zebra, + "ordinalDatabase", zdi->ordinalDatabase, zei->nmem); + /* convert to "SGML" and write it */ #if ZINFO_DEBUG data1_pr_tree (zei->dh, zdi->data1_database, stderr); @@ -1182,11 +1229,13 @@ static void zebraExplain_writeAttributeSet (ZebraExplainInfo zei, attset = data1_attset_search_id (zei->dh, entp->value); #if ZINFO_DEBUG - yaz_log (YLOG_LOG, "zebraExplain_writeAttributeSet %s", + yaz_log(YLOG_LOG, "zebraExplain_writeAttributeSet %s", attset ? attset->name : ""); #endif drec = createRecord (zei->records, &o->sysno); + if (!drec) + return; node_root = data1_read_sgml (zei->dh, zei->nmem, "AttributeSetInfo\n" @@ -1248,7 +1297,7 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) return; zei->dirty = 0; - trec = rec_get (zei->records, 1); + trec = rec_get_root(zei->records); xfree (trec->info[recInfo_storeData]); node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target, @@ -1283,6 +1332,9 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) data1_mk_tag_data_int (zei->dh, node_zebra, "ordinalSU", zei->ordinalSU, zei->nmem); + data1_mk_tag_data_int (zei->dh, node_zebra, "ordinalDatabase", + zei->ordinalDatabase, zei->nmem); + data1_mk_tag_data_zint (zei->dh, node_zebra, "runNumber", zei->runNumber, zei->nmem); @@ -1298,15 +1350,30 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) rec_put (zei->records, &trec); } -int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use) +int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, + const char *str) { - struct zebSUInfoB *zsui; + struct zebSUInfoB **zsui; assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.use == use && zsui->info.set == set) - return zsui->info.ordinal; + for (zsui = &zei->curDatabaseInfo->attributeDetails->SUInfo; + *zsui; zsui = &(*zsui)->next) + if ((*zsui)->info.index_type == index_type + && (*zsui)->info.cat == cat + && (*zsui)->info.which == ZEB_SU_STR + && !yaz_matchstr((*zsui)->info.u.str, str)) + { + struct zebSUInfoB *zsui_this = *zsui; + + /* take it out of the list and move to front */ + *zsui = (*zsui)->next; + zsui_this->next = zei->curDatabaseInfo->attributeDetails->SUInfo; + zei->curDatabaseInfo->attributeDetails->SUInfo = zsui_this; + + return zsui_this->info.ordinal; + } return -1; } @@ -1322,26 +1389,114 @@ int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle, } return 0; } - -int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, - const char **db, int *set, int *use) + + +struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord, + int dirty_mark, + const char **db) { struct zebDatabaseInfoB *zdb; + for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) { - struct zebSUInfoB *zsui = zdb->attributeDetails->SUInfo; - for ( ;zsui; zsui = zsui->next) - if (zsui->info.ordinal == ord) - { - *db = zdb->databaseName; - *set = zsui->info.set; - *use = zsui->info.use; - return 0; - } + struct zebSUInfoB **zsui; + + if (zdb->attributeDetails->readFlag) + zebraExplain_readAttributeDetails (zei, zdb->attributeDetails); + + for (zsui = &zdb->attributeDetails->SUInfo; *zsui; + zsui = &(*zsui)->next) + if ((*zsui)->info.ordinal == ord) + { + struct zebSUInfoB *zsui_this = *zsui; + + /* take it out of the list and move to front */ + *zsui = (*zsui)->next; + zsui_this->next = zdb->attributeDetails->SUInfo; + zdb->attributeDetails->SUInfo = zsui_this; + + if (dirty_mark) + zdb->attributeDetails->dirty = 1; + if (db) + *db = zdb->databaseName; + return zsui_this; + } + } + return 0; +} + + + +int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord, + int term_delta, int doc_delta) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0); + if (zsui) + { + zsui->info.term_occurrences += term_delta; + zsui->info.doc_occurrences += doc_delta; + return 0; + } + return -1; +} + +int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord, + zint *term_occurrences, + zint *doc_occurrences) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + { + *term_occurrences = zsui->info.term_occurrences; + *doc_occurrences = zsui->info.doc_occurrences; + return 0; } return -1; } +zint zebraExplain_ord_get_doc_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.doc_occurrences; + return 0; +} + +zint zebraExplain_ord_get_term_occurrences(ZebraExplainInfo zei, int ord) +{ + struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0); + if (zsui) + return zsui->info.term_occurrences; + return 0; +} + +int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, + int *index_type, + const char **db, + const char **string_index) +{ + struct zebSUInfoB *zsui; + + if (index_type) + *index_type = 0; + if (string_index) + *string_index = 0; + + zsui = zebraExplain_get_sui_info(zei, ord, 0, db); + if (zsui) + { + if (zsui->info.which == ZEB_SU_STR) + if (string_index) + *string_index = zsui->info.u.str; + if (index_type) + *index_type = zsui->info.index_type; + return 0; + } + return -1; +} + + + zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei, zebAccessObject *op, Odr_oid *oid) @@ -1380,24 +1535,35 @@ void zebraExplain_addAttributeSet (ZebraExplainInfo zei, int set) } } -int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use) +struct zebSUInfoB *zebraExplain_add_sui_info(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type) { struct zebSUInfoB *zsui; assert (zei->curDatabaseInfo); - for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; - zsui; zsui=zsui->next) - if (zsui->info.use == use && zsui->info.set == set) - return -1; - zebraExplain_addAttributeSet (zei, set); zsui = (struct zebSUInfoB *) nmem_malloc (zei->nmem, sizeof(*zsui)); zsui->next = zei->curDatabaseInfo->attributeDetails->SUInfo; zei->curDatabaseInfo->attributeDetails->SUInfo = zsui; zei->curDatabaseInfo->attributeDetails->dirty = 1; zei->dirty = 1; - zsui->info.set = set; - zsui->info.use = use; + zsui->info.index_type = index_type; + zsui->info.cat = cat; + zsui->info.doc_occurrences = 0; + zsui->info.term_occurrences = 0; zsui->info.ordinal = (zei->ordinalSU)++; + return zsui; +} + +int zebraExplain_add_attr_str(ZebraExplainInfo zei, + zinfo_index_category_t cat, + int index_type, + const char *index_name) +{ + struct zebSUInfoB *zsui = zebraExplain_add_sui_info(zei, cat, index_type); + + zsui->info.which = ZEB_SU_STR; + zsui->info.u.str = nmem_strdup(zei->nmem, index_name); return zsui->info.ordinal; } @@ -1452,6 +1618,7 @@ RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec) recordAttr->recordSize = 0; recordAttr->recordOffset = 0; recordAttr->runNumber = zei->runNumber; + recordAttr->staticrank = 0; return recordAttr; } @@ -1459,7 +1626,14 @@ static void att_loadset(void *p, const char *n, const char *name) { data1_handle dh = (data1_handle) p; if (!data1_get_attset (dh, name)) - yaz_log (YLOG_WARN, "Directive attset failed for %s", name); + yaz_log(YLOG_WARN, "Directive attset failed for %s", name); +} + +int zebraExplain_get_database_ord(ZebraExplainInfo zei) +{ + if (!zei->curDatabaseInfo) + return -1; + return zei->curDatabaseInfo->ordinalDatabase; } void zebraExplain_loadAttsets (data1_handle dh, Res res) @@ -1475,3 +1649,11 @@ void zebraExplain_loadAttsets (data1_handle dh, Res res) If the database doesn't exist globally (in TargetInfo) an AttributeSetInfo must be added (globally). */ +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +