X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzebraapi.c;h=c6e9668896a6d825a6a0c1d362af8bcf6dd7ed9f;hp=420861ff8c769fc80fa4bf37999ee4e14bfecf93;hb=250de4ed23a44f5eb3552db317eef0d0fbe3265c;hpb=16853a7593f10680ea8d6895aa0720b9af3779e6 diff --git a/index/zebraapi.c b/index/zebraapi.c index 420861f..c6e9668 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1994-2010 Index Data + Copyright (C) 2004-2013 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -93,7 +96,7 @@ static ZEBRA_RES zebra_flush_reg(ZebraHandle zh) return ZEBRA_OK; } -static struct zebra_register *zebra_register_open(ZebraService zs, +static struct zebra_register *zebra_register_open(ZebraService zs, const char *name, int rw, int useshadow, Res res, @@ -130,7 +133,7 @@ ZebraHandle zebra_open(ZebraService zs, Res res) zh->destroyed = 0; zh->errCode = 0; zh->errString = 0; - zh->res = 0; + zh->res = 0; zh->session_res = res_open(zs->global_res, res); zh->user_perm = 0; zh->dbaccesslist = 0; @@ -212,6 +215,8 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) if (configName) yaz_log(YLOG_LOG, "config %s", configName); + yaz_log_xml_errors(0, YLOG_LOG); + if ((res = res_open(def_res, over_res))) { const char *passwd_plain = 0; @@ -242,13 +247,13 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) zh = xmalloc(sizeof(*zh)); zh->global_res = res; zh->sessions = 0; - + if (zebra_chdir(zh)) { xfree(zh); return 0; } - + zebra_mutex_cond_init(&zh->session_lock); passwd_plain = res_get(zh->global_res, "passwd"); passwd_encrypt = res_get(zh->global_res, "passwd.c"); @@ -256,7 +261,7 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) if (!passwd_plain && !passwd_encrypt) zh->passwd_db = NULL; - else + else { zh->passwd_db = passwd_db_open(); if (!zh->passwd_db) @@ -340,7 +345,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, ZEBRA_RES ret = ZEBRA_OK; ASSERTZS; - + reg = xmalloc(sizeof(*reg)); assert(name); @@ -353,7 +358,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, yaz_log(YLOG_DEBUG, "zebra_register_open rw=%d useshadow=%d p=%p n=%s rp=%s", rw, useshadow, reg, name, reg_path ? reg_path : "(none)"); - + reg->dh = data1_create(); if (!reg->dh) { @@ -413,7 +418,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, reg->isamc = 0; reg->isamb = 0; reg->zei = 0; - + /* installing rank classes */ zebraRankInstall(reg, rank_1_class); zebraRankInstall(reg, rank_2_class); @@ -468,7 +473,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, ret = ZEBRA_FAIL; } - + if (res_get_match(res, "sortindex", "f", "f")) sort_type = ZEBRA_SORT_TYPE_FLAT; else if (res_get_match(res, "sortindex", "i", "f")) @@ -510,7 +515,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, if (res_get_match(res, "isam", "b", ISAM_DEFAULT)) { struct ISAMC_M_s isamc_m; - + if (!(reg->isamb = isamb_open(reg->bfs, "isamb", rw, key_isamc_m(res, &isamc_m), 0))) { @@ -521,7 +526,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, if (res_get_match(res, "isam", "bc", ISAM_DEFAULT)) { struct ISAMC_M_s isamc_m; - + if (!(reg->isamb = isamb_open(reg->bfs, "isamb", rw, key_isamc_m(res, &isamc_m), 1))) { @@ -532,7 +537,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, if (res_get_match(res, "isam", "null", ISAM_DEFAULT)) { struct ISAMC_M_s isamc_m; - + if (!(reg->isamb = isamb_open(reg->bfs, "isamb", rw, key_isamc_m(res, &isamc_m), -1))) { @@ -551,7 +556,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, ret = ZEBRA_FAIL; } } - + if (ret != ZEBRA_OK) { zebra_register_close(zs, reg); @@ -590,7 +595,7 @@ static void zebra_register_close(ZebraService zs, struct zebra_register *reg) yaz_log(YLOG_DEBUG, "zebra_register_close p=%p", reg); reg->stop_flag = 0; zebra_chdir(zs); - + zebraExplain_close(reg->zei); dict_close(reg->dict); if (reg->matchDict) @@ -626,7 +631,7 @@ ZEBRA_RES zebra_stop(ZebraService zs) { zebra_close(zs->sessions); } - + zebra_mutex_cond_destroy(&zs->session_lock); if (zs->passwd_db) @@ -641,7 +646,7 @@ ZEBRA_RES zebra_stop(ZebraService zs) yaz_timing_get_real(zs->timing), yaz_timing_get_user(zs->timing), yaz_timing_get_sys(zs->timing)); - + yaz_timing_destroy(&zs->timing); xfree(zs); @@ -658,7 +663,7 @@ ZEBRA_RES zebra_close(ZebraHandle zh) ZEBRA_CHECK_HANDLE(zh); zh->errCode = 0; - + zs = zh->service; yaz_log(YLOG_DEBUG, "zebra_close zh=%p", zh); resultSetDestroy(zh, -1, 0, 0); @@ -778,7 +783,7 @@ static void zebra_select_register(ZebraHandle zh, const char *new_reg) zh->path_reg = 0; if (zh->service->path_root) { - zh->path_reg = xmalloc(strlen(zh->service->path_root) + + zh->path_reg = xmalloc(strlen(zh->service->path_root) + strlen(zh->reg_name) + 3); strcpy(zh->path_reg, zh->service->path_root); if (*zh->reg_name) @@ -788,7 +793,7 @@ static void zebra_select_register(ZebraHandle zh, const char *new_reg) } } zebra_open_res(zh); - + if (zh->lock_normal) zebra_lock_destroy(zh->lock_normal); zh->lock_normal = 0; @@ -801,13 +806,13 @@ static void zebra_select_register(ZebraHandle zh, const char *new_reg) { char fname[512]; const char *lock_area = res_get(zh->res, "lockDir"); - + if (!lock_area && zh->path_reg) res_set(zh->res, "lockDir", zh->path_reg); sprintf(fname, "norm.%s.LCK", zh->reg_name); zh->lock_normal = zebra_lock_create(res_get(zh->res, "lockDir"), fname); - + sprintf(fname, "shadow.%s.LCK", zh->reg_name); zh->lock_shadow = zebra_lock_create(res_get(zh->res, "lockDir"), fname); @@ -840,7 +845,7 @@ static void zebra_select_register(ZebraHandle zh, const char *new_reg) } if (zh->res) { - if (res_get_int(zh->res, "segment", &zh->m_segment_indexing) == + if (res_get_int(zh->res, "segment", &zh->m_segment_indexing) == ZEBRA_OK) { yaz_log(YLOG_DEBUG, "segment indexing set and is %d", @@ -858,7 +863,7 @@ void map_basenames_func(void *vp, const char *name, const char *value) assert(value); assert(name); assert(vp); - + no = sscanf(value, "%127s %127s %127s %127s %127s %127s %127s %127s %127s", fromdb, todb[0], todb[1], todb[2], todb[3], todb[4], @@ -874,7 +879,7 @@ void map_basenames_func(void *vp, const char *name, const char *value) { if (p->new_num_bases == p->new_num_max) return; - p->new_basenames[(p->new_num_bases)++] = + p->new_basenames[(p->new_num_bases)++] = nmem_strdup(p->mem, todb[i]); } return; @@ -917,11 +922,11 @@ void map_basenames(ZebraHandle zh, ODR stream, info.mem = stream->mem; res_trav(zh->session_res, "mapdb", &info, map_basenames_func); - + for (i = 0; inum_bases; i++) if (p->basenames[i] && p->new_num_bases < p->new_num_max) { - p->new_basenames[(p->new_num_bases)++] = + p->new_basenames[(p->new_num_bases)++] = nmem_strdup(p->mem, p->basenames[i]); } *num_bases = info.new_num_bases; @@ -953,7 +958,7 @@ ZEBRA_RES zebra_select_databases(ZebraHandle zh, int num_bases, yaz_log(log_level, "zebra_select_databases n=%d [0]=%s", num_bases,basenames[0]); zh->errCode = 0; - + if (num_bases < 1) { zh->errCode = YAZ_BIB1_COMBI_OF_SPECIFIED_DATABASES_UNSUPP; @@ -988,7 +993,7 @@ ZEBRA_RES zebra_select_databases(ZebraHandle zh, int num_bases, for (i = 0; i < zh->num_basenames; i++) xfree(zh->basenames[i]); xfree(zh->basenames); - + zh->num_basenames = num_bases; zh->basenames = xmalloc(zh->num_basenames * sizeof(*zh->basenames)); for (i = 0; i < zh->num_basenames; i++) @@ -1076,7 +1081,7 @@ ZEBRA_RES zebra_search_RPN_x(ZebraHandle zh, ODR o, Z_RPNQuery *query, int *partial_resultset) { ZEBRA_RES r; - + ZEBRA_CHECK_HANDLE(zh); assert(o); @@ -1090,7 +1095,7 @@ ZEBRA_RES zebra_search_RPN_x(ZebraHandle zh, ODR o, Z_RPNQuery *query, if (zebra_begin_read(zh) == ZEBRA_FAIL) return ZEBRA_FAIL; - r = resultSetAddRPN(zh, odr_extract_mem(o), query, + r = resultSetAddRPN(zh, odr_extract_mem(o), query, zh->num_basenames, zh->basenames, setname, hits, estimated_hit_count); @@ -1134,7 +1139,7 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream, setname); return ZEBRA_FAIL; } - + if (zebra_begin_read(zh) == ZEBRA_FAIL) return ZEBRA_FAIL; @@ -1177,7 +1182,7 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream, /* we disable hit snippets for now. It does not work well and it slows retrieval down a lot */ #if 0 - zebra_snippets_hit_vector(zh, setname, poset[i].sysno, + zebra_snippets_hit_vector(zh, setname, poset[i].sysno, hit_snippet); #endif wrbuf_rewind(addinfo_w); @@ -1187,7 +1192,7 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream, stream, input_format, comp, &recs[i].format, &buf, &len, &recs[i].base, addinfo_w); - + if (wrbuf_len(addinfo_w)) recs[i].errString = odr_strdup(stream, wrbuf_cstr(addinfo_w)); @@ -1233,7 +1238,7 @@ ZEBRA_RES zebra_scan_PQF(ZebraHandle zh, ODR stream, const char *query, Z_AttributesPlusTerm *zapt; Odr_oid *attributeSet; ZEBRA_RES res; - + if (!(zapt = yaz_pqf_scan(pqf_parser, stream, &attributeSet, query))) { res = ZEBRA_FAIL; @@ -1342,7 +1347,7 @@ int zebra_errCode(ZebraHandle zh) return zh->errCode; } yaz_log(log_level, "zebra_errCode: o"); - return 0; + return 0; } const char *zebra_errString(ZebraHandle zh) @@ -1373,7 +1378,7 @@ ZEBRA_RES zebra_auth(ZebraHandle zh, const char *user, const char *pass) ZEBRA_CHECK_HANDLE(zh); zs = zh->service; - + sprintf(u, "perm.%.30s", user ? user : "anonymous"); p = res_get(zs->global_res, u); xfree(zh->user_perm); @@ -1389,7 +1394,7 @@ ZEBRA_RES zebra_auth(ZebraHandle zh, const char *user, const char *pass) /* users that don't require a password .. */ if (zh->user_perm && strchr(zh->user_perm, 'a')) return ZEBRA_OK; - + if (!zs->passwd_db || !passwd_db_auth(zs->passwd_db, user, pass)) return ZEBRA_OK; return ZEBRA_FAIL; @@ -1398,7 +1403,7 @@ ZEBRA_RES zebra_auth(ZebraHandle zh, const char *user, const char *pass) ZEBRA_RES zebra_admin_import_begin(ZebraHandle zh, const char *database, const char *record_type) { - yaz_log(log_level, "zebra_admin_import_begin db=%s rt=%s", + yaz_log(log_level, "zebra_admin_import_begin db=%s rt=%s", database, record_type); if (zebra_select_database(zh, database) == ZEBRA_FAIL) return ZEBRA_FAIL; @@ -1431,9 +1436,9 @@ ZEBRA_RES zebra_admin_import_segment(ZebraHandle zh, Z_Segment *segment) { Odr_oct *oct = fragment->u.notExternallyTagged; sysno = 0; - + if(zebra_update_record( - zh, + zh, action_update, 0, /* record Type */ &sysno, @@ -1503,7 +1508,7 @@ static int delete_SU_handle(void *handle, int ord, assert(zh->reg->isamb); assert(zh->reg->records); dict_delete_subtree(zh->reg->dict, ord_buf, - zh, + zh, !strcmp(string_index, "_ALLRECORDS") ? delete_w_all_handle : delete_w_handle); return 0; @@ -1565,7 +1570,7 @@ ZEBRA_RES zebra_create_database(ZebraHandle zh, const char *db) return ZEBRA_FAIL; /* announce database */ - if (zebraExplain_newDatabase(zh->reg->zei, db, 0 + if (zebraExplain_newDatabase(zh->reg->zei, db, 0 /* explainDatabase */)) { if (zebra_end_trans(zh) != ZEBRA_OK) @@ -1606,7 +1611,7 @@ int zebra_string_norm(ZebraHandle zh, const char *index_type, \param zh Zebra handle \param val state \param seqno sequence number - + val is one of: d=writing to shadow(shadow enabled); writing to register (shadow disabled) o=reading only @@ -1624,7 +1629,11 @@ static void zebra_set_state(ZebraHandle zh, int val, int seqno) sprintf(state_fname, "state.%s.LCK", zh->reg_name); fname = zebra_mk_fname(res_get(zh->res, "lockDir"), state_fname); f = fopen(fname, "w"); - + if (!f) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s w", state_fname); + exit(1); + } yaz_log(YLOG_DEBUG, "zebra_set_state: %c %d %ld", val, seqno, p); fprintf(f, "%c %d %ld\n", val, seqno, p); fclose(f); @@ -1673,7 +1682,7 @@ static void read_res_for_transaction(ZebraHandle zh) const char *group = res_get(zh->res, "group"); const char *v; /* FIXME - do we still use groups ?? */ - + zh->m_group = group; v = res_get_prefix(zh->res, "followLinks", group, "1"); zh->m_follow_links = atoi(v); @@ -1728,7 +1737,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) int seqno = 0; char val = '?'; const char *rval = 0; - + (zh->trans_no++); if (zh->trans_w_no) { @@ -1753,14 +1762,14 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) zh->records_deleted = 0; zh->records_processed = 0; zh->records_skipped = 0; - + #if HAVE_SYS_TIMES_H times(&zh->tms1); #endif /* lock */ if (zh->shadow_enable) rval = res_get(zh->res, "shadow"); - + if (rval) { zebra_lock_r(zh->lock_normal); @@ -1801,16 +1810,12 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) } zebra_set_state(zh, 'd', seqno); - + zh->reg = zebra_register_open(zh->service, zh->reg_name, 1, rval ? 1 : 0, zh->res, zh->path_reg); - if (zh->reg) - zh->reg->seqno = seqno; - else + if (!zh->reg) { - zebra_set_state(zh, 'o', seqno); - zebra_unlock(zh->lock_shadow); zebra_unlock(zh->lock_normal); @@ -1822,6 +1827,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) yaz_log(YLOG_FATAL, "%s", zh->errString); return ZEBRA_FAIL; } + zh->reg->seqno = seqno; zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]); } else @@ -1829,9 +1835,9 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) int dirty = 0; char val; int seqno; - + (zh->trans_no)++; - + if (zh->trans_no != 1) { return zebra_flush_reg(zh); @@ -1854,7 +1860,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) zebra_get_state(zh, &val, &seqno); if (val == 'd') val = 'o'; - + if (!zh->reg) dirty = 1; else if (seqno != zh->reg->seqno) @@ -1871,12 +1877,12 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) } if (!dirty) return ZEBRA_OK; - + if (val == 'c') zebra_lock_r(zh->lock_shadow); else zebra_lock_r(zh->lock_normal); - + if (zh->reg) { resultSetInvalidate(zh); @@ -1948,29 +1954,29 @@ ZEBRA_RES zebra_end_transaction(ZebraHandle zh, ZebraTransactionStatus *status) { /* release write lock */ zh->trans_no--; zh->trans_w_no = 0; - + yaz_log(YLOG_DEBUG, "zebra_end_trans"); rval = res_get(zh->res, "shadow"); - + zebraExplain_runNumberIncrement(zh->reg->zei, 1); - + zebra_flush_reg(zh); - + resultSetInvalidate(zh); zebra_register_close(zh->service, zh->reg); zh->reg = 0; - + yaz_log(YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d " - ZINT_FORMAT"/"ZINT_FORMAT"/"ZINT_FORMAT, + ZINT_FORMAT"/"ZINT_FORMAT"/"ZINT_FORMAT, zh->records_processed, zh->records_inserted, zh->records_updated, zh->records_deleted); - + status->processed = zh->records_processed; status->inserted = zh->records_inserted; status->updated = zh->records_updated; status->deleted = zh->records_deleted; - + zebra_get_state(zh, &val, &seqno); if (val != 'd') { @@ -1983,14 +1989,14 @@ ZEBRA_RES zebra_end_transaction(ZebraHandle zh, ZebraTransactionStatus *status) zebra_set_state(zh, 'o', seqno); zebra_unlock(zh->lock_shadow); zebra_unlock(zh->lock_normal); - + } #if HAVE_SYS_TIMES_H times(&zh->tms2); yaz_log(log_level, "user/system: %ld/%ld", (long) (zh->tms2.tms_utime - zh->tms1.tms_utime), (long) (zh->tms2.tms_stime - zh->tms1.tms_stime)); - + status->utime = (long) (zh->tms2.tms_utime - zh->tms1.tms_utime); status->stime = (long) (zh->tms2.tms_stime - zh->tms1.tms_stime); #endif @@ -2054,7 +2060,7 @@ static ZEBRA_RES zebra_commit_ex(ZebraHandle zh, int clean_only) zh->errCode = YAZ_BIB1_DATABASE_UNAVAILABLE; return ZEBRA_FAIL; } - rval = res_get(zh->res, "shadow"); + rval = res_get(zh->res, "shadow"); if (!rval) { yaz_log(YLOG_WARN, "Cannot perform commit - No shadow area defined"); @@ -2094,7 +2100,7 @@ static ZEBRA_RES zebra_commit_ex(ZebraHandle zh, int clean_only) else { zebra_set_state(zh, 'c', seqno); - + yaz_log(log_level, "commit start"); if (bf_commitExec(bfs)) res = ZEBRA_FAIL; @@ -2103,10 +2109,10 @@ static ZEBRA_RES zebra_commit_ex(ZebraHandle zh, int clean_only) { seqno++; zebra_set_state(zh, 'o', seqno); - + zebra_unlock(zh->lock_shadow); zebra_unlock(zh->lock_normal); - + zebra_lock_w(zh->lock_shadow); bf_commitClean(bfs, rval); zebra_unlock(zh->lock_shadow); @@ -2170,7 +2176,7 @@ ZEBRA_RES zebra_init(ZebraHandle zh) } if (rval && *rval) bf_cache(bfs, rval); - + bf_reset(bfs); bfs_destroy(bfs); zebra_set_state(zh, 'o', 0); @@ -2194,30 +2200,297 @@ ZEBRA_RES zebra_compact(ZebraHandle zh) return ZEBRA_OK; } -ZEBRA_RES zebra_register_check(ZebraHandle zh) +#define ZEBRA_CHECK_DICT 1 +#define ZEBRA_CHECK_ISAM 2 + +static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec, + zint *no_keys, int message_limit, + unsigned flags, + zint *no_long_dict_entries, + zint *no_failed_dict_lookups, + zint *no_invalid_keys, + zint *no_invalid_dict_infos, + zint *no_invalid_isam_entries) +{ + ZEBRA_RES res = ZEBRA_OK; + zebra_rec_keys_t keys = zebra_rec_keys_open(); + zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], 0); + + *no_keys = 0; + if (!zebra_rec_keys_rewind(keys)) + { + ; + } + else + { + size_t slen; + const char *str; + struct it_key key_in; + NMEM nmem = nmem_create(); + + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { + int do_fail = 0; + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + char ord_buf[IT_MAX_WORD+20]; + int ord_len = key_SU_encode(ord, ord_buf); + char *info = 0; + + (*no_keys)++; + + if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX) + { + res = ZEBRA_FAIL; + (*no_invalid_keys)++; + if (*no_invalid_keys <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": unexpected key length %d", + rec->sysno, key_in.len); + } + } + if (ord_len + slen >= sizeof(ord_buf)-1) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + /* so bad it can not fit into our ord_buf */ + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, ord_len, (int) slen); + } + continue; + } + memcpy(ord_buf + ord_len, str, slen); + ord_buf[ord_len + slen] = '\0'; + if (ord_len + slen >= IT_MAX_WORD) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + if ((flags & ZEBRA_CHECK_DICT) == 0) + continue; + info = dict_lookup(zh->reg->dict, ord_buf); + if (!info) + { + res = ZEBRA_FAIL; + (*no_failed_dict_lookups)++; + if (*no_failed_dict_lookups <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": term do not exist in dictionary", rec->sysno); + } + } + else if (flags & ZEBRA_CHECK_ISAM) + { + ISAM_P pos; + + if (*info != sizeof(pos)) + { + res = ZEBRA_FAIL; + (*no_invalid_dict_infos)++; + if (*no_invalid_dict_infos <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + else + { + int scope = 1; + memcpy(&pos, info+1, sizeof(pos)); + if (zh->reg->isamb) + { + ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos, + scope); + if (!ispt) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_open entry " ZINT_FORMAT + " not found", + rec->sysno, pos); + } + } + else if (zh->m_staticrank) + { + isamb_pp_close(ispt); + } + else + { + struct it_key until_key; + struct it_key isam_key; + int r; + int i = 0; + + until_key.len = key_in.len - 1; + for (i = 0; i < until_key.len; i++) + until_key.mem[i] = key_in.mem[i+1]; + + if (until_key.mem[0] == 0) + until_key.mem[0] = rec->sysno; + r = isamb_pp_forward(ispt, &isam_key, &until_key); + if (r != 1) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_forward " ZINT_FORMAT + " returned no entry", + rec->sysno, pos); + } + } + else + { + int cmp = key_compare(&until_key, &isam_key); + if (cmp != 0) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries + <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " + ZINT_FORMAT + ": isamb_pp_forward " + ZINT_FORMAT + " returned different entry", + rec->sysno, pos); + + key_logdump_txt(YLOG_LOG, + &until_key, + "until"); + + key_logdump_txt(YLOG_LOG, + &isam_key, + "isam"); + + } + } + } + isamb_pp_close(ispt); + } + + } + } + } + if (do_fail) + { + zebra_it_key_str_dump(zh, &key_in, str, + slen, nmem, YLOG_LOG); + nmem_reset(nmem); + } + } + nmem_destroy(nmem); + } + zebra_rec_keys_close(keys); + return res; +} + +ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec) { ZEBRA_RES res = ZEBRA_FAIL; + unsigned flags = 0; + int message_limit = 10; + + if (!spec || *spec == '\0' + || !strcmp(spec, "dict") || !strcmp(spec, "default")) + flags = ZEBRA_CHECK_DICT; + else if (!strcmp(spec, "isam") || !strcmp(spec, "full")) + flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM; + else if (!strcmp(spec, "quick")) + flags = 0; + else + return ZEBRA_FAIL; + + yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u message_limit=%d", + flags, message_limit); if (zebra_begin_read(zh) == ZEBRA_OK) { - zint no_records = 0; + zint no_records_total = 0; + zint no_records_fail = 0; + zint total_keys = 0; + if (zh->reg) { Record rec = rec_get_root(zh->reg->records); - + + zint no_long_dict_entries = 0; + zint no_failed_dict_lookups = 0; + zint no_invalid_keys = 0; + zint no_invalid_dict_infos = 0; + zint no_invalid_isam_entries = 0; + + res = ZEBRA_OK; while (rec) { Record r1; - + zint no_keys; + + if (zebra_record_check(zh, rec, &no_keys, message_limit, + flags, + &no_long_dict_entries, + &no_failed_dict_lookups, + &no_invalid_keys, + &no_invalid_dict_infos, + &no_invalid_isam_entries + ) + != ZEBRA_OK) + { + res = ZEBRA_FAIL; + no_records_fail++; + } + r1 = rec_get_next(zh->reg->records, rec); rec_free(&rec); rec = r1; - no_records++; + no_records_total++; + total_keys += no_keys; } - res = ZEBRA_OK; + yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT, + no_records_total); + yaz_log(YLOG_LOG, "records fail: " ZINT_FORMAT, + no_records_fail); + yaz_log(YLOG_LOG, "total keys: " ZINT_FORMAT, + total_keys); + yaz_log(YLOG_LOG, "long dict entries: " ZINT_FORMAT, + no_long_dict_entries); + if (flags & ZEBRA_CHECK_DICT) + { + yaz_log(YLOG_LOG, "failed dict lookups: " ZINT_FORMAT, + no_failed_dict_lookups); + yaz_log(YLOG_LOG, "invalid dict infos: " ZINT_FORMAT, + no_invalid_dict_infos); + } + if (flags & ZEBRA_CHECK_ISAM) + yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT, + no_invalid_isam_entries); } zebra_end_read(zh); - yaz_log(YLOG_LOG, ZINT_FORMAT " records scanned", no_records); } + yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res); return res; } @@ -2253,7 +2526,7 @@ ZEBRA_RES zebra_octet_term_encoding(ZebraHandle zh, const char *encoding) yaz_iconv_close(zh->iconv_to_utf8); if (zh->iconv_from_utf8 != 0) yaz_iconv_close(zh->iconv_from_utf8); - + zh->iconv_to_utf8 = yaz_iconv_open("UTF-8", encoding); if (zh->iconv_to_utf8 == 0) @@ -2324,15 +2597,15 @@ void zebra_set_shadow_enable(ZebraHandle zh, int value) ZEBRA_RES zebra_add_record(ZebraHandle zh, const char *buf, int buf_size) { - return zebra_update_record(zh, action_update, + return zebra_update_record(zh, action_update, 0 /* record type */, 0 /* sysno */ , - 0 /* match */, + 0 /* match */, 0 /* fname */, buf, buf_size); } -ZEBRA_RES zebra_update_record(ZebraHandle zh, +ZEBRA_RES zebra_update_record(ZebraHandle zh, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match, @@ -2354,22 +2627,22 @@ ZEBRA_RES zebra_update_record(ZebraHandle zh, if (zebra_begin_trans(zh, 1) == ZEBRA_FAIL) return ZEBRA_FAIL; - res = zebra_buffer_extract_record(zh, buf, buf_size, + res = zebra_buffer_extract_record(zh, buf, buf_size, action, recordType, - sysno, - match, + sysno, + match, fname); if (zebra_end_trans(zh) != ZEBRA_OK) { yaz_log(YLOG_WARN, "zebra_end_trans failed"); res = ZEBRA_FAIL; } - return res; + return res; } /* --------------------------------------------------------------------------- - Searching + Searching */ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query, @@ -2389,9 +2662,9 @@ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query, assert(setname); yaz_log(log_level, "zebra_search_PQF s=%s q=%s", setname, pqf_query); - + query = p_query_rpn(odr, pqf_query); - + if (!query) { yaz_log(YLOG_WARN, "bad query %s\n", pqf_query); @@ -2400,7 +2673,7 @@ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query, } else res = zebra_search_RPN(zh, odr, query, setname, &lhits); - + odr_destroy(odr); yaz_log(log_level, "Hits: " ZINT_FORMAT, lhits); @@ -2417,7 +2690,7 @@ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query, int zebra_sort_by_specstr(ZebraHandle zh, ODR stream, const char *sort_spec, const char *output_setname, - const char **input_setnames) + const char **input_setnames) { int num_input_setnames = 0; int sort_status = 0; @@ -2436,17 +2709,17 @@ int zebra_sort_by_specstr(ZebraHandle zh, ODR stream, zh->errCode = YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE; return -1; } - - /* we can do this, since the perl typemap code for char** will + + /* we can do this, since the perl typemap code for char** will put a NULL at the end of list */ while (input_setnames[num_input_setnames]) num_input_setnames++; if (zebra_begin_read(zh)) return -1; - + resultSetSort(zh, stream->mem, num_input_setnames, input_setnames, output_setname, sort_sequence, &sort_status); - + zebra_end_read(zh); return sort_status; } @@ -2498,7 +2771,7 @@ void zebra_setError_zint(ZebraHandle zh, int code, zint i) void zebra_lock_prefix(Res res, char *path) { const char *lock_dir = res_get_def(res, "lockDir", ""); - + strcpy(path, lock_dir); if (*path && path[strlen(path)-1] != '/') strcat(path, "/");