X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzebraapi.c;h=6415172a486d70eda08dae9a9c28429660130687;hp=b4cade11ab5c928df8f3436a3c8db822aa25b965;hb=ec6ef89747394edb31aedfce9d585b5b34191154;hpb=6a0f9234f945bc4956e2bcef75f715661a9eba9a diff --git a/index/zebraapi.c b/index/zebraapi.c index b4cade1..6415172 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1994-2009 Index Data + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -61,20 +64,24 @@ static ZEBRA_RES zebra_check_handle(ZebraHandle zh) #define ZEBRA_CHECK_HANDLE(zh) if (zebra_check_handle(zh) != ZEBRA_OK) return ZEBRA_FAIL -static void zebra_chdir(ZebraService zs) +static int zebra_chdir(ZebraService zs) { const char *dir ; + int r; ASSERTZS; yaz_log(log_level, "zebra_chdir"); dir = res_get(zs->global_res, "chdir"); if (!dir) - return; + return 0; yaz_log(YLOG_DEBUG, "chdir %s", dir); #ifdef WIN32 - _chdir(dir); + r = _chdir(dir); #else - chdir(dir); + r = chdir(dir); #endif + if (r) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "chdir %s", dir); + return r; } static ZEBRA_RES zebra_flush_reg(ZebraHandle zh) @@ -199,11 +206,14 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) log_level = yaz_log_module_level("zebraapi"); log_level_initialized = 1; } - + + *system_str = '\0'; + *version_str = '\0'; zebra_get_version(version_str, system_str); - yaz_log(YLOG_LOG, "zebra_start %s %s", version_str, - configName ? configName : ""); + yaz_log(YLOG_LOG, "zebra_start %s %s", version_str, system_str); + if (configName) + yaz_log(YLOG_LOG, "config %s", configName); if ((res = res_open(def_res, over_res))) { @@ -236,7 +246,11 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) zh->global_res = res; zh->sessions = 0; - zebra_chdir(zh); + if (zebra_chdir(zh)) + { + xfree(zh); + return 0; + } zebra_mutex_cond_init(&zh->session_lock); passwd_plain = res_get(zh->global_res, "passwd"); @@ -323,9 +337,8 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, { struct zebra_register *reg; int record_compression = REC_COMPRESS_NONE; - const char *recordCompression = 0; + const char *compression_str = 0; const char *profilePath; - char cwd[1024]; int sort_type = ZEBRA_SORT_TYPE_FLAT; ZEBRA_RES ret = ZEBRA_OK; @@ -371,7 +384,6 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, } } - getcwd(cwd, sizeof(cwd)-1); profilePath = res_get_def(res, "profilePath", 0); data1_set_tabpath(reg->dh, profilePath); @@ -407,14 +419,29 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, /* installing rank classes */ zebraRankInstall(reg, rank_1_class); + zebraRankInstall(reg, rank_2_class); zebraRankInstall(reg, rank_similarity_class); zebraRankInstall(reg, rank_static_class); - recordCompression = res_get_def(res, "recordCompression", "none"); - if (!strcmp(recordCompression, "none")) + compression_str = res_get_def(res, "recordCompression", "none"); + if (!strcmp(compression_str, "none")) record_compression = REC_COMPRESS_NONE; - if (!strcmp(recordCompression, "bzip2")) + else if (!strcmp(compression_str, "bzip2")) record_compression = REC_COMPRESS_BZIP2; + else if (!strcmp(compression_str, "zlib")) + record_compression = REC_COMPRESS_ZLIB; + else + { + yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str); + ret = ZEBRA_FAIL; + } + + if (!rec_check_compression_method(record_compression)) + { + yaz_log(YLOG_FATAL, "unsupported recordCompression: %s", + compression_str); + ret = ZEBRA_FAIL; + } { const char *index_fname = res_get_def(res, "index", "default.idx"); @@ -1600,7 +1627,11 @@ static void zebra_set_state(ZebraHandle zh, int val, int seqno) sprintf(state_fname, "state.%s.LCK", zh->reg_name); fname = zebra_mk_fname(res_get(zh->res, "lockDir"), state_fname); f = fopen(fname, "w"); - + if (!f) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s w", state_fname); + exit(1); + } yaz_log(YLOG_DEBUG, "zebra_set_state: %c %d %ld", val, seqno, p); fprintf(f, "%c %d %ld\n", val, seqno, p); fclose(f); @@ -1624,7 +1655,11 @@ static void zebra_get_state(ZebraHandle zh, char *val, int *seqno) if (f) { - fscanf(f, "%c %d", val, seqno); + if (fscanf(f, "%c %d", val, seqno) != 2) + { + yaz_log(YLOG_ERRNO|YLOG_WARN, "fscan fail %s", + state_fname); + } fclose(f); } xfree(fname); @@ -1781,8 +1816,6 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) zh->reg->seqno = seqno; else { - zebra_set_state(zh, 'o', seqno); - zebra_unlock(zh->lock_shadow); zebra_unlock(zh->lock_normal); @@ -2166,6 +2199,300 @@ ZEBRA_RES zebra_compact(ZebraHandle zh) return ZEBRA_OK; } +#define ZEBRA_CHECK_DICT 1 +#define ZEBRA_CHECK_ISAM 2 + +static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec, + zint *no_keys, int message_limit, + unsigned flags, + zint *no_long_dict_entries, + zint *no_failed_dict_lookups, + zint *no_invalid_keys, + zint *no_invalid_dict_infos, + zint *no_invalid_isam_entries) +{ + ZEBRA_RES res = ZEBRA_OK; + zebra_rec_keys_t keys = zebra_rec_keys_open(); + zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], 0); + + *no_keys = 0; + if (!zebra_rec_keys_rewind(keys)) + { + ; + } + else + { + size_t slen; + const char *str; + struct it_key key_in; + NMEM nmem = nmem_create(); + + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { + int do_fail = 0; + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + char ord_buf[IT_MAX_WORD+20]; + int ord_len = key_SU_encode(ord, ord_buf); + char *info = 0; + + (*no_keys)++; + + if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX) + { + res = ZEBRA_FAIL; + (*no_invalid_keys)++; + if (*no_invalid_keys <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": unexpected key length %d", + rec->sysno, key_in.len); + } + } + if (ord_len + slen >= sizeof(ord_buf)-1) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + /* so bad it can not fit into our ord_buf */ + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, ord_len, (int) slen); + } + continue; + } + memcpy(ord_buf + ord_len, str, slen); + ord_buf[ord_len + slen] = '\0'; + if (ord_len + slen >= IT_MAX_WORD) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + if ((flags & ZEBRA_CHECK_DICT) == 0) + continue; + info = dict_lookup(zh->reg->dict, ord_buf); + if (!info) + { + res = ZEBRA_FAIL; + (*no_failed_dict_lookups)++; + if (*no_failed_dict_lookups <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": term do not exist in dictionary", rec->sysno); + } + } + else if (flags & ZEBRA_CHECK_ISAM) + { + ISAM_P pos; + + if (*info != sizeof(pos)) + { + res = ZEBRA_FAIL; + (*no_invalid_dict_infos)++; + if (*no_invalid_dict_infos <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + else + { + int scope = 1; + memcpy(&pos, info+1, sizeof(pos)); + if (zh->reg->isamb) + { + ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos, + scope); + if (!ispt) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_open entry " ZINT_FORMAT + " not found", + rec->sysno, pos); + } + } + else if (zh->m_staticrank) + { + isamb_pp_close(ispt); + } + else + { + struct it_key until_key; + struct it_key isam_key; + int r; + int i = 0; + + until_key.len = key_in.len - 1; + for (i = 0; i < until_key.len; i++) + until_key.mem[i] = key_in.mem[i+1]; + + if (until_key.mem[0] == 0) + until_key.mem[0] = rec->sysno; + r = isamb_pp_forward(ispt, &isam_key, &until_key); + if (r != 1) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_forward " ZINT_FORMAT + " returned no entry", + rec->sysno, pos); + } + } + else + { + int cmp = key_compare(&until_key, &isam_key); + if (cmp != 0) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries + <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " + ZINT_FORMAT + ": isamb_pp_forward " + ZINT_FORMAT + " returned different entry", + rec->sysno, pos); + + key_logdump_txt(YLOG_LOG, + &until_key, + "until"); + + key_logdump_txt(YLOG_LOG, + &isam_key, + "isam"); + + } + } + } + isamb_pp_close(ispt); + } + + } + } + } + if (do_fail) + { + zebra_it_key_str_dump(zh, &key_in, str, + slen, nmem, YLOG_LOG); + nmem_reset(nmem); + } + } + nmem_destroy(nmem); + } + zebra_rec_keys_close(keys); + return res; +} + +ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec) +{ + ZEBRA_RES res = ZEBRA_FAIL; + unsigned flags = 0; + int message_limit = 10; + + if (!spec || *spec == '\0' + || !strcmp(spec, "dict") || !strcmp(spec, "default")) + flags = ZEBRA_CHECK_DICT; + else if (!strcmp(spec, "isam") || !strcmp(spec, "full")) + flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM; + else if (!strcmp(spec, "quick")) + flags = 0; + else + return ZEBRA_FAIL; + + yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u message_limit=%d", + flags, message_limit); + if (zebra_begin_read(zh) == ZEBRA_OK) + { + zint no_records_total = 0; + zint no_records_fail = 0; + zint total_keys = 0; + + if (zh->reg) + { + Record rec = rec_get_root(zh->reg->records); + + zint no_long_dict_entries = 0; + zint no_failed_dict_lookups = 0; + zint no_invalid_keys = 0; + zint no_invalid_dict_infos = 0; + zint no_invalid_isam_entries = 0; + + res = ZEBRA_OK; + while (rec) + { + Record r1; + zint no_keys; + + if (zebra_record_check(zh, rec, &no_keys, message_limit, + flags, + &no_long_dict_entries, + &no_failed_dict_lookups, + &no_invalid_keys, + &no_invalid_dict_infos, + &no_invalid_isam_entries + ) + != ZEBRA_OK) + { + res = ZEBRA_FAIL; + no_records_fail++; + } + + r1 = rec_get_next(zh->reg->records, rec); + rec_free(&rec); + rec = r1; + no_records_total++; + total_keys += no_keys; + } + yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT, + no_records_total); + yaz_log(YLOG_LOG, "records fail: " ZINT_FORMAT, + no_records_fail); + yaz_log(YLOG_LOG, "total keys: " ZINT_FORMAT, + total_keys); + yaz_log(YLOG_LOG, "long dict entries: " ZINT_FORMAT, + no_long_dict_entries); + if (flags & ZEBRA_CHECK_DICT) + { + yaz_log(YLOG_LOG, "failed dict lookups: " ZINT_FORMAT, + no_failed_dict_lookups); + yaz_log(YLOG_LOG, "invalid dict infos: " ZINT_FORMAT, + no_invalid_dict_infos); + } + if (flags & ZEBRA_CHECK_ISAM) + yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT, + no_invalid_isam_entries); + } + zebra_end_read(zh); + } + yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res); + return res; +} + void zebra_result(ZebraHandle zh, int *code, char **addinfo) { yaz_log(log_level, "zebra_result");