X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzebraapi.c;h=2e64dd0e241bd470ad9f970ae66f4baa8c6bc572;hp=adc9b6a02df676ffaba957cc6e4dc7df51b25d34;hb=3776ae570114d93706ff320ef573d202e908f98e;hpb=f3f94568869a1ae0402bfa4f0dea4d80b09695df diff --git a/index/zebraapi.c b/index/zebraapi.c index adc9b6a..2e64dd0 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1995-2008 Index Data + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -61,20 +64,24 @@ static ZEBRA_RES zebra_check_handle(ZebraHandle zh) #define ZEBRA_CHECK_HANDLE(zh) if (zebra_check_handle(zh) != ZEBRA_OK) return ZEBRA_FAIL -static void zebra_chdir(ZebraService zs) +static int zebra_chdir(ZebraService zs) { const char *dir ; + int r; ASSERTZS; yaz_log(log_level, "zebra_chdir"); dir = res_get(zs->global_res, "chdir"); if (!dir) - return; + return 0; yaz_log(YLOG_DEBUG, "chdir %s", dir); #ifdef WIN32 - _chdir(dir); + r = _chdir(dir); #else - chdir(dir); + r = chdir(dir); #endif + if (r) + yaz_log(YLOG_FATAL|YLOG_ERRNO, "chdir %s", dir); + return r; } static ZEBRA_RES zebra_flush_reg(ZebraHandle zh) @@ -199,11 +206,14 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) log_level = yaz_log_module_level("zebraapi"); log_level_initialized = 1; } - + + *system_str = '\0'; + *version_str = '\0'; zebra_get_version(version_str, system_str); - yaz_log(YLOG_LOG, "zebra_start %s %s", version_str, - configName ? configName : ""); + yaz_log(YLOG_LOG, "zebra_start %s %s", version_str, system_str); + if (configName) + yaz_log(YLOG_LOG, "config %s", configName); if ((res = res_open(def_res, over_res))) { @@ -236,7 +246,11 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res) zh->global_res = res; zh->sessions = 0; - zebra_chdir(zh); + if (zebra_chdir(zh)) + { + xfree(zh); + return 0; + } zebra_mutex_cond_init(&zh->session_lock); passwd_plain = res_get(zh->global_res, "passwd"); @@ -323,9 +337,8 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, { struct zebra_register *reg; int record_compression = REC_COMPRESS_NONE; - const char *recordCompression = 0; + const char *compression_str = 0; const char *profilePath; - char cwd[1024]; int sort_type = ZEBRA_SORT_TYPE_FLAT; ZEBRA_RES ret = ZEBRA_OK; @@ -371,7 +384,6 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, } } - getcwd(cwd, sizeof(cwd)-1); profilePath = res_get_def(res, "profilePath", 0); data1_set_tabpath(reg->dh, profilePath); @@ -407,14 +419,29 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, /* installing rank classes */ zebraRankInstall(reg, rank_1_class); + zebraRankInstall(reg, rank_2_class); zebraRankInstall(reg, rank_similarity_class); zebraRankInstall(reg, rank_static_class); - recordCompression = res_get_def(res, "recordCompression", "none"); - if (!strcmp(recordCompression, "none")) + compression_str = res_get_def(res, "recordCompression", "none"); + if (!strcmp(compression_str, "none")) record_compression = REC_COMPRESS_NONE; - if (!strcmp(recordCompression, "bzip2")) + else if (!strcmp(compression_str, "bzip2")) record_compression = REC_COMPRESS_BZIP2; + else if (!strcmp(compression_str, "zlib")) + record_compression = REC_COMPRESS_ZLIB; + else + { + yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str); + ret = ZEBRA_FAIL; + } + + if (!rec_check_compression_method(record_compression)) + { + yaz_log(YLOG_FATAL, "unsupported recordCompression: %s", + compression_str); + ret = ZEBRA_FAIL; + } { const char *index_fname = res_get_def(res, "index", "default.idx"); @@ -1427,7 +1454,6 @@ int delete_w_handle(const char *info, void *handle) { ZebraHandle zh = (ZebraHandle) handle; ISAM_P pos; - ASSERTZH; if (*info == sizeof(pos)) { @@ -1437,6 +1463,32 @@ int delete_w_handle(const char *info, void *handle) return 0; } +int delete_w_all_handle(const char *info, void *handle) +{ + ZebraHandle zh = (ZebraHandle) handle; + ISAM_P pos; + + if (*info == sizeof(pos)) + { + ISAMB_PP pt; + memcpy(&pos, info+1, sizeof(pos)); + pt = isamb_pp_open(zh->reg->isamb, pos, 2); + if (pt) + { + struct it_key key; + key.mem[0] = 0; + while (isamb_pp_read(pt, &key)) + { + Record rec; + rec = rec_get(zh->reg->records, key.mem[0]); + rec_del(zh->reg->records, &rec); + } + isamb_pp_close(pt); + } + } + return delete_w_handle(info, handle); +} + static int delete_SU_handle(void *handle, int ord, const char *index_type, const char *string_index, zinfo_index_category_t cat) @@ -1445,23 +1497,18 @@ static int delete_SU_handle(void *handle, int ord, char ord_buf[20]; int ord_len; #if 0 - const char *index_type = 0; - const char *db = 0; - const char *string_index = 0; - zebraExplain_lookup_ord(zh->reg->zei, ord, - &index_type, &db, &string_index); - yaz_log(YLOG_LOG, - "delete_SU_handle:: ord=%d index_type=%s db=%s string_index=%s", - ord, index_type, db, string_index); -#endif yaz_log(YLOG_LOG, "ord=%d index_type=%s index=%s cat=%d", ord, index_type, string_index, (int) cat); +#endif ord_len = key_SU_encode(ord, ord_buf); ord_buf[ord_len] = '\0'; assert(zh->reg->isamb); + assert(zh->reg->records); dict_delete_subtree(zh->reg->dict, ord_buf, - zh, delete_w_handle); + zh, + !strcmp(string_index, "_ALLRECORDS") ? + delete_w_all_handle : delete_w_handle); return 0; } @@ -1580,7 +1627,11 @@ static void zebra_set_state(ZebraHandle zh, int val, int seqno) sprintf(state_fname, "state.%s.LCK", zh->reg_name); fname = zebra_mk_fname(res_get(zh->res, "lockDir"), state_fname); f = fopen(fname, "w"); - + if (!f) + { + yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s w", state_fname); + exit(1); + } yaz_log(YLOG_DEBUG, "zebra_set_state: %c %d %ld", val, seqno, p); fprintf(f, "%c %d %ld\n", val, seqno, p); fclose(f); @@ -1604,7 +1655,11 @@ static void zebra_get_state(ZebraHandle zh, char *val, int *seqno) if (f) { - fscanf(f, "%c %d", val, seqno); + if (fscanf(f, "%c %d", val, seqno) != 2) + { + yaz_log(YLOG_ERRNO|YLOG_WARN, "fscan fail %s", + state_fname); + } fclose(f); } xfree(fname); @@ -1757,12 +1812,8 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) zh->reg = zebra_register_open(zh->service, zh->reg_name, 1, rval ? 1 : 0, zh->res, zh->path_reg); - if (zh->reg) - zh->reg->seqno = seqno; - else + if (!zh->reg) { - zebra_set_state(zh, 'o', seqno); - zebra_unlock(zh->lock_shadow); zebra_unlock(zh->lock_normal); @@ -1774,6 +1825,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw) yaz_log(YLOG_FATAL, "%s", zh->errString); return ZEBRA_FAIL; } + zh->reg->seqno = seqno; zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]); } else @@ -2146,6 +2198,300 @@ ZEBRA_RES zebra_compact(ZebraHandle zh) return ZEBRA_OK; } +#define ZEBRA_CHECK_DICT 1 +#define ZEBRA_CHECK_ISAM 2 + +static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec, + zint *no_keys, int message_limit, + unsigned flags, + zint *no_long_dict_entries, + zint *no_failed_dict_lookups, + zint *no_invalid_keys, + zint *no_invalid_dict_infos, + zint *no_invalid_isam_entries) +{ + ZEBRA_RES res = ZEBRA_OK; + zebra_rec_keys_t keys = zebra_rec_keys_open(); + zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], 0); + + *no_keys = 0; + if (!zebra_rec_keys_rewind(keys)) + { + ; + } + else + { + size_t slen; + const char *str; + struct it_key key_in; + NMEM nmem = nmem_create(); + + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { + int do_fail = 0; + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + char ord_buf[IT_MAX_WORD+20]; + int ord_len = key_SU_encode(ord, ord_buf); + char *info = 0; + + (*no_keys)++; + + if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX) + { + res = ZEBRA_FAIL; + (*no_invalid_keys)++; + if (*no_invalid_keys <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": unexpected key length %d", + rec->sysno, key_in.len); + } + } + if (ord_len + slen >= sizeof(ord_buf)-1) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + /* so bad it can not fit into our ord_buf */ + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, ord_len, (int) slen); + } + continue; + } + memcpy(ord_buf + ord_len, str, slen); + ord_buf[ord_len + slen] = '\0'; + if (ord_len + slen >= IT_MAX_WORD) + { + res = ZEBRA_FAIL; + (*no_long_dict_entries)++; + if (*no_long_dict_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + if ((flags & ZEBRA_CHECK_DICT) == 0) + continue; + info = dict_lookup(zh->reg->dict, ord_buf); + if (!info) + { + res = ZEBRA_FAIL; + (*no_failed_dict_lookups)++; + if (*no_failed_dict_lookups <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": term do not exist in dictionary", rec->sysno); + } + } + else if (flags & ZEBRA_CHECK_ISAM) + { + ISAM_P pos; + + if (*info != sizeof(pos)) + { + res = ZEBRA_FAIL; + (*no_invalid_dict_infos)++; + if (*no_invalid_dict_infos <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } + } + else + { + int scope = 1; + memcpy(&pos, info+1, sizeof(pos)); + if (zh->reg->isamb) + { + ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos, + scope); + if (!ispt) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_open entry " ZINT_FORMAT + " not found", + rec->sysno, pos); + } + } + else if (zh->m_staticrank) + { + isamb_pp_close(ispt); + } + else + { + struct it_key until_key; + struct it_key isam_key; + int r; + int i = 0; + + until_key.len = key_in.len - 1; + for (i = 0; i < until_key.len; i++) + until_key.mem[i] = key_in.mem[i+1]; + + if (until_key.mem[0] == 0) + until_key.mem[0] = rec->sysno; + r = isamb_pp_forward(ispt, &isam_key, &until_key); + if (r != 1) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": isamb_pp_forward " ZINT_FORMAT + " returned no entry", + rec->sysno, pos); + } + } + else + { + int cmp = key_compare(&until_key, &isam_key); + if (cmp != 0) + { + res = ZEBRA_FAIL; + (*no_invalid_isam_entries)++; + if (*no_invalid_isam_entries + <= message_limit) + { + do_fail = 1; + yaz_log(YLOG_WARN, "Record " + ZINT_FORMAT + ": isamb_pp_forward " + ZINT_FORMAT + " returned different entry", + rec->sysno, pos); + + key_logdump_txt(YLOG_LOG, + &until_key, + "until"); + + key_logdump_txt(YLOG_LOG, + &isam_key, + "isam"); + + } + } + } + isamb_pp_close(ispt); + } + + } + } + } + if (do_fail) + { + zebra_it_key_str_dump(zh, &key_in, str, + slen, nmem, YLOG_LOG); + nmem_reset(nmem); + } + } + nmem_destroy(nmem); + } + zebra_rec_keys_close(keys); + return res; +} + +ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec) +{ + ZEBRA_RES res = ZEBRA_FAIL; + unsigned flags = 0; + int message_limit = 10; + + if (!spec || *spec == '\0' + || !strcmp(spec, "dict") || !strcmp(spec, "default")) + flags = ZEBRA_CHECK_DICT; + else if (!strcmp(spec, "isam") || !strcmp(spec, "full")) + flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM; + else if (!strcmp(spec, "quick")) + flags = 0; + else + return ZEBRA_FAIL; + + yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u message_limit=%d", + flags, message_limit); + if (zebra_begin_read(zh) == ZEBRA_OK) + { + zint no_records_total = 0; + zint no_records_fail = 0; + zint total_keys = 0; + + if (zh->reg) + { + Record rec = rec_get_root(zh->reg->records); + + zint no_long_dict_entries = 0; + zint no_failed_dict_lookups = 0; + zint no_invalid_keys = 0; + zint no_invalid_dict_infos = 0; + zint no_invalid_isam_entries = 0; + + res = ZEBRA_OK; + while (rec) + { + Record r1; + zint no_keys; + + if (zebra_record_check(zh, rec, &no_keys, message_limit, + flags, + &no_long_dict_entries, + &no_failed_dict_lookups, + &no_invalid_keys, + &no_invalid_dict_infos, + &no_invalid_isam_entries + ) + != ZEBRA_OK) + { + res = ZEBRA_FAIL; + no_records_fail++; + } + + r1 = rec_get_next(zh->reg->records, rec); + rec_free(&rec); + rec = r1; + no_records_total++; + total_keys += no_keys; + } + yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT, + no_records_total); + yaz_log(YLOG_LOG, "records fail: " ZINT_FORMAT, + no_records_fail); + yaz_log(YLOG_LOG, "total keys: " ZINT_FORMAT, + total_keys); + yaz_log(YLOG_LOG, "long dict entries: " ZINT_FORMAT, + no_long_dict_entries); + if (flags & ZEBRA_CHECK_DICT) + { + yaz_log(YLOG_LOG, "failed dict lookups: " ZINT_FORMAT, + no_failed_dict_lookups); + yaz_log(YLOG_LOG, "invalid dict infos: " ZINT_FORMAT, + no_invalid_dict_infos); + } + if (flags & ZEBRA_CHECK_ISAM) + yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT, + no_invalid_isam_entries); + } + zebra_end_read(zh); + } + yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res); + return res; +} + void zebra_result(ZebraHandle zh, int *code, char **addinfo) { yaz_log(log_level, "zebra_result"); @@ -2281,7 +2627,6 @@ ZEBRA_RES zebra_update_record(ZebraHandle zh, return ZEBRA_FAIL; res = zebra_buffer_extract_record(zh, buf, buf_size, action, - 0, /* test_mode */ recordType, sysno, match, @@ -2433,6 +2778,7 @@ void zebra_lock_prefix(Res res, char *path) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab