X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzebraapi.c;h=885c684ba32ab201f9f8241fd4a505f8f7a2874f;hp=2b69a2d36199707dae6ca4f5d61bd6f30aa710ef;hb=45a75e3bd29f9cd44bb7880544ad3864f5c6e87d;hpb=5478f209e90c32b8083c9b230de618599df93ff6 diff --git a/index/zebraapi.c b/index/zebraapi.c index 2b69a2d..885c684 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1994-2009 Index Data + Copyright (C) 1994-2010 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -334,7 +334,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, { struct zebra_register *reg; int record_compression = REC_COMPRESS_NONE; - const char *recordCompression = 0; + const char *compression_str = 0; const char *profilePath; int sort_type = ZEBRA_SORT_TYPE_FLAT; ZEBRA_RES ret = ZEBRA_OK; @@ -420,11 +420,25 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, zebraRankInstall(reg, rank_similarity_class); zebraRankInstall(reg, rank_static_class); - recordCompression = res_get_def(res, "recordCompression", "none"); - if (!strcmp(recordCompression, "none")) + compression_str = res_get_def(res, "recordCompression", "none"); + if (!strcmp(compression_str, "none")) record_compression = REC_COMPRESS_NONE; - if (!strcmp(recordCompression, "bzip2")) + else if (!strcmp(compression_str, "bzip2")) record_compression = REC_COMPRESS_BZIP2; + else if (!strcmp(compression_str, "zlib")) + record_compression = REC_COMPRESS_ZLIB; + else + { + yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str); + ret = ZEBRA_FAIL; + } + + if (!rec_check_compression_method(record_compression)) + { + yaz_log(YLOG_FATAL, "unsupported recordCompression: %s", + compression_str); + ret = ZEBRA_FAIL; + } { const char *index_fname = res_get_def(res, "index", "default.idx"); @@ -2180,6 +2194,99 @@ ZEBRA_RES zebra_compact(ZebraHandle zh) return ZEBRA_OK; } +static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec, + zint *no_keys) +{ + ZEBRA_RES res = ZEBRA_FAIL; + zebra_rec_keys_t keys = zebra_rec_keys_open(); + zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], 0); + + *no_keys = 0; + if (!zebra_rec_keys_rewind(keys)) + { + ; + } + else + { + size_t slen; + const char *str; + struct it_key key_in; + int no_long_dict_entries = 0; + int no_failed_dict_lookup = 0; + + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + char ord_buf[IT_MAX_WORD]; + int ord_len = key_SU_encode(ord, ord_buf); + + memcpy(ord_buf + ord_len, str, slen); + ord_buf[ord_len + slen] = '\0'; + if (ord_len + slen >= IT_MAX_WORD) + ++no_long_dict_entries; + else + { + char *info = dict_lookup(zh->reg->dict, ord_buf); + if (!info) + no_failed_dict_lookup++; + else + { + ; + } + } + (*no_keys)++; + } + if (no_long_dict_entries) + { + yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT + " has %d dictionary entries that are too long", + rec->sysno, no_long_dict_entries); + } + if (no_failed_dict_lookup) + { + yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT + " has %d terms that do not exist in dictionary", + rec->sysno, no_failed_dict_lookup); + } + res = ZEBRA_OK; + } + zebra_rec_keys_close(keys); + return res; +} + +ZEBRA_RES zebra_register_check(ZebraHandle zh) +{ + ZEBRA_RES res = ZEBRA_FAIL; + if (zebra_begin_read(zh) == ZEBRA_OK) + { + zint no_records = 0; + zint total_keys = 0; + if (zh->reg) + { + Record rec = rec_get_root(zh->reg->records); + + while (rec) + { + Record r1; + zint no_keys; + + zebra_record_check(zh, rec, &no_keys); + r1 = rec_get_next(zh->reg->records, rec); + rec_free(&rec); + rec = r1; + no_records++; + total_keys += no_keys; + } + res = ZEBRA_OK; + yaz_log(YLOG_LOG, "records: " ZINT_FORMAT, no_records); + yaz_log(YLOG_LOG, "keys: " ZINT_FORMAT, total_keys); + } + zebra_end_read(zh); + } + return res; +} + void zebra_result(ZebraHandle zh, int *code, char **addinfo) { yaz_log(log_level, "zebra_result");