+#define ZEBRA_CHECK_DICT 1
+#define ZEBRA_CHECK_ISAM 2
+
+static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec,
+ zint *no_keys, int message_limit,
+ unsigned flags,
+ zint *no_long_dict_entries,
+ zint *no_failed_dict_lookups,
+ zint *no_invalid_keys,
+ zint *no_invalid_dict_infos,
+ zint *no_invalid_isam_entries)
+{
+ ZEBRA_RES res = ZEBRA_OK;
+ zebra_rec_keys_t keys = zebra_rec_keys_open();
+ zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys], 0);
+
+ *no_keys = 0;
+ if (!zebra_rec_keys_rewind(keys))
+ {
+ ;
+ }
+ else
+ {
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ NMEM nmem = nmem_create();
+
+ while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+ {
+ int do_fail = 0;
+ int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+ char ord_buf[IT_MAX_WORD+20];
+ int ord_len = key_SU_encode(ord, ord_buf);
+ char *info = 0;
+
+ (*no_keys)++;
+
+ if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX)
+ {
+ res = ZEBRA_FAIL;
+ (*no_invalid_keys)++;
+ if (*no_invalid_keys <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": unexpected key length %d",
+ rec->sysno, key_in.len);
+ }
+ }
+ if (ord_len + slen >= sizeof(ord_buf)-1)
+ {
+ res = ZEBRA_FAIL;
+ (*no_long_dict_entries)++;
+ if (*no_long_dict_entries <= message_limit)
+ {
+ do_fail = 1;
+ /* so bad it can not fit into our ord_buf */
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": long dictionary entry %d + %d",
+ rec->sysno, ord_len, (int) slen);
+ }
+ continue;
+ }
+ memcpy(ord_buf + ord_len, str, slen);
+ ord_buf[ord_len + slen] = '\0';
+ if (slen > IT_MAX_WORD || ord_len > 4)
+ {
+ res = ZEBRA_FAIL;
+ (*no_long_dict_entries)++;
+ if (*no_long_dict_entries <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": long dictionary entry %d + %d",
+ rec->sysno, (int) ord_len, (int) slen);
+ }
+ }
+ if ((flags & ZEBRA_CHECK_DICT) == 0)
+ continue;
+ info = dict_lookup(zh->reg->dict, ord_buf);
+ if (!info)
+ {
+ res = ZEBRA_FAIL;
+ (*no_failed_dict_lookups)++;
+ if (*no_failed_dict_lookups <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": term do not exist in dictionary", rec->sysno);
+ }
+ }
+ else if (flags & ZEBRA_CHECK_ISAM)
+ {
+ ISAM_P pos;
+
+ if (*info != sizeof(pos))
+ {
+ res = ZEBRA_FAIL;
+ (*no_invalid_dict_infos)++;
+ if (*no_invalid_dict_infos <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": long dictionary entry %d + %d",
+ rec->sysno, (int) ord_len, (int) slen);
+ }
+ }
+ else
+ {
+ int scope = 1;
+ memcpy(&pos, info+1, sizeof(pos));
+ if (zh->reg->isamb)
+ {
+ ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos,
+ scope);
+ if (!ispt)
+ {
+ res = ZEBRA_FAIL;
+ (*no_invalid_isam_entries)++;
+ if (*no_invalid_isam_entries <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": isamb_pp_open entry " ZINT_FORMAT
+ " not found",
+ rec->sysno, pos);
+ }
+ }
+ else if (zh->m_staticrank)
+ {
+ isamb_pp_close(ispt);
+ }
+ else
+ {
+ struct it_key until_key;
+ struct it_key isam_key;
+ int r;
+ int i = 0;
+
+ until_key.len = key_in.len - 1;
+ for (i = 0; i < until_key.len; i++)
+ until_key.mem[i] = key_in.mem[i+1];
+
+ if (until_key.mem[0] == 0)
+ until_key.mem[0] = rec->sysno;
+ r = isamb_pp_forward(ispt, &isam_key, &until_key);
+ if (r != 1)
+ {
+ res = ZEBRA_FAIL;
+ (*no_invalid_isam_entries)++;
+ if (*no_invalid_isam_entries <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+ ": isamb_pp_forward " ZINT_FORMAT
+ " returned no entry",
+ rec->sysno, pos);
+ }
+ }
+ else
+ {
+ int cmp = key_compare(&until_key, &isam_key);
+ if (cmp != 0)
+ {
+ res = ZEBRA_FAIL;
+ (*no_invalid_isam_entries)++;
+ if (*no_invalid_isam_entries
+ <= message_limit)
+ {
+ do_fail = 1;
+ yaz_log(YLOG_WARN, "Record "
+ ZINT_FORMAT
+ ": isamb_pp_forward "
+ ZINT_FORMAT
+ " returned different entry",
+ rec->sysno, pos);
+
+ key_logdump_txt(YLOG_LOG,
+ &until_key,
+ "until");
+
+ key_logdump_txt(YLOG_LOG,
+ &isam_key,
+ "isam");
+
+ }
+ }
+ }
+ isamb_pp_close(ispt);
+ }
+
+ }
+ }
+ }
+ if (do_fail)
+ {
+ zebra_it_key_str_dump(zh, &key_in, str,
+ slen, nmem, YLOG_LOG);
+ nmem_reset(nmem);
+ }
+ }
+ nmem_destroy(nmem);
+ }
+ zebra_rec_keys_close(keys);
+ return res;
+}
+
+ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec)
+{
+ ZEBRA_RES res = ZEBRA_FAIL;
+ unsigned flags = 0;
+
+ if (!spec || *spec == '\0'
+ || !strcmp(spec, "dict") || !strcmp(spec, "default"))
+ flags = ZEBRA_CHECK_DICT;
+ else if (!strcmp(spec, "isam") || !strcmp(spec, "full"))
+ flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM;
+ else if (!strcmp(spec, "quick"))
+ flags = 0;
+ else
+ {
+ yaz_log(YLOG_WARN, "Unknown check spec: %s", spec);
+ return ZEBRA_FAIL;
+ }
+
+ yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u", flags);
+ if (zebra_begin_read(zh) == ZEBRA_OK)
+ {
+ zint no_records_total = 0;
+ zint no_records_fail = 0;
+ zint total_keys = 0;
+ int message_limit = zh->m_file_verbose_limit;
+
+ if (zh->reg)
+ {
+ Record rec = rec_get_root(zh->reg->records);
+
+ zint no_long_dict_entries = 0;
+ zint no_failed_dict_lookups = 0;
+ zint no_invalid_keys = 0;
+ zint no_invalid_dict_infos = 0;
+ zint no_invalid_isam_entries = 0;
+
+ res = ZEBRA_OK;
+ while (rec)
+ {
+ Record r1;
+ zint no_keys;
+
+ if (zebra_record_check(zh, rec, &no_keys, message_limit,
+ flags,
+ &no_long_dict_entries,
+ &no_failed_dict_lookups,
+ &no_invalid_keys,
+ &no_invalid_dict_infos,
+ &no_invalid_isam_entries
+ )
+ != ZEBRA_OK)
+ {
+ res = ZEBRA_FAIL;
+ no_records_fail++;
+ }
+
+ r1 = rec_get_next(zh->reg->records, rec);
+ rec_free(&rec);
+ rec = r1;
+ no_records_total++;
+ total_keys += no_keys;
+ }
+ yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT,
+ no_records_total);
+ yaz_log(YLOG_LOG, "records fail: " ZINT_FORMAT,
+ no_records_fail);
+ yaz_log(YLOG_LOG, "total keys: " ZINT_FORMAT,
+ total_keys);
+ yaz_log(YLOG_LOG, "long dict entries: " ZINT_FORMAT,
+ no_long_dict_entries);
+ if (flags & ZEBRA_CHECK_DICT)
+ {
+ yaz_log(YLOG_LOG, "failed dict lookups: " ZINT_FORMAT,
+ no_failed_dict_lookups);
+ yaz_log(YLOG_LOG, "invalid dict infos: " ZINT_FORMAT,
+ no_invalid_dict_infos);
+ }
+ if (flags & ZEBRA_CHECK_ISAM)
+ yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT,
+ no_invalid_isam_entries);
+ }
+ zebra_end_read(zh);
+ }
+ yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res);
+ return res;
+}
+