Verbose level for register check
[idzebra-moved-to-github.git] / index / zebraapi.c
index 7e2382d..26e2bbe 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the Zebra server.
-   Copyright (C) 1994-2009 Index Data
+   Copyright (C) 1994-2010 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -69,7 +69,7 @@ static int zebra_chdir(ZebraService zs)
     yaz_log(log_level, "zebra_chdir");
     dir = res_get(zs->global_res, "chdir");
     if (!dir)
-       return;
+       return 0;
     yaz_log(YLOG_DEBUG, "chdir %s", dir);
 #ifdef WIN32
     r = _chdir(dir);
@@ -334,7 +334,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
 {
     struct zebra_register *reg;
     int record_compression = REC_COMPRESS_NONE;
-    const char *recordCompression = 0;
+    const char *compression_str = 0;
     const char *profilePath;
     int sort_type = ZEBRA_SORT_TYPE_FLAT;
     ZEBRA_RES ret = ZEBRA_OK;
@@ -420,11 +420,25 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
     zebraRankInstall(reg, rank_similarity_class);
     zebraRankInstall(reg, rank_static_class);
 
-    recordCompression = res_get_def(res, "recordCompression", "none");
-    if (!strcmp(recordCompression, "none"))
+    compression_str = res_get_def(res, "recordCompression", "none");
+    if (!strcmp(compression_str, "none"))
        record_compression = REC_COMPRESS_NONE;
-    if (!strcmp(recordCompression, "bzip2"))
+    else if (!strcmp(compression_str, "bzip2"))
        record_compression = REC_COMPRESS_BZIP2;
+    else if (!strcmp(compression_str, "zlib"))
+       record_compression = REC_COMPRESS_ZLIB;
+    else
+    {
+        yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str);
+        ret = ZEBRA_FAIL;
+    }
+
+    if (!rec_check_compression_method(record_compression))
+    {
+        yaz_log(YLOG_FATAL, "unsupported recordCompression: %s",
+                compression_str);
+        ret = ZEBRA_FAIL;
+    }
 
     {
        const char *index_fname = res_get_def(res, "index", "default.idx");
@@ -1634,7 +1648,7 @@ static void zebra_get_state(ZebraHandle zh, char *val, int *seqno)
 
     if (f)
     {
-        if (fscanf(f, "%c %d", val, seqno))
+        if (fscanf(f, "%c %d", val, seqno) != 2)
         {
             yaz_log(YLOG_ERRNO|YLOG_WARN, "fscan fail %s",
                     state_fname);
@@ -2180,6 +2194,141 @@ ZEBRA_RES zebra_compact(ZebraHandle zh)
     return ZEBRA_OK;
 }
 
+static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec,
+                                    zint *no_keys, int verbose_level)
+{
+    ZEBRA_RES res = ZEBRA_FAIL;
+    zebra_rec_keys_t keys = zebra_rec_keys_open();
+    zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
+                           rec->size[recInfo_delKeys], 0);
+    
+    *no_keys = 0;
+    if (!zebra_rec_keys_rewind(keys))
+    {
+        ;
+    }
+    else
+    {
+        size_t slen;
+        const char *str;
+        struct it_key key_in;
+        int no_long_dict_entries = 0;
+        int no_failed_dict_lookup = 0;
+        int no_invalid_keys = 0;
+        NMEM nmem = nmem_create();
+
+        while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+        {
+            int do_log = 0;
+            int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+            char ord_buf[IT_MAX_WORD+20];
+            int ord_len = key_SU_encode(ord, ord_buf);
+            char *info = 0;
+
+            (*no_keys)++;
+
+            if (ord_len + slen >= sizeof(ord_buf)-1)
+            {
+                if (verbose_level >= 1)
+                {
+                    /* so bad it can not fit into our ord_buf */
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, ord_len, (int) slen);
+                }
+                ++no_long_dict_entries;
+                continue;
+            }
+            memcpy(ord_buf + ord_len, str, slen);
+            ord_buf[ord_len + slen] = '\0'; 
+            if (ord_len + slen >= IT_MAX_WORD)
+            {
+                if (verbose_level >= 1)
+                {
+                    do_log = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, (int) ord_len, (int) slen);
+                }
+                ++no_long_dict_entries;
+            }
+            info = dict_lookup(zh->reg->dict, ord_buf);
+            if (!info)
+            {
+                if (verbose_level >= 1)
+                {
+                    do_log = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": term do not exist in dictionary", rec->sysno);
+                }
+                no_failed_dict_lookup++;
+            }
+            if (key_in.len < 2 || key_in.len > 4)
+            {
+                if (verbose_level >= 1)
+                {
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": unexpected key length %d",
+                            rec->sysno, key_in.len);
+                    do_log = 1;
+                }
+                no_invalid_keys++;
+            }
+            if (do_log)
+            {
+                zebra_it_key_str_dump(zh, &key_in, str,
+                                      slen, nmem, YLOG_LOG);
+                nmem_reset(nmem);
+            }
+        }
+        if (!no_long_dict_entries && !no_failed_dict_lookup && !no_invalid_keys)
+            res = ZEBRA_OK;
+        nmem_destroy(nmem);
+    }
+    zebra_rec_keys_close(keys);
+    return res;
+}
+
+ZEBRA_RES zebra_register_check(ZebraHandle zh, int verbose_level)
+{
+    ZEBRA_RES res = ZEBRA_FAIL;
+    if (zebra_begin_read(zh) == ZEBRA_OK)
+    {
+        zint no_records_total = 0;
+        zint no_records_fail = 0;
+        zint total_keys = 0;
+        if (zh->reg)
+        {
+            Record rec = rec_get_root(zh->reg->records);
+            
+            res = ZEBRA_OK;
+            while (rec)
+            {
+                Record r1;
+                zint no_keys;
+    
+                if (zebra_record_check(zh, rec, &no_keys, verbose_level)
+                    != ZEBRA_OK)
+                {
+                    res = ZEBRA_FAIL;
+                    no_records_fail++;
+                }
+
+                r1 = rec_get_next(zh->reg->records, rec);
+                rec_free(&rec);
+                rec = r1;
+                no_records_total++;
+                total_keys += no_keys;
+            }
+            yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT, no_records_total);
+            yaz_log(YLOG_LOG, "records fail:  " ZINT_FORMAT, no_records_fail);
+            yaz_log(YLOG_LOG, "keys:    " ZINT_FORMAT, total_keys);
+        }
+        zebra_end_read(zh);
+    }
+    return res;
+}
+
 void zebra_result(ZebraHandle zh, int *code, char **addinfo)
 {
     yaz_log(log_level, "zebra_result");