Don't reset reg state if register can not be opened
[idzebra-moved-to-github.git] / index / zebraapi.c
index 885c684..6415172 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the Zebra server.
-   Copyright (C) 1994-2010 Index Data
+   Copyright (C) 1994-2011 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <assert.h>
 #include <stdio.h>
 #include <limits.h>
@@ -1624,7 +1627,11 @@ static void zebra_set_state(ZebraHandle zh, int val, int seqno)
     sprintf(state_fname, "state.%s.LCK", zh->reg_name);
     fname = zebra_mk_fname(res_get(zh->res, "lockDir"), state_fname);
     f = fopen(fname, "w");
-
+    if (!f)
+    {
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s w", state_fname);
+        exit(1); 
+    }
     yaz_log(YLOG_DEBUG, "zebra_set_state: %c %d %ld", val, seqno, p);
     fprintf(f, "%c %d %ld\n", val, seqno, p);
     fclose(f);
@@ -1809,8 +1816,6 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw)
             zh->reg->seqno = seqno;
         else
         {
-            zebra_set_state(zh, 'o', seqno);
-            
             zebra_unlock(zh->lock_shadow);
             zebra_unlock(zh->lock_normal);
 
@@ -2194,10 +2199,19 @@ ZEBRA_RES zebra_compact(ZebraHandle zh)
     return ZEBRA_OK;
 }
 
+#define ZEBRA_CHECK_DICT 1
+#define ZEBRA_CHECK_ISAM 2
+
 static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec,
-                                    zint *no_keys)
+                                    zint *no_keys, int message_limit,
+                                    unsigned flags,
+                                    zint *no_long_dict_entries,
+                                    zint *no_failed_dict_lookups,
+                                    zint *no_invalid_keys,
+                                    zint *no_invalid_dict_infos,
+                                    zint *no_invalid_isam_entries)
 {
-    ZEBRA_RES res = ZEBRA_FAIL;
+    ZEBRA_RES res = ZEBRA_OK;
     zebra_rec_keys_t keys = zebra_rec_keys_open();
     zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
                            rec->size[recInfo_delKeys], 0);
@@ -2212,78 +2226,270 @@ static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec,
         size_t slen;
         const char *str;
         struct it_key key_in;
-        int no_long_dict_entries = 0;
-        int no_failed_dict_lookup = 0;
+        NMEM nmem = nmem_create();
 
         while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
         {
+            int do_fail = 0;
             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
-            char ord_buf[IT_MAX_WORD];
+            char ord_buf[IT_MAX_WORD+20];
             int ord_len = key_SU_encode(ord, ord_buf);
-            
+            char *info = 0;
+
+            (*no_keys)++;
+
+            if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX)
+            {
+                res = ZEBRA_FAIL;
+                (*no_invalid_keys)++;
+                if (*no_invalid_keys <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": unexpected key length %d",
+                            rec->sysno, key_in.len);
+                }
+            }
+            if (ord_len + slen >= sizeof(ord_buf)-1)
+            {
+                res = ZEBRA_FAIL;
+                (*no_long_dict_entries)++;
+                if (*no_long_dict_entries <= message_limit)
+                {
+                    do_fail = 1;
+                    /* so bad it can not fit into our ord_buf */
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, ord_len, (int) slen);
+                }
+                continue;
+            }
             memcpy(ord_buf + ord_len, str, slen);
             ord_buf[ord_len + slen] = '\0'; 
             if (ord_len + slen >= IT_MAX_WORD)
-                ++no_long_dict_entries;
-            else
             {
-                char *info = dict_lookup(zh->reg->dict, ord_buf);
-                if (!info)
-                    no_failed_dict_lookup++;
+                res = ZEBRA_FAIL;
+                (*no_long_dict_entries)++;
+                if (*no_long_dict_entries <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, (int) ord_len, (int) slen);
+                }
+            }
+            if ((flags & ZEBRA_CHECK_DICT) == 0)
+                continue;
+            info = dict_lookup(zh->reg->dict, ord_buf);
+            if (!info)
+            {
+                res = ZEBRA_FAIL;
+                (*no_failed_dict_lookups)++;
+                if (*no_failed_dict_lookups <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": term do not exist in dictionary", rec->sysno);
+                }
+            }
+            else if (flags & ZEBRA_CHECK_ISAM)
+            {
+                ISAM_P pos;
+
+                if (*info != sizeof(pos))
+                {
+                    res = ZEBRA_FAIL;
+                    (*no_invalid_dict_infos)++;
+                    if (*no_invalid_dict_infos <= message_limit)
+                    {
+                        do_fail = 1;
+                        yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                ": long dictionary entry %d + %d",
+                                rec->sysno, (int) ord_len, (int) slen);
+                    }
+                }
                 else
                 {
-                    ;
+                    int scope = 1;
+                    memcpy(&pos, info+1, sizeof(pos));
+                    if (zh->reg->isamb)
+                    {
+                        ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos,
+                                                      scope);
+                        if (!ispt)
+                        {
+                            res = ZEBRA_FAIL;
+                            (*no_invalid_isam_entries)++;
+                            if (*no_invalid_isam_entries <= message_limit)
+                            {
+                                do_fail = 1;
+                                yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                        ": isamb_pp_open entry " ZINT_FORMAT
+                                        " not found",
+                                        rec->sysno, pos);
+                            }
+                        }
+                        else if (zh->m_staticrank)
+                        {
+                            isamb_pp_close(ispt);
+                        }
+                        else
+                        {
+                            struct it_key until_key;
+                            struct it_key isam_key;
+                            int r;
+                            int i = 0;
+                            
+                            until_key.len = key_in.len - 1;
+                            for (i = 0; i < until_key.len; i++)
+                                until_key.mem[i] = key_in.mem[i+1];
+                            
+                            if (until_key.mem[0] == 0)
+                                until_key.mem[0] = rec->sysno;
+                            r = isamb_pp_forward(ispt, &isam_key, &until_key);
+                            if (r != 1)
+                            {
+                                res = ZEBRA_FAIL;
+                                (*no_invalid_isam_entries)++;
+                                if (*no_invalid_isam_entries <= message_limit)
+                                {
+                                    do_fail = 1;
+                                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                            ": isamb_pp_forward " ZINT_FORMAT
+                                            " returned no entry",
+                                            rec->sysno, pos);
+                                }
+                            }
+                            else
+                            {
+                                int cmp = key_compare(&until_key, &isam_key);
+                                if (cmp != 0)
+                                {
+                                    res = ZEBRA_FAIL;
+                                    (*no_invalid_isam_entries)++;
+                                    if (*no_invalid_isam_entries
+                                        <= message_limit)
+                                    {
+                                        do_fail = 1;
+                                        yaz_log(YLOG_WARN, "Record "
+                                                ZINT_FORMAT 
+                                                ": isamb_pp_forward "
+                                                ZINT_FORMAT
+                                                " returned different entry",
+                                                rec->sysno, pos);
+
+                                        key_logdump_txt(YLOG_LOG,
+                                                        &until_key,
+                                                        "until");
+
+                                        key_logdump_txt(YLOG_LOG,
+                                                        &isam_key,
+                                                        "isam");
+
+                                    }
+                                }
+                            }
+                            isamb_pp_close(ispt);
+                        }
+
+                    }
                 }
             }
-            (*no_keys)++;
+            if (do_fail)
+            {
+                zebra_it_key_str_dump(zh, &key_in, str,
+                                      slen, nmem, YLOG_LOG);
+                nmem_reset(nmem);
+            }
         }
-        if (no_long_dict_entries)
-        {
-            yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT
-                    " has %d dictionary entries that are too long",
-                    rec->sysno, no_long_dict_entries);
-        }            
-        if (no_failed_dict_lookup)
-        {
-            yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT
-                    " has %d terms that do not exist in dictionary",
-                    rec->sysno, no_failed_dict_lookup);
-        }            
-        res = ZEBRA_OK;
+        nmem_destroy(nmem);
     }
     zebra_rec_keys_close(keys);
     return res;
 }
 
-ZEBRA_RES zebra_register_check(ZebraHandle zh)
+ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec)
 {
     ZEBRA_RES res = ZEBRA_FAIL;
+    unsigned flags = 0;
+    int message_limit = 10;
+    
+    if (!spec || *spec == '\0'
+        || !strcmp(spec, "dict") || !strcmp(spec, "default"))
+        flags = ZEBRA_CHECK_DICT;
+    else if (!strcmp(spec, "isam") || !strcmp(spec, "full"))
+        flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM;
+    else if (!strcmp(spec, "quick"))
+        flags = 0;
+    else
+        return ZEBRA_FAIL;
+
+    yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u message_limit=%d",
+            flags, message_limit);
     if (zebra_begin_read(zh) == ZEBRA_OK)
     {
-        zint no_records = 0;
+        zint no_records_total = 0;
+        zint no_records_fail = 0;
         zint total_keys = 0;
+
         if (zh->reg)
         {
             Record rec = rec_get_root(zh->reg->records);
             
+            zint no_long_dict_entries = 0;
+            zint no_failed_dict_lookups = 0;
+            zint no_invalid_keys = 0;
+            zint no_invalid_dict_infos = 0;
+            zint no_invalid_isam_entries = 0;
+
+            res = ZEBRA_OK;
             while (rec)
             {
                 Record r1;
                 zint no_keys;
-    
-                zebra_record_check(zh, rec, &no_keys);
+
+                if (zebra_record_check(zh, rec, &no_keys, message_limit,
+                                       flags,
+                                       &no_long_dict_entries,
+                                       &no_failed_dict_lookups,
+                                       &no_invalid_keys,
+                                       &no_invalid_dict_infos,
+                                       &no_invalid_isam_entries
+                        )
+                    != ZEBRA_OK)
+                {
+                    res = ZEBRA_FAIL;
+                    no_records_fail++;
+                }
+
                 r1 = rec_get_next(zh->reg->records, rec);
                 rec_free(&rec);
                 rec = r1;
-                no_records++;
+                no_records_total++;
                 total_keys += no_keys;
             }
-            res = ZEBRA_OK;
-            yaz_log(YLOG_LOG, "records: " ZINT_FORMAT, no_records);
-            yaz_log(YLOG_LOG, "keys:    " ZINT_FORMAT, total_keys);
+            yaz_log(YLOG_LOG, "records total:        " ZINT_FORMAT,
+                    no_records_total);
+            yaz_log(YLOG_LOG, "records fail:         " ZINT_FORMAT,
+                    no_records_fail);
+            yaz_log(YLOG_LOG, "total keys:           " ZINT_FORMAT,
+                    total_keys);
+            yaz_log(YLOG_LOG, "long dict entries:    " ZINT_FORMAT,
+                    no_long_dict_entries);
+            if (flags & ZEBRA_CHECK_DICT)
+            {
+                yaz_log(YLOG_LOG, "failed dict lookups:  " ZINT_FORMAT,
+                        no_failed_dict_lookups);
+                yaz_log(YLOG_LOG, "invalid dict infos:   " ZINT_FORMAT,
+                        no_invalid_dict_infos);
+            }
+            if (flags & ZEBRA_CHECK_ISAM)
+                yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT,
+                        no_invalid_isam_entries);
         }
         zebra_end_read(zh);
     }
+    yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res);
     return res;
 }