Refactor if stmt a bit
[idzebra-moved-to-github.git] / index / zebraapi.c
index dda541f..2e64dd0 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: zebraapi.c,v 1.269 2007-12-20 11:15:42 adam Exp $
-   Copyright (C) 1995-2007
-   Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1994-2011 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <assert.h>
 #include <stdio.h>
 #include <limits.h>
@@ -64,20 +64,24 @@ static ZEBRA_RES zebra_check_handle(ZebraHandle zh)
 
 #define ZEBRA_CHECK_HANDLE(zh) if (zebra_check_handle(zh) != ZEBRA_OK) return ZEBRA_FAIL
 
-static void zebra_chdir(ZebraService zs)
+static int zebra_chdir(ZebraService zs)
 {
     const char *dir ;
+    int r;
     ASSERTZS;
     yaz_log(log_level, "zebra_chdir");
     dir = res_get(zs->global_res, "chdir");
     if (!dir)
-       return;
+       return 0;
     yaz_log(YLOG_DEBUG, "chdir %s", dir);
 #ifdef WIN32
-    _chdir(dir);
+    r = _chdir(dir);
 #else
-    chdir(dir);
+    r = chdir(dir);
 #endif
+    if (r)
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "chdir %s", dir);
+    return r;
 }
 
 static ZEBRA_RES zebra_flush_reg(ZebraHandle zh)
@@ -202,11 +206,14 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res)
         log_level = yaz_log_module_level("zebraapi");
         log_level_initialized = 1;
     }
-    
+
+    *system_str = '\0';
+    *version_str = '\0';
     zebra_get_version(version_str, system_str);
 
-    yaz_log(YLOG_LOG, "zebra_start %s %s", version_str,
-           configName ? configName : "");
+    yaz_log(YLOG_LOG, "zebra_start %s %s", version_str, system_str);
+    if (configName)
+        yaz_log(YLOG_LOG, "config %s", configName);
 
     if ((res = res_open(def_res, over_res)))
     {
@@ -239,7 +246,11 @@ ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res)
         zh->global_res = res;
         zh->sessions = 0;
         
-        zebra_chdir(zh);
+        if (zebra_chdir(zh))
+        {
+            xfree(zh);
+            return 0;
+        }
         
         zebra_mutex_cond_init(&zh->session_lock);
        passwd_plain = res_get(zh->global_res, "passwd");
@@ -326,9 +337,8 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
 {
     struct zebra_register *reg;
     int record_compression = REC_COMPRESS_NONE;
-    const char *recordCompression = 0;
+    const char *compression_str = 0;
     const char *profilePath;
-    char cwd[1024];
     int sort_type = ZEBRA_SORT_TYPE_FLAT;
     ZEBRA_RES ret = ZEBRA_OK;
 
@@ -374,7 +384,6 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
        }
     }
 
-    getcwd(cwd, sizeof(cwd)-1);
     profilePath = res_get_def(res, "profilePath", 0);
 
     data1_set_tabpath(reg->dh, profilePath);
@@ -410,14 +419,29 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
     
     /* installing rank classes */
     zebraRankInstall(reg, rank_1_class);
+    zebraRankInstall(reg, rank_2_class);
     zebraRankInstall(reg, rank_similarity_class);
     zebraRankInstall(reg, rank_static_class);
 
-    recordCompression = res_get_def(res, "recordCompression", "none");
-    if (!strcmp(recordCompression, "none"))
+    compression_str = res_get_def(res, "recordCompression", "none");
+    if (!strcmp(compression_str, "none"))
        record_compression = REC_COMPRESS_NONE;
-    if (!strcmp(recordCompression, "bzip2"))
+    else if (!strcmp(compression_str, "bzip2"))
        record_compression = REC_COMPRESS_BZIP2;
+    else if (!strcmp(compression_str, "zlib"))
+       record_compression = REC_COMPRESS_ZLIB;
+    else
+    {
+        yaz_log(YLOG_FATAL, "invalid recordCompression: %s", compression_str);
+        ret = ZEBRA_FAIL;
+    }
+
+    if (!rec_check_compression_method(record_compression))
+    {
+        yaz_log(YLOG_FATAL, "unsupported recordCompression: %s",
+                compression_str);
+        ret = ZEBRA_FAIL;
+    }
 
     {
        const char *index_fname = res_get_def(res, "index", "default.idx");
@@ -426,6 +450,10 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
            if (zebra_maps_read_file(reg->zebra_maps, index_fname) != ZEBRA_OK)
                ret = ZEBRA_FAIL;
        }
+        else
+        {
+            zebra_maps_define_default_sort(reg->zebra_maps);
+        }
     }
 
     if (!(reg->records = rec_open(reg->bfs, rw, record_compression)))
@@ -448,6 +476,8 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
         sort_type = ZEBRA_SORT_TYPE_FLAT;
     else if (res_get_match(res, "sortindex", "i", "f"))
         sort_type = ZEBRA_SORT_TYPE_ISAMB;
+    else if (res_get_match(res, "sortindex", "m", "f"))
+        sort_type = ZEBRA_SORT_TYPE_MULTI;
     else
     {
        yaz_log(YLOG_WARN, "bad_value for 'sortindex'");
@@ -1124,11 +1154,18 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
     }
     else
     {
-       for (i = 0; i<num_recs; i++)
+        WRBUF addinfo_w = wrbuf_alloc();
+       for (i = 0; i < num_recs; i++)
        {
+            recs[i].errCode = 0;
+            recs[i].errString = 0;
+            recs[i].format = 0;
+            recs[i].len = 0;
+            recs[i].buf = 0;
+            recs[i].base = 0;
+            recs[i].sysno = poset[i].sysno;
            if (poset[i].term)
            {
-               recs[i].errCode = 0;
                recs[i].format = yaz_oid_recsyn_sutrs;
                recs[i].len = strlen(poset[i].term);
                recs[i].buf = poset[i].term;
@@ -1146,13 +1183,17 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                zebra_snippets_hit_vector(zh, setname, poset[i].sysno, 
                                          hit_snippet);
 #endif
+                wrbuf_rewind(addinfo_w);
                recs[i].errCode =
                    zebra_record_fetch(zh, setname,
                                        poset[i].sysno, poset[i].score,
                                       stream, input_format, comp,
                                       &recs[i].format, &buf, &len,
-                                      &recs[i].base, &recs[i].errString);
+                                      &recs[i].base, addinfo_w);
                
+                if (wrbuf_len(addinfo_w))
+                    recs[i].errString =
+                        odr_strdup(stream, wrbuf_cstr(addinfo_w));
                recs[i].len = len;
                if (len > 0)
                {
@@ -1162,7 +1203,6 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                else
                    recs[i].buf = buf;
                 recs[i].score = poset[i].score;
-                recs[i].sysno = poset[i].sysno;
                zebra_snippets_destroy(hit_snippet);
            }
            else
@@ -1176,14 +1216,10 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                    ret = ZEBRA_FAIL;
                    break;
                }
-               recs[i].buf = 0;  /* no record and no error issued */
-               recs[i].len = 0;
-               recs[i].errCode = 0;
-               recs[i].format = 0;
-               recs[i].sysno = 0;
            }
        }
        zebra_meta_records_destroy(zh, poset, num_recs);
+        wrbuf_destroy(addinfo_w);
     }
     zebra_end_read(zh);
     xfree(pos_array);
@@ -1418,7 +1454,6 @@ int delete_w_handle(const char *info, void *handle)
 {
     ZebraHandle zh = (ZebraHandle) handle;
     ISAM_P pos;
-    ASSERTZH;
 
     if (*info == sizeof(pos))
     {
@@ -1428,18 +1463,52 @@ int delete_w_handle(const char *info, void *handle)
     return 0;
 }
 
-static int delete_SU_handle(void *handle, int ord)
+int delete_w_all_handle(const char *info, void *handle)
+{
+    ZebraHandle zh = (ZebraHandle) handle;
+    ISAM_P pos;
+
+    if (*info == sizeof(pos))
+    {
+        ISAMB_PP pt;
+       memcpy(&pos, info+1, sizeof(pos));
+        pt = isamb_pp_open(zh->reg->isamb, pos, 2);
+        if (pt)
+        {
+            struct it_key key;
+            key.mem[0] = 0;
+            while (isamb_pp_read(pt, &key))
+            {
+                Record rec;
+                rec = rec_get(zh->reg->records, key.mem[0]);
+                rec_del(zh->reg->records, &rec);
+            }
+            isamb_pp_close(pt);
+        }
+    }
+    return delete_w_handle(info, handle);
+}
+
+static int delete_SU_handle(void *handle, int ord,
+                            const char *index_type, const char *string_index,
+                            zinfo_index_category_t cat)
 {
     ZebraHandle zh = (ZebraHandle) handle;
     char ord_buf[20];
     int ord_len;
-
+#if 0
+    yaz_log(YLOG_LOG, "ord=%d index_type=%s index=%s cat=%d", ord,
+            index_type, string_index, (int) cat);
+#endif
     ord_len = key_SU_encode(ord, ord_buf);
     ord_buf[ord_len] = '\0';
 
     assert(zh->reg->isamb);
+    assert(zh->reg->records);
     dict_delete_subtree(zh->reg->dict, ord_buf,
-                       zh, delete_w_handle);
+                       zh, 
+                        !strcmp(string_index, "_ALLRECORDS") ?
+                        delete_w_all_handle : delete_w_handle);
     return 0;
 }
 
@@ -1558,7 +1627,11 @@ static void zebra_set_state(ZebraHandle zh, int val, int seqno)
     sprintf(state_fname, "state.%s.LCK", zh->reg_name);
     fname = zebra_mk_fname(res_get(zh->res, "lockDir"), state_fname);
     f = fopen(fname, "w");
-
+    if (!f)
+    {
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s w", state_fname);
+        exit(1); 
+    }
     yaz_log(YLOG_DEBUG, "zebra_set_state: %c %d %ld", val, seqno, p);
     fprintf(f, "%c %d %ld\n", val, seqno, p);
     fclose(f);
@@ -1582,7 +1655,11 @@ static void zebra_get_state(ZebraHandle zh, char *val, int *seqno)
 
     if (f)
     {
-        fscanf(f, "%c %d", val, seqno);
+        if (fscanf(f, "%c %d", val, seqno) != 2)
+        {
+            yaz_log(YLOG_ERRNO|YLOG_WARN, "fscan fail %s",
+                    state_fname);
+        }
         fclose(f);
     }
     xfree(fname);
@@ -1735,12 +1812,8 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw)
         zh->reg = zebra_register_open(zh->service, zh->reg_name,
                                      1, rval ? 1 : 0, zh->res,
                                      zh->path_reg);
-        if (zh->reg)
-            zh->reg->seqno = seqno;
-        else
+        if (!zh->reg)
         {
-            zebra_set_state(zh, 'o', seqno);
-            
             zebra_unlock(zh->lock_shadow);
             zebra_unlock(zh->lock_normal);
 
@@ -1752,6 +1825,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw)
             yaz_log(YLOG_FATAL, "%s", zh->errString);
             return ZEBRA_FAIL;
         }
+        zh->reg->seqno = seqno;
        zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]);
     }
     else
@@ -2124,6 +2198,300 @@ ZEBRA_RES zebra_compact(ZebraHandle zh)
     return ZEBRA_OK;
 }
 
+#define ZEBRA_CHECK_DICT 1
+#define ZEBRA_CHECK_ISAM 2
+
+static ZEBRA_RES zebra_record_check(ZebraHandle zh, Record rec,
+                                    zint *no_keys, int message_limit,
+                                    unsigned flags,
+                                    zint *no_long_dict_entries,
+                                    zint *no_failed_dict_lookups,
+                                    zint *no_invalid_keys,
+                                    zint *no_invalid_dict_infos,
+                                    zint *no_invalid_isam_entries)
+{
+    ZEBRA_RES res = ZEBRA_OK;
+    zebra_rec_keys_t keys = zebra_rec_keys_open();
+    zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys],
+                           rec->size[recInfo_delKeys], 0);
+    
+    *no_keys = 0;
+    if (!zebra_rec_keys_rewind(keys))
+    {
+        ;
+    }
+    else
+    {
+        size_t slen;
+        const char *str;
+        struct it_key key_in;
+        NMEM nmem = nmem_create();
+
+        while (zebra_rec_keys_read(keys, &str, &slen, &key_in))
+        {
+            int do_fail = 0;
+            int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+            char ord_buf[IT_MAX_WORD+20];
+            int ord_len = key_SU_encode(ord, ord_buf);
+            char *info = 0;
+
+            (*no_keys)++;
+
+            if (key_in.len < 2 || key_in.len > IT_KEY_LEVEL_MAX)
+            {
+                res = ZEBRA_FAIL;
+                (*no_invalid_keys)++;
+                if (*no_invalid_keys <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": unexpected key length %d",
+                            rec->sysno, key_in.len);
+                }
+            }
+            if (ord_len + slen >= sizeof(ord_buf)-1)
+            {
+                res = ZEBRA_FAIL;
+                (*no_long_dict_entries)++;
+                if (*no_long_dict_entries <= message_limit)
+                {
+                    do_fail = 1;
+                    /* so bad it can not fit into our ord_buf */
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, ord_len, (int) slen);
+                }
+                continue;
+            }
+            memcpy(ord_buf + ord_len, str, slen);
+            ord_buf[ord_len + slen] = '\0'; 
+            if (ord_len + slen >= IT_MAX_WORD)
+            {
+                res = ZEBRA_FAIL;
+                (*no_long_dict_entries)++;
+                if (*no_long_dict_entries <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                            ": long dictionary entry %d + %d",
+                            rec->sysno, (int) ord_len, (int) slen);
+                }
+            }
+            if ((flags & ZEBRA_CHECK_DICT) == 0)
+                continue;
+            info = dict_lookup(zh->reg->dict, ord_buf);
+            if (!info)
+            {
+                res = ZEBRA_FAIL;
+                (*no_failed_dict_lookups)++;
+                if (*no_failed_dict_lookups <= message_limit)
+                {
+                    do_fail = 1;
+                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT
+                            ": term do not exist in dictionary", rec->sysno);
+                }
+            }
+            else if (flags & ZEBRA_CHECK_ISAM)
+            {
+                ISAM_P pos;
+
+                if (*info != sizeof(pos))
+                {
+                    res = ZEBRA_FAIL;
+                    (*no_invalid_dict_infos)++;
+                    if (*no_invalid_dict_infos <= message_limit)
+                    {
+                        do_fail = 1;
+                        yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                ": long dictionary entry %d + %d",
+                                rec->sysno, (int) ord_len, (int) slen);
+                    }
+                }
+                else
+                {
+                    int scope = 1;
+                    memcpy(&pos, info+1, sizeof(pos));
+                    if (zh->reg->isamb)
+                    {
+                        ISAMB_PP ispt = isamb_pp_open(zh->reg->isamb, pos,
+                                                      scope);
+                        if (!ispt)
+                        {
+                            res = ZEBRA_FAIL;
+                            (*no_invalid_isam_entries)++;
+                            if (*no_invalid_isam_entries <= message_limit)
+                            {
+                                do_fail = 1;
+                                yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                        ": isamb_pp_open entry " ZINT_FORMAT
+                                        " not found",
+                                        rec->sysno, pos);
+                            }
+                        }
+                        else if (zh->m_staticrank)
+                        {
+                            isamb_pp_close(ispt);
+                        }
+                        else
+                        {
+                            struct it_key until_key;
+                            struct it_key isam_key;
+                            int r;
+                            int i = 0;
+                            
+                            until_key.len = key_in.len - 1;
+                            for (i = 0; i < until_key.len; i++)
+                                until_key.mem[i] = key_in.mem[i+1];
+                            
+                            if (until_key.mem[0] == 0)
+                                until_key.mem[0] = rec->sysno;
+                            r = isamb_pp_forward(ispt, &isam_key, &until_key);
+                            if (r != 1)
+                            {
+                                res = ZEBRA_FAIL;
+                                (*no_invalid_isam_entries)++;
+                                if (*no_invalid_isam_entries <= message_limit)
+                                {
+                                    do_fail = 1;
+                                    yaz_log(YLOG_WARN, "Record " ZINT_FORMAT 
+                                            ": isamb_pp_forward " ZINT_FORMAT
+                                            " returned no entry",
+                                            rec->sysno, pos);
+                                }
+                            }
+                            else
+                            {
+                                int cmp = key_compare(&until_key, &isam_key);
+                                if (cmp != 0)
+                                {
+                                    res = ZEBRA_FAIL;
+                                    (*no_invalid_isam_entries)++;
+                                    if (*no_invalid_isam_entries
+                                        <= message_limit)
+                                    {
+                                        do_fail = 1;
+                                        yaz_log(YLOG_WARN, "Record "
+                                                ZINT_FORMAT 
+                                                ": isamb_pp_forward "
+                                                ZINT_FORMAT
+                                                " returned different entry",
+                                                rec->sysno, pos);
+
+                                        key_logdump_txt(YLOG_LOG,
+                                                        &until_key,
+                                                        "until");
+
+                                        key_logdump_txt(YLOG_LOG,
+                                                        &isam_key,
+                                                        "isam");
+
+                                    }
+                                }
+                            }
+                            isamb_pp_close(ispt);
+                        }
+
+                    }
+                }
+            }
+            if (do_fail)
+            {
+                zebra_it_key_str_dump(zh, &key_in, str,
+                                      slen, nmem, YLOG_LOG);
+                nmem_reset(nmem);
+            }
+        }
+        nmem_destroy(nmem);
+    }
+    zebra_rec_keys_close(keys);
+    return res;
+}
+
+ZEBRA_RES zebra_register_check(ZebraHandle zh, const char *spec)
+{
+    ZEBRA_RES res = ZEBRA_FAIL;
+    unsigned flags = 0;
+    int message_limit = 10;
+    
+    if (!spec || *spec == '\0'
+        || !strcmp(spec, "dict") || !strcmp(spec, "default"))
+        flags = ZEBRA_CHECK_DICT;
+    else if (!strcmp(spec, "isam") || !strcmp(spec, "full"))
+        flags = ZEBRA_CHECK_DICT|ZEBRA_CHECK_ISAM;
+    else if (!strcmp(spec, "quick"))
+        flags = 0;
+    else
+        return ZEBRA_FAIL;
+
+    yaz_log(YLOG_LOG, "zebra_register_check begin flags=%u message_limit=%d",
+            flags, message_limit);
+    if (zebra_begin_read(zh) == ZEBRA_OK)
+    {
+        zint no_records_total = 0;
+        zint no_records_fail = 0;
+        zint total_keys = 0;
+
+        if (zh->reg)
+        {
+            Record rec = rec_get_root(zh->reg->records);
+            
+            zint no_long_dict_entries = 0;
+            zint no_failed_dict_lookups = 0;
+            zint no_invalid_keys = 0;
+            zint no_invalid_dict_infos = 0;
+            zint no_invalid_isam_entries = 0;
+
+            res = ZEBRA_OK;
+            while (rec)
+            {
+                Record r1;
+                zint no_keys;
+
+                if (zebra_record_check(zh, rec, &no_keys, message_limit,
+                                       flags,
+                                       &no_long_dict_entries,
+                                       &no_failed_dict_lookups,
+                                       &no_invalid_keys,
+                                       &no_invalid_dict_infos,
+                                       &no_invalid_isam_entries
+                        )
+                    != ZEBRA_OK)
+                {
+                    res = ZEBRA_FAIL;
+                    no_records_fail++;
+                }
+
+                r1 = rec_get_next(zh->reg->records, rec);
+                rec_free(&rec);
+                rec = r1;
+                no_records_total++;
+                total_keys += no_keys;
+            }
+            yaz_log(YLOG_LOG, "records total:        " ZINT_FORMAT,
+                    no_records_total);
+            yaz_log(YLOG_LOG, "records fail:         " ZINT_FORMAT,
+                    no_records_fail);
+            yaz_log(YLOG_LOG, "total keys:           " ZINT_FORMAT,
+                    total_keys);
+            yaz_log(YLOG_LOG, "long dict entries:    " ZINT_FORMAT,
+                    no_long_dict_entries);
+            if (flags & ZEBRA_CHECK_DICT)
+            {
+                yaz_log(YLOG_LOG, "failed dict lookups:  " ZINT_FORMAT,
+                        no_failed_dict_lookups);
+                yaz_log(YLOG_LOG, "invalid dict infos:   " ZINT_FORMAT,
+                        no_invalid_dict_infos);
+            }
+            if (flags & ZEBRA_CHECK_ISAM)
+                yaz_log(YLOG_LOG, "invalid isam entries: " ZINT_FORMAT,
+                        no_invalid_isam_entries);
+        }
+        zebra_end_read(zh);
+    }
+    yaz_log(YLOG_LOG, "zebra_register_check end ret=%d", res);
+    return res;
+}
+
 void zebra_result(ZebraHandle zh, int *code, char **addinfo)
 {
     yaz_log(log_level, "zebra_result");
@@ -2259,7 +2627,6 @@ ZEBRA_RES zebra_update_record(ZebraHandle zh,
        return ZEBRA_FAIL;
     res = zebra_buffer_extract_record(zh, buf, buf_size, 
                                       action,
-                                      0, /* test_mode */
                                       recordType,
                                       sysno,   
                                       match, 
@@ -2411,6 +2778,7 @@ void zebra_lock_prefix(Res res, char *path)
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab