Zebra returns character encoding as part of init response even if
[idzebra-moved-to-github.git] / index / zebraapi.c
index d8857c1..10a4954 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: zebraapi.c,v 1.234 2006-11-21 22:17:49 adam Exp $
-   Copyright (C) 1995-2006
+/* $Id: zebraapi.c,v 1.256 2007-05-21 11:54:59 adam Exp $
+   Copyright (C) 1995-2007
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -40,6 +40,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "orddict.h"
 #include <charmap.h>
 #include <idzebra/api.h>
+#include <yaz/oid_db.h>
 
 #define DEFAULT_APPROX_LIMIT 2000000000
 
@@ -98,6 +99,12 @@ static struct zebra_register *zebra_register_open(ZebraService zs,
                                                  const char *reg_path);
 static void zebra_register_close(ZebraService zs, struct zebra_register *reg);
 
+const char *zebra_get_encoding(ZebraHandle zh)
+{
+    assert(zh && zh->session_res);
+    return res_get_def(zh->session_res, "encoding", "ISO-8859-1");
+}
+
 ZebraHandle zebra_open(ZebraService zs, Res res)
 {
     ZebraHandle zh;
@@ -143,7 +150,10 @@ ZebraHandle zebra_open(ZebraService zs, Res res)
     zh->m_staticrank = 0;
     zh->m_segment_indexing = 0;
 
-    default_encoding = res_get_def(zh->session_res, "encoding", "ISO-8859-1");
+    zh->break_handler_func = 0;
+    zh->break_handler_data = 0;
+
+    default_encoding = zebra_get_encoding(zh);
 
     zh->iconv_to_utf8 =
         yaz_iconv_open ("UTF-8", default_encoding);
@@ -174,12 +184,12 @@ ZebraHandle zebra_open(ZebraService zs, Res res)
     return zh;
 }
 
-ZebraService zebra_start (const char *configName)
+ZebraService zebra_start(const char *configName)
 {
     return zebra_start_res(configName, 0, 0);
 }
 
-ZebraService zebra_start_res (const char *configName, Res def_res, Res over_res)
+ZebraService zebra_start_res(const char *configName, Res def_res, Res over_res)
 {
     Res res;
 
@@ -209,7 +219,18 @@ ZebraService zebra_start_res (const char *configName, Res def_res, Res over_res)
                res_close(res);
                return 0;
            }
+            if (zebra_check_res(res))
+            {
+                yaz_log(YLOG_FATAL, "Configuration error(s) for %s",
+                        configName);
+                return 0;
+            }
        }
+        else
+        {
+            zebra_check_res(res);
+        }
+
        zh = xmalloc(sizeof(*zh));
         zh->global_res = res;
         zh->sessions = 0;
@@ -247,6 +268,7 @@ ZebraService zebra_start_res (const char *configName, Res def_res, Res over_res)
            }
        }
 
+        zh->timing = yaz_timing_create();
         zh->path_root = res_get (zh->global_res, "root");
        zh->nmem = nmem_create();
        zh->record_classes = recTypeClass_create (zh->global_res, zh->nmem);
@@ -303,6 +325,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
     const char *recordCompression = 0;
     const char *profilePath;
     char cwd[1024];
+    int sort_type = ZEBRA_SORT_TYPE_FLAT;
     ZEBRA_RES ret = ZEBRA_OK;
 
     ASSERTZS;
@@ -348,8 +371,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
     }
 
     getcwd(cwd, sizeof(cwd)-1);
-    profilePath = res_get_def(res, "profilePath", DEFAULT_PROFILE_PATH);
-    yaz_log(YLOG_DEBUG, "profilePath=%s cwd=%s", profilePath, cwd);
+    profilePath = res_get_def(res, "profilePath", 0);
 
     data1_set_tabpath (reg->dh, profilePath);
     data1_set_tabroot (reg->dh, reg_path);
@@ -375,7 +397,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
 
     reg->records = 0;
     reg->dict = 0;
-    reg->sortIdx = 0;
+    reg->sort_index = 0;
     reg->isams = 0;
     reg->matchDict = 0;
     reg->isamc = 0;
@@ -417,9 +439,22 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name,
        yaz_log (YLOG_WARN, "dict_open failed");
        ret = ZEBRA_FAIL;
     }
-    if (!(reg->sortIdx = sortIdx_open (reg->bfs, rw)))
+
+    
+    if (res_get_match (res, "sortindex", "f", "f"))
+        sort_type = ZEBRA_SORT_TYPE_FLAT;
+    else if (res_get_match (res, "sortindex", "i", "f"))
+        sort_type = ZEBRA_SORT_TYPE_ISAMB;
+    else
     {
-       yaz_log (YLOG_WARN, "sortIdx_open failed");
+       yaz_log (YLOG_WARN, "bad_value for 'sortindex'");
+       ret = ZEBRA_FAIL;
+    }
+
+
+    if (!(reg->sort_index = zebra_sort_open(reg->bfs, rw, sort_type)))
+    {
+       yaz_log (YLOG_WARN, "zebra_sort_open failed");
        ret = ZEBRA_FAIL;
     }
     if (res_get_match (res, "isam", "s", ISAM_DEFAULT))
@@ -530,7 +565,7 @@ static void zebra_register_close(ZebraService zs, struct zebra_register *reg)
     dict_close (reg->dict);
     if (reg->matchDict)
        dict_close (reg->matchDict);
-    sortIdx_close (reg->sortIdx);
+    zebra_sort_close(reg->sort_index);
     if (reg->isams)
        isams_close (reg->isams);
     if (reg->isamc)
@@ -557,8 +592,6 @@ ZEBRA_RES zebra_stop(ZebraService zs)
 {
     if (!zs)
         return ZEBRA_OK;
-    yaz_log (log_level, "zebra_stop");
-
     while (zs->sessions)
     {
         zebra_close (zs->sessions);
@@ -572,6 +605,15 @@ ZEBRA_RES zebra_stop(ZebraService zs)
     recTypeClass_destroy(zs->record_classes);
     nmem_destroy(zs->nmem);
     res_close (zs->global_res);
+
+    yaz_timing_stop(zs->timing);
+    yaz_log (YLOG_LOG, "zebra_stop: %4.2f %4.2f %4.2f",
+             yaz_timing_get_real(zs->timing),
+             yaz_timing_get_user(zs->timing),
+             yaz_timing_get_sys(zs->timing));
+    
+
+    yaz_timing_destroy(&zs->timing);
     xfree(zs);
     return ZEBRA_OK;
 }
@@ -983,8 +1025,25 @@ ZEBRA_RES zebra_set_approx_limit(ZebraHandle zh, zint approx_limit)
     return ZEBRA_OK;
 }
 
-ZEBRA_RES zebra_search_RPN(ZebraHandle zh, ODR o, Z_RPNQuery *query,
-                          const char *setname, zint *hits)
+void zebra_set_partial_result(ZebraHandle zh)
+{
+    zh->partial_result = 1;
+}
+
+
+ZEBRA_RES zebra_set_break_handler(ZebraHandle zh,
+                                  int (*f)(void *client_data),
+                                  void *client_data)
+{
+    zh->break_handler_func = f;
+    zh->break_handler_data = client_data;
+    return ZEBRA_OK;
+}
+
+ZEBRA_RES zebra_search_RPN_x(ZebraHandle zh, ODR o, Z_RPNQuery *query,
+                             const char *setname, zint *hits,
+                             int *estimated_hit_count,
+                             int *partial_resultset)
 {
     ZEBRA_RES r;
     
@@ -995,23 +1054,35 @@ ZEBRA_RES zebra_search_RPN(ZebraHandle zh, ODR o, Z_RPNQuery *query,
     assert(hits);
     assert(setname);
     yaz_log(log_level, "zebra_search_rpn");
-    zh->hits = 0;
-    *hits = 0;
+
+    zh->partial_result = 0;
 
     if (zebra_begin_read(zh) == ZEBRA_FAIL)
        return ZEBRA_FAIL;
 
     r = resultSetAddRPN(zh, odr_extract_mem(o), query, 
-                       zh->num_basenames, zh->basenames, setname);
+                       zh->num_basenames, zh->basenames, setname,
+                        hits, estimated_hit_count);
+
+    *partial_resultset = zh->partial_result;
     zebra_end_read(zh);
-    *hits = zh->hits;
     return r;
 }
 
+ZEBRA_RES zebra_search_RPN(ZebraHandle zh, ODR o, Z_RPNQuery *query,
+                           const char *setname, zint *hits)
+{
+    int estimated_hit_count;
+    int partial_resultset;
+    return zebra_search_RPN_x(zh, o, query, setname, hits,
+                              &estimated_hit_count,
+                              &partial_resultset);
+}
+
 ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                                 const char *setname,
                                 Z_RecordComposition *comp,
-                                oid_value input_format, int num_recs,
+                                const Odr_oid *input_format, int num_recs,
                                 ZebraRetrievalRecord *recs)
 {
     ZebraMetaRecord *poset;
@@ -1055,7 +1126,7 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
            if (poset[i].term)
            {
                recs[i].errCode = 0;
-               recs[i].format = VAL_SUTRS;
+               recs[i].format = yaz_oid_recsyn_sutrs;
                recs[i].len = strlen(poset[i].term);
                recs[i].buf = poset[i].term;
                recs[i].base = poset[i].db;
@@ -1066,9 +1137,12 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                int len = 0;
                zebra_snippets *hit_snippet = zebra_snippets_create();
 
+                /* we disable hit snippets for now. It does not work well
+                 and it slows retrieval down a lot */
+#if 0
                zebra_snippets_hit_vector(zh, setname, poset[i].sysno, 
                                          hit_snippet);
-
+#endif
                recs[i].errCode =
                    zebra_record_fetch(zh, poset[i].sysno, poset[i].score,
                                       hit_snippet,
@@ -1102,7 +1176,7 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream,
                recs[i].buf = 0;  /* no record and no error issued */
                recs[i].len = 0;
                recs[i].errCode = 0;
-               recs[i].format = VAL_NONE;
+               recs[i].format = 0;
                recs[i].sysno = 0;
            }
        }
@@ -1121,7 +1195,7 @@ ZEBRA_RES zebra_scan_PQF(ZebraHandle zh, ODR stream, const char *query,
 {
     YAZ_PQF_Parser pqf_parser = yaz_pqf_create ();
     Z_AttributesPlusTerm *zapt;
-    int *attributeSet;
+    Odr_oid *attributeSet;
     ZEBRA_RES res;
     
     if (!(zapt = yaz_pqf_scan(pqf_parser, stream, &attributeSet, query)))
@@ -1130,15 +1204,17 @@ ZEBRA_RES zebra_scan_PQF(ZebraHandle zh, ODR stream, const char *query,
        zh->errCode = YAZ_BIB1_SCAN_MALFORMED_SCAN;
     }
     else
-       res = zebra_scan(zh, stream, zapt, VAL_BIB1,
+    {
+       res = zebra_scan(zh, stream, zapt, yaz_oid_attset_bib_1,
                         position, num_entries, entries, is_partial,
                         setname);
+    }
     yaz_pqf_destroy (pqf_parser);
     return res;
 }
 
 ZEBRA_RES zebra_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
-                    oid_value attributeset,
+                    const Odr_oid *attributeset,
                     int *position,
                     int *num_entries, ZebraScanEntry **entries,
                     int *is_partial,
@@ -1333,13 +1409,14 @@ ZEBRA_RES zebra_admin_import_segment (ZebraHandle zh, Z_Segment *segment)
                Odr_oct *oct = fragment->u.notExternallyTagged;
                sysno = 0;
                
-               if (zebra_update_record(zh, 
-                                       0, /* record Type */
-                                       &sysno,
-                                       0, /* match */
-                                       0, /* fname */
-                                       (const char *) oct->buf, oct->len,
-                                       0) == ZEBRA_FAIL)
+               if (zebra_update_record(
+                        zh, 
+                        action_update,
+                        0, /* record Type */
+                        &sysno,
+                        0, /* match */
+                        0, /* fname */
+                        (const char *) oct->buf, oct->len) == ZEBRA_FAIL)
                    res = ZEBRA_FAIL;
            }
        }
@@ -1347,99 +1424,6 @@ ZEBRA_RES zebra_admin_import_segment (ZebraHandle zh, Z_Segment *segment)
     return res;
 }
 
-ZEBRA_RES zebra_admin_exchange_record(ZebraHandle zh,
-                                     const char *rec_buf,
-                                     size_t rec_len,
-                                     const char *recid_buf, size_t recid_len,
-                                     int action)
-    /* 1 = insert. Fail it already exists */
-    /* 2 = replace. Fail it does not exist */
-    /* 3 = delete. Fail if does not exist */
-    /* 4 = update. Insert/replace */
-{
-    ZEBRA_RES res;
-    zint sysno = 0;
-    char *rinfo = 0;
-    char recid_z[256];
-    int db_ord;
-    ZEBRA_CHECK_HANDLE(zh);
-    assert(action>0 && action <=4);
-    assert(rec_buf);
-
-    yaz_log(log_level, "zebra_admin_exchange_record ac=%d", action);
-
-    if (!recid_buf || recid_len <= 0 || recid_len >= sizeof(recid_z))
-    {
-       zebra_setError(zh, YAZ_BIB1_ES_IMMEDIATE_EXECUTION_FAILED,
-                      "no record ID or empty record ID");
-        return ZEBRA_FAIL;
-    }
-
-    memcpy (recid_z, recid_buf, recid_len);
-    recid_z[recid_len] = 0;
-
-    if (zebra_begin_trans(zh, 1) == ZEBRA_FAIL)
-       return ZEBRA_FAIL;
-
-    db_ord = zebraExplain_get_database_ord(zh->reg->zei);
-    rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord, recid_z);
-    if (rinfo)
-    {
-        if (action == 1)  /* fail if insert */
-        {
-           if (zebra_end_trans(zh) != ZEBRA_OK)
-               yaz_log(YLOG_WARN, "zebra_end_trans failed");
-           zebra_setError(zh, YAZ_BIB1_ES_IMMEDIATE_EXECUTION_FAILED,
-                          "Cannot insert record: already exist");
-           return ZEBRA_FAIL;
-       }
-
-        memcpy (&sysno, rinfo+1, sizeof(sysno));
-    }
-    else
-    {
-        if (action == 2 || action == 3) /* fail if delete or update */
-        {
-           if (zebra_end_trans(zh) != ZEBRA_OK)
-               yaz_log(YLOG_WARN, "zebra_end_trans failed");
-           zebra_setError(zh, YAZ_BIB1_ES_IMMEDIATE_EXECUTION_FAILED,
-                          "Cannot delete/update record: does not exist");
-            return ZEBRA_FAIL;
-       }
-       action = 1;  /* make it an insert (if it's an update).. */
-    }
-    res = zebra_buffer_extract_record(zh, rec_buf, rec_len,
-                                      action == 3 ? 1 : 0 /* delete flag */,
-                                      0, /* test mode */
-                                      0, /* recordType */
-                                      &sysno, 
-                                      0, /* match */
-                                      0, /* fname */
-                                      0, /* force update */
-                                      1  /* allow update */
-       );
-    if (res == ZEBRA_FAIL)
-    {
-       zebra_setError(zh, YAZ_BIB1_ES_IMMEDIATE_EXECUTION_FAILED,
-                      "Unable to parse record");
-    }
-    if (action == 1)
-    {
-        dict_insert_ord(zh->reg->matchDict, db_ord, recid_z,
-                       sizeof(sysno), &sysno);
-    }
-    else if (action == 3)
-    {
-        dict_delete_ord(zh->reg->matchDict, db_ord, recid_z);
-    }
-    if (zebra_end_trans(zh) != ZEBRA_OK)
-    {
-       yaz_log(YLOG_WARN, "zebra_end_trans failed");
-       res = ZEBRA_FAIL;
-    }
-    return res;
-}
-
 int delete_w_handle(const char *info, void *handle)
 {
     ZebraHandle zh = (ZebraHandle) handle;
@@ -1538,9 +1522,9 @@ ZEBRA_RES zebra_create_database (ZebraHandle zh, const char *db)
     return zebra_end_trans (zh);
 }
 
-int zebra_string_norm (ZebraHandle zh, unsigned reg_id,
-                      const char *input_str, int input_len,
-                      char *output_str, int output_len)
+int zebra_string_norm(ZebraHandle zh, unsigned reg_id,
+                     const char *input_str, int input_len,
+                     char *output_str, int output_len)
 {
     WRBUF wrbuf;
     ASSERTZH;
@@ -1568,9 +1552,9 @@ int zebra_string_norm (ZebraHandle zh, unsigned reg_id,
     \param seqno sequence number
     
     val is one of:
-    d=writing to shadow(dirty)
-    o=no writing, 
-    c=commit
+    d=writing to shadow(shadow enabled); writing to register (shadow disabled)
+    o=reading only
+    c=commit (writing to register, reading from shadow, shadow mode only)
 */
 static void zebra_set_state (ZebraHandle zh, int val, int seqno)
 {
@@ -1649,7 +1633,7 @@ static void read_res_for_transaction(ZebraHandle zh)
     v = res_get_prefix(zh->res, "openRW", group, "1");
     zh->m_flag_rw = atoi(v);
 
-    v = res_get_prefix(zh->res, "fileVerboseLimit", group, "100000");
+    v = res_get_prefix(zh->res, "fileVerboseLimit", group, "1000");
     zh->m_file_verbose_limit = atoi(v);
 }
 
@@ -1708,6 +1692,7 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw)
         zh->records_updated = 0;
         zh->records_deleted = 0;
         zh->records_processed = 0;
+        zh->records_skipped = 0;
         
 #if HAVE_SYS_TIMES_H
         times (&zh->tms1);
@@ -1730,6 +1715,11 @@ ZEBRA_RES zebra_begin_trans(ZebraHandle zh, int rw)
         if (val != 'o')
         {
             /* either we didn't finish commit or shadow is dirty */
+            if (!rval)
+            {
+                yaz_log(YLOG_WARN, "previous transaction did not finish "
+                        "(shadow disabled)");
+            }
             zebra_unlock (zh->lock_shadow);
             zebra_unlock (zh->lock_normal);
             if (zebra_commit (zh))
@@ -2015,6 +2005,7 @@ static ZEBRA_RES zebra_commit_ex(ZebraHandle zh, int clean_only)
 
     if (val == 'd')
     {
+        /* shadow area is dirty and so we must throw it away */
         yaz_log(YLOG_WARN, "previous transaction didn't reach commit");
         clean_only = 1;
     }
@@ -2231,48 +2222,20 @@ void zebra_set_shadow_enable (ZebraHandle zh, int value)
 ZEBRA_RES zebra_add_record(ZebraHandle zh,
                            const char *buf, int buf_size)
 {
-    return zebra_update_record(zh, 0, 0 /* sysno */, 0, 0, buf, buf_size, 0);
-}
-
-ZEBRA_RES zebra_insert_record(ZebraHandle zh, 
-                             const char *recordType,
-                             zint *sysno, const char *match,
-                             const char *fname,
-                             const char *buf, int buf_size, int force_update)
-{
-    ZEBRA_RES res;
-    ASSERTZH;
-    assert(sysno);
-    assert(buf);
-    yaz_log(log_level, "zebra_insert_record sysno=" ZINT_FORMAT, *sysno);
-
-    if (buf_size < 1)
-       buf_size = strlen(buf);
-
-    if (zebra_begin_trans(zh, 1) == ZEBRA_FAIL)
-       return ZEBRA_FAIL;
-    res = zebra_buffer_extract_record(zh, buf, buf_size, 
-                                      0, /* delete_flag  */
-                                      0, /* test_mode */
-                                      recordType,
-                                      sysno,   
-                                      match, fname,
-                                      0, 
-                                      0); /* allow_update */
-    if (zebra_end_trans(zh) != ZEBRA_OK)
-    {
-       yaz_log(YLOG_WARN, "zebra_end_trans failed");
-       res = ZEBRA_FAIL;
-    }
-    return res; 
+    return zebra_update_record(zh, action_update, 
+                               0 /* record type */,
+                               0 /* sysno */ ,
+                               0 /* match */, 
+                               0 /* fname */,
+                               buf, buf_size);
 }
 
 ZEBRA_RES zebra_update_record(ZebraHandle zh, 
+                              enum zebra_recctrl_action_t action,
                               const char *recordType,
                               zint *sysno, const char *match,
                               const char *fname,
-                              const char *buf, int buf_size,
-                              int force_update)
+                              const char *buf, int buf_size)
 {
     ZEBRA_RES res;
 
@@ -2284,18 +2247,18 @@ ZEBRA_RES zebra_update_record(ZebraHandle zh,
     if (sysno)
        yaz_log(log_level, " sysno=" ZINT_FORMAT, *sysno);
 
-    if (buf_size < 1) buf_size = strlen(buf);
+    if (buf_size < 1)
+        buf_size = strlen(buf);
 
     if (zebra_begin_trans(zh, 1) == ZEBRA_FAIL)
        return ZEBRA_FAIL;
     res = zebra_buffer_extract_record(zh, buf, buf_size, 
-                                      0, /* delete_flag */
+                                      action,
                                       0, /* test_mode */
                                       recordType,
                                       sysno,   
-                                      match, fname,
-                                      force_update, 
-                                      1); /* allow_update */
+                                      match, 
+                                      fname);
     if (zebra_end_trans(zh) != ZEBRA_OK)
     {
        yaz_log(YLOG_WARN, "zebra_end_trans failed");
@@ -2304,42 +2267,6 @@ ZEBRA_RES zebra_update_record(ZebraHandle zh,
     return res; 
 }
 
-ZEBRA_RES zebra_delete_record(ZebraHandle zh, 
-                              const char *recordType,
-                              zint *sysno, const char *match,
-                              const char *fname,
-                              const char *buf, int buf_size,
-                              int force_update) 
-{
-    ZEBRA_RES res;
-
-    ZEBRA_CHECK_HANDLE(zh);
-
-    assert(buf);
-    yaz_log(log_level, "zebra_delete_record");
-    if (sysno)
-       yaz_log(log_level, " sysno=" ZINT_FORMAT, *sysno);
-
-    if (buf_size < 1) buf_size = strlen(buf);
-
-    if (zebra_begin_trans(zh, 1) == ZEBRA_FAIL)
-       return ZEBRA_FAIL;
-    res = zebra_buffer_extract_record(zh, buf, buf_size,
-                                      1, /* delete_flag */
-                                      0, /* test_mode */
-                                      recordType,
-                                      sysno,
-                                      match,fname,
-                                      force_update,
-                                      1); /* allow_update */
-    if (zebra_end_trans(zh) != ZEBRA_OK)
-    {
-       yaz_log(YLOG_WARN, "zebra_end_trans failed");
-       res = ZEBRA_FAIL;
-    }
-    return res;
-}
-
 /* ---------------------------------------------------------------------------
   Searching 
 */
@@ -2352,6 +2279,7 @@ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query,
     Z_RPNQuery *query;
     ODR odr;
 
+
     ZEBRA_CHECK_HANDLE(zh);
 
     odr = odr_createmem(ODR_ENCODE);
@@ -2361,7 +2289,7 @@ ZEBRA_RES zebra_search_PQF(ZebraHandle zh, const char *pqf_query,
 
     yaz_log(log_level, "zebra_search_PQF s=%s q=%s", setname, pqf_query);
     
-    query = p_query_rpn (odr, PROTO_Z3950, pqf_query);
+    query = p_query_rpn(odr, pqf_query);
     
     if (!query)
     {