Use Odr_oid for OIDs. Require YAZ 3.0.2 or later.
[idzebra-moved-to-github.git] / index / rpnsearch.c
index 909e9cc..5e1b7e2 100644 (file)
@@ -1,5 +1,5 @@
-/* $Id: rpnsearch.c,v 1.1 2006-09-21 08:56:52 adam Exp $
-   Copyright (C) 1995-2006
+/* $Id: rpnsearch.c,v 1.12 2007-05-08 12:50:04 adam Exp $
+   Copyright (C) 1995-2007
    Index Data ApS
 
 This file is part of the Zebra server.
@@ -78,19 +78,25 @@ struct grep_info {
     ISAM_P *isam_p_buf;
     int isam_p_size;        
     int isam_p_indx;
+    int trunc_max;
     ZebraHandle zh;
     int reg_type;
     ZebraSet termset;
 };        
 
-static void add_isam_p(const char *name, const char *info,
-                      struct grep_info *p)
+static int add_isam_p(const char *name, const char *info,
+                      struct grep_info *p)
 {
     if (!log_level_set)
     {
         log_level_rpn = yaz_log_module_level("rpn");
         log_level_set = 1;
     }
+    /* we may have to stop this madness.. NOTE: -1 so that if
+       truncmax == trunxlimit we do *not* generate result sets */
+    if (p->isam_p_indx >= p->trunc_max - 1)
+        return 1;
+
     if (p->isam_p_indx == p->isam_p_size)
     {
         ISAM_P *new_isam_p_buf;
@@ -140,12 +146,12 @@ static void add_isam_p(const char *name, const char *info,
                         index_name, term_tmp);
     }
     (p->isam_p_indx)++;
+    return 0;
 }
 
 static int grep_handle(char *name, const char *info, void *p)
 {
-    add_isam_p(name, info, (struct grep_info *) p);
-    return 0;
+    return add_isam_p(name, info, (struct grep_info *) p);
 }
 
 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
@@ -697,7 +703,7 @@ void string_rel_add_char(char **term_p, const char *src, int *indx)
  */
 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                           const char **term_sub, char *term_dict,
-                          oid_value attributeSet,
+                          const Odr_oid *attributeSet,
                           int reg_type, int space_split, char *term_dst,
                           int *error_code)
 {
@@ -734,6 +740,10 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             *term_tmp++ = '[';
 
             *term_tmp++ = '^';
+
+            *term_tmp++ = 1;
+            *term_tmp++ = FIRST_IN_FIELD_CHAR;
+
             string_rel_add_char(&term_tmp, term_component, &i);
             *term_tmp++ = '-';
 
@@ -746,6 +756,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
         *term_tmp++ = ')';
         *term_tmp = '\0';
+        yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
         break;
     case 2:
         if (!term_100(zh->reg->zebra_maps, reg_type,
@@ -764,6 +775,10 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             *term_tmp++ = '[';
 
             *term_tmp++ = '^';
+
+            *term_tmp++ = 1;
+            *term_tmp++ = FIRST_IN_FIELD_CHAR;
+
             string_rel_add_char(&term_tmp, term_component, &i);
             *term_tmp++ = '-';
 
@@ -882,7 +897,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                             const char **term_sub, 
-                            oid_value attributeSet, NMEM stream,
+                            const Odr_oid *attributeSet, NMEM stream,
                             struct grep_info *grep_info,
                             int reg_type, int complete_flag,
                             int num_bases, char **basenames,
@@ -940,7 +955,7 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh,
 static ZEBRA_RES term_trunc(ZebraHandle zh,
                            Z_AttributesPlusTerm *zapt,
                            const char **term_sub, 
-                           oid_value attributeSet, NMEM stream,
+                           const Odr_oid *attributeSet, NMEM stream,
                            struct grep_info *grep_info,
                            int reg_type, int complete_flag,
                            int num_bases, char **basenames,
@@ -980,7 +995,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh,
 
 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                             const char **term_sub, 
-                            oid_value attributeSet, NMEM stream,
+                            const Odr_oid *attributeSet, NMEM stream,
                             struct grep_info *grep_info,
                             int reg_type, int complete_flag,
                             int num_bases, char **basenames,
@@ -1168,7 +1183,9 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                              grep_info, &max_pos, 
                              ord_len /* number of "exact" chars */,
                              grep_handle);
-        if (r)
+        if (r == 1)
+            zebra_set_partial_result(zh);
+        else if (r)
             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
     }
     if (!bases_ok)
@@ -1193,47 +1210,59 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
                                   struct grep_info *grep_info,
                                   int reg_type)
 {
-    AttrType termset;
-    int termset_value_numeric;
-    const char *termset_value_string;
-
 #ifdef TERM_COUNT
     grep_info->term_no = 0;
 #endif
+    grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
     grep_info->isam_p_size = 0;
     grep_info->isam_p_buf = NULL;
     grep_info->zh = zh;
     grep_info->reg_type = reg_type;
     grep_info->termset = 0;
-    if (!zapt)
-        return ZEBRA_OK;
-    attr_init_APT(&termset, zapt, 8);
-    termset_value_numeric =
-        attr_find_ex(&termset, NULL, &termset_value_string);
-    if (termset_value_numeric != -1)
+    if (zapt)
     {
+        AttrType truncmax;
+        int truncmax_value;
+
+        attr_init_APT(&truncmax, zapt, 13);
+        truncmax_value = attr_find(&truncmax, NULL);
+        if (truncmax_value != -1)
+            grep_info->trunc_max = truncmax_value;
+    }
+    if (zapt)
+    {
+        AttrType termset;
+        int termset_value_numeric;
+        const char *termset_value_string;
+
+        attr_init_APT(&termset, zapt, 8);
+        termset_value_numeric =
+            attr_find_ex(&termset, NULL, &termset_value_string);
+        if (termset_value_numeric != -1)
+        {
 #if TERMSET_DISABLE
-        zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
-        return ZEBRA_FAIL;
+            zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
+            return ZEBRA_FAIL;
 #else
-        char resname[32];
-        const char *termset_name = 0;
-        if (termset_value_numeric != -2)
-        {
-    
-            sprintf(resname, "%d", termset_value_numeric);
-            termset_name = resname;
-        }
-        else
+            char resname[32];
+            const char *termset_name = 0;
+            if (termset_value_numeric != -2)
+            {
+                
+                sprintf(resname, "%d", termset_value_numeric);
+                termset_name = resname;
+            }
+            else
             termset_name = termset_value_string;
-        yaz_log(log_level_rpn, "creating termset set %s", termset_name);
-        grep_info->termset = resultSetAdd(zh, termset_name, 1);
-        if (!grep_info->termset)
-        {
-           zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
-            return ZEBRA_FAIL;
-        }
+            yaz_log(log_level_rpn, "creating termset set %s", termset_name);
+            grep_info->termset = resultSetAdd(zh, termset_name, 1);
+            if (!grep_info->termset)
+            {
+                zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
+                return ZEBRA_FAIL;
+            }
 #endif
+        }
     }
     return ZEBRA_OK;
 }
@@ -1259,7 +1288,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
                                 Z_AttributesPlusTerm *zapt,
                                 const char *termz,
-                                oid_value attributeSet,
+                                const Odr_oid *attributeSet,
                                 NMEM stream,
                                 int reg_type, int complete_flag,
                                 const char *rank_type,
@@ -1321,7 +1350,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh,
 
 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
                                          Z_AttributesPlusTerm *zapt,
-                                         oid_value attributeSet,
+                                         const Odr_oid *attributeSet,
                                          int reg_type,
                                          int num_bases, char **basenames,
                                          NMEM rset_nmem,
@@ -1413,7 +1442,7 @@ static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
                                       Z_AttributesPlusTerm *zapt,
                                       const char *termz_org,
-                                      oid_value attributeSet,
+                                      const Odr_oid *attributeSet,
                                       NMEM stream,
                                       int reg_type, int complete_flag,
                                       const char *rank_type,
@@ -1473,7 +1502,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
                                        Z_AttributesPlusTerm *zapt,
                                        const char *termz_org,
-                                       oid_value attributeSet,
+                                       const Odr_oid *attributeSet,
                                        NMEM stream,
                                        int reg_type, int complete_flag,
                                        const char *rank_type,
@@ -1540,7 +1569,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
                                         Z_AttributesPlusTerm *zapt,
                                         const char *termz_org,
-                                        oid_value attributeSet,
+                                        const Odr_oid *attributeSet,
                                         NMEM stream,
                                         int reg_type, int complete_flag,
                                         const char *rank_type, 
@@ -1609,7 +1638,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                            const char **term_sub,
                            char *term_dict,
-                           oid_value attributeSet,
+                           const Odr_oid *attributeSet,
                            struct grep_info *grep_info,
                            int *max_pos,
                            int reg_type,
@@ -1683,7 +1712,10 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
                           0, grep_handle);
-    if (r)
+
+    if (r == 1)
+        zebra_set_partial_result(zh);
+    else if (r)
         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
     return 1;
@@ -1691,7 +1723,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                              const char **term_sub, 
-                             oid_value attributeSet, NMEM stream,
+                             const Odr_oid *attributeSet, NMEM stream,
                              struct grep_info *grep_info,
                              int reg_type, int complete_flag,
                              int num_bases, char **basenames,
@@ -1768,7 +1800,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
                                        Z_AttributesPlusTerm *zapt,
                                        const char *termz,
-                                       oid_value attributeSet,
+                                       const Odr_oid *attributeSet,
                                        NMEM stream,
                                        int reg_type, int complete_flag,
                                        const char *rank_type, 
@@ -1848,32 +1880,46 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
                                      Z_AttributesPlusTerm *zapt,
                                      const char *termz,
-                                     oid_value attributeSet,
+                                     const Odr_oid *attributeSet,
                                      NMEM stream,
                                      const char *rank_type, NMEM rset_nmem,
                                      RSET *rset,
                                      struct rset_key_control *kc)
 {
-    RSFD rsfd;
-    struct it_key key;
-    int sys;
-    *rset = rset_create_temp(rset_nmem, kc, kc->scope,
-                             res_get (zh->res, "setTmpDir"),0 );
-    rsfd = rset_open(*rset, RSETF_WRITE);
+    Record rec;
+    zint sysno = atozint(termz);
     
-    sys = atoi(termz);
-    if (sys <= 0)
-        sys = 1;
-    key.mem[0] = sys;
-    key.mem[1] = 1;
-    key.len = 2;
-    rset_write (rsfd, &key);
-    rset_close (rsfd);
+    if (sysno <= 0)
+        sysno = 0;
+    rec = rec_get(zh->reg->records, sysno);
+    if (!rec)
+        sysno = 0;
+
+    rec_free(&rec);
+
+    if (sysno <= 0)
+    {
+        *rset = rset_create_null(rset_nmem, kc, 0);
+    }
+    else
+    {
+        RSFD rsfd;
+        struct it_key key;
+        *rset = rset_create_temp(rset_nmem, kc, kc->scope,
+                                 res_get(zh->res, "setTmpDir"), 0);
+        rsfd = rset_open(*rset, RSETF_WRITE);
+        
+        key.mem[0] = sysno;
+        key.mem[1] = 1;
+        key.len = 2;
+        rset_write(rsfd, &key);
+        rset_close(rsfd);
+    }
     return ZEBRA_OK;
 }
 
 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                              oid_value attributeSet, NMEM stream,
+                              const Odr_oid *attributeSet, NMEM stream,
                               Z_SortKeySpecList *sort_sequence,
                               const char *rank_type,
                               NMEM rset_nmem,
@@ -1885,8 +1931,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     AttrType sort_relation_type;
     Z_SortKeySpec *sks;
     Z_SortKey *sk;
-    int oid[OID_SIZE];
-    oident oe;
     char termz[20];
     
     attr_init_APT(&sort_relation_type, zapt, 7);
@@ -1910,12 +1954,6 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         i = 0;
     sprintf(termz, "%d", i);
 
-    oe.proto = PROTO_Z3950;
-    oe.oclass = CLASS_ATTSET;
-    oe.value = attributeSet;
-    if (!oid_ent_to_oid (&oe, oid))
-        return ZEBRA_FAIL;
-
     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
     sks->sortElement = (Z_SortElement *)
         nmem_malloc(stream, sizeof(*sks->sortElement));
@@ -1926,7 +1964,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     sk->u.sortAttributes = (Z_SortAttributes *)
         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
 
-    sk->u.sortAttributes->id = oid;
+    sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
     sk->u.sortAttributes->list = zapt->attributes;
 
     sks->sortRelation = (int *)
@@ -1951,11 +1989,11 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 
 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                           oid_value attributeSet,
+                           const Odr_oid *attributeSet,
                            struct xpath_location_step *xpath, int max,
                            NMEM mem)
 {
-    oid_value curAttributeSet = attributeSet;
+    const Odr_oid *curAttributeSet = attributeSet;
     AttrType use;
     const char *use_string = 0;
     
@@ -2129,26 +2167,25 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                         cp++;
                     }
                 }
-                wrbuf_puts(wbuf, "");
                 rset_attr = xpath_trunc(
-                    zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
+                    zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, 
                     rset_nmem, kc);
-                wrbuf_free(wbuf, 1);
+                wrbuf_destroy(wbuf);
             } 
             else 
             {
                 if (!first_path)
                 {
-                    wrbuf_free(xpath_rev, 1);
+                    wrbuf_destroy(xpath_rev);
                     continue;
                 }
             }
-            yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
-                    wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
+            yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
+                    wrbuf_cstr(xpath_rev));
             if (wrbuf_len(xpath_rev))
             {
                 rset_start_tag = xpath_trunc(zh, stream, '0', 
-                                             wrbuf_buf(xpath_rev),
+                                             wrbuf_cstr(xpath_rev),
                                              ZEBRA_XPATH_ELM_BEGIN, 
                                              rset_nmem, kc);
                 if (always_matches)
@@ -2156,7 +2193,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                 else
                 {
                     rset_end_tag = xpath_trunc(zh, stream, '0', 
-                                               wrbuf_buf(xpath_rev),
+                                               wrbuf_cstr(xpath_rev),
                                                ZEBRA_XPATH_ELM_END, 
                                                rset_nmem, kc);
                     
@@ -2165,7 +2202,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                                                rset_end_tag, rset_attr);
                 }
             }
-            wrbuf_free(xpath_rev, 1);
+            wrbuf_destroy(xpath_rev);
             first_path = 0;
         }
     }
@@ -2176,7 +2213,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
 #define MAX_XPATH_STEPS 10
 
 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                               oid_value attributeSet, NMEM stream,
+                               const Odr_oid *attributeSet, NMEM stream,
                                Z_SortKeySpecList *sort_sequence,
                                int num_bases, char **basenames, 
                                NMEM rset_nmem,
@@ -2301,7 +2338,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
-                                     oid_value attributeSet, 
+                                     const Odr_oid *attributeSet, 
                                      NMEM stream, NMEM rset_nmem,
                                      Z_SortKeySpecList *sort_sequence,
                                      int num_bases, char **basenames,
@@ -2309,8 +2346,39 @@ static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
                                      Z_Operator *parent_op,
                                      struct rset_key_control *kc);
 
+ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
+                                   zint *approx_limit)
+{
+    ZEBRA_RES res = ZEBRA_OK;
+    if (zs->which == Z_RPNStructure_complex)
+    {
+        if (res == ZEBRA_OK)
+            res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
+                                           approx_limit);
+        if (res == ZEBRA_OK)
+            res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
+                                           approx_limit);
+    }
+    else if (zs->which == Z_RPNStructure_simple)
+    {
+        if (zs->u.simple->which == Z_Operand_APT)
+        {
+            Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
+            AttrType global_hits_limit_attr;
+            int l;
+            
+            attr_init_APT(&global_hits_limit_attr, zapt, 12);
+            
+            l = attr_find(&global_hits_limit_attr, NULL);
+            if (l != -1)
+                *approx_limit = l;
+        }
+    }
+    return res;
+}
+
 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
-                        oid_value attributeSet, 
+                        const Odr_oid *attributeSet, 
                         NMEM stream, NMEM rset_nmem,
                         Z_SortKeySpecList *sort_sequence,
                         int num_bases, char **basenames,
@@ -2347,7 +2415,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
 }
 
 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
-                              oid_value attributeSet, 
+                              const Odr_oid *attributeSet, 
                               NMEM stream, NMEM rset_nmem,
                               Z_SortKeySpecList *sort_sequence,
                               int num_bases, char **basenames,