X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=80d66c76b4f31362d946d2a526ac1670c625fb06;hb=587c1d5cac956ec0cf482fc14ecea40240e85fd8;hp=6f084f1316be07e9c7a2f1691a95434af9fc8007;hpb=896c0427df9d8eff5de6a1735dcd992e067df844;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index 6f084f1..80d66c7 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.120 2002-08-02 19:26:56 adam Exp $ +/* $Id: zrpn.c,v 1.125 2002-10-03 10:16:23 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -46,7 +46,20 @@ struct rpn_char_map_info { static const char **rpn_char_map_handler (void *vp, const char **from, int len) { struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp; - return zebra_maps_input (p->zm, p->reg_type, from, len); + const char **out = zebra_maps_input (p->zm, p->reg_type, from, len); +#if 0 + if (out && *out) + { + const char *outp = *out; + yaz_log (LOG_LOG, "---"); + while (*outp) + { + yaz_log (LOG_LOG, "%02X", *outp); + outp++; + } + } +#endif + return out; } static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type, @@ -167,17 +180,18 @@ struct grep_info { static void term_untrans (ZebraHandle zh, int reg_type, char *dst, const char *src) { + int len = 0; while (*src) { const char *cp = zebra_maps_output (zh->reg->zebra_maps, reg_type, &src); - if (!cp) - *dst++ = *src++; + if (!cp && len < IT_MAX_WORD-1) + dst[len++] = *src++; else - while (*cp) - *dst++ = *cp++; + while (*cp && len < IT_MAX_WORD-1) + dst[len++] = *cp++; } - *dst = '\0'; + dst[len] = '\0'; } static void add_isam_p (const char *name, const char *info, @@ -220,7 +234,7 @@ static void add_isam_p (const char *name, const char *info, { const char *db; int set, use; - char term_tmp[512]; + char term_tmp[IT_MAX_WORD]; int su_code = 0; int len = key_SU_decode (&su_code, name); @@ -266,7 +280,7 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, return *s0; } -#define REGEX_CHARS "[]()|.*+!" +#define REGEX_CHARS " []()|.*+?!" /* term_100: handle term, where trunc=none (no operators at all) */ static int term_100 (ZebraMaps zebra_maps, int reg_type, @@ -982,11 +996,11 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, attributeSet, reg_type, space_split, term_dst)) return 0; - logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len); + logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len); r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) - logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); + logf (LOG_WARN, "dict_lookup_grep fail %d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; @@ -1096,8 +1110,9 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, } -static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz) +/* convert APT search term to UTF8 */ +static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz) { size_t sizez; Z_Term *term = zapt->term; @@ -1105,8 +1120,7 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (term->which) { case Z_Term_general: -#if HAVE_ICONV_H - if (zh->iconv_to_utf8 != (iconv_t)(-1)) + if (zh->iconv_to_utf8 != 0) { char *inbuf = term->u.general->buf; size_t inleft = term->u.general->len; @@ -1114,24 +1128,24 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, size_t outleft = IT_MAX_WORD-1; size_t ret; - yaz_log (LOG_DEBUG, "converting general from ISO-8859-1"); - ret = iconv(zh->iconv_to_utf8, &inbuf, &inleft, + ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t)(-1)) { - ret = iconv(zh->iconv_to_utf8, 0, 0, 0, 0); + ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0); zh->errCode = 125; return -1; } *outbuf = 0; - return 0; } -#endif - sizez = term->u.general->len; - if (sizez > IT_MAX_WORD-1) - sizez = IT_MAX_WORD-1; - memcpy (termz, term->u.general->buf, sizez); - termz[sizez] = '\0'; + else + { + sizez = term->u.general->len; + if (sizez > IT_MAX_WORD-1) + sizez = IT_MAX_WORD-1; + memcpy (termz, term->u.general->buf, sizez); + termz[sizez] = '\0'; + } break; case Z_Term_characterString: sizez = strlen(term->u.characterString); @@ -1142,38 +1156,47 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, break; default: zh->errCode = 124; + return -1; } return 0; } -static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz, int reg_type) +/* convert APT SCAN term to internal cmap */ +static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, + char *termz, int reg_type) { - Z_Term *term = zapt->term; - const char **map; - const char *cp = (const char *) term->u.general->buf; - const char *cp_end = cp + term->u.general->len; - const char *src; - int i = 0; - const char *space_map = NULL; - int len; - - while ((len = (cp_end - cp)) > 0) + char termz0[IT_MAX_WORD]; + + if (zapt_term_to_utf8(zh, zapt, termz0)) + return -1; /* error */ + else { - map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len); - if (**map == *CHR_SPACE) - space_map = *map; - else + const char **map; + const char *cp = (const char *) termz0; + const char *cp_end = cp + strlen(cp); + const char *src; + int i = 0; + const char *space_map = NULL; + int len; + + while ((len = (cp_end - cp)) > 0) { - if (i && space_map) - for (src = space_map; *src; src++) + map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len); + if (**map == *CHR_SPACE) + space_map = *map; + else + { + if (i && space_map) + for (src = space_map; *src; src++) + termz[i++] = *src; + space_map = NULL; + for (src = *map; *src; src++) termz[i++] = *src; - space_map = NULL; - for (src = *map; *src; src++) - termz[i++] = *src; + } } + termz[i] = '\0'; } - termz[i] = '\0'; + return 0; } static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, @@ -2136,12 +2159,28 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, int prefix_len = 0; int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use); int ord_len, i, r, max_pos; + int term_type = Z_Term_characterString; + const char *flags = "void"; if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream)) - return 0; + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (term, strlen(term), + flags, term_type); + parms.rset_term->nn = 0; + return rset_create (rset_kind_null, &parms); + } if (ord < 0) - return 0; + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (term, strlen(term), + flags, term_type); + parms.rset_term->nn = 0; + return rset_create (rset_kind_null, &parms); + } if (prefix_len) term_dict[prefix_len++] = '|'; else @@ -2166,7 +2205,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, grep_info.isam_p_indx); rset = rset_trunc (zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), - "void", 1, Z_Term_characterString); + flags, 1, term_type); grep_info_delete (&grep_info); return rset; } @@ -2294,7 +2333,7 @@ static RSET rpn_search_xpath (ZebraHandle zh, rset_end_tag = xpath_trunc(zh, stream, '0', xpath_rev, 2, curAttributeSet); - + parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; parms.rset_l = rset_start_tag; @@ -2337,7 +2376,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, logf (LOG_DEBUG, "search_type=%s", search_type); logf (LOG_DEBUG, "rank_type=%s", rank_type); - if (trans_term (zh, zapt, termz)) + if (zapt_term_to_utf8(zh, zapt, termz)) return 0; if (sort_flag) @@ -2612,12 +2651,33 @@ static int scan_handle (char *name, const char *info, int pos, void *client) static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type, char **dst, const char *src) { - char term_dst[1024]; - - term_untrans (zh, reg_type, term_dst, src); + char term_src[IT_MAX_WORD]; + char term_dst[IT_MAX_WORD]; - *dst = (char *) nmem_malloc (stream, strlen(term_dst)+1); - strcpy (*dst, term_dst); + term_untrans (zh, reg_type, term_src, src); + + if (zh->iconv_from_utf8 != 0) + { + int len; + char *inbuf = term_src; + size_t inleft = strlen(term_src); + char *outbuf = term_dst; + size_t outleft = sizeof(term_dst)-1; + size_t ret; + + ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft, + &outbuf, &outleft); + if (ret == (size_t)(-1)) + len = 0; + else + len = outbuf - term_dst; + *dst = nmem_malloc (stream, len + 1); + if (len > 0) + memcpy (*dst, term_dst, len); + (*dst)[len] = '\0'; + } + else + *dst = nmem_strdup (stream, term_src); } static void count_set (RSET r, int *count) @@ -2761,7 +2821,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, termz[prefix_len] = 0; strcpy (scan_info->prefix, termz); - trans_scan_term (zh, zapt, termz+prefix_len, reg_id); + if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id)) + return ; dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp, scan_info, scan_handle);