X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=e7580a1654dc7552633f5e41a354558de123a328;hb=0df6c383d656dba662eb9d9acafed01a0973fcff;hp=a5617dccbfe3612968e066244c1ebab5c30b6acb;hpb=5d51844baabbb37b83afdd9b10db0a6c53cd1905;p=idzebra-moved-to-github.git diff --git a/index/zrpn.c b/index/zrpn.c index a5617dc..e7580a1 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,10 +1,21 @@ /* - * Copyright (C) 1995-1998, Index Data I/S + * Copyright (C) 1995-1998, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.80 1998-06-23 15:33:34 adam + * Revision 1.83 1998-09-02 13:53:19 adam + * Extra parameter decode added to search routines to implement + * persistent queries. + * + * Revision 1.82 1998/06/26 11:16:40 quinn + * Added support (un-optimised) for left and left/right truncation + * + * Revision 1.81 1998/06/24 12:16:14 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.80 1998/06/23 15:33:34 adam * Added feature to specify sort criteria in query (type 7 specifies * sort flags). * @@ -720,20 +731,33 @@ static void gen_regular_rel (char *dst, int val, int islt) strcat (dst, "))"); } +void string_rel_add_char (char **term_p, const char *src, int *indx) +{ + if (src[*indx] == '\\') + *(*term_p)++ = src[(*indx)++]; + *(*term_p)++ = src[(*indx)++]; +} + +/* + * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+) + * ([^-a].*|a[^-b].*ab[^-c].*|abc.+) + * >= abc ([b-].*|a[c-].*|ab[c-].*) + * ([^-a].*|a[^-b].*|ab[c-].*) + * < abc ([-0].*|a[-a].*|ab[-b].*) + * ([^a-].*|a[^b-].*|ab[^c-].*) + * <= abc ([-0].*|a[-a].*|ab[-b].*|abc) + * ([^a-].*|a[^b-].*|ab[^c-].*|abc) + */ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - char *term_dict, - oid_value attributeSet, - struct grep_info *grep_info, - int *max_pos, - int reg_type, - char *term_dst) + const char **term_sub, char *term_dict, + oid_value attributeSet, + int reg_type, int space_split, char *term_dst) { AttrType relation; int relation_value; - int term_value; - int r; + int i; char *term_tmp = term_dict + strlen(term_dict); + char term_component[256]; attr_init (&relation, zapt, 2); relation_value = attr_find (&relation, NULL); @@ -742,52 +766,141 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <"); - gen_regular_rel (term_tmp, term_value-1, 1); + + *term_tmp++ = '('; + for (i = 0; term_component[i]; ) + { + int j = 0; + + if (i) + *term_tmp++ = '|'; + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + + *term_tmp++ = '['; + + *term_tmp++ = '^'; + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + } + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 2: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <="); - gen_regular_rel (term_tmp, term_value, 1); - break; - case 4: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) - return 0; - term_value = atoi (term_tmp); - logf (LOG_DEBUG, "Relation >="); - gen_regular_rel (term_tmp, term_value, 0); + + *term_tmp++ = '('; + for (i = 0; term_component[i]; ) + { + int j = 0; + + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + *term_tmp++ = '^'; + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + + *term_tmp++ = '|'; + } + for (i = 0; term_component[i]; ) + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 5: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation >"); - gen_regular_rel (term_tmp, term_value+1, 0); + + *term_tmp++ = '('; + for (i = 0; term_component[i];) + { + int j = 0; + + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + *term_tmp++ = '^'; + *term_tmp++ = '-'; + string_rel_add_char (&term_tmp, term_component, &i); + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + + *term_tmp++ = '|'; + } + for (i = 0; term_component[i];) + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '.'; + *term_tmp++ = '+'; + *term_tmp++ = ')'; + *term_tmp = '\0'; + break; + case 4: + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) + return 0; + logf (LOG_DEBUG, "Relation >="); + + *term_tmp++ = '('; + for (i = 0; term_component[i];) + { + int j = 0; + + if (i) + *term_tmp++ = '|'; + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + if (term_component[i+1]) + { + *term_tmp++ = '^'; + *term_tmp++ = '-'; + string_rel_add_char (&term_tmp, term_component, &i); + } + else + { + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + } + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + } + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 3: default: logf (LOG_DEBUG, "Relation ="); - *term_tmp = '('; - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp+1, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; + strcat (term_tmp, "("); + strcat (term_tmp, term_component); strcat (term_tmp, ")"); } - logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); - if (r) - logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); - logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); return 1; } @@ -798,7 +911,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, int num_bases, char **basenames, char *term_dst) { - char term_dict[2*IT_MAX_WORD+2]; + char term_dict[2*IT_MAX_WORD+4000]; int j, r, base_no; AttrType truncation; int truncation_value; @@ -882,20 +995,14 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, case -1: /* not specified */ case 100: /* do not truncate */ if (!string_relation (zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, - reg_type, term_dst)) + attributeSet, + reg_type, space_split, term_dst)) return 0; -#if 0 - term_dict[j++] = '('; - if (!term_100 (zh->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - return 0; - strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, - &max_pos, 0, grep_handle); + logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, + 0, grep_handle); if (r) - logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r); -#endif + logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; @@ -906,8 +1013,24 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; - case 2: /* left truncation */ + case 2: /* keft truncation */ + term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; + if (!term_100 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) + return 0; + strcat (term_dict, ")"); + dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + break; case 3: /* left&right truncation */ + term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*'; + if (!term_100 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) + return 0; + strcat (term_dict, ".*)"); + dict_lookup_grep (zh->dict, term_dict, 0, grep_info, + &max_pos, 0, grep_handle); + break; zh->errCode = 120; return -1; case 101: /* process # in term */ @@ -1894,7 +2017,7 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs, return r; } -void rpn_search (ZebraHandle zh, ODR stream, +void rpn_search (ZebraHandle zh, ODR stream, ODR decode, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname) {