From 55a5cde7eb23fb9aa5a8386d34bb1b6e131c19d8 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 24 Jun 1998 12:16:09 +0000 Subject: [PATCH] Support for relations on text operands. Open range support in DFA module (i.e. [-j], [g-]). --- CHANGELOG | 8 +++ dfa/dfa.c | 40 +++++++---- dict/lookgrep.c | 15 +++- index/zebraapi.c | 10 ++- index/zrpn.c | 204 ++++++++++++++++++++++++++++++++++++++++-------------- index/zserver.c | 9 ++- index/zserver.h | 10 ++- 7 files changed, 221 insertions(+), 75 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2f2bd84..c783420 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Added support for relational operators on text when using +RPN queries. + +Added support for sort specifications in RPN queries. Type 7 +specifies 'sort' where value 1=ascending, value 2=descending. +The use attribute specifies the field criteria as usual. +The term specifies priority (0=first, 1=second). + Changed the way use attributes are specified in the recordId specification. diff --git a/dfa/dfa.c b/dfa/dfa.c index 072c397..e1e1c45 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.21 1998-06-22 11:33:39 adam + * Revision 1.22 1998-06-24 12:16:10 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.21 1998/06/22 11:33:39 adam * Added two type casts. * * Revision 1.20 1998/06/08 14:40:44 adam @@ -423,18 +427,28 @@ static int read_charset (struct DFA_parse *parse_info) { if (!esc0 && ch0 == ']') break; - if (parse_info->cmap) - { - const char **mapto; - char mapfrom[2]; - const char *mcp = mapfrom; - mapfrom[0] = ch0; - mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); - assert (mapto); - ch0 = mapto[0][0]; - } - add_BSet (parse_info->charset, parse_info->look_chars, ch0); - ch1 = nextchar_set (parse_info, &esc1); + if (!esc0 && ch0 == '-') + { + ch1 = ch0; + esc1 = esc0; + ch0 = 1; + add_BSet (parse_info->charset, parse_info->look_chars, ch0); + } + else + { + if (parse_info->cmap) + { + const char **mapto; + char mapfrom[2]; + const char *mcp = mapfrom; + mapfrom[0] = ch0; + mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); + assert (mapto); + ch0 = mapto[0][0]; + } + add_BSet (parse_info->charset, parse_info->look_chars, ch0); + ch1 = nextchar_set (parse_info, &esc1); + } if (!esc1 && ch1 == '-') { int open_range = 0; diff --git a/dict/lookgrep.c b/dict/lookgrep.c index 3e094e1..39d2b3f 100644 --- a/dict/lookgrep.c +++ b/dict/lookgrep.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: lookgrep.c,v $ - * Revision 1.20 1997-10-27 14:33:03 adam + * Revision 1.21 1998-06-24 12:16:12 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.20 1997/10/27 14:33:03 adam * Moved towards generic character mapping depending on "structure" * field in abstract syntax file. Fixed a few memory leaks. Fixed * bug with negative integers when doing searches with relational @@ -419,6 +423,13 @@ int dict_lookup_grep (Dict dict, const char *pattern, int range, void *client, struct DFA *dfa = dfa_init(); int i, d; +#if 0 + debug_dfa_trav = 1; + debug_dfa_tran = 1; + debug_dfa_followpos = 1; + dfa_verbose = 1; +#endif + logf (LOG_DEBUG, "dict_lookup_grep range=%d", range); for (i = 0; pattern[i]; i++) { diff --git a/index/zebraapi.c b/index/zebraapi.c index 94a31f9..4e5d2fd 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebraapi.c,v $ - * Revision 1.6 1998-06-22 11:36:47 adam + * Revision 1.7 1998-06-24 12:16:13 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.6 1998/06/22 11:36:47 adam * Added authentication check facility to zebra. * * Revision 1.5 1998/06/13 00:14:08 adam @@ -279,8 +283,8 @@ void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, } void zebra_sort (ZebraHandle zh, ODR stream, - int num_input_setnames, char **input_setnames, - char *output_setname, Z_SortKeySpecList *sort_sequence, + int num_input_setnames, const char **input_setnames, + const char *output_setname, Z_SortKeySpecList *sort_sequence, int *sort_status) { zh->errCode = 0; diff --git a/index/zrpn.c b/index/zrpn.c index a5617dc..5a1b42a 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.80 1998-06-23 15:33:34 adam + * Revision 1.81 1998-06-24 12:16:14 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.80 1998/06/23 15:33:34 adam * Added feature to specify sort criteria in query (type 7 specifies * sort flags). * @@ -720,20 +724,33 @@ static void gen_regular_rel (char *dst, int val, int islt) strcat (dst, "))"); } +void string_rel_add_char (char **term_p, const char *src, int *indx) +{ + if (src[*indx] == '\\') + *(*term_p)++ = src[(*indx)++]; + *(*term_p)++ = src[(*indx)++]; +} + +/* + * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+) + * ([^-a].*|a[^-b].*ab[^-c].*|abc.+) + * >= abc ([b-].*|a[c-].*|ab[c-].*) + * ([^-a].*|a[^-b].*|ab[c-].*) + * < abc ([-0].*|a[-a].*|ab[-b].*) + * ([^a-].*|a[^b-].*|ab[^c-].*) + * <= abc ([-0].*|a[-a].*|ab[-b].*|abc) + * ([^a-].*|a[^b-].*|ab[^c-].*|abc) + */ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, - const char **term_sub, - char *term_dict, - oid_value attributeSet, - struct grep_info *grep_info, - int *max_pos, - int reg_type, - char *term_dst) + const char **term_sub, char *term_dict, + oid_value attributeSet, + int reg_type, int space_split, char *term_dst) { AttrType relation; int relation_value; - int term_value; - int r; + int i; char *term_tmp = term_dict + strlen(term_dict); + char term_component[256]; attr_init (&relation, zapt, 2); relation_value = attr_find (&relation, NULL); @@ -742,52 +759,141 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <"); - gen_regular_rel (term_tmp, term_value-1, 1); + + *term_tmp++ = '('; + for (i = 0; term_component[i]; ) + { + int j = 0; + + if (i) + *term_tmp++ = '|'; + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + + *term_tmp++ = '['; + + *term_tmp++ = '^'; + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + } + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 2: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <="); - gen_regular_rel (term_tmp, term_value, 1); - break; - case 4: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) - return 0; - term_value = atoi (term_tmp); - logf (LOG_DEBUG, "Relation >="); - gen_regular_rel (term_tmp, term_value, 0); + + *term_tmp++ = '('; + for (i = 0; term_component[i]; ) + { + int j = 0; + + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + *term_tmp++ = '^'; + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + + *term_tmp++ = '|'; + } + for (i = 0; term_component[i]; ) + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 5: - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; - term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation >"); - gen_regular_rel (term_tmp, term_value+1, 0); + + *term_tmp++ = '('; + for (i = 0; term_component[i];) + { + int j = 0; + + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + *term_tmp++ = '^'; + *term_tmp++ = '-'; + string_rel_add_char (&term_tmp, term_component, &i); + + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + + *term_tmp++ = '|'; + } + for (i = 0; term_component[i];) + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '.'; + *term_tmp++ = '+'; + *term_tmp++ = ')'; + *term_tmp = '\0'; + break; + case 4: + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) + return 0; + logf (LOG_DEBUG, "Relation >="); + + *term_tmp++ = '('; + for (i = 0; term_component[i];) + { + int j = 0; + + if (i) + *term_tmp++ = '|'; + while (j < i) + string_rel_add_char (&term_tmp, term_component, &j); + *term_tmp++ = '['; + + if (term_component[i+1]) + { + *term_tmp++ = '^'; + *term_tmp++ = '-'; + string_rel_add_char (&term_tmp, term_component, &i); + } + else + { + string_rel_add_char (&term_tmp, term_component, &i); + *term_tmp++ = '-'; + } + *term_tmp++ = ']'; + *term_tmp++ = '.'; + *term_tmp++ = '*'; + } + *term_tmp++ = ')'; + *term_tmp = '\0'; break; case 3: default: logf (LOG_DEBUG, "Relation ="); - *term_tmp = '('; - if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp+1, 1, - term_dst)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component, + space_split, term_dst)) return 0; + strcat (term_tmp, "("); + strcat (term_tmp, term_component); strcat (term_tmp, ")"); } - logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos, - 0, grep_handle); - if (r) - logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); - logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx); return 1; } @@ -798,7 +904,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, int num_bases, char **basenames, char *term_dst) { - char term_dict[2*IT_MAX_WORD+2]; + char term_dict[2*IT_MAX_WORD+4000]; int j, r, base_no; AttrType truncation; int truncation_value; @@ -882,20 +988,14 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, case -1: /* not specified */ case 100: /* do not truncate */ if (!string_relation (zh, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, - reg_type, term_dst)) + attributeSet, + reg_type, space_split, term_dst)) return 0; -#if 0 - term_dict[j++] = '('; - if (!term_100 (zh->zebra_maps, reg_type, - &termp, term_dict + j, space_split, term_dst)) - return 0; - strcat (term_dict, ")"); - r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, - &max_pos, 0, grep_handle); + logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len); + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, + 0, grep_handle); if (r) - logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r); -#endif + logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; diff --git a/index/zserver.c b/index/zserver.c index 4718912..34e7406 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.60 1998-06-22 11:36:49 adam + * Revision 1.61 1998-06-24 12:16:15 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.60 1998/06/22 11:36:49 adam * Added authentication check facility to zebra. * * Revision 1.59 1998/06/12 12:22:13 adam @@ -386,7 +390,8 @@ int bend_sort (void *handle, bend_sort_rr *rr) { ZebraHandle zh = handle; - zebra_sort (zh, rr->stream, rr->num_input_setnames, rr->input_setnames, + zebra_sort (zh, rr->stream, + rr->num_input_setnames, (const char **) rr->input_setnames, rr->output_setname, rr->sort_sequence, &rr->sort_status); rr->errcode = zh->errCode; rr->errstring = zh->errString; diff --git a/index/zserver.h b/index/zserver.h index 7337b77..51b59bb 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.35 1998-06-23 15:33:35 adam + * Revision 1.36 1998-06-24 12:16:16 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.35 1998/06/23 15:33:35 adam * Added feature to specify sort criteria in query (type 7 specifies * sort flags). * @@ -230,8 +234,8 @@ void resultSetSort (ZebraHandle zh, ODR stream, Z_SortKeySpecList *sort_sequence, int *sort_status); void zebra_sort (ZebraHandle zh, ODR stream, - int num_input_setnames, char **input_setnames, - char *output_setname, Z_SortKeySpecList *sort_sequence, + int num_input_setnames, const char **input_setnames, + const char *output_setname, Z_SortKeySpecList *sort_sequence, int *sort_status); void zlog_rpn (Z_RPNQuery *rpn); -- 1.7.10.4