X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fzebramap.c;h=976f5b43e28629257ecf3cc88a9fd6087bd3ca76;hb=89d059a3e4541ff00bbb4193ba34c1dbe1e21928;hp=4e24daa67ca596f132dac88dde5ffe8c6c009c15;hpb=01ddc55fd5a59535e229c09440cfdadccadf3555;p=idzebra-moved-to-github.git diff --git a/util/zebramap.c b/util/zebramap.c index 4e24daa..976f5b4 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -4,7 +4,25 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.c,v $ - * Revision 1.16 1999-09-07 07:19:21 adam + * Revision 1.22 2001-11-14 22:06:27 adam + * Rank-weight may be controlled via query. + * + * Revision 1.21 2001/01/22 10:42:56 adam + * Added numerical sort. + * + * Revision 1.20 2000/03/02 14:35:19 adam + * Added structure year and date. + * + * Revision 1.19 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.18 1999/10/15 08:27:46 adam + * Fixed replace handler. 8-bit fix. + * + * Revision 1.17 1999/09/08 12:13:21 adam + * Fixed minor bug "replace"-mappings. Removed some logging messages. + * + * Revision 1.16 1999/09/07 07:19:21 adam * Work on character mapping. Implemented replace rules. * * Revision 1.15 1999/05/26 07:49:14 adam @@ -63,15 +81,17 @@ #include #include -#include +#include #include #include #define ZEBRA_MAP_TYPE_SORT 1 #define ZEBRA_MAP_TYPE_INDEX 2 +#define ZEBRA_REPLACE_ANY 300 + struct zm_token { - char *token_from; + int *token_from; char *token_to; int token_min; struct zm_token *next; @@ -166,6 +186,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) (*zm)->maptab = NULL; (*zm)->completeness = 0; (*zm)->positioned = 0; + (*zm)->replace_tokens = 0; } else if (zm && !yaz_matchstr (argv[0], "charmap") && argc == 2) { @@ -187,27 +208,43 @@ static void zebra_map_read (ZebraMaps zms, const char *name) else if (zm && !yaz_matchstr (argv[0], "replace") && argc >= 2) { struct zm_token *token = nmem_malloc (zms->nmem, sizeof(*token)); - char *cp, *dp; token->next = (*zm)->replace_tokens; (*zm)->replace_tokens = token; - dp = token->token_from = nmem_strdup (zms->nmem, cp = argv[1]); - while (*cp) - { - if (*cp == '$') - { - *dp++ = ' '; - cp++; - } - else - *dp++ = zebra_prim(&cp); +#if 0 + logf (LOG_LOG, "replace %s", argv[1]); +#endif + token->token_from = 0; + if (argc >= 2) + { + char *cp = argv[1]; + int *dp = token->token_from = (int *) + nmem_malloc (zms->nmem, (1+strlen(cp))*sizeof(int)); + while (*cp) + if (*cp == '$') + { + *dp++ = ' '; + cp++; + } + else if (*cp == '.') + { + *dp++ = ZEBRA_REPLACE_ANY; + cp++; + } + else + { + *dp++ = zebra_prim(&cp); +#if 0 + logf (LOG_LOG, " char %2X %c", dp[-1], dp[-1]); +#endif + } + *dp = '\0'; } - *dp = '\0'; - if (argc >= 3) { - dp = token->token_to = nmem_strdup (zms->nmem, cp = argv[2]); + char *cp = argv[2]; + char *dp = token->token_to = + nmem_malloc (zms->nmem, strlen(cp)+1); while (*cp) - { if (*cp == '$') { *dp++ = ' '; @@ -215,7 +252,6 @@ static void zebra_map_read (ZebraMaps zms, const char *name) } else *dp++ = zebra_prim(&cp); - } *dp = '\0'; } else @@ -279,7 +315,7 @@ chrmaptab zebra_charmap_get (ZebraMaps zms, unsigned reg_id) logf (LOG_WARN, "Unknown register type: %c", reg_id); zm->reg_id = reg_id; - zm->maptab_name = NULL; + zm->maptab_name = nmem_strdup (zms->nmem, "@"); zm->maptab = NULL; zm->type = ZEBRA_MAP_TYPE_INDEX; zm->completeness = 0; @@ -465,36 +501,48 @@ int zebra_maps_is_sort (ZebraMaps zms, unsigned reg_id) return 0; } -int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes) +int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes, + int *numerical) { AttrType use; + AttrType structure; + int structure_value; attr_init_AttrList (&use, sortAttributes->list, 1); + attr_init_AttrList (&structure, sortAttributes->list, 4); + *numerical = 0; + structure_value = attr_find (&structure, 0); + if (structure_value == 109) + *numerical = 1; return attr_find (&use, NULL); } int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, - unsigned *reg_id, char **search_type, char **rank_type, + unsigned *reg_id, char **search_type, char *rank_type, int *complete_flag, int *sort_flag) { AttrType completeness; AttrType structure; AttrType relation; AttrType sort_relation; + AttrType weight; int completeness_value; int structure_value; int relation_value; int sort_relation_value; + int weight_value; attr_init_APT (&structure, zapt, 4); attr_init_APT (&completeness, zapt, 6); attr_init_APT (&relation, zapt, 2); attr_init_APT (&sort_relation, zapt, 7); + attr_init_APT (&weight, zapt, 9); completeness_value = attr_find (&completeness, NULL); structure_value = attr_find (&structure, NULL); relation_value = attr_find (&relation, NULL); sort_relation_value = attr_find (&sort_relation, NULL); + weight_value = attr_find (&weight, NULL); if (completeness_value == 2 || completeness_value == 3) *complete_flag = 1; @@ -504,10 +552,13 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, *sort_flag = (sort_relation_value > 0) ? 1 : 0; *search_type = "phrase"; - *rank_type = "void"; + strcpy (rank_type, "void"); if (relation_value == 102) - *rank_type = "rank"; - + { + if (weight_value == -1) + weight_value == 34; + sprintf (rank_type, "rank,%d", weight_value); + } if (*complete_flag) *reg_id = 'p'; else @@ -545,6 +596,14 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, *reg_id = '0'; *search_type = "phrase"; break; + case 4: /* year */ + *reg_id = 'y'; + *search_type = "phrase"; + break; + case 5: /* date */ + *reg_id = 'd'; + *search_type = "phrase"; + break; default: return -1; } @@ -561,11 +620,10 @@ WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list, wrbuf_rewind(zms->wrbuf_1); wrbuf_write(zms->wrbuf_1, input_str, input_len); - if (!zm->replace_tokens) + if (!zm || !zm->replace_tokens) return zms->wrbuf_1; - -#if 0 - logf (LOG_LOG, "zebra_replace"); + +#if 0 logf (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1), wrbuf_buf(zms->wrbuf_1)); #endif @@ -615,8 +673,8 @@ int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list, if (i+j < 0 || j+i >= input_len) c = ' '; else - c = tolower(input_str[j+i]); - if (token->token_from[j] == '.') + c = input_str[j+i] & 255; + if (token->token_from[j] == ZEBRA_REPLACE_ANY) { if (c == ' ') break; @@ -625,7 +683,9 @@ int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list, else { if (c != token->token_from[j]) + { break; + } if (!replace_done) { const char *cp = token->token_to;