X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fzebramap.c;h=e05bdcefdde6c78f46ae9ad826209eedc3664001;hb=56795fa9571dcb939b249bcd654ae5094fee835f;hp=4e24daa67ca596f132dac88dde5ffe8c6c009c15;hpb=01ddc55fd5a59535e229c09440cfdadccadf3555;p=idzebra-moved-to-github.git diff --git a/util/zebramap.c b/util/zebramap.c index 4e24daa..e05bdce 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -4,7 +4,19 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.c,v $ - * Revision 1.16 1999-09-07 07:19:21 adam + * Revision 1.20 2000-03-02 14:35:19 adam + * Added structure year and date. + * + * Revision 1.19 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.18 1999/10/15 08:27:46 adam + * Fixed replace handler. 8-bit fix. + * + * Revision 1.17 1999/09/08 12:13:21 adam + * Fixed minor bug "replace"-mappings. Removed some logging messages. + * + * Revision 1.16 1999/09/07 07:19:21 adam * Work on character mapping. Implemented replace rules. * * Revision 1.15 1999/05/26 07:49:14 adam @@ -63,15 +75,17 @@ #include #include -#include +#include #include #include #define ZEBRA_MAP_TYPE_SORT 1 #define ZEBRA_MAP_TYPE_INDEX 2 +#define ZEBRA_REPLACE_ANY 300 + struct zm_token { - char *token_from; + int *token_from; char *token_to; int token_min; struct zm_token *next; @@ -166,6 +180,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) (*zm)->maptab = NULL; (*zm)->completeness = 0; (*zm)->positioned = 0; + (*zm)->replace_tokens = 0; } else if (zm && !yaz_matchstr (argv[0], "charmap") && argc == 2) { @@ -187,27 +202,43 @@ static void zebra_map_read (ZebraMaps zms, const char *name) else if (zm && !yaz_matchstr (argv[0], "replace") && argc >= 2) { struct zm_token *token = nmem_malloc (zms->nmem, sizeof(*token)); - char *cp, *dp; token->next = (*zm)->replace_tokens; (*zm)->replace_tokens = token; - dp = token->token_from = nmem_strdup (zms->nmem, cp = argv[1]); - while (*cp) - { - if (*cp == '$') - { - *dp++ = ' '; - cp++; - } - else - *dp++ = zebra_prim(&cp); +#if 0 + logf (LOG_LOG, "replace %s", argv[1]); +#endif + token->token_from = 0; + if (argc >= 2) + { + char *cp = argv[1]; + int *dp = token->token_from = (int *) + nmem_malloc (zms->nmem, (1+strlen(cp))*sizeof(int)); + while (*cp) + if (*cp == '$') + { + *dp++ = ' '; + cp++; + } + else if (*cp == '.') + { + *dp++ = ZEBRA_REPLACE_ANY; + cp++; + } + else + { + *dp++ = zebra_prim(&cp); +#if 0 + logf (LOG_LOG, " char %2X %c", dp[-1], dp[-1]); +#endif + } + *dp = '\0'; } - *dp = '\0'; - if (argc >= 3) { - dp = token->token_to = nmem_strdup (zms->nmem, cp = argv[2]); + char *cp = argv[2]; + char *dp = token->token_to = + nmem_malloc (zms->nmem, strlen(cp)+1); while (*cp) - { if (*cp == '$') { *dp++ = ' '; @@ -215,7 +246,6 @@ static void zebra_map_read (ZebraMaps zms, const char *name) } else *dp++ = zebra_prim(&cp); - } *dp = '\0'; } else @@ -279,7 +309,7 @@ chrmaptab zebra_charmap_get (ZebraMaps zms, unsigned reg_id) logf (LOG_WARN, "Unknown register type: %c", reg_id); zm->reg_id = reg_id; - zm->maptab_name = NULL; + zm->maptab_name = nmem_strdup (zms->nmem, "@"); zm->maptab = NULL; zm->type = ZEBRA_MAP_TYPE_INDEX; zm->completeness = 0; @@ -545,6 +575,14 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, *reg_id = '0'; *search_type = "phrase"; break; + case 4: /* year */ + *reg_id = 'y'; + *search_type = "phrase"; + break; + case 5: /* date */ + *reg_id = 'd'; + *search_type = "phrase"; + break; default: return -1; } @@ -561,11 +599,10 @@ WRBUF zebra_replace(ZebraMaps zms, unsigned reg_id, const char *ex_list, wrbuf_rewind(zms->wrbuf_1); wrbuf_write(zms->wrbuf_1, input_str, input_len); - if (!zm->replace_tokens) + if (!zm || !zm->replace_tokens) return zms->wrbuf_1; - -#if 0 - logf (LOG_LOG, "zebra_replace"); + +#if 0 logf (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1), wrbuf_buf(zms->wrbuf_1)); #endif @@ -615,8 +652,8 @@ int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list, if (i+j < 0 || j+i >= input_len) c = ' '; else - c = tolower(input_str[j+i]); - if (token->token_from[j] == '.') + c = input_str[j+i] & 255; + if (token->token_from[j] == ZEBRA_REPLACE_ANY) { if (c == ' ') break; @@ -625,7 +662,9 @@ int zebra_replace_sub(ZebraMaps zms, unsigned reg_id, const char *ex_list, else { if (c != token->token_from[j]) + { break; + } if (!replace_done) { const char *cp = token->token_to;