From 195222aff7f9fa54caaef052c3fdd15f47f4703d Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 18 Nov 1997 10:05:08 +0000 Subject: [PATCH] Changed character map facility so that admin can specify character mapping files for each register type, w, p, etc. --- CHANGELOG | 6 +-- include/zebramap.h | 9 +++- index/Makefile | 13 ++++- index/main.c | 8 +++- index/zserver.c | 9 +++- rset/rsrel.c | 8 +++- tab/default.idx | 19 ++++++++ util/zebramap.c | 135 +++++++++++++++++++++++++++++++++++----------------- 8 files changed, 151 insertions(+), 56 deletions(-) create mode 100644 tab/default.idx diff --git a/CHANGELOG b/CHANGELOG index d471554..444e181 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,10 +1,10 @@ +Moved towards generic character mapping. Configuration file default.idx +specifies character map files for register types w, p, u, etc. + Implemented "begin variant" for the sgml.regx - filter. Fixed a few memory leaks. -Moved towards generic character mapping. Type w, p uses -string.chr, u uses urx.chr and n uses numeric.chr. - Added support for C++, headers uses extern "C" for public definitions. New filter grs.marc. that reads MARC records in the ISO2709 diff --git a/include/zebramap.h b/include/zebramap.h index da7fc7c..4439f1a 100644 --- a/include/zebramap.h +++ b/include/zebramap.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.h,v $ - * Revision 1.2 1997-10-29 12:02:47 adam + * Revision 1.3 1997-11-18 10:05:08 adam + * Changed character map facility so that admin can specify character + * mapping files for each register type, w, p, etc. + * + * Revision 1.2 1997/10/29 12:02:47 adam * Added missing prototype. * * Revision 1.1 1997/10/27 14:33:04 adam @@ -19,13 +23,14 @@ #define ZEBRAMAP_H #include +#include #ifdef __cplusplus extern "C" { #endif typedef struct zebra_maps *ZebraMaps; -ZebraMaps zebra_maps_open (const char *tabpath); +ZebraMaps zebra_maps_open (const char *tabpath, Res res); void zebra_maps_close (ZebraMaps zm); diff --git a/index/Makefile b/index/Makefile index b97c001..3b7f2cd 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995-1996, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.44 1997-10-27 14:33:04 adam Exp $ +# $Id: Makefile,v 1.45 1997-11-18 10:05:08 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -17,6 +17,7 @@ INCLUDE=-I../include $(YAZINC) TPROG1=zebraidx TPROG2=kdump TPROG3=zebrasrv +TPROG4=hlvltest DEFS=$(INCLUDE) O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ symtab.o recindex.o recstat.o lockutil.o lockidx.o \ @@ -24,6 +25,7 @@ O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o attribute.o recindex.o \ zlogs.o lockutil.o locksrv.o zinfo.o trunc.o +O4 = hlvl.o hlvltest.o kcompare.o CPP=$(CC) -E all: $(TPROG1) $(TPROG2) $(TPROG3) @@ -47,6 +49,15 @@ $(TPROG3): $(O3) \ ../lib/isamc.a ../lib/bfile.a ../lib/dfa.a ../lib/zebrautl.a \ $(YAZLIB) $(OSILIB) $(NETLIB) -lm +$(TPROG4): $(O4) \ + ../lib/rset.a ../lib/dict.a ../lib/isam.a ../lib/recctrl.a \ + ../lib/isamc.a ../lib/bfile.a ../lib/dfa.a ../lib/zebrautl.a \ + $(YAZLIB) + $(CC) $(CFLAGS) -o $(TPROG4) $(O4) \ + ../lib/rset.a ../lib/dict.a ../lib/isam.a ../lib/recctrl.a \ + ../lib/isamc.a ../lib/bfile.a ../lib/dfa.a ../lib/zebrautl.a \ + $(YAZLIB) $(OSILIB) $(NETLIB) -lm + .c.o: $(CC) -c $(DEFS) $(CFLAGS) $< diff --git a/index/main.c b/index/main.c index 5add43e..7870d52 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.52 1997-10-31 12:34:04 adam + * Revision 1.53 1997-11-18 10:05:08 adam + * Changed character map facility so that admin can specify character + * mapping files for each register type, w, p, etc. + * + * Revision 1.52 1997/10/31 12:34:04 adam * Added missing nmem_init. * * Revision 1.51 1997/10/27 14:33:05 adam @@ -293,7 +297,7 @@ int main (int argc, char **argv) bf_lockDir (rGroupDef.bfs, res_get (common_resource, "lockDir")); rGroupDef.zebra_maps = zebra_maps_open (res_get( - common_resource, "profilePath")); + common_resource, "profilePath"), common_resource); } if (!strcmp (arg, "update")) cmd = 'u'; diff --git a/index/zserver.c b/index/zserver.c index bd89d6e..af81ffa 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.51 1997-10-27 14:33:06 adam + * Revision 1.52 1997-11-18 10:05:08 adam + * Changed character map facility so that admin can specify character + * mapping files for each register type, w, p, etc. + * + * Revision 1.51 1997/10/27 14:33:06 adam * Moved towards generic character mapping depending on "structure" * field in abstract syntax file. Fixed a few memory leaks. Fixed * bug with negative integers when doing searches with relational @@ -327,7 +331,8 @@ bend_initresult *bend_init (bend_initrequest *q) zi->records = NULL; zi->odr = odr_createmem (ODR_ENCODE); zi->registered_sets = NULL; - zi->zebra_maps = zebra_maps_open (res_get(zi->res, "profilePath")); + zi->zebra_maps = zebra_maps_open (res_get(zi->res, "profilePath"), + zi->res); return r; } diff --git a/rset/rsrel.c b/rset/rsrel.c index 1c1c6c0..d344b01 100644 --- a/rset/rsrel.c +++ b/rset/rsrel.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsrel.c,v $ - * Revision 1.20 1997-10-31 12:37:55 adam + * Revision 1.21 1997-11-18 10:05:08 adam + * Changed character map facility so that admin can specify character + * mapping files for each register type, w, p, etc. + * + * Revision 1.20 1997/10/31 12:37:55 adam * Code calls xfree() instead of free(). * * Revision 1.19 1997/10/01 11:44:06 adam @@ -173,7 +177,7 @@ static int qcomp (const void *p1, const void *p2) qsort_info->key_buf + i2*qsort_info->key_size); } -#define NEW_RANKING 1 +#define NEW_RANKING 0 #define SCORE_SHOW 0.0 /* base score for showing up */ #define SCORE_COOC 0.3 /* component dependent on co-oc */ diff --git a/tab/default.idx b/tab/default.idx new file mode 100644 index 0000000..c908a85 --- /dev/null +++ b/tab/default.idx @@ -0,0 +1,19 @@ +index w +completeness 0 +charmap string.chr + +index p +completeness 1 +charmap string.chr + +index u +completeness 0 +charmap urx.chr + +index n +completeness 0 +charmap numeric.chr + +index 0 +completeness 0 +charmap @ diff --git a/util/zebramap.c b/util/zebramap.c index 8fe8041..c553f2c 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.c,v $ - * Revision 1.3 1997-11-17 15:35:26 adam + * Revision 1.4 1997-11-18 10:05:08 adam + * Changed character map facility so that admin can specify character + * mapping files for each register type, w, p, etc. + * + * Revision 1.3 1997/11/17 15:35:26 adam * Bug fix. Relation=relevance wasn't observed. * * Revision 1.2 1997/10/31 12:39:30 adam @@ -27,7 +31,9 @@ struct zebra_map { int reg_type; + int completeness; chrmaptab maptab; + const char *maptab_name; struct zebra_map *next; }; @@ -35,6 +41,8 @@ struct zebra_maps { char *tabpath; NMEM nmem; struct zebra_map *map_list; + char temp_map_str[2]; + const char *temp_map_ptr[2]; }; void zebra_maps_close (ZebraMaps zms) @@ -52,75 +60,111 @@ void zebra_maps_close (ZebraMaps zms) xfree (zms); } -ZebraMaps zebra_maps_open (const char *tabpath) +static void zebra_map_read (ZebraMaps zms, const char *name) +{ + FILE *f; + char line[512]; + char *argv[10]; + int argc; + struct zebra_map **zm = 0; + + if (!(f = yaz_path_fopen(zms->tabpath, name, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", name); + return ; + } + while ((argc = readconf_line(f, line, 512, argv, 10))) + { + if (!strcmp (argv[0], "index") && argc == 2) + { + if (!zm) + zm = &zms->map_list; + else + zm = &(*zm)->next; + *zm = nmem_malloc (zms->nmem, sizeof(**zm)); + (*zm)->reg_type = argv[1][0]; + (*zm)->maptab_name = NULL; + (*zm)->maptab = NULL; + (*zm)->completeness = 0; + } + else if (zm && !strcmp (argv[0], "charmap") && argc == 2) + { + (*zm)->maptab_name = nmem_strdup (zms->nmem, argv[1]); + } + else if (zm && !strcmp (argv[0], "completeness") && argc == 2) + { + (*zm)->completeness = atoi (argv[1]); + } + } + if (zm) + (*zm)->next = NULL; + fclose (f); +} +static void zms_map_handle (void *p, const char *name, const char *value) +{ + ZebraMaps zms = p; + + zebra_map_read (zms, value); +} + +ZebraMaps zebra_maps_open (const char *tabpath, Res res) { ZebraMaps zms = xmalloc (sizeof(*zms)); zms->nmem = nmem_create (); zms->tabpath = nmem_strdup (zms->nmem, tabpath); zms->map_list = NULL; + + zms->temp_map_str[0] = '\0'; + zms->temp_map_str[1] = '\0'; + + zms->temp_map_ptr[0] = zms->temp_map_str; + zms->temp_map_ptr[1] = NULL; + + if (!res_trav (res, "index", zms, zms_map_handle)) + zebra_map_read (zms, "default.idx"); return zms; } chrmaptab zebra_map_get (ZebraMaps zms, int reg_type) { - char name[512]; struct zebra_map *zm; - + for (zm = zms->map_list; zm; zm = zm->next) - { if (reg_type == zm->reg_type) - return zm->maptab; + break; + if (!zm) + { + logf (LOG_WARN, "unknown register type: %c", reg_type); + return NULL; } - *name = '\0'; - switch (reg_type) + if (!zm->maptab) { - case 'w': - case 'p': - strcat (name, "string"); - break; - case 'n': - strcat (name, "numeric"); - break; - case 'u': - strcat (name, "urx"); - break; - default: - strcat (name, "null"); + if (!strcmp (zm->maptab_name, "@")) + return NULL; + if (!(zm->maptab = chrmaptab_create (zms->tabpath, + zm->maptab_name, 0))) + logf(LOG_WARN, "Failed to read character table %s", + zm->maptab_name); + else + logf(LOG_DEBUG, "Read character table %s", zm->maptab_name); } - strcat (name, ".chr"); - - zm = xmalloc (sizeof(*zm)); - zm->reg_type = reg_type; - zm->next = zms->map_list; - zms->map_list = zm; - if (!(zm->maptab = chrmaptab_create (zms->tabpath, name, 0))) - logf(LOG_WARN, "Failed to read character table %s", name); - else - logf(LOG_DEBUG, "Read character table %s", name); return zm->maptab; } const char **zebra_maps_input (ZebraMaps zms, int reg_type, const char **from, int len) { - static char str[2] = {0,0}; - static const char *buf[2] = {0,0}; chrmaptab maptab; maptab = zebra_map_get (zms, reg_type); if (maptab) return chr_map_input(maptab, from, len); - - if (isalnum(**from)) - { - str[0] = isupper(**from) ? tolower(**from) : **from; - buf[0] = str; - } - else - buf[0] = (char*) CHR_SPACE; + + zms->temp_map_str[0] = **from; + (*from)++; - return buf; + return zms->temp_map_ptr; } const char *zebra_maps_output(ZebraMaps zms, int reg_type, const char **from) @@ -204,9 +248,12 @@ static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, /* ------------------------------------ */ int zebra_maps_is_complete (ZebraMaps zms, int reg_type) -{ - if (reg_type == 'p') - return 1; +{ + struct zebra_map *zm; + + for (zm = zms->map_list; zm; zm = zm->next) + if (reg_type == zm->reg_type) + return zm->completeness; return 0; } -- 1.7.10.4