From 5c693d36af8be6f6642257160b3c6441d2e2d762 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 10 Feb 1998 12:03:04 +0000 Subject: [PATCH] Implemented Sort. --- CHANGELOG | 8 ++ doc/zebra.sgml | 37 +++++-- include/recctrl.h | 13 ++- include/sortidx.h | 36 ++++++ include/zebramap.h | 10 +- include/zebraver.h | 9 +- index/Makefile | 6 +- index/extract.c | 181 ++++++++++++++++++++++++++++-- index/sortidx.c | 112 +++++++++++++++++++ index/zrpn.c | 37 ++++--- index/zserver.c | 35 ++++-- index/zserver.h | 16 ++- index/zsets.c | 312 +++++++++++++++++++++++++++++++++++++++++++++++----- recctrl/recgrs.c | 135 +++-------------------- recctrl/rectext.c | 18 +-- tab/default.idx | 7 +- util/zebramap.c | 104 +++++++++++++----- 17 files changed, 838 insertions(+), 238 deletions(-) create mode 100644 include/sortidx.h create mode 100644 index/sortidx.c diff --git a/CHANGELOG b/CHANGELOG index 7fd0e9b..b0e76dd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Implemented Z39.50 Sort. Zebra's sort handler uses use attributes to +specify a "sort register". Refer to the gils sample records which +refer to index type "s" which is specified as "sort" in the default.idx +file. Each sort criteria can either be Ascending or Descending and +at most three sort elements can be specified. + +Bug fix: Character mapping didn't work for text files. + --- 1.0b1 1998/1/29 Simple ranked searches now return correct number of hits. diff --git a/doc/zebra.sgml b/doc/zebra.sgml index 232bd17..e204727 100644 --- a/doc/zebra.sgml +++ b/doc/zebra.sgml @@ -1,14 +1,14 @@
Zebra Server - Administrators's Guide and Reference <author><htmlurl url="http://www.indexdata.dk/" name="Index Data">, <tt><htmlurl url="mailto:info@indexdata.dk" name="info@indexdata.dk"></> -<date>$Revision: 1.39 $ +<date>$Revision: 1.40 $ <abstract> The Zebra information server combines a versatile fielded/free-text search engine with a Z39.50-1995 frontend to provide a powerful and flexible @@ -91,13 +91,15 @@ schema on the fly. Supports approximate matching in registers (ie. spelling mistakes, etc). -<item> +</itemize> + +<p> Protocol support: <itemize> <item> -Protocol facilities: Init, Search, Retrieve, Browse. +Protocol facilities: Init, Search, Retrieve, Browse and Sort. <item> Piggy-backed presents are honored in the search-request. @@ -135,8 +137,6 @@ requires MSVC++ to compile the system (we use version 5.0). </itemize> -</itemize> - <sect1>Future Work <p> @@ -1094,6 +1094,22 @@ processed in the same way as operands in a query (see above). Currently, only the term and the globalOccurrences are returned with the TermInfo structure. +<sect2>Sort + +<p> +Z39.50 specifies three diffent types of sort criterias. +Of these Zebra supports the attribute specification type in which +case the use attribute specifies the "Sort register". +Sort registers are created for those fields that are of type "sort" in +the default.idx file. +The corresponding character mapping file in default.idx specifies the +ordinal of each character used in the actual sort. + +Z39.50 allows the client to specify sorting on one or more input +result sets and one output result set. +Zebra supports sorting on one result set only which may or may not +be the same as the output result set. + <sect2>Close <p> @@ -2052,11 +2068,18 @@ of the .idx file is as follows <descrip> <tag>index <it/field type code/</tag>This directive introduces a new -index code. The argument is a one-character code to be used in the +search index code. The argument is a one-character code to be used in the .abs files to select this particular index type. An index, roughly, corresponds to a particular structure attribute during search. Refer to section <ref id="search" name="Search">. +<tag>sort <it/field code type/</tag>This directive introduces a +sort index. The argument is a one-character code to be used in the +.abs fie to select this particular index type. The corresponding +use attribute must be used in the sort request to refer to this +particular sort index. The corresponding character map (see below) +is used in the sort process. + <tag>completeness <it/boolean/</tag>This directive enables or disables complete field indexing. The value of the <it/boolean/ should be 0 (disable) or 1. If completeness is enabled, the index entry will diff --git a/include/recctrl.h b/include/recctrl.h index 722bbf7..9000fda 100644 --- a/include/recctrl.h +++ b/include/recctrl.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recctrl.h,v $ - * Revision 1.22 1997-10-27 14:33:04 adam + * Revision 1.23 1998-02-10 12:03:05 adam + * Implemented Sort. + * + * Revision 1.22 1997/10/27 14:33:04 adam * Moved towards generic character mapping depending on "structure" * field in abstract syntax file. Fixed a few memory leaks. Fixed * bug with negative integers when doing searches with relational @@ -104,7 +107,9 @@ typedef struct { int attrUse; unsigned reg_type; char *string; - int seqno; + int length; + int seqno; + ZebraMaps zebra_maps; } RecWord; /* Extract record control */ @@ -116,8 +121,8 @@ struct recExtractCtrl { void (*endf)(void *fh, off_t offset); /* end of record position */ off_t offset; /* start offset */ char *subType; - void (*init)(RecWord *p); - void (*add)(const RecWord *p); + void (*init)(struct recExtractCtrl *p, RecWord *w); + void (*add)(RecWord *p); ZebraMaps zebra_maps; int flagShowRecords; data1_handle dh; diff --git a/include/sortidx.h b/include/sortidx.h new file mode 100644 index 0000000..1daac80 --- /dev/null +++ b/include/sortidx.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 1994-1998, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: sortidx.h,v $ + * Revision 1.1 1998-02-10 12:03:05 adam + * Implemented Sort. + * + */ + +#ifndef SORTIDX_H +#define SORTIDX_H + +#include <bfile.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define SORT_IDX_ENTRYSIZE 64 + +typedef struct sortIdx *SortIdx; + +SortIdx sortIdx_open (BFiles bfs, int write_flag); +void sortIdx_close (SortIdx si); +int sortIdx_type (SortIdx si, int type); +void sortIdx_sysno (SortIdx si, int sysno); +void sortIdx_add (SortIdx si, const char *buf, int len); +void sortIdx_read (SortIdx si, char *buf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/zebramap.h b/include/zebramap.h index 4439f1a..07e6908 100644 --- a/include/zebramap.h +++ b/include/zebramap.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.h,v $ - * Revision 1.3 1997-11-18 10:05:08 adam + * Revision 1.4 1998-02-10 12:03:05 adam + * Implemented Sort. + * + * Revision 1.3 1997/11/18 10:05:08 adam * Changed character map facility so that admin can specify character * mapping files for each register type, w, p, etc. * @@ -41,7 +44,10 @@ const char *zebra_maps_output(ZebraMaps, int reg_type, const char **from); int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, int *reg_type, char **search_type, int *complete_flag); +int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes); + int zebra_maps_is_complete (ZebraMaps zms, int structure); +int zebra_maps_is_sort (ZebraMaps zms, int reg_type); #ifdef __cplusplus } #endif diff --git a/include/zebraver.h b/include/zebraver.h index f51ebe1..3d69834 100644 --- a/include/zebraver.h +++ b/include/zebraver.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebraver.h,v $ - * Revision 1.11 1998-01-29 13:33:04 adam + * Revision 1.12 1998-02-10 12:03:05 adam + * Implemented Sort. + * + * Revision 1.11 1998/01/29 13:33:04 adam * 1.0b1. * * Revision 1.10 1997/02/12 20:45:21 adam @@ -40,10 +43,10 @@ */ #ifndef ZEBRAVER -#define ZEBRAVER "1.0b1" +#define ZEBRAVER "1.0b2" #endif #ifndef ZEBRADATE -#define ZEBRADATE "$Date: 1998-01-29 13:33:04 $" +#define ZEBRADATE "$Date: 1998-02-10 12:03:05 $" #endif diff --git a/index/Makefile b/index/Makefile index f0f89ad..ac8cba6 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995-1998, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.47 1998-01-30 15:31:49 adam Exp $ +# $Id: Makefile,v 1.48 1998-02-10 12:03:05 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -20,10 +20,10 @@ TPROG4=hlvltest DEFS=$(INCLUDE) O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ symtab.o recindex.o recstat.o lockutil.o lockidx.o \ - zinfo.o invstat.o + zinfo.o invstat.o sortidx.o O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o attribute.o recindex.o \ - zlogs.o lockutil.o locksrv.o zinfo.o trunc.o + zlogs.o lockutil.o locksrv.o zinfo.o trunc.o sortidx.o O4 = hlvl.o hlvltest.o kcompare.o CPP=$(CC) -E diff --git a/index/extract.c b/index/extract.c index db1c9c7..829bb68 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.77 1998-01-12 15:04:08 adam + * Revision 1.78 1998-02-10 12:03:05 adam + * Implemented Sort. + * + * Revision 1.77 1998/01/12 15:04:08 adam * The test option (-s) only uses read-lock (and not write lock). * * Revision 1.76 1997/10/27 14:33:04 adam @@ -290,6 +293,8 @@ #include <fcntl.h> #include <recctrl.h> +#include <charmap.h> +#include <sortidx.h> #include "index.h" #include "zinfo.h" @@ -297,6 +302,7 @@ static Dict matchDict; static Records records = NULL; +static SortIdx sortIdx = NULL; static char **key_buf; static size_t ptr_top; @@ -355,6 +361,7 @@ int key_open (BFiles bfs, int mem, int rw) dict_close (matchDict); return -1; } + sortIdx = sortIdx_open (bfs, 1); return 0; } @@ -535,19 +542,29 @@ int key_close () #endif rec_close (&records); dict_close (matchDict); + sortIdx_close (sortIdx); logRecord (1); return key_file_no; } -static void wordInit (RecWord *p) +static void wordInit (struct recExtractCtrl *p, RecWord *w) { - p->attrSet = 1; - p->attrUse = 1016; - p->reg_type = 'w'; + w->zebra_maps = p->zebra_maps; + w->attrSet = 1; + w->attrUse = 1016; + w->reg_type = 'w'; } -struct recKeys { +static struct sortKey { + char *string; + int length; + int attrSet; + int attrUse; + struct sortKey *next; +} *sortKeys = NULL; + +static struct recKeys { int buf_used; int buf_max; char *buf; @@ -556,12 +573,11 @@ struct recKeys { int prevSeqNo; } reckeys; -static void addRecordKey (const RecWord *p) +static void addIndexString (RecWord *p, const char *string, int length) { char *dst; char attrSet; short attrUse; - size_t i; int lead = 0; int diff = 0; @@ -609,8 +625,8 @@ static void addRecordKey (const RecWord *p) dst += sizeof(attrUse); } *dst++ = p->reg_type; - for (i = 0; p->string[i] && i < IT_MAX_WORD-3; i++) - *dst++ = p->string[i]; + memcpy (dst, string, length); + dst += length; *dst++ = '\0'; if (!diff) @@ -619,6 +635,149 @@ static void addRecordKey (const RecWord *p) dst += sizeof(p->seqno); } reckeys.buf_used = dst - reckeys.buf; + (p->seqno)++; +} + +static void addSortString (RecWord *p, const char *string, int length) +{ + struct sortKey *sk; + + for (sk = sortKeys; sk; sk = sk->next) + if (sk->attrSet == p->attrSet && sk->attrUse == p->attrUse) + return; + + sk = xmalloc (sizeof(*sk)); + sk->next = sortKeys; + sortKeys = sk; + + sk->string = xmalloc (p->length); + sk->length = p->length; + memcpy (sk->string, p->string, p->length); + sk->attrSet = p->attrSet; + sk->attrUse = p->attrUse; +} + +static void addString (RecWord *p, const char *string, int length) +{ + if (zebra_maps_is_sort (p->zebra_maps, p->reg_type)) + addSortString (p, string, length); + else + addIndexString (p, string, length); +} + +static void addIncompleteField (RecWord *p) +{ + const char *b = p->string; + int remain = p->length; + const char **map = 0; + + if (remain > 0) + map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); + + while (map) + { + char buf[IT_MAX_WORD+1]; + int i, remain; + + /* Skip spaces */ + while (map && *map && **map == *CHR_SPACE) + { + remain = p->length - (b - p->string); + if (remain > 0) + map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); + else + map = 0; + } + if (!map) + break; + i = 0; + while (map && *map && **map != *CHR_SPACE) + { + const char *cp = *map; + + while (i < IT_MAX_WORD && *cp) + buf[i++] = *(cp++); + remain = p->length - (b - p->string); + if (remain > 0) + map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); + else + map = 0; + } + if (!i) + return; + addString (p, buf, i); + } +} + +static void addCompleteField (RecWord *p) +{ + const char *b = p->string; + char buf[IT_MAX_WORD+1]; + const char **map = 0; + int i = 0, remain = p->length; + + if (remain > 0) + map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain); + + while (remain > 0 && i < IT_MAX_WORD) + { + while (map && *map && **map == *CHR_SPACE) + { + remain = p->length - (b - p->string); + if (remain > 0) + map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain); + else + map = 0; + } + if (!map) + break; + + if (i && i < IT_MAX_WORD) + buf[i++] = *CHR_SPACE; + while (map && *map && **map != *CHR_SPACE) + { + const char *cp = *map; + + if (i >= IT_MAX_WORD) + break; + while (i < IT_MAX_WORD && *cp) + buf[i++] = *(cp++); + remain = p->length - (b - p->string); + if (remain > 0) + map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, + remain); + else + map = 0; + } + } + if (!i) + return; + addString (p, buf, i); +} + +static void addRecordKey (RecWord *p) +{ + if (zebra_maps_is_complete (p->zebra_maps, p->reg_type)) + addCompleteField (p); + else + addIncompleteField(p); +} + +static void flushSortKeys (SYSNO sysno, int cmd) +{ + struct sortKey *sk = sortKeys; + + sortIdx_sysno (sortIdx, sysno); + while (sk) + { + struct sortKey *sk_next = sk->next; + sortIdx_type (sortIdx, sk->attrUse); + sortIdx_add (sortIdx, sk->string, sk->length); + xfree (sk->string); + xfree (sk); + sk = sk_next; + } + sortKeys = NULL; } static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, @@ -1096,6 +1255,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, dict_insert (matchDict, matchStr, sizeof(*sysno), sysno); } flushRecordKeys (*sysno, 1, &reckeys, rGroup->databaseName); + flushSortKeys (*sysno, 1); records_inserted++; } @@ -1108,6 +1268,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, assert (rec); delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; + flushSortKeys (*sysno, 0); flushRecordKeys (*sysno, 0, &delkeys, rec->info[recInfo_databaseName]); if (deleteFlag) { diff --git a/index/sortidx.c b/index/sortidx.c new file mode 100644 index 0000000..4980a9a --- /dev/null +++ b/index/sortidx.c @@ -0,0 +1,112 @@ + +#include <string.h> + +#include <log.h> +#include <bfile.h> +#include <sortidx.h> + +#define SORT_IDX_BLOCKSIZE 64 + +struct sortFileHead { + int sysno_max; +}; + +struct sortFile { + int type; + BFile bf; + struct sortFile *next; + struct sortFileHead head; +}; + +struct sortIdx { + BFiles bfs; + int write_flag; + int sysno; + char *entry_buf; + struct sortFile *current_file; + struct sortFile *files; +}; + +SortIdx sortIdx_open (BFiles bfs, int write_flag) +{ + SortIdx si = xmalloc (sizeof(*si)); + si->bfs = bfs; + si->write_flag = write_flag; + si->current_file = NULL; + si->files = NULL; + si->entry_buf = xmalloc (SORT_IDX_ENTRYSIZE); + return si; +} + +void sortIdx_close (SortIdx si) +{ + struct sortFile *sf = si->files; + while (sf) + { + struct sortFile *sf_next = sf->next; + if (sf->bf) + bf_close (sf->bf); + xfree (sf); + sf = sf_next; + } + xfree (si->entry_buf); + xfree (si); +} + +int sortIdx_type (SortIdx si, int type) +{ + char fname[80]; + struct sortFile *sf; + if (si->current_file && si->current_file->type == type) + return 0; + for (sf = si->files; sf; sf = sf->next) + if (sf->type == type) + { + si->current_file = sf; + return 0; + } + sf = xmalloc (sizeof(*sf)); + sf->type = type; + sf->bf = NULL; + sf->next = si->files; + si->current_file = si->files = sf; + sprintf (fname, "sort%d", type); + logf (LOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag); + sf->bf = bf_open (si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag); + if (!sf->bf) + return -1; + if (!bf_read (sf->bf, 0, 0, sizeof(sf->head), &sf->head)) + { + sf->head.sysno_max = 0; + if (!si->write_flag) + return -1; + } + return 0; +} + +void sortIdx_sysno (SortIdx si, int sysno) +{ + si->sysno = sysno; +} + +void sortIdx_add (SortIdx si, const char *buf, int len) +{ + if (!si->current_file || !si->current_file->bf) + return; + if (len > SORT_IDX_ENTRYSIZE) + { + len = SORT_IDX_ENTRYSIZE; + memcpy (si->entry_buf, buf, len); + } + else + { + memcpy (si->entry_buf, buf, len); + memset (si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); + } + bf_write (si->current_file->bf, si->sysno+1, 0, 0, si->entry_buf); +} + +void sortIdx_read (SortIdx si, char *buf) +{ + bf_read (si->current_file->bf, si->sysno+1, 0, 0, buf); +} diff --git a/index/zrpn.c b/index/zrpn.c index fc4dcca..a5eac1e 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.73 1998-01-29 13:40:11 adam + * Revision 1.74 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.73 1998/01/29 13:40:11 adam * Better logging for scan service. * * Revision 1.72 1998/01/07 13:53:41 adam @@ -1271,7 +1274,7 @@ static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId) } static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, - oid_value attributeSet, + oid_value attributeSet, ODR stream, int num_bases, char **basenames) { RSET r = NULL; @@ -1283,14 +1286,14 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1, - attributeSet, + attributeSet, stream, num_bases, basenames); if (bool_parms.rset_l == NULL) return NULL; if (rset_is_ranked(bool_parms.rset_l)) soft = 1; bool_parms.rset_r = rpn_search_structure (zi, zs->u.complex->s2, - attributeSet, + attributeSet, stream, num_bases, basenames); if (bool_parms.rset_r == NULL) { @@ -1321,7 +1324,7 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, } if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word) { - char *val = odr_malloc (zi->odr, 16); + char *val = odr_malloc (stream, 16); zi->errCode = 132; zi->errString = val; sprintf (val, "%d", *zop->u.prox->proximityUnitCode); @@ -1425,7 +1428,7 @@ static void count_set (RSET r, int *count) logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *count); } -int rpn_search (ZServerInfo *zi, +int rpn_search (ZServerInfo *zi, ODR stream, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname, int *hits) { @@ -1441,7 +1444,7 @@ int rpn_search (ZServerInfo *zi, attrset = oid_getentbyoid (rpn->attributeSetId); attributeSet = attrset->value; - rset = rpn_search_structure (zi, rpn->RPNStructure, attributeSet, + rset = rpn_search_structure (zi, rpn->RPNStructure, attributeSet, stream, num_bases, basenames); if (!rset) return zi->errCode; @@ -1490,10 +1493,10 @@ static int scan_handle (char *name, const char *info, int pos, void *client) } -static void scan_term_untrans (ZServerInfo *zi, int reg_type, +static void scan_term_untrans (ZServerInfo *zi, ODR stream, int reg_type, char **dstp, const char *src) { - char *dst = odr_malloc (zi->odr, strlen(src)*2+1); + char *dst = odr_malloc (stream, strlen(src)*2+1); *dstp = dst; while (*src) @@ -1505,7 +1508,7 @@ static void scan_term_untrans (ZServerInfo *zi, int reg_type, *dst = '\0'; } -int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, oid_value attributeset, int num_bases, char **basenames, int *position, int *num_entries, struct scan_entry **list, @@ -1582,7 +1585,7 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return zi->errCode = 113; before = pos-1; after = 1+num-pos; - scan_info_array = odr_malloc (zi->odr, ord_no * sizeof(*scan_info_array)); + scan_info_array = odr_malloc (stream, ord_no * sizeof(*scan_info_array)); for (i = 0; i < ord_no; i++) { int j, prefix_len = 0; @@ -1594,9 +1597,9 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, scan_info->before = before; scan_info->after = after; - scan_info->odr = zi->odr; + scan_info->odr = stream; - scan_info->list = odr_malloc (zi->odr, (before+after)* + scan_info->list = odr_malloc (stream, (before+after)* sizeof(*scan_info->list)); for (j = 0; j<before+after; j++) scan_info->list[j].term = NULL; @@ -1610,7 +1613,7 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, dict_scan (zi->dict, termz, &before_tmp, &after_tmp, scan_info, scan_handle); } - glist = odr_malloc (zi->odr, (before+after)*sizeof(*glist)); + glist = odr_malloc (stream, (before+after)*sizeof(*glist)); for (i = 0; i < ord_no; i++) ptr[i] = before; @@ -1634,7 +1637,8 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, } if (j0 == -1) break; - scan_term_untrans (zi, reg_type, &glist[i+before].term, mterm); + scan_term_untrans (zi, stream, reg_type, + &glist[i+before].term, mterm); rset = rset_trunc (zi, &scan_info_array[j0].list[ptr[j0]].isam_p, 1); ptr[j0]++; @@ -1692,7 +1696,8 @@ int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, if (j0 == -1) break; - scan_term_untrans (zi, reg_type, &glist[before-1-i].term, mterm); + scan_term_untrans (zi, stream, reg_type, + &glist[before-1-i].term, mterm); rset = rset_trunc (zi, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1); diff --git a/index/zserver.c b/index/zserver.c index 002989f..4c1ec26 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.54 1998-01-29 13:39:13 adam + * Revision 1.55 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.54 1998/01/29 13:39:13 adam * Compress ISAM is default. * * Revision 1.53 1998/01/12 15:04:09 adam @@ -248,6 +251,7 @@ static int register_lock (ZServerInfo *zi) { zebTargetInfo_close (zi->zti, 0); dict_close (zi->dict); + sortIdx_close (zi->sortIdx); if (zi->isam) is_close (zi->isam); if (zi->isamc) @@ -259,6 +263,8 @@ static int register_lock (ZServerInfo *zi) zi->records = rec_open (zi->bfs, 0); if (!(zi->dict = dict_open (zi->bfs, FNAME_DICT, 40, 0))) return -1; + if (!(zi->sortIdx = sortIdx_open (zi->bfs, 0))) + return -1; zi->isam = NULL; zi->isamc = NULL; if (!res_get_match (zi->res, "isam", "i", NULL)) @@ -306,6 +312,8 @@ static void register_unlock (ZServerInfo *zi) zebra_server_unlock (zi, zi->registerState); } +static int bend_sort (void *handle, bend_sort_rr *rr); + bend_initresult *bend_init (bend_initrequest *q) { bend_initresult *r = odr_malloc (q->stream, sizeof(*r)); @@ -315,6 +323,7 @@ bend_initresult *bend_init (bend_initrequest *q) r->errcode = 0; r->errstring = 0; r->handle = zi; + q->bend_sort = bend_sort; logf (LOG_DEBUG, "bend_init"); @@ -336,7 +345,6 @@ bend_initresult *bend_init (bend_initrequest *q) zi->registerChange = 0; zi->records = NULL; - zi->odr = odr_createmem (ODR_ENCODE); zi->registered_sets = NULL; zi->zebra_maps = zebra_maps_open (res_get(zi->res, "profilePath"), zi->res); @@ -353,7 +361,6 @@ bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd) r->hits = 0; register_lock (zi); - odr_reset (zi->odr); zi->errCode = 0; zi->errString = NULL; @@ -361,7 +368,7 @@ bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd) switch (q->query->which) { case Z_Query_type_1: case Z_Query_type_101: - r->errcode = rpn_search (zi, q->query->u.type_1, + r->errcode = rpn_search (zi, q->stream, q->query->u.type_1, q->num_bases, q->basenames, q->setname, &r->hits); r->errstring = zi->errString; @@ -515,7 +522,6 @@ bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num) r->last_in_set = 0; r->basename = "base"; - odr_reset (zi->odr); zi->errCode = 0; positions[0] = q->number; @@ -558,13 +564,12 @@ bend_scanresult *bend_scan (void *handle, bend_scanrequest *q, int *num) int status; register_lock (zi); - odr_reset (zi->odr); zi->errCode = 0; zi->errString = 0; r->term_position = q->term_position; r->num_entries = q->num_entries; - r->errcode = rpn_scan (zi, q->term, + r->errcode = rpn_scan (zi, q->stream, q->term, q->attributeset, q->num_bases, q->basenames, &r->term_position, @@ -584,6 +589,7 @@ void bend_close (void *handle) resultSetDestroy (zi); zebTargetInfo_close (zi->zti, 0); dict_close (zi->dict); + sortIdx_close (zi->sortIdx); if (zi->isam) is_close (zi->isam); if (zi->isamc) @@ -591,7 +597,6 @@ void bend_close (void *handle) rec_close (&zi->records); register_unlock (zi); } - odr_destroy (zi->odr); zebra_maps_close (zi->zebra_maps); bfs_destroy (zi->bfs); data1_destroy (zi->dh); @@ -620,6 +625,20 @@ static void pre_init (struct statserv_options_block *sob) } #endif +int bend_sort (void *handle, bend_sort_rr *rr) +{ + ZServerInfo *zi = handle; + +#if 1 + register_lock (zi); + + resultSetSort (zi, rr); + + register_unlock (zi); +#endif + return 0; +} + int main (int argc, char **argv) { struct statserv_options_block *sob; diff --git a/index/zserver.h b/index/zserver.h index 13e8901..ab1e985 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.28 1998-01-29 13:40:11 adam + * Revision 1.29 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.28 1998/01/29 13:40:11 adam * Better logging for scan service. * * Revision 1.27 1997/10/27 14:33:06 adam @@ -115,6 +118,7 @@ #include <backend.h> #include <rset.h> +#include <sortidx.h> #include "index.h" #include "zinfo.h" @@ -127,6 +131,7 @@ typedef struct ZServerSet_ { char *name; RSET rset; int size; + struct zset_sort_info *sort_info; struct ZServerSet_ *next; } ZServerSet; @@ -135,12 +140,12 @@ typedef struct { time_t registerChange; ZServerSet *sets; Dict dict; + SortIdx sortIdx; ISAM isam; ISAMC isamc; Records records; int errCode; char *errString; - ODR odr; ZebTargetInfo *zti; data1_handle dh; data1_attset *registered_sets; @@ -157,11 +162,11 @@ typedef struct { ZebraMaps zebra_maps; } ZServerInfo; -int rpn_search (ZServerInfo *zi, +int rpn_search (ZServerInfo *zi, ODR stream, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname, int *hits); -int rpn_scan (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, oid_value attributeset, int num_bases, char **basenames, int *position, int *num_entries, struct scan_entry **list, @@ -177,6 +182,9 @@ void resultSetDestroy (ZServerInfo *zi); ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, int num, int *positions); void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num); + +int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr); + void zlog_rpn (Z_RPNQuery *rpn); void zlog_scan (Z_AttributesPlusTerm *zapt, oid_value ast); diff --git a/index/zsets.c b/index/zsets.c index b43ea18..f087500 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.12 1997-09-25 14:57:36 adam + * Revision 1.13 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.12 1997/09/25 14:57:36 adam * Windows NT port. * * Revision 1.11 1996/12/23 15:30:46 adam @@ -58,6 +61,32 @@ #include "zserver.h" #include <rstemp.h> +#define SORT_IDX_ENTRYSIZE 64 +#define ZSET_SORT_MAX_LEVEL 3 + +struct zset_sort_entry { + int sysno; + char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE]; +}; + +struct zset_sort_info { + int max_entries; + int num_entries; + struct zset_sort_entry **entries; +}; + +void resultSetSortReset (struct zset_sort_info **si) +{ + int i; + if (!*si) + return ; + for (i = 0; i<(*si)->num_entries; i++) + xfree ((*si)->entries[i]); + xfree ((*si)->entries); + xfree (*si); + *si = NULL; +} + ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset) { ZServerSet *s; @@ -65,18 +94,22 @@ ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset) for (s = zi->sets; s; s = s->next) if (!strcmp (s->name, name)) { + logf (LOG_DEBUG, "updating result set %s", name); if (!ov) return NULL; + resultSetSortReset (&s->sort_info); rset_delete (s->rset); s->rset = rset; return s; } + logf (LOG_DEBUG, "adding result set %s", name); s = xmalloc (sizeof(*s)); s->next = zi->sets; zi->sets = s; s->name = xmalloc (strlen(name)+1); strcpy (s->name, name); s->rset = rset; + s->sort_info = NULL; return s; } @@ -90,6 +123,7 @@ ZServerSet *resultSetGet (ZServerInfo *zi, const char *name) return NULL; } + void resultSetDestroy (ZServerInfo *zi) { ZServerSet *s, *s1; @@ -97,6 +131,7 @@ void resultSetDestroy (ZServerInfo *zi) for (s = zi->sets; s; s = s1) { s1 = s->next; + resultSetSortReset (&s->sort_info); rset_delete (s->rset); xfree (s->name); xfree (s); @@ -110,40 +145,79 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, ZServerSet *sset; ZServerSetSysno *sr; RSET rset; - int num_i = 0; - int position = 0; - int psysno = 0; - struct it_key key; - RSFD rfd; + int i; + struct zset_sort_info *sort_info; if (!(sset = resultSetGet (zi, name))) return NULL; if (!(rset = sset->rset)) return NULL; - logf (LOG_DEBUG, "resultSetRecordGet"); sr = xmalloc (sizeof(*sr) * num); - rfd = rset_open (rset, RSETF_READ|RSETF_SORT_RANK); - while (rset_read (rset, rfd, &key)) + for (i = 0; i<num; i++) { - if (key.sysno != psysno) - { - psysno = key.sysno; - position++; - assert (num_i < num); - if (position == positions[num_i]) - { - sr[num_i].sysno = psysno; - rset_score (rset, rfd, &sr[num_i].score); - if (++num_i == num) - break; - } - } + sr[i].sysno = 0; + sr[i].score = -1; } - rset_close (rset, rfd); - while (num_i < num) + sort_info = sset->sort_info; + if (sort_info) { - sr[num_i].sysno = 0; - num_i++; + int position; + + for (i = 0; i<num; i++) + { + position = positions[i]; + if (position <= sort_info->num_entries) + { + logf (LOG_DEBUG, "got pos=%d (sorted)", position); + sr[i].sysno = sort_info->entries[position-1]->sysno; + } + } + } + /* did we really get all entries using sort ? */ + for (i = 0; i<num; i++) + { + if (!sr[i].sysno) + break; + } + if (i < num) /* nope, get the rest, unsorted - sorry */ + { + int position = 0; + int num_i = 0; + int psysno = 0; + RSFD rfd; + struct it_key key; + + if (sort_info) + position = sort_info->num_entries; + while (num_i < num && positions[num_i] < position) + num_i++; + rfd = rset_open (rset, RSETF_READ|RSETF_SORT_RANK); + while (num_i < num && rset_read (rset, rfd, &key)) + { + if (key.sysno != psysno) + { + psysno = key.sysno; + if (sort_info) + { + /* determine we alreay have this in our set */ + for (i = sort_info->num_entries; --i >= 0; ) + if (psysno == sort_info->entries[i]->sysno) + break; + if (i >= 0) + continue; + } + position++; + assert (num_i < num); + if (position == positions[num_i]) + { + sr[num_i].sysno = psysno; + logf (LOG_DEBUG, "got pos=%d (unsorted)", position); + rset_score (rset, rfd, &sr[num_i].score); + num_i++; + } + } + } + rset_close (rset, rfd); } return sr; } @@ -152,3 +226,187 @@ void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num) { xfree (records); } + +struct sortKey { + int relation; + int attrUse; +}; + +void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset, + struct sortKey *criteria, int num_criteria, + int sysno) +{ + struct zset_sort_entry this_entry; + struct zset_sort_entry *new_entry = NULL; + struct zset_sort_info *sort_info = sset->sort_info; + int i, j; + + sortIdx_sysno (zi->sortIdx, sysno); + for (i = 0; i<num_criteria; i++) + { + sortIdx_type (zi->sortIdx, criteria[i].attrUse); + sortIdx_read (zi->sortIdx, this_entry.buf[i]); + } + i = sort_info->num_entries; + while (--i >= 0) + { + int rel = 0; + for (j = 0; j<num_criteria; j++) + { + rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j], + SORT_IDX_ENTRYSIZE); + if (rel) + break; + } + if (rel) + { + if (criteria[j].relation == 'D') + if (rel > 0) + break; + if (criteria[j].relation == 'A') + if (rel < 0) + break; + } + } + j = sort_info->max_entries-1; + if (i == j) + return; + ++i; + new_entry = sort_info->entries[j]; + while (j != i) + { + sort_info->entries[j] = sort_info->entries[j-1]; + --j; + } + sort_info->entries[j] = new_entry; + assert (new_entry); + if (sort_info->num_entries != sort_info->max_entries) + (sort_info->num_entries)++; + for (i = 0; i<num_criteria; i++) + memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE); + new_entry->sysno = sysno; +} + +int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr) +{ + ZServerSet *sset; + RSET rset; + int i, psysno = 0; + struct it_key key; + struct sortKey sort_criteria[3]; + int num_criteria; + RSFD rfd; + + if (rr->num_input_setnames == 0) + { + rr->errcode = 208; + return 0; + } + if (rr->num_input_setnames > 1) + { + rr->errcode = 230; + return 0; + } + sset = resultSetGet (zi, rr->input_setnames[0]); + if (!sset) + { + rr->errcode = 30; + rr->errstring = rr->input_setnames[0]; + return 0; + } + if (!(rset = sset->rset)) + { + rr->errcode = 30; + rr->errstring = rr->input_setnames[0]; + return 0; + } + num_criteria = rr->sort_sequence->num_specs; + if (num_criteria > 3) + num_criteria = 3; + for (i = 0; i < num_criteria; i++) + { + Z_SortKeySpec *sks = rr->sort_sequence->specs[i]; + Z_SortKey *sk; + + if (*sks->sortRelation == Z_SortRelation_ascending) + sort_criteria[i].relation = 'A'; + else if (*sks->sortRelation == Z_SortRelation_descending) + sort_criteria[i].relation = 'D'; + else + { + rr->errcode = 214; + return 0; + } + if (sks->sortElement->which == Z_SortElement_databaseSpecific) + { + rr->errcode = 210; + return 0; + } + else if (sks->sortElement->which != Z_SortElement_generic) + { + rr->errcode = 237; + return 0; + } + sk = sks->sortElement->u.generic; + switch (sk->which) + { + case Z_SortKey_sortField: + logf (LOG_DEBUG, "Sort: key %d is of type sortField", i+1); + rr->errcode = 207; + return 0; + case Z_SortKey_elementSpec: + logf (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1); + return 0; + case Z_SortKey_sortAttributes: + logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1); + sort_criteria[i].attrUse = + zebra_maps_sort (zi->zebra_maps, sk->u.sortAttributes); + logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse); + if (sort_criteria[i].attrUse == -1) + { + rr->errcode = 116; + return 0; + } + if (sortIdx_type (zi->sortIdx, sort_criteria[i].attrUse)) + { + rr->errcode = 207; + return 0; + } + break; + } + } + if (strcmp (rr->output_setname, rr->input_setnames[0])) + { + rset = rset_dup (rset); + sset = resultSetAdd (zi, rr->output_setname, 1, rset); + } + resultSetSortReset (&sset->sort_info); + + sset->sort_info = xmalloc (sizeof(*sset->sort_info)); + sset->sort_info->max_entries = 10; + sset->sort_info->num_entries = 0; + sset->sort_info->entries = xmalloc (sizeof(*sset->sort_info->entries) * + sset->sort_info->max_entries); + for (i = 0; i<sset->sort_info->max_entries; i++) + sset->sort_info->entries[i] = + xmalloc (sizeof(*sset->sort_info->entries[i])); + + + rfd = rset_open (rset, RSETF_READ|RSETF_SORT_SYSNO); + while (rset_read (rset, rfd, &key)) + { + if (key.sysno != psysno) + { + psysno = key.sysno; + resultSetInsertSort (zi, sset, + sort_criteria, num_criteria, psysno); + } + } + rset_close (rset, rfd); + + rr->errcode = 0; + rr->sort_status = Z_SortStatus_success; + + return 0; +} + diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 947afbb..76e0918 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.16 1998-01-29 13:38:17 adam + * Revision 1.17 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.16 1998/01/29 13:38:17 adam * Fixed problem with mapping to record with unknown schema. * * Revision 1.15 1998/01/26 10:37:57 adam @@ -152,7 +155,6 @@ #include <oid.h> #include <recctrl.h> -#include <charmap.h> #include "grsread.h" #define GRS_MAX_WORD 512 @@ -192,118 +194,10 @@ static void grs_init(void) { } -static void dumpkeys_incomplete_field(data1_node *n, struct recExtractCtrl *p, - data1_att *att, int reg_type) -{ - const char *b = n->u.data.data; - int remain; - const char **map = 0; - - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input(p->zebra_maps, reg_type, &b, remain); - - while (map) - { - RecWord wrd; - char buf[GRS_MAX_WORD+1]; - int i, remain; - - /* Skip spaces */ - while (map && *map && **map == *CHR_SPACE) - { - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input(p->zebra_maps, reg_type, &b, remain); - else - map = 0; - } - if (!map) - break; - i = 0; - while (map && *map && **map != *CHR_SPACE) - { - const char *cp = *map; - - while (i < GRS_MAX_WORD && *cp) - buf[i++] = *(cp++); - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input(p->zebra_maps, reg_type, &b, remain); - else - map = 0; - } - if (!i) - return; - buf[i] = '\0'; - (*p->init)(&wrd); /* set defaults */ - wrd.reg_type = reg_type; - wrd.seqno = seqno++; - wrd.string = buf; - wrd.attrSet = att->parent->ordinal; - wrd.attrUse = att->locals->local; - (*p->add)(&wrd); - } -} - -static void dumpkeys_complete_field(data1_node *n, struct recExtractCtrl *p, - data1_att *att, int reg_type) -{ - const char *b = n->u.data.data; - char buf[GRS_MAX_WORD+1]; - const char **map = 0; - RecWord wrd; - int i = 0, remain; - - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input (p->zebra_maps, reg_type, &b, remain); - - while (remain > 0 && i < GRS_MAX_WORD) - { - while (map && *map && **map == *CHR_SPACE) - { - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input(p->zebra_maps, reg_type, &b, remain); - else - map = 0; - } - if (!map) - break; - - if (i && i < GRS_MAX_WORD) - buf[i++] = *CHR_SPACE; - while (map && *map && **map != *CHR_SPACE) - { - const char *cp = *map; - - if (i >= GRS_MAX_WORD) - break; - while (i < GRS_MAX_WORD && *cp) - buf[i++] = *(cp++); - remain = n->u.data.len - (b - n->u.data.data); - if (remain > 0) - map = zebra_maps_input (p->zebra_maps, reg_type, &b, remain); - else - map = 0; - } - } - if (!i) - return; - buf[i] = '\0'; - (*p->init)(&wrd); - - wrd.reg_type = reg_type; - wrd.seqno = seqno++; - wrd.string = buf; - wrd.attrSet = att->parent->ordinal; - wrd.attrUse = att->locals->local; - (*p->add)(&wrd); -} - static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) { + RecWord wrd; + (*p->init)(p, &wrd); /* set defaults */ for (; n; n = n->next) { if (p->flagShowRecords) /* display element description to user */ @@ -391,13 +285,16 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level) tlist->att->name, tlist->att->value); } else - if (zebra_maps_is_complete (p->zebra_maps, - *tlist->structure)) - dumpkeys_complete_field(n, p, tlist->att, - *tlist->structure); - else - dumpkeys_incomplete_field(n, p, tlist->att, - *tlist->structure); + { + wrd.reg_type = *tlist->structure; + wrd.seqno = seqno; + wrd.string = n->u.data.data; + wrd.length = n->u.data.len; + wrd.attrSet = tlist->att->parent->ordinal; + wrd.attrUse = tlist->att->locals->local; + (*p->add)(&wrd); + seqno = wrd.seqno; + } } } if (p->flagShowRecords && n->which == DATA1N_root) diff --git a/recctrl/rectext.c b/recctrl/rectext.c index 7c7c9c0..9993d70 100644 --- a/recctrl/rectext.c +++ b/recctrl/rectext.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rectext.c,v $ - * Revision 1.5 1997-10-27 14:33:06 adam + * Revision 1.6 1998-02-10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.5 1997/10/27 14:33:06 adam * Moved towards generic character mapping depending on "structure" * field in abstract syntax file. Fixed a few memory leaks. Fixed * bug with negative integers when doing searches with relational @@ -99,31 +102,28 @@ void buf_close (struct buf_info *fi) static int text_extract (struct recExtractCtrl *p) { - char w[256]; + char w[512]; RecWord recWord; int r, seqno = 1; struct buf_info *fi = buf_open (p); - (*p->init)(&recWord); + (*p->init)(p, &recWord); recWord.reg_type = 'w'; do { int i = 0; r = buf_read (fi, w); - while (r > 0 && i < 255 && isalnum(w[i])) + while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r') { i++; r = buf_read (fi, w + i); } if (i) { - int j; - for (j = 0; j<i; j++) - w[j] = tolower(w[j]); - w[i] = 0; recWord.seqno = seqno++; recWord.string = w; + recWord.length = i; (*p->add)(&recWord); } } while (r > 0); diff --git a/tab/default.idx b/tab/default.idx index 3554de4..bb142cf 100644 --- a/tab/default.idx +++ b/tab/default.idx @@ -1,5 +1,5 @@ # Zebra indexes as referred to from the *.abs-files. -# $Id: default.idx,v 1.3 1998-01-26 10:36:49 adam Exp $ +# $Id: default.idx,v 1.4 1998-02-10 12:03:06 adam Exp $ # # Traditional word index index w @@ -25,3 +25,8 @@ charmap numeric.chr index 0 completeness 0 charmap @ + +# Sort register (no mapping at all) +sort s +completeness 1 +charmap @ diff --git a/util/zebramap.c b/util/zebramap.c index fa170e5..8fd2227 100644 --- a/util/zebramap.c +++ b/util/zebramap.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zebramap.c,v $ - * Revision 1.6 1998-01-29 13:36:01 adam + * Revision 1.7 1998-02-10 12:03:07 adam + * Implemented Sort. + * + * Revision 1.6 1998/01/29 13:36:01 adam * Structure word-list, free-form-text and document-text all * trigger ranked search. * @@ -39,6 +42,7 @@ struct zebra_map { int reg_type; int completeness; + int sort_flag; chrmaptab maptab; const char *maptab_name; struct zebra_map *next; @@ -50,6 +54,7 @@ struct zebra_maps { struct zebra_map *map_list; char temp_map_str[2]; const char *temp_map_ptr[2]; + struct zebra_map **lookup_array; }; void zebra_maps_close (ZebraMaps zms) @@ -71,7 +76,7 @@ static void zebra_map_read (ZebraMaps zms, const char *name) char line[512]; char *argv[10]; int argc; - struct zebra_map **zm = 0; + struct zebra_map **zm = 0, *zp; if (!(f = yaz_path_fopen(zms->tabpath, name, "r"))) { @@ -90,6 +95,20 @@ static void zebra_map_read (ZebraMaps zms, const char *name) (*zm)->reg_type = argv[1][0]; (*zm)->maptab_name = NULL; (*zm)->maptab = NULL; + (*zm)->sort_flag = 0; + (*zm)->completeness = 0; + } + else if (!strcmp (argv[0], "sort") && argc == 2) + { + if (!zm) + zm = &zms->map_list; + else + zm = &(*zm)->next; + *zm = nmem_malloc (zms->nmem, sizeof(**zm)); + (*zm)->reg_type = argv[1][0]; + (*zm)->maptab_name = NULL; + (*zm)->sort_flag = 1; + (*zm)->maptab = NULL; (*zm)->completeness = 0; } else if (zm && !strcmp (argv[0], "charmap") && argc == 2) @@ -104,7 +123,11 @@ static void zebra_map_read (ZebraMaps zms, const char *name) if (zm) (*zm)->next = NULL; fclose (f); + + for (zp = zms->map_list; zp; zp = zp->next) + zms->lookup_array[zp->reg_type & 255] = zp; } + static void zms_map_handle (void *p, const char *name, const char *value) { ZebraMaps zms = p; @@ -115,6 +138,7 @@ static void zms_map_handle (void *p, const char *name, const char *value) ZebraMaps zebra_maps_open (const char *tabpath, Res res) { ZebraMaps zms = xmalloc (sizeof(*zms)); + int i; zms->nmem = nmem_create (); zms->tabpath = nmem_strdup (zms->nmem, tabpath); @@ -125,19 +149,24 @@ ZebraMaps zebra_maps_open (const char *tabpath, Res res) zms->temp_map_ptr[0] = zms->temp_map_str; zms->temp_map_ptr[1] = NULL; - + + zms->lookup_array = + nmem_malloc (zms->nmem, sizeof(*zms->lookup_array)*256); + for (i = 0; i<256; i++) + zms->lookup_array[i] = 0; if (!res_trav (res, "index", zms, zms_map_handle)) zebra_map_read (zms, "default.idx"); return zms; } -chrmaptab zebra_map_get (ZebraMaps zms, int reg_type) +struct zebra_map *zebra_map_get (ZebraMaps zms, int reg_type) { - struct zebra_map *zm; - - for (zm = zms->map_list; zm; zm = zm->next) - if (reg_type == zm->reg_type) - break; + return zms->lookup_array[reg_type]; +} + +chrmaptab zebra_charmap_get (ZebraMaps zms, int reg_type) +{ + struct zebra_map *zm = zebra_map_get (zms, reg_type); if (!zm) { logf (LOG_WARN, "Unknown register type: %c", reg_type); @@ -162,7 +191,7 @@ const char **zebra_maps_input (ZebraMaps zms, int reg_type, { chrmaptab maptab; - maptab = zebra_map_get (zms, reg_type); + maptab = zebra_charmap_get (zms, reg_type); if (maptab) return chr_map_input(maptab, from, len); @@ -178,7 +207,7 @@ const char *zebra_maps_output(ZebraMaps zms, int reg_type, const char **from) unsigned char i = (unsigned char) **from; static char buf[2] = {0,0}; - maptab = zebra_map_get (zms, reg_type); + maptab = zebra_charmap_get (zms, reg_type); if (maptab) return chr_map_output (maptab, from, 1); (*from)++; @@ -193,16 +222,17 @@ typedef struct { int type; int major; int minor; - Z_AttributesPlusTerm *zapt; + Z_AttributeElement **attributeList; + int num_attributes; } AttrType; static int attr_find (AttrType *src, oid_value *attributeSetP) { - while (src->major < src->zapt->num_attributes) + while (src->major < src->num_attributes) { Z_AttributeElement *element; - element = src->zapt->attributeList[src->major]; + element = src->attributeList[src->major]; if (src->type == *element->attributeType) { switch (element->which) @@ -241,10 +271,20 @@ static int attr_find (AttrType *src, oid_value *attributeSetP) return -1; } -static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, - int type) +static void attr_init_APT (AttrType *src, Z_AttributesPlusTerm *zapt, int type) +{ + + src->attributeList = zapt->attributeList; + src->num_attributes = zapt->num_attributes; + src->type = type; + src->major = 0; + src->minor = 0; +} + +static void attr_init_AttrList (AttrType *src, Z_AttributeList *list, int type) { - src->zapt = zapt; + src->attributeList = list->attributes; + src->num_attributes = list->num_attributes; src->type = type; src->major = 0; src->minor = 0; @@ -254,14 +294,28 @@ static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, int zebra_maps_is_complete (ZebraMaps zms, int reg_type) { - struct zebra_map *zm; - - for (zm = zms->map_list; zm; zm = zm->next) - if (reg_type == zm->reg_type) - return zm->completeness; + struct zebra_map *zm = zebra_map_get (zms, reg_type); + if (zm) + return zm->completeness; return 0; } +int zebra_maps_is_sort (ZebraMaps zms, int reg_type) +{ + struct zebra_map *zm = zebra_map_get (zms, reg_type); + if (zm) + return zm->sort_flag; + return 0; +} + +int zebra_maps_sort (ZebraMaps zms, Z_SortAttributes *sortAttributes) +{ + AttrType use; + attr_init_AttrList (&use, sortAttributes->list, 1); + + return attr_find (&use, NULL); +} + int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, int *reg_type, char **search_type, int *complete_flag) { @@ -272,9 +326,9 @@ int zebra_maps_attr (ZebraMaps zms, Z_AttributesPlusTerm *zapt, int structure_value; int relation_value; - attr_init (&structure, zapt, 4); - attr_init (&completeness, zapt, 6); - attr_init (&relation, zapt, 2); + attr_init_APT (&structure, zapt, 4); + attr_init_APT (&completeness, zapt, 6); + attr_init_APT (&relation, zapt, 2); completeness_value = attr_find (&completeness, NULL); structure_value = attr_find (&structure, NULL); -- 1.7.10.4