From: Adam Dickmeiss Date: Thu, 5 Mar 1998 08:45:11 +0000 (+0000) Subject: New result set model and modular ranking system. Moved towards X-Git-Tag: ZEBRA.1.0~228 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=ce3907338568fce46c5751e7e1091a5ad1c8e291 New result set model and modular ranking system. Moved towards descent server API. System information stored as "SGML" records. --- diff --git a/CHANGELOG b/CHANGELOG index 86a3076..dcdc5c1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,16 @@ +Changed the way Zebra keeps its maintenance information. Records +in "SGML" notation using an EXPLAIN schema is now used when +appropriate. + +Bug fix: Index didn't handle update/insert/delete of the same +record (i.e. same recordId) in one run (one invocation of zebraidx). +Only the first occurence of a record is considered. + +Most searches now return correct number of hits. + +New modular ranking system. Interested programmers are encouraged to +inspect rank1.c and improve the algorithm. + Bug fix: Lock files weren't removed as they should when using NT. Implemented Z39.50 Sort. Zebra's sort handler uses use attributes to diff --git a/Makefile b/Makefile index f3bafaf..a47ab32 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -# Copyright (C) 1994-1997, Index Data I/S +# Copyright (C) 1994-1998, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.60 1997-09-17 12:19:05 adam Exp $ +# $Id: Makefile,v 1.61 1998-03-05 08:45:11 adam Exp $ SHELL=/bin/sh MAKE=make diff --git a/TODO b/TODO index 868ccbc..120027b 100644 --- a/TODO +++ b/TODO @@ -1,18 +1,26 @@ -Zebra TODO $Id: TODO,v 1.5 1998-01-29 13:40:27 adam Exp $ +Zebra TODO $Id: TODO,v 1.6 1998-03-05 08:45:11 adam Exp $ Make regx-filter thread safe. -Better ranking in searches. Admin should be able specify initial - weight to certain fields. +Size of sort entries should be configurable. + +Use first field in sorting, i.e. author. -Search result should hold information about hits for each term, - especially when using ranked search. +System number sorting. -Admin should be able to specify set/attr when none is specified - (currently bib1,use=any is used). +Configurable default sorting criteria - used when query is not ranked. + +Configurable default search attributes. + +Better ranking in searches. Admin should be able specify initial + weight to certain fields. Explain support - including d1 to grs (d1_grs.c) in YAZ; Zebra to auto-generate explain information depending on data1 system. ISAMC optimization: indirect block with pointers to all blocks in chain. The initial block should include the count as well. + +Create surrogate diagnostic when ONE record is missing insead of a +non-surrogate diagnostic. + diff --git a/index/Makefile b/index/Makefile index ac8cba6..fb699a0 100644 --- a/index/Makefile +++ b/index/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995-1998, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.48 1998-02-10 12:03:05 adam Exp $ +# $Id: Makefile,v 1.49 1998-03-05 08:45:11 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -23,11 +23,12 @@ O1 = main.o dir.o dirs.o trav.o extract.o kinput.o kcompare.o \ zinfo.o invstat.o sortidx.o O2 = kdump.o O3 = zserver.o kcompare.o zrpn.o zsets.o attribute.o recindex.o \ - zlogs.o lockutil.o locksrv.o zinfo.o trunc.o sortidx.o + zlogs.o lockutil.o locksrv.o zinfo.o trunc.o sortidx.o rank1.o zebraapi.o \ + retrieve.o O4 = hlvl.o hlvltest.o kcompare.o CPP=$(CC) -E -all: $(TPROG1) $(TPROG3) +all: $(TPROG1) $(TPROG2) $(TPROG3) $(TPROG1): $(O1) ../lib/dict.a ../lib/isam.a ../lib/isamc.a ../lib/recctrl.a \ ../lib/bfile.a ../lib/dfa.a ../lib/zebrautl.a $(YAZLIB) diff --git a/index/attribute.c b/index/attribute.c index fb15f20..3548f3a 100644 --- a/index/attribute.c +++ b/index/attribute.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: attribute.c,v $ - * Revision 1.7 1997-10-29 12:05:01 adam + * Revision 1.8 1998-03-05 08:45:11 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.7 1997/10/29 12:05:01 adam * Server produces diagnostic "Unsupported Attribute Set" when appropriate. * * Revision 1.6 1997/09/17 12:19:11 adam @@ -39,7 +43,7 @@ static void att_loadset(void *p, const char *n, const char *name) { data1_attset *cnew; - ZServerInfo *zi = p; + ZebraHandle zi = p; if (!(cnew = data1_read_attset(zi->dh, (char*) name))) { @@ -50,7 +54,7 @@ static void att_loadset(void *p, const char *n, const char *name) zi->registered_sets = cnew; } -static void load_atts(ZServerInfo *zi) +static void load_atts(ZebraHandle zi) { res_trav(zi->res, "attset", zi, att_loadset); } @@ -72,7 +76,7 @@ static data1_att *getatt(data1_attset *p, int att) return 0; } -int att_getentbyatt(ZServerInfo *zi, attent *res, oid_value set, int att) +int att_getentbyatt(ZebraHandle zi, attent *res, oid_value set, int att) { data1_att *r; data1_attset *p; @@ -81,7 +85,7 @@ int att_getentbyatt(ZServerInfo *zi, attent *res, oid_value set, int att) load_atts(zi); for (p = zi->registered_sets; p; p = p->next) if (p->reference == set) - break;; + break; if (!p) return -2; if (!(r = getatt(p, att))) diff --git a/index/extract.c b/index/extract.c index ae8a6c4..32aed36 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.79 1998-02-17 10:32:52 adam + * Revision 1.80 1998-03-05 08:45:11 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.79 1998/02/17 10:32:52 adam * Fixed bug: binary files weren't opened with flag b on NT. * * Revision 1.78 1998/02/10 12:03:05 adam @@ -318,7 +322,7 @@ static int records_updated = 0; static int records_deleted = 0; static int records_processed = 0; -static ZebTargetInfo *zti = NULL; +static ZebraExplainInfo zti = NULL; static void logRecord (int showFlag) { @@ -332,7 +336,7 @@ static void logRecord (int showFlag) } } -int key_open (BFiles bfs, int mem, int rw) +int key_open (BFiles bfs, int mem, int rw, data1_handle dh) { if (!mem) mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024; @@ -357,7 +361,7 @@ int key_open (BFiles bfs, int mem, int rw) dict_close (matchDict); return -1; } - zti = zebTargetInfo_open (records, rw); + zti = zebraExplain_open (records, dh, rw); if (!zti) { rec_close (&records); @@ -536,13 +540,13 @@ void key_flush (void) key_buf_used = 0; } -int key_close () +int key_close (int rw) { key_flush (); xfree (key_buf); -#if 1 - zebTargetInfo_close (zti, 1); -#endif + if (rw) + zebraExplain_runNumberIncrement (zti, 1); + zebraExplain_close (zti, rw); rec_close (&records); dict_close (matchDict); sortIdx_close (sortIdx); @@ -792,11 +796,12 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, int seqno = 0; int off = 0; - if (zebTargetInfo_curDatabase (zti, databaseName)) + if (zebraExplain_curDatabase (zti, databaseName)) { - if (zebTargetInfo_newDatabase (zti, databaseName)) + if (zebraExplain_newDatabase (zti, databaseName)) abort (); } + zebraExplain_recordCountIncrement (zti, cmd ? 1 : -1); while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; @@ -820,9 +825,9 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, ++ptr_i; key_buf[ptr_top-ptr_i] = (char*)key_buf + key_buf_used; - ch = zebTargetInfo_lookupSU (zti, attrSet, attrUse); + ch = zebraExplain_lookupSU (zti, attrSet, attrUse); if (ch < 0) - ch = zebTargetInfo_addSU (zti, attrSet, attrUse); + ch = zebraExplain_addSU (zti, attrSet, attrUse); assert (ch > 0); ((char*) key_buf) [key_buf_used++] = ch; while (*src) @@ -848,7 +853,7 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, } static const char **searchRecordKey (struct recKeys *reckeys, - int attrSetS, int attrUseS) + int attrSetS, int attrUseS) { static const char *ws[32]; int off = 0; @@ -1147,6 +1152,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, char *subType) { struct recExtractCtrl extractCtrl; + RecordAttr *recordAttr; int r; char *matchStr; SYSNO sysnotmp; @@ -1252,8 +1258,11 @@ static int recordExtract (SYSNO *sysno, const char *fname, logf (LOG_LOG, "add %s %s %ld", rGroup->recordType, fname, (long) recordOffset); rec = rec_new (records); + *sysno = rec->sysno; + recordAttr = rec_init_attr (zti, rec); + if (matchStr) { dict_insert (matchDict, matchStr, sizeof(*sysno), sysno); @@ -1270,6 +1279,17 @@ static int recordExtract (SYSNO *sysno, const char *fname, rec = rec_get (records, *sysno); assert (rec); + + recordAttr = rec_init_attr (zti, rec); + + if (recordAttr->runNumber == zebraExplain_runNumberIncrement (zti, 0)) + { + logf (LOG_LOG, "skipped %s %s %ld", rGroup->recordType, + fname, (long) recordOffset); + rec_rm (&rec); + logRecord (0); + return 1; + } delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; flushSortKeys (*sysno, 0); @@ -1293,6 +1313,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, dict_delete (matchDict, matchStr); rec_del (records, &rec); } + rec_rm (&rec); logRecord (0); return 1; } @@ -1347,25 +1368,33 @@ static int recordExtract (SYSNO *sysno, const char *fname, rec->size[recInfo_delKeys] = 0; } + /* save file size of original record */ + zebraExplain_recordBytesIncrement (zti, - recordAttr->recordSize); + recordAttr->recordSize = fi->file_moffset - recordOffset; + if (!recordAttr->recordSize) + recordAttr->recordSize = fi->file_max - recordOffset; + zebraExplain_recordBytesIncrement (zti, recordAttr->recordSize); + + /* set run-number for this record */ + recordAttr->runNumber = zebraExplain_runNumberIncrement (zti, 0); + /* update store data */ xfree (rec->info[recInfo_storeData]); if (rGroup->flagStoreData == 1) { - int size = fi->file_moffset - recordOffset; - if (!size) - size = fi->file_max - recordOffset; - rec->size[recInfo_storeData] = size; - rec->info[recInfo_storeData] = xmalloc (size); + rec->size[recInfo_storeData] = recordAttr->recordSize; + rec->info[recInfo_storeData] = xmalloc (recordAttr->recordSize); if (lseek (fi->fd, recordOffset, SEEK_SET) < 0) { logf (LOG_ERRNO|LOG_FATAL, "seek to %ld in %s", fname, (long) recordOffset); exit (1); } - if (read (fi->fd, rec->info[recInfo_storeData], size) < size) + if (read (fi->fd, rec->info[recInfo_storeData], recordAttr->recordSize) + < recordAttr->recordSize) { logf (LOG_ERRNO|LOG_FATAL, "read %d bytes of %s", - fi->file_max, fname); + recordAttr->recordSize, fname); exit (1); } } @@ -1380,11 +1409,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, rec_strdup (rGroup->databaseName, &rec->size[recInfo_databaseName]); /* update offset */ - xfree (rec->info[recInfo_offset]); - - rec->size[recInfo_offset] = sizeof(recordOffset); - rec->info[recInfo_offset] = xmalloc (sizeof(recordOffset)); - memcpy (rec->info[recInfo_offset], &recordOffset, sizeof(recordOffset)); + recordAttr->recordOffset = recordOffset; /* commit this record */ rec_put (records, &rec); diff --git a/index/index.h b/index/index.h index b43fd42..bfd6923 100644 --- a/index/index.h +++ b/index/index.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: index.h,v $ - * Revision 1.56 1998-01-12 15:04:08 adam + * Revision 1.57 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.56 1998/01/12 15:04:08 adam * The test option (-s) only uses read-lock (and not write lock). * * Revision 1.55 1997/10/27 14:33:04 adam @@ -274,8 +278,8 @@ void repositoryAdd (struct recordGroup *rGroup); void repositoryDelete (struct recordGroup *rGroup); void repositoryShow (struct recordGroup *rGroup); -int key_open (BFiles bfs, int mem, int rw); -int key_close (void); +int key_open (BFiles bfs, int mem, int rw, data1_handle); +int key_close (int rw); int key_compare (const void *p1, const void *p2); int key_get_pos (const void *p); int key_compare_it (const void *p1, const void *p2); @@ -325,5 +329,4 @@ int zebra_unlock (ZebraLockHandle h); int zebra_lock_fd (ZebraLockHandle h); void zebra_lock_prefix (Res res, char *dst); - extern Res common_resource; diff --git a/index/kcompare.c b/index/kcompare.c index ca513ab..e259f77 100644 --- a/index/kcompare.c +++ b/index/kcompare.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: kcompare.c,v $ - * Revision 1.22 1997-09-22 12:39:06 adam + * Revision 1.23 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.22 1997/09/22 12:39:06 adam * Added get_pos method for the ranked result sets. * * Revision 1.21 1997/09/17 12:19:13 adam @@ -279,7 +283,6 @@ ISAMC_M key_isamc_m (Res res) me->debug = atoi(res_get_def (res, "isamcDebug", "0")); - logf (LOG_LOG, "ISAMC system active"); return me; } diff --git a/index/kdump.c b/index/kdump.c index 40ab26f..3cff3d4 100644 --- a/index/kdump.c +++ b/index/kdump.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: kdump.c,v $ - * Revision 1.14 1997-10-27 14:33:04 adam + * Revision 1.15 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.14 1997/10/27 14:33:04 adam * Moved towards generic character mapping depending on "structure" * field in abstract syntax file. Fixed a few memory leaks. Fixed * bug with negative integers when doing searches with relational @@ -132,15 +136,16 @@ int main (int argc, char **argv) char *key_fname = NULL; char key_string[IT_MAX_WORD]; char key_info[256]; + ZebraMaps zm; FILE *inf; + Res res = NULL; struct it_key prevk; - chrmaptab map = 0; prevk.sysno = 0; prevk.seqno = 0; prog = *argv; - while ((ret = options ("m:v:", argv, argc, &arg)) != -2) + while ((ret = options ("c:v:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -150,12 +155,12 @@ int main (int argc, char **argv) { log_init (log_mask_str(arg), prog, NULL); } - else if (ret == 'm') + else if (ret == 'c') { - if (!(map = chrmaptab_create (NULL, arg, 0))) - { - logf(LOG_FATAL, "Failed to open maptab"); - exit(1); + if (!(res = res_open (arg))) + { + logf(LOG_FATAL, "Failed to open resource file %s", arg); + exit (1); } } else @@ -166,9 +171,12 @@ int main (int argc, char **argv) } if (!key_fname) { - fprintf (stderr, "kdump [-m maptab -v log] file\n"); + fprintf (stderr, "kdump [-c config] [-v log] file\n"); exit (1); } + if (!res) + res = res_open ("zebra.cfg"); + zm = zebra_maps_open (res); if (!(inf = fopen (key_fname, "r"))) { logf (LOG_FATAL|LOG_ERRNO, "fopen %s", key_fname); @@ -179,26 +187,26 @@ int main (int argc, char **argv) struct it_key k; int op; char keybuf[IT_MAX_WORD+1]; + char *to = keybuf; + const char *from = key_string; + int usedb_type = from[0]; + int reg_type = from[1]; op = key_info[0]; memcpy (&k, 1+key_info, sizeof(k)); - if (map) - { - char *to = keybuf, *from = key_string; - while (*from) - { - char *res = chr_map_output(map, from, 1); - while (*res) - *(to++) = *(res++); - } - *to = '\0'; + from += 2; + while (*from) + { + const char *res = zebra_maps_output (zm, reg_type, &from); + while (*res) + *(to++) = *(res++); } - else - strcpy(keybuf, key_string); - printf ("%7d op=%d s=%-5d %s\n", k.sysno, op, k.seqno, - keybuf); + *to = '\0'; + printf ("%c %3d %c %7d %5d %s\n", reg_type, usedb_type, op ? 'i':'d', + k.sysno, k.seqno, keybuf); } + zebra_maps_close (zm); if (fclose (inf)) { logf (LOG_FATAL|LOG_ERRNO, "fclose %s", key_fname); diff --git a/index/kinput.c b/index/kinput.c index 781ec9a..4ee3dec 100644 --- a/index/kinput.c +++ b/index/kinput.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: kinput.c,v $ - * Revision 1.27 1998-02-17 10:32:52 adam + * Revision 1.28 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.27 1998/02/17 10:32:52 adam * Fixed bug: binary files weren't opened with flag b on NT. * * Revision 1.26 1998/01/29 13:39:13 adam @@ -451,7 +455,6 @@ int heap_inpc (struct heap_info *hi) char *dict_info; strcpy (this_name, hci.cur_name); - logf (LOG_DEBUG, "inserting %s", 1+hci.cur_name); assert (hci.cur_name[1]); no_diffs++; if ((dict_info = dict_lookup (hi->dict, hci.cur_name))) @@ -525,7 +528,6 @@ int heap_inp (struct heap_info *hi) if ((info = dict_lookup (hi->dict, cur_name))) { ISAM_P isam_p, isam_p2; - logf (LOG_DEBUG, "updating %s", 1+cur_name); memcpy (&isam_p, info+1, sizeof(ISAM_P)); isam_p2 = is_merge (hi->isam, isam_p, nmemb, key_buf); if (!isam_p2) @@ -544,7 +546,6 @@ int heap_inp (struct heap_info *hi) else { ISAM_P isam_p; - logf (LOG_DEBUG, "inserting %s", 1+cur_name); no_insertions++; isam_p = is_merge (hi->isam, 0, nmemb, key_buf); dict_insert (hi->dict, cur_name, sizeof(ISAM_P), &isam_p); diff --git a/index/locksrv.c b/index/locksrv.c index 2b17ba0..57f106e 100644 --- a/index/locksrv.c +++ b/index/locksrv.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: locksrv.c,v $ - * Revision 1.10 1997-09-29 09:08:36 adam + * Revision 1.11 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.10 1997/09/29 09:08:36 adam * Revised locking system to be thread safe for the server. * * Revision 1.9 1997/09/25 14:54:43 adam @@ -54,7 +58,7 @@ #include "zserver.h" -int zebra_server_lock_init (ZServerInfo *zi) +int zebra_server_lock_init (ZebraHandle zi) { char path_prefix[1024]; @@ -70,7 +74,7 @@ int zebra_server_lock_init (ZServerInfo *zi) return 0; } -int zebra_server_lock_destroy (ZServerInfo *zi) +int zebra_server_lock_destroy (ZebraHandle zi) { xfree (zi->server_path_prefix); zebra_lock_destroy (zi->server_lock_cmt); @@ -79,7 +83,7 @@ int zebra_server_lock_destroy (ZServerInfo *zi) return 0; } -int zebra_server_lock (ZServerInfo *zi, int commitPhase) +int zebra_server_lock (ZebraHandle zi, int commitPhase) { if (!zi->server_lock_cmt) { @@ -115,7 +119,7 @@ int zebra_server_lock (ZServerInfo *zi, int commitPhase) return 0; } -void zebra_server_unlock (ZServerInfo *zi, int commitPhase) +void zebra_server_unlock (ZebraHandle zi, int commitPhase) { if (zi->server_lock_org == NULL) return; @@ -131,7 +135,7 @@ void zebra_server_unlock (ZServerInfo *zi, int commitPhase) } } -int zebra_server_lock_get_state (ZServerInfo *zi, time_t *timep) +int zebra_server_lock_get_state (ZebraHandle zi, time_t *timep) { char path[1024]; char buf[256]; diff --git a/index/main.c b/index/main.c index 123887a..f622834 100644 --- a/index/main.c +++ b/index/main.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: main.c,v $ - * Revision 1.55 1998-01-26 10:37:34 adam + * Revision 1.56 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.55 1998/01/26 10:37:34 adam * Minor changes. * * Revision 1.54 1998/01/12 15:04:08 adam @@ -297,8 +301,7 @@ int main (int argc, char **argv) bf_lockDir (rGroupDef.bfs, res_get (common_resource, "lockDir")); - rGroupDef.zebra_maps = zebra_maps_open (res_get( - common_resource, "profilePath"), common_resource); + rGroupDef.zebra_maps = zebra_maps_open (common_resource); } if (!strcmp (arg, "update")) cmd = 'u'; @@ -400,28 +403,31 @@ int main (int argc, char **argv) switch (cmd) { case 'u': - if (!key_open (rGroup.bfs, mem_max, rGroup.flagRw)) + if (!key_open (rGroup.bfs, mem_max, rGroup.flagRw, + rGroup.dh)) { logf (LOG_LOG, "updating %s", rGroup.path); repositoryUpdate (&rGroup); - nsections = key_close (); + nsections = key_close (rGroup.flagRw); } break; case 'U': - if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw)) + if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw, + rGroup.dh)) { logf (LOG_LOG, "updating (pass 1) %s", rGroup.path); repositoryUpdate (&rGroup); - key_close (); + key_close (rGroup.flagRw); } nsections = 0; break; case 'd': - if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw)) + if (!key_open (rGroup.bfs,mem_max, rGroup.flagRw, + rGroup.dh)) { logf (LOG_LOG, "deleting %s", rGroup.path); repositoryDelete (&rGroup); - nsections = key_close (); + nsections = key_close (rGroup.flagRw); } break; case 's': diff --git a/index/rank1.c b/index/rank1.c new file mode 100644 index 0000000..3369c79 --- /dev/null +++ b/index/rank1.c @@ -0,0 +1,185 @@ +/* + * Copyright (C) 1998, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: rank1.c,v $ + * Revision 1.1 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + */ + +#include +#include +#ifdef WINDOWS +#include +#else +#include +#endif + +#include "zserver.h" + +struct rank_class_info { + int dummy; +}; + +struct rank_term_info { + int local_occur; + int global_occur; + int global_inv; + int rank_flag; +}; + +struct rank_set_info { + int last_pos; + int no_entries; + int no_rank_entries; + struct rank_term_info *entries; +}; + +static int log2_int (unsigned g) +{ + int n = 0; + while ((g = g>>1)) + n++; + return n; +} + +/* + * create: Creates/Initialises this rank handler. This routine is + * called exactly once. The routine returns the class_handle. + */ +static void *create (ZebraHandle zh) +{ + struct rank_class_info *ci = xmalloc (sizeof(*ci)); + + logf (LOG_DEBUG, "rank-1 create"); + return ci; +} + +/* + * destroy: Destroys this rank handler. This routine is called + * when the handler is no longer needed - i.e. when the server + * dies. The class_handle was previously returned by create. + */ +static void destroy (ZebraHandle zh, void *class_handle) +{ + struct rank_class_info *ci = class_handle; + + logf (LOG_DEBUG, "rank-1 destroy"); + xfree (ci); +} + + +/* + * begin: Prepares beginning of "real" ranking. Called once for + * each result set. The returned handle is a "set handle" and + * will be used in each of the handlers below. + */ +static void *begin (ZebraHandle zh, void *class_handle, RSET rset) +{ + struct rank_set_info *si = xmalloc (sizeof(*si)); + int i; + + logf (LOG_DEBUG, "rank-1 begin"); + si->no_entries = rset->no_rset_terms; + si->no_rank_entries = 0; + si->entries = xmalloc (sizeof(*si->entries)*si->no_entries); + for (i = 0; i < si->no_entries; i++) + { + int g = rset->rset_terms[i]->nn; + if (!strcmp (rset->rset_terms[i]->flags, "rank")) + { + si->entries[i].rank_flag = 1; + (si->no_rank_entries)++; + } + else + si->entries[i].rank_flag = 0; + si->entries[i].local_occur = 0; + si->entries[i].global_occur = g; + si->entries[i].global_inv = 32 - log2_int (g); + logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g)); + } + return si; +} + +/* + * end: Terminates ranking process. Called after a result set + * has been ranked. + */ +static void end (ZebraHandle zh, void *set_handle) +{ + struct rank_set_info *si = set_handle; + logf (LOG_DEBUG, "rank-1 end"); + xfree (si); +} + +/* + * add: Called for each word occurence in a result set. This routine + * should be as fast as possible. This routine should "incrementally" + * update the score. + */ +static void add (void *set_handle, int seqno, int term_index) +{ + struct rank_set_info *si = set_handle; + logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index); + si->last_pos = seqno; + si->entries[term_index].local_occur++; +} + +/* + * calc: Called for each document in a result. This handler should + * produce a score based on previous call(s) to the add handler. The + * score should be between 0 and 1000. If score cannot be obtained + * -1 should be returned. + */ +static int calc (void *set_handle, int sysno) +{ + int i, lu, score = 0; + struct rank_set_info *si = set_handle; + + logf (LOG_DEBUG, "rank-1 calc sysno=%d", sysno); + + if (!si->no_rank_entries) + return -1; + for (i = 0; i < si->no_entries; i++) + if (si->entries[i].rank_flag && (lu = si->entries[i].local_occur)) + score += (2+log2_int (lu)) * si->entries[i].global_inv; + logf (LOG_DEBUG, " dividend=%d", 60*score); + logf (LOG_DEBUG, " divisor=%d", si->no_rank_entries * log2_int (4+si->last_pos)); + score = (60 * score)/(si->no_rank_entries * log2_int (4+si->last_pos)); + for (i = 0; i < si->no_entries; i++) + si->entries[i].local_occur = 0; + return score; +} + +/* + * Pseudo-meta code with sequence of calls as they occur in a + * server. Handlers are prefixed by --: + * + * server init + * -- create + * foreach search + * rank result set + * -- begin + * foreach record + * foreach word + * -- add + * -- calc + * -- end + * -- destroy + * server close + */ + +static struct rank_control rank_control = { + "rank-1", + create, + destroy, + begin, + end, + calc, + add, +}; + +struct rank_control *rank1_class = &rank_control; diff --git a/index/recindex.h b/index/recindex.h index 9285465..55658eb 100644 --- a/index/recindex.h +++ b/index/recindex.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recindex.h,v $ - * Revision 1.12 1998-01-12 15:04:08 adam + * Revision 1.13 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.12 1998/01/12 15:04:08 adam * The test option (-s) only uses read-lock (and not write lock). * * Revision 1.11 1997/09/17 12:19:16 adam @@ -83,6 +87,6 @@ enum { recInfo_delKeys, recInfo_databaseName, recInfo_storeData, - recInfo_offset + recInfo_attr }; diff --git a/index/recindxp.h b/index/recindxp.h index eaf7f86..8776726 100644 --- a/index/recindxp.h +++ b/index/recindxp.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recindxp.h,v $ - * Revision 1.3 1995-12-11 11:45:55 adam + * Revision 1.4 1998-03-05 08:45:12 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.3 1995/12/11 11:45:55 adam * Removed commented code. * * Revision 1.2 1995/12/11 09:12:51 adam @@ -35,7 +39,6 @@ struct records_info { char *index_fname; BFile index_BFile; - char *data_fname[REC_BLOCK_TYPES]; BFile data_BFile[REC_BLOCK_TYPES]; diff --git a/index/retrieve.c b/index/retrieve.c new file mode 100644 index 0000000..f138bbf --- /dev/null +++ b/index/retrieve.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) 1995-1998, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: retrieve.c,v $ + * Revision 1.1 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + */ + +#include +#include + +#include +#ifdef WINDOWS +#include +#include +#else +#include +#endif + +#include +#include "zserver.h" + +struct fetch_control { + int record_offset; + int record_int_pos; + char *record_int_buf; + int record_int_len; + int fd; +}; + +static int record_ext_read (void *fh, char *buf, size_t count) +{ + struct fetch_control *fc = fh; + return read (fc->fd, buf, count); +} + +static off_t record_ext_seek (void *fh, off_t offset) +{ + struct fetch_control *fc = fh; + return lseek (fc->fd, offset + fc->record_offset, SEEK_SET); +} + +static off_t record_ext_tell (void *fh) +{ + struct fetch_control *fc = fh; + return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset; +} + +static off_t record_int_seek (void *fh, off_t offset) +{ + struct fetch_control *fc = fh; + return (off_t) (fc->record_int_pos = offset); +} + +static off_t record_int_tell (void *fh) +{ + struct fetch_control *fc = fh; + return (off_t) fc->record_int_pos; +} + +static int record_int_read (void *fh, char *buf, size_t count) +{ + struct fetch_control *fc = fh; + int l = fc->record_int_len - fc->record_int_pos; + if (l <= 0) + return 0; + l = (l < count) ? l : count; + memcpy (buf, fc->record_int_buf + fc->record_int_pos, l); + fc->record_int_pos += l; + return l; +} + +int zebra_record_fetch (ZebraHandle zh, int sysno, int score, ODR stream, + oid_value input_format, Z_RecordComposition *comp, + oid_value *output_format, char **rec_bufp, + int *rec_lenp, char **basenamep) +{ + Record rec; + char *fname, *file_type, *basename; + RecType rt; + struct recRetrieveCtrl retrieveCtrl; + char subType[128]; + struct fetch_control fc; + RecordAttr *recordAttr; + + rec = rec_get (zh->records, sysno); + if (!rec) + { + logf (LOG_DEBUG, "rec_get fail on sysno=%d", sysno); + return 14; + } + recordAttr = rec_init_attr (zh->zei, rec); + + file_type = rec->info[recInfo_fileType]; + fname = rec->info[recInfo_filename]; + basename = rec->info[recInfo_databaseName]; + *basenamep = odr_malloc (stream, strlen(basename)+1); + strcpy (*basenamep, basename); + + if (!(rt = recType_byName (file_type, subType))) + { + logf (LOG_WARN, "Retrieve: Cannot handle type %s", file_type); + return 14; + } + logf (LOG_DEBUG, "retrieve localno=%d score=%d", sysno, score); + retrieveCtrl.fh = &fc; + fc.fd = -1; + if (rec->size[recInfo_storeData] > 0) + { + retrieveCtrl.readf = record_int_read; + retrieveCtrl.seekf = record_int_seek; + retrieveCtrl.tellf = record_int_tell; + fc.record_int_len = rec->size[recInfo_storeData]; + fc.record_int_buf = rec->info[recInfo_storeData]; + fc.record_int_pos = 0; + logf (LOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len); + } + else + { + if ((fc.fd = open (fname, O_BINARY|O_RDONLY)) == -1) + { + logf (LOG_WARN|LOG_ERRNO, "Retrieve fail; missing file: %s", + fname); + rec_rm (&rec); + return 14; + } + fc.record_offset = recordAttr->recordOffset; + + retrieveCtrl.readf = record_ext_read; + retrieveCtrl.seekf = record_ext_seek; + retrieveCtrl.tellf = record_ext_tell; + + record_ext_seek (retrieveCtrl.fh, 0); + } + retrieveCtrl.subType = subType; + retrieveCtrl.localno = sysno; + retrieveCtrl.score = score; + retrieveCtrl.recordSize = recordAttr->recordSize; + retrieveCtrl.odr = stream; + retrieveCtrl.input_format = retrieveCtrl.output_format = input_format; + retrieveCtrl.comp = comp; + retrieveCtrl.diagnostic = 0; + retrieveCtrl.dh = zh->dh; + (*rt->retrieve)(&retrieveCtrl); + *output_format = retrieveCtrl.output_format; + *rec_bufp = retrieveCtrl.rec_buf; + *rec_lenp = retrieveCtrl.rec_len; + if (fc.fd != -1) + close (fc.fd); + rec_rm (&rec); + + return retrieveCtrl.diagnostic; +} diff --git a/index/trunc.c b/index/trunc.c index 7632b1e..ccaf1f2 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: trunc.c,v $ - * Revision 1.9 1998-01-12 15:04:09 adam + * Revision 1.10 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.9 1998/01/12 15:04:09 adam * The test option (-s) only uses read-lock (and not write lock). * * Revision 1.8 1997/10/31 12:34:27 adam @@ -148,7 +152,7 @@ static void heap_close (struct trunc_info *ti) xfree (ti); } -static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, +static RSET rset_trunc_r (ZebraHandle zi, ISAM_P *isam_p, int from, int to, int merge_chunk) { RSET result; @@ -158,12 +162,13 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, parms.key_size = sizeof(struct it_key); parms.temp_path = res_get (zi->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); - result_rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); + result_rsfd = rset_open (result, RSETF_WRITE); if (to - from > merge_chunk) { RSFD *rsfd; RSET *rset; + int term_index; int i, i_add = (to-from)/merge_chunk + 1; struct trunc_info *ti; int rscur = 0; @@ -185,8 +190,8 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, ti = heap_init (rscur, sizeof(struct it_key), key_compare_it); for (i = rscur; --i >= 0; ) { - rsfd[i] = rset_open (rset[i], RSETF_READ|RSETF_SORT_SYSNO); - if (rset_read (rset[i], rsfd[i], ti->tmpbuf)) + rsfd[i] = rset_open (rset[i], RSETF_READ); + if (rset_read (rset[i], rsfd[i], ti->tmpbuf, &term_index)) heap_insert (ti, ti->tmpbuf, i); else { @@ -202,7 +207,7 @@ static RSET rset_trunc_r (ZServerInfo *zi, ISAM_P *isam_p, int from, int to, while (1) { - if (!rset_read (rset[n], rsfd[n], ti->tmpbuf)) + if (!rset_read (rset[n], rsfd[n], ti->tmpbuf, &term_index)) { heap_delete (ti); rset_close (rset[n], rsfd[n]); @@ -353,7 +358,8 @@ static int isamc_trunc_cmp (const void *p1, const void *p2) return isc_block (i1) - isc_block (i2); } -RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no) +RSET rset_trunc (ZebraHandle zi, ISAM_P *isam_p, int no, + const char *term, int length, const char *flags) { logf (LOG_DEBUG, "rset_trunc no=%d", no); if (zi->isam) @@ -366,6 +372,7 @@ RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no) parms.pos = *isam_p; parms.is = zi->isam; + parms.rset_term = rset_term_create (term, length, flags); return rset_create (rset_kind_isam, &parms); } qsort (isam_p, no, sizeof(*isam_p), isam_trunc_cmp); @@ -380,6 +387,7 @@ RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no) parms.pos = *isam_p; parms.is = zi->isamc; + parms.rset_term = rset_term_create (term, length, flags); return rset_create (rset_kind_isamc, &parms); } #if NEW_TRUNC @@ -387,13 +395,13 @@ RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no) { rset_m_or_parms parms; - logf (LOG_LOG, "new_trunc"); parms.key_size = sizeof(struct it_key); parms.cmp = key_compare_it; parms.isc = zi->isamc; parms.isam_positions = isam_p; parms.no_isam_positions = no; - parms.no_save_positions = 100; + parms.no_save_positions = 100000; + parms.rset_term = rset_term_create (term, length, flags); return rset_create (rset_kind_m_or, &parms); } #endif diff --git a/index/zebraapi.c b/index/zebraapi.c new file mode 100644 index 0000000..ce2a017 --- /dev/null +++ b/index/zebraapi.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 1995-1998, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: zebraapi.c,v $ + * Revision 1.1 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + */ + +#include +#ifdef WINDOWS +#include +#include +#else +#include +#endif + +#include "zserver.h" + +static int zebra_register_lock (ZebraHandle zh) +{ + time_t lastChange; + int state = zebra_server_lock_get_state(zh, &lastChange); + + switch (state) + { + case 'c': + state = 1; + break; + default: + state = 0; + } + zebra_server_lock (zh, state); +#if USE_TIMES + times (&zh->tms1); +#endif + if (zh->registerState == state) + { + if (zh->registerChange >= lastChange) + return 0; + logf (LOG_LOG, "Register completely updated since last access"); + } + else if (zh->registerState == -1) + logf (LOG_LOG, "Reading register using state %d pid=%ld", state, + (long) getpid()); + else + logf (LOG_LOG, "Register has changed state from %d to %d", + zh->registerState, state); + zh->registerChange = lastChange; + if (zh->records) + { + zebraExplain_close (zh->zei, 0); + dict_close (zh->dict); + sortIdx_close (zh->sortIdx); + if (zh->isam) + is_close (zh->isam); + if (zh->isamc) + isc_close (zh->isamc); + rec_close (&zh->records); + } + bf_cache (zh->bfs, state ? res_get (zh->res, "shadow") : NULL); + zh->registerState = state; + zh->records = rec_open (zh->bfs, 0); + if (!(zh->dict = dict_open (zh->bfs, FNAME_DICT, 40, 0))) + return -1; + if (!(zh->sortIdx = sortIdx_open (zh->bfs, 0))) + return -1; + zh->isam = NULL; + zh->isamc = NULL; + if (!res_get_match (zh->res, "isam", "i", NULL)) + { + if (!(zh->isamc = isc_open (zh->bfs, FNAME_ISAMC, + 0, key_isamc_m(zh->res)))) + return -1; + + } + else + { + if (!(zh->isam = is_open (zh->bfs, FNAME_ISAM, key_compare, 0, + sizeof (struct it_key), zh->res))) + return -1; + } + zh->zei = zebraExplain_open (zh->records, zh->dh, 0); + + return 0; +} + +static void zebra_register_unlock (ZebraHandle zh) +{ + static int waitSec = -1; + +#if USE_TIMES + times (&zh->tms2); + logf (LOG_LOG, "user/system: %ld/%ld", + (long) (zh->tms2.tms_utime - zh->tms1.tms_utime), + (long) (zh->tms2.tms_stime - zh->tms1.tms_stime)); +#endif + if (waitSec == -1) + { + char *s = res_get (zh->res, "debugRequestWait"); + if (s) + waitSec = atoi (s); + else + waitSec = 0; + } +#ifdef WINDOWS +#else + if (waitSec > 0) + sleep (waitSec); +#endif + if (zh->registerState != -1) + zebra_server_unlock (zh, zh->registerState); +} + +ZebraHandle zebra_open (const char *host, const char *configName) +{ + ZebraHandle zh = xmalloc (sizeof(*zh)); + + if (!(zh->res = res_open (configName))) + { + logf (LOG_WARN, "Failed to read resources `%s'", configName); + return NULL; + } + zebra_server_lock_init (zh); + zh->dh = data1_create (); + zh->bfs = bfs_create (res_get (zh->res, "register")); + bf_lockDir (zh->bfs, res_get (zh->res, "lockDir")); + data1_set_tabpath (zh->dh, res_get(zh->res, "profilePath")); + zh->sets = NULL; + zh->registerState = -1; /* trigger open of registers! */ + zh->registerChange = 0; + + zh->records = NULL; + zh->registered_sets = NULL; + zh->zebra_maps = zebra_maps_open (zh->res); + zh->rank_classes = NULL; + + zebraRankInstall (zh, rank1_class); + return zh; +} + +void zebra_close (ZebraHandle zh) +{ + if (zh->records) + { + resultSetDestroy (zh); + zebraExplain_close (zh->zei, 0); + dict_close (zh->dict); + sortIdx_close (zh->sortIdx); + if (zh->isam) + is_close (zh->isam); + if (zh->isamc) + isc_close (zh->isamc); + rec_close (&zh->records); + zebra_register_unlock (zh); + } + zebra_maps_close (zh->zebra_maps); + zebraRankDestroy (zh); + bfs_destroy (zh->bfs); + data1_destroy (zh->dh); + zebra_server_lock_destroy (zh); + + res_close (zh->res); + xfree (zh); +} + +void zebra_search_rpn (ZebraHandle zh, ODR stream, + Z_RPNQuery *query, int num_bases, char **basenames, + const char *setname) +{ + zebra_register_lock (zh); + zh->errCode = 0; + zh->errString = NULL; + zh->hits = 0; + rpn_search (zh, stream, query, num_bases, basenames, setname); + zebra_register_unlock (zh); +} + +void zebra_records_retrieve (ZebraHandle zh, ODR stream, + const char *setname, Z_RecordComposition *comp, + oid_value input_format, int num_recs, + ZebraRetrievalRecord *recs) +{ + ZebraPosSet poset; + int i, *pos_array; + + pos_array = xmalloc (sizeof(*pos_array)); + for (i = 0; ierrCode = 13; + } + else + { + for (i = 0; ierrCode = 13; + logf (LOG_DEBUG, "Out of range. pos=%d", pos_array[i]); + } + else + { + zh->errCode = + zebra_record_fetch (zh, poset[i].sysno, poset[i].score, + stream, input_format, comp, + &recs[i].format, &recs[i].buf, + &recs[i].len, + &recs[i].base); + } + } + zebraPosSetDestroy (zh, poset, num_recs); + } + zebra_register_unlock (zh); + xfree (pos_array); +} + +void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **entries, + int *is_partial) +{ + zebra_register_lock (zh); + rpn_scan (zh, stream, zapt, attributeset, + num_bases, basenames, position, + num_entries, entries, is_partial); + zebra_register_unlock (zh); +} + +void zebra_sort (ZebraHandle zh, ODR stream, + int num_input_setnames, char **input_setnames, + char *output_setname, Z_SortKeySpecList *sort_sequence, + int *sort_status) +{ + zebra_register_lock (zh); + resultSetSort (zh, stream, num_input_setnames, input_setnames, + output_setname, sort_sequence, sort_status); + zebra_register_unlock (zh); +} + +void zebra_setDB (ZebraHandle zh, int num_bases, char **basenames) +{ + +} + +void zebra_setRecordType (ZebraHandle zh, const char *type) +{ + +} + +void zebra_setGroup (ZebraHandle zh, const char *group) +{ + +} + +void zebra_admin (ZebraHandle zh, const char *command) +{ + +} diff --git a/index/zinfo.c b/index/zinfo.c index d443863..c57e52b 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,10 +1,14 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.c,v $ - * Revision 1.6 1998-02-17 10:29:27 adam + * Revision 1.7 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.6 1998/02/17 10:29:27 adam * Moved towards 'automatic' EXPLAIN database. * * Revision 1.5 1997/10/27 14:33:05 adam @@ -31,8 +35,11 @@ #include #include +#include #include "zinfo.h" +#define ZINFO_DEBUG 0 + struct zebSUInfo { int set; int use; @@ -47,205 +54,367 @@ struct zebSUInfoB { struct zebDatabaseInfoB { struct zebSUInfoB *SUInfo; char *databaseName; - int sysno; - int readFlag; - int dirty; - struct zebDatabaseInfo info; + data1_node *data1_database; + int recordCount; /* records in db */ + int recordBytes; /* size of records */ + int sysno; /* sysno of database info */ + int readFlag; /* 1: read is needed when referenced; 0 if not */ + int dirty; /* 1: database is dirty: write is needed */ struct zebDatabaseInfoB *next; }; -struct zebTargetInfo { - int dictNum; +struct zebraExplainAttset { + char *name; + int ordinal; + struct zebraExplainAttset *next; +}; + +struct zebraExplainInfo { + int ordinalSU; + int runNumber; int dirty; Records records; + data1_handle dh; + struct zebraExplainAttset *attsets; + NMEM nmem; + data1_node *data1_target; struct zebDatabaseInfoB *databaseInfo; struct zebDatabaseInfoB *curDatabaseInfo; }; -void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag) +static data1_node *read_sgml_rec (data1_handle dh, NMEM nmem, Record rec) +{ + return data1_read_sgml (dh, nmem, rec->info[recInfo_storeData]); +} + +static data1_node *data1_search_tag (data1_handle dh, data1_node *n, + const char *tag) { - struct zebDatabaseInfoB *zdi, *zdi1; + logf (LOG_DEBUG, "data1_search_tag %s", tag); + for (; n; n = n->next) + if (n->which == DATA1N_tag && n->u.tag.tag && + !yaz_matchstr (tag, n->u.tag.tag)) + { + logf (LOG_DEBUG, " found"); + return n; + } + logf (LOG_DEBUG, " not found"); + return 0; +} + +static data1_node *data1_add_tag (data1_handle dh, data1_node *at, + const char *tag, NMEM nmem) +{ + data1_node *partag = get_parent_tag(dh, at); + data1_node *res = data1_mk_node (dh, nmem); + data1_element *e = NULL; + + res->parent = at; + res->which = DATA1N_tag; + res->u.tag.tag = data1_insert_string (dh, res, nmem, tag); + res->u.tag.node_selected = 0; + res->u.tag.make_variantlist = 0; + res->u.tag.no_data_requested = 0; + res->u.tag.get_bytes = -1; + + if (partag) + e = partag->u.tag.element; + res->u.tag.element = + data1_getelementbytagname (dh, at->root->u.root.absyn, + e, res->u.tag.tag); + res->root = at->root; + if (!at->child) + at->child = res; + else + { + assert (at->last_child); + at->last_child->next = res; + } + at->last_child = res; + return res; +} + +static data1_node *data1_make_tag (data1_handle dh, data1_node *at, + const char *tag, NMEM nmem) +{ + data1_node *node; + + node = data1_search_tag (dh, at->child, tag); + if (!node) + node = data1_add_tag (dh, at, tag, nmem); + else + node->child = node->last_child = NULL; + return node; +} + +static data1_node *data1_add_tagdata_int (data1_handle dh, data1_node *at, + const char *tag, int num, + NMEM nmem) +{ + data1_node *node_data; + node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem); + if (!node_data) + return 0; + node_data->u.data.what = DATA1I_num; + node_data->u.data.data = node_data->lbuf; + sprintf (node_data->u.data.data, "%d", num); + node_data->u.data.len = strlen (node_data->u.data.data); + return node_data; +} + +static data1_node *data1_add_tagdata_text (data1_handle dh, data1_node *at, + const char *tag, const char *str, + NMEM nmem) +{ + data1_node *node_data; + + node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem); + if (!node_data) + return 0; + node_data->u.data.what = DATA1I_text; + node_data->u.data.data = node_data->lbuf; + strcpy (node_data->u.data.data, str); + node_data->u.data.len = strlen (node_data->u.data.data); + return node_data; +} + +static void zebraExplain_writeDatabase (ZebraExplainInfo zei, + struct zebDatabaseInfoB *zdi); +static void zebraExplain_writeTarget (ZebraExplainInfo zei); + +void zebraExplain_close (ZebraExplainInfo zei, int writeFlag) +{ + struct zebDatabaseInfoB *zdi, *zdi_next; + + logf (LOG_DEBUG, "zebraExplain_close wr=%d", writeFlag); if (writeFlag) { - char p0[4096], *p = p0; - - memcpy (p, &zti->dictNum, sizeof(zti->dictNum)); - p += sizeof(zti->dictNum); - for (zdi = zti->databaseInfo; zdi; zdi=zdi->next) - { - if (zdi->dirty) - { - char q0[4096], *q = q0; - struct zebSUInfoB *zsui; - Record drec; - int no = 0; - - if (zdi->sysno) - drec = rec_get (zti->records, zdi->sysno); - else - { - drec = rec_new (zti->records); - - drec->info[recInfo_fileType] = - rec_strdup ("grs.explain.databaseInfo", - &drec->size[recInfo_fileType]); - - drec->info[recInfo_databaseName] = - rec_strdup ("IR-Explain-1", - &drec->size[recInfo_databaseName]); - zdi->sysno = drec->sysno; - } - assert (drec); - for (zsui = zdi->SUInfo; zsui; zsui=zsui->next) - no++; - memcpy (q, &zdi->info, sizeof(zdi->info)); - q += sizeof(zdi->info); - memcpy (q, &no, sizeof(no)); - q += sizeof(no); - for (zsui = zdi->SUInfo; zsui; zsui=zsui->next) - { - memcpy (q, &zsui->info, sizeof(zsui->info)); - q += sizeof(zsui->info); - } - xfree (drec->info[recInfo_storeData]); - drec->size[recInfo_storeData] = q-q0; - drec->info[recInfo_storeData] = xmalloc (drec->size[recInfo_storeData]); - memcpy (drec->info[recInfo_storeData], q0, drec->size[recInfo_storeData]); - rec_put (zti->records, &drec); - } - strcpy (p, zdi->databaseName); - p += strlen(p)+1; - memcpy (p, &zdi->sysno, sizeof(zdi->sysno)); - p += sizeof(zdi->sysno); - } - *p++ = '\0'; - if (zti->dirty) - { - Record grec = rec_get (zti->records, 1); - - assert (grec); - xfree (grec->info[recInfo_storeData]); - grec->size[recInfo_storeData] = p-p0; - grec->info[recInfo_storeData] = xmalloc (grec->size[recInfo_storeData]); - memcpy (grec->info[recInfo_storeData], p0, grec->size[recInfo_storeData]); - rec_put (zti->records, &grec); - } + /* write each database info record */ + for (zdi = zei->databaseInfo; zdi; zdi = zdi->next) + zebraExplain_writeDatabase (zei, zdi); + zebraExplain_writeTarget (zei); } - for (zdi = zti->databaseInfo; zdi; zdi = zdi1) + for (zdi = zei->databaseInfo; zdi; zdi = zdi_next) { - struct zebSUInfoB *zsui, *zsui1; + struct zebSUInfoB *zsui, *zsui_next; - zdi1 = zdi->next; - for (zsui = zdi->SUInfo; zsui; zsui = zsui1) + zdi_next = zdi->next; + for (zsui = zdi->SUInfo; zsui; zsui = zsui_next) { - zsui1 = zsui->next; + zsui_next = zsui->next; xfree (zsui); } - xfree (zdi->databaseName); xfree (zdi); } - xfree (zti); + nmem_destroy (zei->nmem); + xfree (zei); } -ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag) + +ZebraExplainInfo zebraExplain_open (Records records, data1_handle dh, + int writeFlag) { Record trec; - ZebTargetInfo *zti; - struct zebDatabaseInfoB **zdi; - - zti = xmalloc (sizeof(*zti)); - zti->dirty = 0; - zti->curDatabaseInfo = NULL; - zti->records = records; + ZebraExplainInfo zei; + struct zebDatabaseInfoB **zdip; - zdi = &zti->databaseInfo; - + logf (LOG_DEBUG, "zebraExplain_open wr=%d", writeFlag); + zei = xmalloc (sizeof(*zei)); + zei->dirty = 0; + zei->curDatabaseInfo = NULL; + zei->records = records; + zei->nmem = nmem_create (); + zei->dh = dh; + zei->attsets = NULL; + zdip = &zei->databaseInfo; trec = rec_get (records, 1); + if (trec) { - const char *p; + data1_node *node_tgtinfo, *node_zebra, *node_list, *np; - p = trec->info[recInfo_storeData]; + zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec); - memcpy (&zti->dictNum, p, sizeof(zti->dictNum)); - p += sizeof(zti->dictNum); - while (*p) - { - *zdi = xmalloc (sizeof(**zdi)); - (*zdi)->SUInfo = NULL; - (*zdi)->databaseName = xstrdup (p); - p += strlen(p)+1; - memcpy (&(*zdi)->sysno, p, sizeof((*zdi)->sysno)); - p += sizeof((*zdi)->sysno); - (*zdi)->readFlag = 1; - (*zdi)->dirty = 0; - zdi = &(*zdi)->next; - } - assert (p - trec->info[recInfo_storeData] == trec->size[recInfo_storeData]-1); +#if ZINFO_DEBUG + data1_pr_tree (zei->dh, zei->data1_target, stderr); +#endif + node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child, + "targetInfo"); + node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child, + "zebraInfo"); + node_list = data1_search_tag (zei->dh, node_zebra->child, + "databaseList"); + for (np = node_list->child; np; np = np->next) + { + data1_node *node_name = NULL; + data1_node *node_id = NULL; + data1_node *np2; + if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "database")) + continue; + for (np2 = np->child; np2; np2 = np2->next) + { + if (np2->which != DATA1N_tag) + continue; + if (!strcmp (np2->u.tag.tag, "name")) + node_name = np2->child; + else if (!strcmp (np2->u.tag.tag, "id")) + node_id = np2->child; + } + assert (node_id && node_name); + + *zdip = xmalloc (sizeof(**zdip)); + + (*zdip)->readFlag = 1; + (*zdip)->dirty = 0; + (*zdip)->data1_database = NULL; + (*zdip)->recordCount = 0; + (*zdip)->recordBytes = 0; + (*zdip)->SUInfo = NULL; + + (*zdip)->databaseName = nmem_malloc (zei->nmem, + 1+node_name->u.data.len); + memcpy ((*zdip)->databaseName, node_name->u.data.data, + node_name->u.data.len); + (*zdip)->databaseName[node_name->u.data.len] = '\0'; + (*zdip)->sysno = atoi_n (node_id->u.data.data, + node_id->u.data.len); + zdip = &(*zdip)->next; + } + np = data1_search_tag (zei->dh, node_zebra->child, + "ordinalSU"); + np = np->child; + assert (np && np->which == DATA1N_data); + zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len); + + np = data1_search_tag (zei->dh, node_zebra->child, + "runNumber"); + np = np->child; + assert (np && np->which == DATA1N_data); + zei->runNumber = atoi_n (np->u.data.data, np->u.data.len); } else { - zti->dictNum = 1; - if (writeFlag) - { - trec = rec_new (records); + zei->ordinalSU = 1; + zei->runNumber = 0; + if (writeFlag) + { + char *sgml_buf; + int sgml_len; + zei->data1_target = + data1_read_sgml (zei->dh, zei->nmem, + "targetInfo\n" + "Zebra\n" + "1\n" + "1\n" + "Zebra\n" + "\n" ); + /* write now because we want to be sure about the sysno */ + trec = rec_new (records); trec->info[recInfo_fileType] = - rec_strdup ("grs.explain.targetInfo", - &trec->size[recInfo_fileType]); + rec_strdup ("grs.sgml", &trec->size[recInfo_fileType]); trec->info[recInfo_databaseName] = - rec_strdup ("IR-Explain-1", - &trec->size[recInfo_databaseName]); - trec->info[recInfo_databaseName] = xstrdup ("IR-Explain-1"); - trec->info[recInfo_storeData] = xmalloc (1+sizeof(zti->dictNum)); - memcpy (trec->info[recInfo_storeData], &zti->dictNum, sizeof(zti->dictNum)); - trec->info[recInfo_storeData][sizeof(zti->dictNum)] = '\0'; - trec->size[recInfo_storeData] = sizeof(zti->dictNum)+1; - rec_put (records, &trec); - } + rec_strdup ("IR-Explain-1", &trec->size[recInfo_databaseName]); + + sgml_buf = data1_nodetoidsgml(dh, zei->data1_target, 0, &sgml_len); + trec->info[recInfo_storeData] = xmalloc (sgml_len); + memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len); + trec->size[recInfo_storeData] = sgml_len; + + rec_put (records, &trec); + } } - *zdi = NULL; + *zdip = NULL; rec_rm (&trec); - return zti; + zebraExplain_newDatabase (zei, "IR-Explain-1"); + return zei; } -static void zebTargetInfo_readDatabase (ZebTargetInfo *zti, - struct zebDatabaseInfoB *zdi) + +static void zebraExplain_readDatabase (ZebraExplainInfo zei, + struct zebDatabaseInfoB *zdi) { - const char *p; - struct zebSUInfoB **zsuip = &zdi->SUInfo; - int i, no; Record rec; + data1_node *node_dbinfo, *node_zebra, *node_list, *np; + struct zebSUInfoB **zsuip = &zdi->SUInfo; + + assert (zdi->sysno); + rec = rec_get (zei->records, zdi->sysno); - rec = rec_get (zti->records, zdi->sysno); - assert (rec); - p = rec->info[recInfo_storeData]; - memcpy (&zdi->info, p, sizeof(zdi->info)); - p += sizeof(zdi->info); - memcpy (&no, p, sizeof(no)); - p += sizeof(no); - for (i = 0; idata1_database = read_sgml_rec (zei->dh, zei->nmem, rec); + + node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child, + "databaseInfo"); + + node_zebra = data1_search_tag (zei->dh, node_dbinfo->child, + "zebraInfo"); + np = data1_search_tag (zei->dh, node_dbinfo->child, + "recordBytes"); + if (np && np->child && np->child->which == DATA1N_data) + { + zdi->recordBytes = atoi_n (np->child->u.data.data, + np->child->u.data.len); + } + node_list = data1_search_tag (zei->dh, node_zebra->child, + "attrlist"); + for (np = node_list->child; np; np = np->next) { + data1_node *node_set = NULL; + data1_node *node_use = NULL; + data1_node *node_ordinal = NULL; + data1_node *np2; + if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "attr")) + continue; + for (np2 = np->child; np2; np2 = np2->next) + { + if (np2->which != DATA1N_tag || !np2->child || + np2->child->which != DATA1N_data) + continue; + if (!strcmp (np2->u.tag.tag, "set")) + node_set = np2->child; + else if (!strcmp (np2->u.tag.tag, "use")) + node_use = np2->child; + else if (!strcmp (np2->u.tag.tag, "ordinal")) + node_ordinal = np2->child; + } + assert (node_set && node_use && node_ordinal); + *zsuip = xmalloc (sizeof(**zsuip)); - memcpy (&(*zsuip)->info, p, sizeof((*zsuip)->info)); - p += sizeof((*zsuip)->info); + (*zsuip)->info.set = atoi_n (node_set->u.data.data, + node_set->u.data.len); + (*zsuip)->info.use = atoi_n (node_use->u.data.data, + node_use->u.data.len); + (*zsuip)->info.ordinal = atoi_n (node_ordinal->u.data.data, + node_ordinal->u.data.len); + logf (LOG_DEBUG, "set=%d use=%d ordinal=%d", + (*zsuip)->info.set, (*zsuip)->info.use, (*zsuip)->info.ordinal); zsuip = &(*zsuip)->next; } *zsuip = NULL; + + if ((np = data1_search_tag (zei->dh, node_dbinfo->child, + "recordCount")) && + (np = data1_search_tag (zei->dh, np->child, + "recordCountActual")) && + np->child->which == DATA1N_data) + { + zdi->recordCount = atoi_n (np->child->u.data.data, + np->child->u.data.len); + } zdi->readFlag = 0; rec_rm (&rec); } -int zebTargetInfo_curDatabase (ZebTargetInfo *zti, const char *database) +int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database) { struct zebDatabaseInfoB *zdi; - assert (zti); - if (zti->curDatabaseInfo && - !strcmp (zti->curDatabaseInfo->databaseName, database)) + assert (zei); + if (zei->curDatabaseInfo && + !strcmp (zei->curDatabaseInfo->databaseName, database)) return 0; - for (zdi = zti->databaseInfo; zdi; zdi=zdi->next) + for (zdi = zei->databaseInfo; zdi; zdi=zdi->next) { if (!strcmp (zdi->databaseName, database)) break; @@ -253,78 +422,281 @@ int zebTargetInfo_curDatabase (ZebTargetInfo *zti, const char *database) if (!zdi) return -1; if (zdi->readFlag) - zebTargetInfo_readDatabase (zti, zdi); - zti->curDatabaseInfo = zdi; + zebraExplain_readDatabase (zei, zdi); + zei->curDatabaseInfo = zdi; return 0; } -int zebTargetInfo_newDatabase (ZebTargetInfo *zti, const char *database) +int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database) { struct zebDatabaseInfoB *zdi; + data1_node *node_dbinfo; - assert (zti); - for (zdi = zti->databaseInfo; zdi; zdi=zdi->next) + assert (zei); + for (zdi = zei->databaseInfo; zdi; zdi=zdi->next) { if (!strcmp (zdi->databaseName, database)) break; } if (zdi) return -1; + /* it's new really. make it */ zdi = xmalloc (sizeof(*zdi)); - zdi->next = zti->databaseInfo; - zti->databaseInfo = zdi; + zdi->next = zei->databaseInfo; + zei->databaseInfo = zdi; zdi->sysno = 0; + zdi->recordCount = 0; + zdi->recordBytes = 0; zdi->readFlag = 0; - zdi->databaseName = xstrdup (database); + zdi->databaseName = nmem_strdup (zei->nmem, database); zdi->SUInfo = NULL; + + assert (zei->dh); + assert (zei->nmem); + + zdi->data1_database = + data1_read_sgml (zei->dh, zei->nmem, + "databaseInfo\n" + "0\n" + "1\n" + "\n"); + + node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child, + "databaseInfo"); + assert (node_dbinfo); + + data1_add_tagdata_text (zei->dh, node_dbinfo, "name", + database, zei->nmem); + +#if ZINFO_DEBUG + data1_pr_tree (zei->dh, zdi->data1_database, stderr); +#endif zdi->dirty = 1; - zti->dirty = 1; - zti->curDatabaseInfo = zdi; + zei->dirty = 1; + zei->curDatabaseInfo = zdi; return 0; } -int zebTargetInfo_lookupSU (ZebTargetInfo *zti, int set, int use) +static void zebraExplain_writeDatabase (ZebraExplainInfo zei, + struct zebDatabaseInfoB *zdi) { + char *sgml_buf; + int sgml_len; + Record drec; + data1_node *node_dbinfo, *node_list, *node_count, *node_zebra; struct zebSUInfoB *zsui; + + if (!zdi->dirty) + return; + + if (zdi->sysno) + { + drec = rec_get (zei->records, zdi->sysno); + xfree (drec->info[recInfo_storeData]); + } + else + { + drec = rec_new (zei->records); + zdi->sysno = drec->sysno; + + drec->info[recInfo_fileType] = + rec_strdup ("grs.sgml", &drec->size[recInfo_fileType]); + drec->info[recInfo_databaseName] = + rec_strdup ("IR-Explain-1", + &drec->size[recInfo_databaseName]); + } + assert (zdi->data1_database); + node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child, + "databaseInfo"); + /* record count */ + node_count = data1_make_tag (zei->dh, node_dbinfo, + "recordCount", zei->nmem); + data1_add_tagdata_int (zei->dh, node_count, "recordCountActual", + zdi->recordCount, zei->nmem); + + /* zebra info (private) */ + node_zebra = data1_make_tag (zei->dh, node_dbinfo, + "zebraInfo", zei->nmem); + node_list = data1_make_tag (zei->dh, node_zebra, + "attrlist", zei->nmem); + for (zsui = zdi->SUInfo; zsui; zsui = zsui->next) + { + data1_node *node_attr; + node_attr = data1_add_tag (zei->dh, node_list, + "attr", zei->nmem); + data1_add_tagdata_int (zei->dh, node_attr, "set", + zsui->info.set, zei->nmem); + data1_add_tagdata_int (zei->dh, node_attr, "use", + zsui->info.use, zei->nmem); + data1_add_tagdata_int (zei->dh, node_attr, "ordinal", + zsui->info.ordinal, zei->nmem); + } + data1_add_tagdata_int (zei->dh, node_zebra, + "recordBytes", zdi->recordBytes, zei->nmem); + /* convert to "SGML" and write it */ +#if ZINFO_DEBUG + data1_pr_tree (zei->dh, zdi->data1_database, stderr); +#endif + sgml_buf = data1_nodetoidsgml(zei->dh, zdi->data1_database, + 0, &sgml_len); + drec->info[recInfo_storeData] = xmalloc (sgml_len); + memcpy (drec->info[recInfo_storeData], sgml_buf, sgml_len); + drec->size[recInfo_storeData] = sgml_len; + + rec_put (zei->records, &drec); +} + +static void trav_attset (data1_handle dh, ZebraExplainInfo zei, + data1_attset *p_this) +{ + struct zebraExplainAttset *p_reg = zei->attsets; + + if (!p_this) + return ; + while (p_reg) + { + if (!strcmp (p_this->name, p_reg->name)) + break; + p_reg = p_reg->next; + } + if (!p_this) + { + p_reg = nmem_malloc (zei->nmem, sizeof (*p_reg)); + p_reg->name = nmem_strdup (zei->nmem, p_this->name); + p_reg->ordinal = p_this->ordinal; + p_reg->next = zei->attsets; + zei->attsets = p_reg; + } + trav_attset (dh, zei, p_this->children); +} - assert (zti->curDatabaseInfo); - for (zsui = zti->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next) +static void trav_absyn (data1_handle dh, void *h, data1_absyn *a) +{ + logf (LOG_LOG, "absyn %s", a->name); + trav_attset (dh, (ZebraExplainInfo) h, a->attset); +} + +static void zebraExplain_writeTarget (ZebraExplainInfo zei) +{ + struct zebDatabaseInfoB *zdi; + data1_node *node_tgtinfo, *node_list, *node_zebra; + Record trec; + int sgml_len; + char *sgml_buf; + + if (!zei->dirty) + return; + + trec = rec_get (zei->records, 1); + xfree (trec->info[recInfo_storeData]); + + node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child, + "targetInfo"); + assert (node_tgtinfo); + + node_zebra = data1_make_tag (zei->dh, node_tgtinfo, + "zebraInfo", zei->nmem); + data1_add_tagdata_text (zei->dh, node_zebra, "version", + ZEBRAVER, zei->nmem); + node_list = data1_add_tag (zei->dh, node_zebra, + "databaseList", zei->nmem); + for (zdi = zei->databaseInfo; zdi; zdi = zdi->next) + { + data1_node *node_db; + node_db = data1_add_tag (zei->dh, node_list, + "database", zei->nmem); + data1_add_tagdata_text (zei->dh, node_db, "name", + zdi->databaseName, zei->nmem); + data1_add_tagdata_int (zei->dh, node_db, "id", + zdi->sysno, zei->nmem); + } + data1_add_tagdata_int (zei->dh, node_zebra, "ordinalSU", + zei->ordinalSU, zei->nmem); + + data1_add_tagdata_int (zei->dh, node_zebra, "runNumber", + zei->runNumber, zei->nmem); + + node_list = data1_add_tag (zei->dh, node_zebra, + "attsetList", zei->nmem); + /* convert to "SGML" and write it */ +#if ZINFO_DEBUG + data1_pr_tree (zei->dh, zei->data1_target, stderr); +#endif + sgml_buf = data1_nodetoidsgml(zei->dh, zei->data1_target, + 0, &sgml_len); + trec->info[recInfo_storeData] = xmalloc (sgml_len); + memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len); + trec->size[recInfo_storeData] = sgml_len; + + rec_put (zei->records, &trec); +} + +int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use) +{ + struct zebSUInfoB *zsui; + + assert (zei->curDatabaseInfo); + for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next) if (zsui->info.use == use && zsui->info.set == set) return zsui->info.ordinal; return -1; } -int zebTargetInfo_addSU (ZebTargetInfo *zti, int set, int use) +int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use) { struct zebSUInfoB *zsui; - assert (zti->curDatabaseInfo); - for (zsui = zti->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next) + assert (zei->curDatabaseInfo); + for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next) if (zsui->info.use == use && zsui->info.set == set) return -1; zsui = xmalloc (sizeof(*zsui)); - zsui->next = zti->curDatabaseInfo->SUInfo; - zti->curDatabaseInfo->SUInfo = zsui; - zti->curDatabaseInfo->dirty = 1; - zti->dirty = 1; + zsui->next = zei->curDatabaseInfo->SUInfo; + zei->curDatabaseInfo->SUInfo = zsui; + zei->curDatabaseInfo->dirty = 1; + zei->dirty = 1; zsui->info.set = set; zsui->info.use = use; - zsui->info.ordinal = (zti->dictNum)++; + zsui->info.ordinal = (zei->ordinalSU)++; return zsui->info.ordinal; } -ZebDatabaseInfo *zebTargetInfo_getDB (ZebTargetInfo *zti) +void zebraExplain_recordBytesIncrement (ZebraExplainInfo zei, int adjust_num) +{ + assert (zei->curDatabaseInfo); + + zei->curDatabaseInfo->recordBytes += adjust_num; + zei->curDatabaseInfo->dirty = 1; +} + +void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num) { - assert (zti->curDatabaseInfo); + assert (zei->curDatabaseInfo); - return &zti->curDatabaseInfo->info; + zei->curDatabaseInfo->recordCount += adjust_num; + zei->curDatabaseInfo->dirty = 1; } -void zebTargetInfo_setDB (ZebTargetInfo *zti, ZebDatabaseInfo *zdi) +int zebraExplain_runNumberIncrement (ZebraExplainInfo zei, int adjust_num) { - assert (zti->curDatabaseInfo); + if (adjust_num) + zei->dirty = 1; + return zei->runNumber += adjust_num; +} - zti->curDatabaseInfo->dirty = 1; - memcpy (&zti->curDatabaseInfo->info, zdi, sizeof(*zdi)); +RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec) +{ + RecordAttr *recordAttr; + + if (rec->info[recInfo_attr]) + return (RecordAttr *) rec->info[recInfo_attr]; + recordAttr = xmalloc (sizeof(*recordAttr)); + rec->info[recInfo_attr] = (char *) recordAttr; + rec->size[recInfo_attr] = sizeof(*recordAttr); + + recordAttr->recordSize = 0; + recordAttr->recordOffset = 0; + recordAttr->runNumber = zei->runNumber; + return recordAttr; } diff --git a/index/zinfo.h b/index/zinfo.h index f707aee..991488b 100644 --- a/index/zinfo.h +++ b/index/zinfo.h @@ -1,29 +1,44 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.h,v $ - * Revision 1.2 1996-05-22 08:22:00 adam + * Revision 1.3 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.2 1996/05/22 08:22:00 adam * Added public ZebDatabaseInfo structure. * * Revision 1.1 1996/05/13 14:23:07 adam * Work on compaction of set/use bytes in dictionary. * */ +#ifndef ZINFO_H +#define ZINFO_H +#include #include "recindex.h" -typedef struct zebTargetInfo ZebTargetInfo; -typedef struct zebDatabaseInfo { - int noOfRecords; -} ZebDatabaseInfo; +typedef struct zebraExplainInfo *ZebraExplainInfo; +typedef struct zebDatabaseInfo ZebDatabaseInfo; +ZebraExplainInfo zebraExplain_open (Records records, data1_handle, + int writeFlag); +void zebraExplain_close (ZebraExplainInfo zei, int writeFlag); +int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database); +int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database); +int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use); +int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use); +void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num); +void zebraExplain_recordBytesIncrement (ZebraExplainInfo zei, int adjust_num); +int zebraExplain_runNumberIncrement (ZebraExplainInfo zei, int adjust_num); + +typedef struct { + int recordSize; + int recordOffset; + int runNumber; +} RecordAttr; +RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec); -ZebTargetInfo *zebTargetInfo_open (Records records, int writeFlag); -void zebTargetInfo_close (ZebTargetInfo *zti, int writeFlag); -int zebTargetInfo_curDatabase (ZebTargetInfo *zti, const char *database); -int zebTargetInfo_newDatabase (ZebTargetInfo *zti, const char *database); -int zebTargetInfo_lookupSU (ZebTargetInfo *zti, int set, int use); -int zebTargetInfo_addSU (ZebTargetInfo *zti, int set, int use); -ZebDatabaseInfo *zebTargetInfo_getDB (ZebTargetInfo *zti); -void zebTargetInfo_setDB (ZebTargetInfo *zti, ZebDatabaseInfo *zdi); +#endif diff --git a/index/zrpn.c b/index/zrpn.c index a5eac1e..daf07bd 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.74 1998-02-10 12:03:06 adam + * Revision 1.75 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.74 1998/02/10 12:03:06 adam * Implemented Sort. * * Revision 1.73 1998/01/29 13:40:11 adam @@ -271,7 +275,6 @@ #include #include #include -#include struct rpn_char_map_info { ZebraMaps zm; @@ -284,12 +287,12 @@ static const char **rpn_char_map_handler (void *vp, const char **from, int len) return zebra_maps_input (p->zm, p->reg_type, from, len); } -static void rpn_char_map_prepare (ZServerInfo *zi, int reg_type, +static void rpn_char_map_prepare (ZebraHandle zh, int reg_type, struct rpn_char_map_info *map_info) { - map_info->zm = zi->zebra_maps; + map_info->zm = zh->zebra_maps; map_info->reg_type = reg_type; - dict_grep_cmap (zi->dict, map_info, rpn_char_map_handler); + dict_grep_cmap (zh->dict, map_info, rpn_char_map_handler); } typedef struct { @@ -359,13 +362,29 @@ struct grep_info { #ifdef TERM_COUNT int *term_no; #endif - ISAM_P *isam_p_buf; + ISAM_P *isam_p_buf; int isam_p_size; - int isam_p_indx; + int isam_p_indx; + ZebraHandle zh; + int reg_type; }; -static void add_isam_p (const char *info, struct grep_info *p) +static void term_untrans (ZebraHandle zh, int reg_type, + char *dst, const char *src) +{ + while (*src) + { + const char *cp = zebra_maps_output (zh->zebra_maps, reg_type, &src); + while (*cp) + *dst++ = *cp++; + } + *dst = '\0'; +} + +static void add_isam_p (const char *name, const char *info, + struct grep_info *p) { + char term_tmp[1024]; if (p->isam_p_indx == p->isam_p_size) { ISAM_P *new_isam_p_buf; @@ -397,12 +416,16 @@ static void add_isam_p (const char *info, struct grep_info *p) } assert (*info == sizeof(*p->isam_p_buf)); memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf)); + + term_untrans (p->zh, p->reg_type, term_tmp, name+2); + logf (LOG_DEBUG, "grep: %s", term_tmp); + (p->isam_p_indx)++; } static int grep_handle (char *name, const char *info, void *p) { - add_isam_p (info, p); + add_isam_p (name, info, p); return 0; } @@ -430,11 +453,13 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src, } static int term_100 (ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split) + const char **src, char *dst, int space_split, + char *dst_term) { const char *s0, *s1; const char **map; int i = 0; + int j = 0; if (!term_pre (zebra_maps, reg_type, src, NULL, NULL)) return 0; @@ -449,20 +474,24 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type, { if (!isalnum (*s1) && *s1 != '-') dst[i++] = '\\'; + dst_term[j++] = *s1; dst[i++] = *s1++; } } dst[i] = '\0'; + dst_term[j] = '\0'; *src = s0; return i; } static int term_101 (ZebraMaps zebra_maps, int reg_type, - const char **src, char *dst, int space_split) + const char **src, char *dst, int space_split, + char *dst_term) { const char *s0, *s1; const char **map; int i = 0; + int j = 0; if (!term_pre (zebra_maps, reg_type, src, "#", "#")) return 0; @@ -473,7 +502,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type, { dst[i++] = '.'; dst[i++] = '*'; - s0++; + dst_term[j++] = *s0++; } else { @@ -485,20 +514,24 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type, { if (!isalnum (*s1)) dst[i++] = '\\'; + dst_term[j++] = *s1; dst[i++] = *s1++; } } } dst[i] = '\0'; + dst_term[j++] = '\0'; *src = s0; return i; } static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int *errors, int space_split) + char *dst, int *errors, int space_split, + char *dst_term) { int i = 0; + int j = 0; const char *s0, *s1; const char **map; @@ -516,7 +549,10 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, while (*s0) { if (strchr ("^\\()[].*+?|-", *s0)) + { + dst_term[j++] = *s0; dst[i++] = *s0++; + } else { s1 = s0; @@ -527,19 +563,22 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src, { if (!isalnum (*s1)) dst[i++] = '\\'; + dst_term[j++] = *s1; dst[i++] = *s1++; } } } dst[i] = '\0'; + dst_term[j] = '\0'; *src = s0; return i; } static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src, - char *dst, int space_split) + char *dst, int space_split, char *dst_term) { - return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split); + return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split, + dst_term); } /* gen_regular_rel - generate regular expression from relation @@ -651,13 +690,14 @@ static void gen_regular_rel (char *dst, int val, int islt) strcat (dst, "))"); } -static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static int relational_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, char *term_dict, oid_value attributeSet, struct grep_info *grep_info, int *max_pos, - int reg_type) + int reg_type, + char *term_dst) { AttrType relation; int relation_value; @@ -672,28 +712,32 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, switch (relation_value) { case 1: - if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, + term_dst)) return 0; term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <"); gen_regular_rel (term_tmp, term_value-1, 1); break; case 2: - if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, + term_dst)) return 0; term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation <="); gen_regular_rel (term_tmp, term_value, 1); break; case 4: - if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, + term_dst)) return 0; term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation >="); gen_regular_rel (term_tmp, term_value, 0); break; case 5: - if (!term_100 (zi->zebra_maps, reg_type, term_sub, term_tmp, 1)) + if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1, + term_dst)) return 0; term_value = atoi (term_tmp); logf (LOG_DEBUG, "Relation >"); @@ -703,7 +747,7 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 0; } logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp); - r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, max_pos, + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r); @@ -711,11 +755,12 @@ static int relational_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 1; } -static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static int field_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, oid_value attributeSet, struct grep_info *grep_info, int reg_type, int complete_flag, - int num_bases, char **basenames) + int num_bases, char **basenames, + char *term_dst) { char term_dict[2*IT_MAX_WORD+2]; int j, r, base_no; @@ -728,7 +773,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, struct rpn_char_map_info rcmi; int space_split = complete_flag ? 0 : 1; - rpn_char_map_prepare (zi, reg_type, &rcmi); + rpn_char_map_prepare (zh, reg_type, &rcmi); attr_init (&use, zapt, 1); use_value = attr_find (&use, &curAttributeSet); logf (LOG_DEBUG, "field_term, use value %d", use_value); @@ -746,20 +791,20 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, int max_pos, prefix_len = 0; termp = *term_sub; - if ((r=att_getentbyatt (zi, &attp, curAttributeSet, use_value))) + if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value))) { logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d", curAttributeSet, use_value, r); if (r == -1) - zi->errCode = 114; + zh->errCode = 114; else - zi->errCode = 121; + zh->errCode = 121; return -1; } - if (zebTargetInfo_curDatabase (zi->zti, basenames[base_no])) + if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) { - zi->errCode = 109; /* Database unavailable */ - zi->errString = basenames[base_no]; + zh->errCode = 109; /* Database unavailable */ + zh->errString = basenames[base_no]; return -1; } for (local_attr = attp.local_attributes; local_attr; @@ -767,7 +812,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, { int ord; - ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal, + ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal, local_attr->local); if (ord < 0) continue; @@ -780,7 +825,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, } if (!prefix_len) { - zi->errCode = 114; + zh->errCode = 114; return -1; } term_dict[prefix_len++] = ')'; @@ -788,8 +833,9 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, term_dict[prefix_len++] = reg_type; logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]); term_dict[prefix_len] = '\0'; - if (!relational_term (zi, zapt, &termp, term_dict, - attributeSet, grep_info, &max_pos, reg_type)) + if (!relational_term (zh, zapt, &termp, term_dict, + attributeSet, grep_info, &max_pos, reg_type, + term_dst)) { j = prefix_len; switch (truncation_value) @@ -797,47 +843,47 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, case -1: /* not specified */ case 100: /* do not truncate */ term_dict[j++] = '('; - if (!term_100 (zi->zebra_maps, reg_type, - &termp, term_dict + j, space_split)) + if (!term_100 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r); break; case 1: /* right truncation */ term_dict[j++] = '('; - if (!term_100 (zi->zebra_maps, reg_type, - &termp, term_dict + j, space_split)) + if (!term_100 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ".*)"); - dict_lookup_grep (zi->dict, term_dict, 0, grep_info, + dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); break; case 2: /* left truncation */ case 3: /* left&right truncation */ - zi->errCode = 120; + zh->errCode = 120; return -1; case 101: /* process # in term */ term_dict[j++] = '('; - if (!term_101 (zi->zebra_maps, reg_type, - &termp, term_dict + j, space_split)) + if (!term_101 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); - r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r); break; case 102: /* Regexp-1 */ term_dict[j++] = '('; - if (!term_102 (zi->zebra_maps, reg_type, - &termp, term_dict + j, space_split)) + if (!term_102 (zh->zebra_maps, reg_type, + &termp, term_dict + j, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r); - r = dict_lookup_grep (zi->dict, term_dict, 0, grep_info, + r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos, 0, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d", @@ -846,12 +892,12 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, case 103: /* Regexp-2 */ r = 1; term_dict[j++] = '('; - if (!term_103 (zi->zebra_maps, reg_type, - &termp, term_dict + j, &r, space_split)) + if (!term_103 (zh->zebra_maps, reg_type, + &termp, term_dict + j, &r, space_split, term_dst)) return 0; strcat (term_dict, ")"); logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r); - r = dict_lookup_grep (zi->dict, term_dict, r, grep_info, + r = dict_lookup_grep (zh->dict, term_dict, r, grep_info, &max_pos, 2, grep_handle); if (r) logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d", @@ -865,7 +911,7 @@ static int field_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return 1; } -static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static void trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz) { size_t sizez; @@ -878,7 +924,7 @@ static void trans_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, termz[sizez] = '\0'; } -static void trans_scan_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz, int reg_type) { Z_Term *term = zapt->term; @@ -892,7 +938,7 @@ static void trans_scan_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, while ((len = (cp_end - cp)) > 0) { - map = zebra_maps_input (zi->zebra_maps, reg_type, &cp, len); + map = zebra_maps_input (zh->zebra_maps, reg_type, &cp, len); if (**map == *CHR_SPACE) space_map = *map; else @@ -908,69 +954,7 @@ static void trans_scan_term (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, termz[i] = '\0'; } -static RSET rpn_search_APT_relevance (ZServerInfo *zi, - Z_AttributesPlusTerm *zapt, - oid_value attributeSet, - int reg_type, int complete_flag, - int num_bases, char **basenames) -{ - rset_relevance_parms parms; - char termz[IT_MAX_WORD+1]; - const char *termp = termz; - struct grep_info grep_info; - RSET result; - int term_index = 0; - int r; - - parms.key_size = sizeof(struct it_key); - parms.max_rec = 200; - parms.cmp = key_compare_it; - parms.get_pos = key_get_pos; - parms.is = zi->isam; - parms.isc = zi->isamc; - parms.no_terms = 0; - parms.method = RSREL_METHOD_A; - - if (zapt->term->which != Z_Term_general) - { - zi->errCode = 124; - return NULL; - } - trans_term (zi, zapt, termz); - -#ifdef TERM_COUNT - grep_info.term_no = 0; -#endif - grep_info.isam_p_indx = 0; - grep_info.isam_p_size = 0; - grep_info.isam_p_buf = NULL; - while (1) - { - r = field_term (zi, zapt, &termp, attributeSet, &grep_info, - reg_type, complete_flag, num_bases, basenames); - if (r <= 0) - break; -#ifdef TERM_COUNT - for (; term_index < grep_info.isam_p_indx; term_index++) - grep_info.term_no[term_index] = parms.no_terms; - parms.no_terms++; -#endif - } - parms.term_no = grep_info.term_no; - parms.isam_positions = grep_info.isam_p_buf; - parms.no_isam_positions = grep_info.isam_p_indx; - if (grep_info.isam_p_indx > 0) - result = rset_create (rset_kind_relevance, &parms); - else - result = rset_create (rset_kind_null, NULL); -#ifdef TERM_COUNT - xfree(grep_info.term_no); -#endif - xfree (grep_info.isam_p_buf); - return result; -} - -static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, +static RSET rpn_proximity (ZebraHandle zh, RSET rset1, RSET rset2, int ordered, int exclusion, int relation, int distance) { @@ -981,17 +965,18 @@ static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, RSFD rsfd_result; RSET result; rset_temp_parms parms; + int term_index; - rsfd1 = rset_open (rset1, RSETF_READ|RSETF_SORT_SYSNO); - more1 = rset_read (rset1, rsfd1, &buf1); + rsfd1 = rset_open (rset1, RSETF_READ); + more1 = rset_read (rset1, rsfd1, &buf1, &term_index); - rsfd2 = rset_open (rset2, RSETF_READ|RSETF_SORT_SYSNO); - more2 = rset_read (rset2, rsfd2, &buf2); + rsfd2 = rset_open (rset2, RSETF_READ); + more2 = rset_read (rset2, rsfd2, &buf2, &term_index); parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zi->res, "setTmpDir"); + parms.temp_path = res_get (zh->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); - rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); + rsfd_result = rset_open (result, RSETF_WRITE); logf (LOG_DEBUG, "rpn_proximity excl=%d ord=%d rel=%d dis=%d", exclusion, ordered, relation, distance); @@ -999,9 +984,9 @@ static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, { int cmp = key_compare_it (&buf1, &buf2); if (cmp < -1) - more1 = rset_read (rset1, rsfd1, &buf1); + more1 = rset_read (rset1, rsfd1, &buf1, &term_index); else if (cmp > 1) - more2 = rset_read (rset2, rsfd2, &buf2); + more2 = rset_read (rset2, rsfd2, &buf2, &term_index); else { int sysno = buf1.sysno; @@ -1009,7 +994,7 @@ static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, int n = 0; seqno[n++] = buf1.seqno; - while ((more1 = rset_read (rset1, rsfd1, &buf1)) && + while ((more1 = rset_read (rset1, rsfd1, &buf1, &term_index)) && sysno == buf1.sysno) if (n < 500) seqno[n++] = buf1.seqno; @@ -1051,7 +1036,7 @@ static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, if (excl) rset_write (result, rsfd_result, &buf2); } - } while ((more2 = rset_read (rset2, rsfd2, &buf2)) && + } while ((more2 = rset_read (rset2, rsfd2, &buf2, &term_index)) && sysno == buf2.sysno); } } @@ -1061,15 +1046,18 @@ static RSET rpn_proximity (ZServerInfo *zi, RSET rset1, RSET rset2, return result; } -static RSET rpn_prox (ZServerInfo *zi, RSET *rset, int rset_no) +static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no) { int i; RSFD *rsfd; int *more; struct it_key **buf; - RSFD rsfd_result; RSET result; - rset_temp_parms parms; + char prox_term[1024]; + int length_prox_term = 0; + int min_nn = 10000000; + int term_index; + const char *flags = NULL; rsfd = xmalloc (sizeof(*rsfd)*rset_no); more = xmalloc (sizeof(*more)*rset_no); @@ -1077,112 +1065,140 @@ static RSET rpn_prox (ZServerInfo *zi, RSET *rset, int rset_no) for (i = 0; i= 0) - { - rset_close (rset[i], rsfd[i]); - xfree (buf[i]); - --i; - } - xfree (rsfd); - xfree (more); - xfree (buf); - return rset_create (rset_kind_null, NULL); - } + int j; + buf[i] = xmalloc (sizeof(**buf)); + rsfd[i] = rset_open (rset[i], RSETF_READ); + if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index))) + break; + for (j = 0; jno_rset_terms; j++) + { + const char *nflags = rset[i]->rset_terms[j]->flags; + char *term = rset[i]->rset_terms[j]->name; + int lterm = strlen(term); + if (length_prox_term) + prox_term[length_prox_term++] = ' '; + strcpy (prox_term + length_prox_term, term); + length_prox_term += lterm; + if (min_nn > rset[i]->rset_terms[j]->nn) + min_nn = rset[i]->rset_terms[j]->nn; + flags = nflags; + } } - parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zi->res, "setTmpDir"); - result = rset_create (rset_kind_temp, &parms); - rsfd_result = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); - - while (*more) + if (i != rset_no) { - for (i = 1; i 1) - { - more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]); - break; - } - else if (cmp == 1) - { - if (buf[i-1]->seqno+1 != buf[i]->seqno) - { - more[i-1] = rset_read (rset[i-1], rsfd[i-1], buf[i-1]); - break; - } - } - else - { - more[i] = rset_read (rset[i], rsfd[i], buf[i]); - break; - } - } - if (i == rset_no) - { - rset_write (result, rsfd_result, buf[0]); - more[0] = rset_read (*rset, *rsfd, *buf); - } + rset_null_parms parms; + + while (i >= 0) + { + rset_close (rset[i], rsfd[i]); + xfree (buf[i]); + --i; + } + parms.rset_term = rset_term_create (prox_term, -1, flags); + parms.rset_term->nn = 0; + result = rset_create (rset_kind_null, &parms); } - - for (i = 0; inn = min_nn; + parms.key_size = sizeof (struct it_key); + parms.temp_path = res_get (zh->res, "setTmpDir"); + result = rset_create (rset_kind_temp, &parms); + rsfd_result = rset_open (result, RSETF_WRITE); + + while (*more) + { + for (i = 1; i 1) + { + more[i-1] = rset_read (rset[i-1], rsfd[i-1], + buf[i-1], &term_index); + break; + } + else if (cmp == 1) + { + if (buf[i-1]->seqno+1 != buf[i]->seqno) + { + more[i-1] = rset_read (rset[i-1], rsfd[i-1], + buf[i-1], &term_index); + break; + } + } + else + { + more[i] = rset_read (rset[i], rsfd[i], buf[i], + &term_index); + break; + } + } + if (i == rset_no) + { + rset_write (result, rsfd_result, buf[0]); + more[0] = rset_read (*rset, *rsfd, *buf, &term_index); + } + } + + for (i = 0; iterm->which != Z_Term_general) - { - zi->errCode = 124; - return NULL; - } - trans_term (zi, zapt, termz); - #ifdef TERM_COUNT grep_info.term_no = 0; #endif grep_info.isam_p_size = 0; grep_info.isam_p_buf = NULL; + grep_info.zh = zh; + grep_info.reg_type = reg_type; while (1) { logf (LOG_DEBUG, "APT_phrase termp=%s", termp); grep_info.isam_p_indx = 0; - r = field_term (zi, zapt, &termp, attributeSet, &grep_info, - reg_type, complete_flag, num_bases, basenames); + r = field_term (zh, zapt, &termp, attributeSet, &grep_info, + reg_type, complete_flag, num_bases, basenames, + term_dst); if (r < 1) break; - rset[rset_no] = rset_trunc (zi, grep_info.isam_p_buf, - grep_info.isam_p_indx); + logf (LOG_DEBUG, "term: %s", term_dst); + rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf, + grep_info.isam_p_indx, term_dst, + strlen(term_dst), rank_type); assert (rset[rset_no]); if (++rset_no >= sizeof(rset)/sizeof(*rset)) break; @@ -1192,37 +1208,167 @@ static RSET rpn_search_APT_phrase (ZServerInfo *zi, #endif xfree (grep_info.isam_p_buf); if (rset_no == 0) - return rset_create (rset_kind_null, NULL); + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (term_dst, -1, rank_type); + return rset_create (rset_kind_null, &parms); + } else if (rset_no == 1) return (rset[0]); - result = rpn_prox (zi, rset, rset_no); + result = rpn_prox (zh, rset, rset_no); for (i = 0; i= sizeof(rset)/sizeof(*rset)) + break; + } +#ifdef TERM_COUNT + xfree(grep_info.term_no); +#endif + xfree (grep_info.isam_p_buf); + if (rset_no == 0) + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (term_dst, -1, rank_type); + return rset_create (rset_kind_null, &parms); + } + result = rset[0]; + for (i = 1; i= sizeof(rset)/sizeof(*rset)) + break; + } +#ifdef TERM_COUNT + xfree(grep_info.term_no); +#endif + xfree (grep_info.isam_p_buf); + if (rset_no == 0) + { + rset_null_parms parms; + + parms.rset_term = rset_term_create (term_dst, -1, rank_type); + return rset_create (rset_kind_null, &parms); + } + result = rset[0]; + for (i = 1; iterm->which != Z_Term_general) - { - zi->errCode = 124; - return NULL; - } + parms.rset_term = rset_term_create (termz, -1, rank_type); parms.key_size = sizeof (struct it_key); - parms.temp_path = res_get (zi->res, "setTmpDir"); + parms.temp_path = res_get (zh->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); - rsfd = rset_open (result, RSETF_WRITE|RSETF_SORT_SYSNO); - - trans_term (zi, zapt, termz); + rsfd = rset_open (result, RSETF_WRITE); key.sysno = atoi (termz); + key.seqno = 1; if (key.sysno <= 0) key.sysno = 1; rset_write (result, rsfd, &key); @@ -1230,50 +1376,59 @@ static RSET rpn_search_APT_local (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, return result; } -static RSET rpn_search_APT (ZServerInfo *zi, Z_AttributesPlusTerm *zapt, +static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, oid_value attributeSet, int num_bases, char **basenames) { - int reg_type; + unsigned reg_id; char *search_type = NULL; + char *rank_type = NULL; int complete_flag; + char termz[IT_MAX_WORD+1]; - zebra_maps_attr (zi->zebra_maps, zapt, ®_type, &search_type, - &complete_flag); + zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type, + &rank_type, &complete_flag); - logf (LOG_DEBUG, "reg_type=%c", reg_type); + logf (LOG_DEBUG, "reg_id=%c", reg_id); logf (LOG_DEBUG, "complete_flag=%d", complete_flag); logf (LOG_DEBUG, "search_type=%s", search_type); + logf (LOG_DEBUG, "rank_type=%s", rank_type); + + if (zapt->term->which != Z_Term_general) + { + zh->errCode = 124; + return NULL; + } + trans_term (zh, zapt, termz); + if (!strcmp (search_type, "phrase")) { - return rpn_search_APT_phrase (zi, zapt, attributeSet, - reg_type, complete_flag, + return rpn_search_APT_phrase (zh, zapt, termz, attributeSet, + reg_id, complete_flag, rank_type, num_bases, basenames); } - else if (!strcmp (search_type, "ranked")) + else if (!strcmp (search_type, "and-list")) + { + return rpn_search_APT_and_list (zh, zapt, termz, attributeSet, + reg_id, complete_flag, rank_type, + num_bases, basenames); + } + else if (!strcmp (search_type, "or-list")) { - return rpn_search_APT_relevance (zi, zapt, attributeSet, - reg_type, complete_flag, - num_bases, basenames); + return rpn_search_APT_or_list (zh, zapt, termz, attributeSet, + reg_id, complete_flag, rank_type, + num_bases, basenames); } else if (!strcmp (search_type, "local")) { - return rpn_search_APT_local (zi, zapt, attributeSet); + return rpn_search_APT_local (zh, zapt, termz, attributeSet, + rank_type); } - zi->errCode = 118; + zh->errCode = 118; return NULL; } -static RSET rpn_search_ref (ZServerInfo *zi, Z_ResultSetId *resultSetId) -{ - ZServerSet *s; - - if (!(s = resultSetGet (zi, resultSetId))) - return rset_create (rset_kind_null, NULL); - return s->rset; -} - -static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, +static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs, oid_value attributeSet, ODR stream, int num_bases, char **basenames) { @@ -1282,17 +1437,13 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, { Z_Operator *zop = zs->u.complex->roperator; rset_bool_parms bool_parms; - int soft = 0; - - bool_parms.rset_l = rpn_search_structure (zi, zs->u.complex->s1, + bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1, attributeSet, stream, num_bases, basenames); if (bool_parms.rset_l == NULL) return NULL; - if (rset_is_ranked(bool_parms.rset_l)) - soft = 1; - bool_parms.rset_r = rpn_search_structure (zi, zs->u.complex->s2, + bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2, attributeSet, stream, num_bases, basenames); if (bool_parms.rset_r == NULL) @@ -1300,37 +1451,35 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, rset_delete (bool_parms.rset_l); return NULL; } - if (rset_is_ranked(bool_parms.rset_r)) - soft = 1; bool_parms.key_size = sizeof(struct it_key); bool_parms.cmp = key_compare_it; switch (zop->which) { case Z_Operator_and: - r = rset_create (soft ? rset_kind_sand:rset_kind_and, &bool_parms); + r = rset_create (rset_kind_and, &bool_parms); break; case Z_Operator_or: - r = rset_create (soft ? rset_kind_sor:rset_kind_or, &bool_parms); + r = rset_create (rset_kind_or, &bool_parms); break; case Z_Operator_and_not: - r = rset_create (soft ? rset_kind_snot:rset_kind_not, &bool_parms); + r = rset_create (rset_kind_not, &bool_parms); break; case Z_Operator_prox: if (zop->u.prox->which != Z_ProxCode_known) { - zi->errCode = 132; + zh->errCode = 132; return NULL; } if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word) { char *val = odr_malloc (stream, 16); - zi->errCode = 132; - zi->errString = val; + zh->errCode = 132; + zh->errString = val; sprintf (val, "%d", *zop->u.prox->proximityUnitCode); return NULL; } - r = rpn_proximity (zi, bool_parms.rset_l, bool_parms.rset_r, + r = rpn_proximity (zh, bool_parms.rset_l, bool_parms.rset_r, *zop->u.prox->ordered, (!zop->u.prox->exclusion ? 0 : *zop->u.prox->exclusion), @@ -1338,7 +1487,7 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, *zop->u.prox->distance); break; default: - zi->errCode = 110; + zh->errCode = 110; return NULL; } } @@ -1347,117 +1496,54 @@ static RSET rpn_search_structure (ZServerInfo *zi, Z_RPNStructure *zs, if (zs->u.simple->which == Z_Operand_APT) { logf (LOG_DEBUG, "rpn_search_APT"); - r = rpn_search_APT (zi, zs->u.simple->u.attributesPlusTerm, + r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm, attributeSet, num_bases, basenames); } else if (zs->u.simple->which == Z_Operand_resultSetId) { logf (LOG_DEBUG, "rpn_search_ref"); - r = rpn_search_ref (zi, zs->u.simple->u.resultSetId); + r = resultSetRef (zh, zs->u.simple->u.resultSetId); + if (!r) + r = rset_create (rset_kind_null, NULL); } else { - zi->errCode = 3; + zh->errCode = 3; return NULL; } } else { - zi->errCode = 3; + zh->errCode = 3; return NULL; } return r; } -void count_set_save (ZServerInfo *zi, RSET *r, int *count) -{ - int psysno = 0; - int kno = 0; - struct it_key key; - RSFD rfd, wfd; - RSET w; - rset_temp_parms parms; - int maxResultSetSize = atoi (res_get_def (zi->res, - "maxResultSetSize", "400")); - logf (LOG_DEBUG, "count_set_save"); - *count = 0; - parms.key_size = sizeof(struct it_key); - parms.temp_path = res_get (zi->res, "setTmpDir"); - w = rset_create (rset_kind_temp, &parms); - wfd = rset_open (w, RSETF_WRITE|RSETF_SORT_SYSNO); - rfd = rset_open (*r, RSETF_READ|RSETF_SORT_SYSNO); - while (rset_read (*r, rfd, &key)) - { - if (key.sysno != psysno) - { - if (*count < maxResultSetSize) - rset_write (w, wfd, &key); - (*count)++; - psysno = key.sysno; - } - kno++; - } - rset_close (*r, rfd); - rset_delete (*r); - rset_close (w, wfd); - *r = w; - logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *count); -} - -static void count_set (RSET r, int *count) +void rpn_search (ZebraHandle zh, ODR stream, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname) { - int psysno = 0; - int kno = 0; - struct it_key key; - RSFD rfd; - - logf (LOG_DEBUG, "count_set"); - - *count = 0; - rfd = rset_open (r, RSETF_READ|RSETF_SORT_SYSNO); - while (rset_read (r, rfd, &key)) - { - if (key.sysno != psysno) - { - psysno = key.sysno; - (*count)++; - } - kno++; - } - rset_close (r, rfd); - logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *count); -} - -int rpn_search (ZServerInfo *zi, ODR stream, - Z_RPNQuery *rpn, int num_bases, char **basenames, - const char *setname, int *hits) -{ - int i; RSET rset; oident *attrset; oid_value attributeSet; zlog_rpn (rpn); - zi->errCode = 0; - zi->errString = NULL; + zh->errCode = 0; + zh->errString = NULL; + zh->hits = 0; attrset = oid_getentbyoid (rpn->attributeSetId); attributeSet = attrset->value; - rset = rpn_search_structure (zi, rpn->RPNStructure, attributeSet, stream, + rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet, stream, num_bases, basenames); if (!rset) - return zi->errCode; - if (rset_is_volatile(rset)) - count_set_save(zi, &rset, hits); - else if ((i = rset_hits (rset)) >= 0) - *hits = i; - else - count_set (rset, hits); - resultSetAdd (zi, setname, 1, rset); - if (zi->errCode) - logf (LOG_DEBUG, "search error: %d", zi->errCode); - return zi->errCode; + return; + + resultSetAdd (zh, setname, 1, rset, &zh->hits); + if (zh->errCode) + logf (LOG_DEBUG, "search error: %d", zh->errCode); } struct scan_info_entry { @@ -1492,27 +1578,47 @@ static int scan_handle (char *name, const char *info, int pos, void *client) return 0; } - -static void scan_term_untrans (ZServerInfo *zi, ODR stream, int reg_type, +static void scan_term_untrans (ZebraHandle zh, ODR stream, int reg_type, char **dstp, const char *src) -{ - char *dst = odr_malloc (stream, strlen(src)*2+1); - *dstp = dst; +{ + char term_dst[1024], **dst; + + term_untrans (zh, reg_type, term_dst, src); + + *dst = odr_malloc (stream, strlen(term_dst)+1); + strcpy (*dst, term_dst); +} - while (*src) +static void count_set (RSET r, int *count) +{ + int psysno = 0; + int kno = 0; + struct it_key key; + RSFD rfd; + int term_index; + + logf (LOG_DEBUG, "count_set"); + + *count = 0; + rfd = rset_open (r, RSETF_READ); + while (rset_read (r, rfd, &key, &term_index)) { - const char *cp = zebra_maps_output (zi->zebra_maps, reg_type, &src); - while (*cp) - *dst++ = *cp++; + if (key.sysno != psysno) + { + psysno = key.sysno; + (*count)++; + } + kno++; } - *dst = '\0'; + rset_close (r, rfd); + logf (LOG_DEBUG, "%d keys, %d records", kno, *count); } -int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, - oid_value attributeset, - int num_bases, char **basenames, - int *position, int *num_entries, struct scan_entry **list, - int *status) +void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **list, + int *is_partial) { int i; int pos = *position; @@ -1524,12 +1630,13 @@ int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, AttrType use; int use_value; struct scan_info *scan_info_array; - struct scan_entry *glist; + ZebraScanEntry *glist; int ords[32], ord_no = 0; int ptr[32]; - int reg_type; + unsigned reg_id; char *search_type = NULL; + char *rank_type = NULL; int complete_flag; if (attributeset == VAL_NONE) @@ -1541,11 +1648,11 @@ int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, attr_init (&use, zapt, 1); use_value = attr_find (&use, &attributeset); - if (zebra_maps_attr (zi->zebra_maps, zapt, ®_type, &search_type, - &complete_flag)) + if (zebra_maps_attr (zh->zebra_maps, zapt, ®_id, &search_type, + &rank_type, &complete_flag)) { - zi->errCode = 113; - return zi->errCode; + zh->errCode = 113; + return ; } if (use_value == -1) @@ -1556,33 +1663,37 @@ int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, attent attp; data1_local_attribute *local_attr; - if ((r=att_getentbyatt (zi, &attp, attributeset, use_value))) + if ((r=att_getentbyatt (zh, &attp, attributeset, use_value))) { logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d", attributeset, use_value); if (r == -1) - zi->errCode = 114; + zh->errCode = 114; else - zi->errCode = 121; + zh->errCode = 121; } - if (zebTargetInfo_curDatabase (zi->zti, basenames[base_no])) + if (zebraExplain_curDatabase (zh->zei, basenames[base_no])) { - zi->errString = basenames[base_no]; - return zi->errCode = 109; /* Database unavailable */ + zh->errString = basenames[base_no]; + zh->errCode = 109; /* Database unavailable */ + return; } for (local_attr = attp.local_attributes; local_attr && ord_no < 32; local_attr = local_attr->next) { int ord; - ord = zebTargetInfo_lookupSU (zi->zti, attp.attset_ordinal, - local_attr->local); + ord = zebraExplain_lookupSU (zh->zei, attp.attset_ordinal, + local_attr->local); if (ord > 0) ords[ord_no++] = ord; } } if (ord_no == 0) - return zi->errCode = 113; + { + zh->errCode = 113; + return; + } before = pos-1; after = 1+num-pos; scan_info_array = odr_malloc (stream, ord_no * sizeof(*scan_info_array)); @@ -1593,7 +1704,7 @@ int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, struct scan_info *scan_info = scan_info_array + i; struct rpn_char_map_info rcmi; - rpn_char_map_prepare (zi, reg_type, &rcmi); + rpn_char_map_prepare (zh, reg_id, &rcmi); scan_info->before = before; scan_info->after = after; @@ -1604,20 +1715,20 @@ int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, for (j = 0; jlist[j].term = NULL; termz[prefix_len++] = ords[i]; - termz[prefix_len++] = reg_type; + termz[prefix_len++] = reg_id; termz[prefix_len] = 0; strcpy (scan_info->prefix, termz); - trans_scan_term (zi, zapt, termz+prefix_len, reg_type); + trans_scan_term (zh, zapt, termz+prefix_len, reg_id); - dict_scan (zi->dict, termz, &before_tmp, &after_tmp, scan_info, + dict_scan (zh->dict, termz, &before_tmp, &after_tmp, scan_info, scan_handle); } glist = odr_malloc (stream, (before+after)*sizeof(*glist)); for (i = 0; i < ord_no; i++) ptr[i] = before; - *status = BEND_SCAN_SUCCESS; + *is_partial = 0; for (i = 0; ierrCode) - logf (LOG_DEBUG, "scan error: %d", zi->errCode); - return zi->errCode; + if (zh->errCode) + logf (LOG_DEBUG, "scan error: %d", zh->errCode); } diff --git a/index/zserver.c b/index/zserver.c index 4c1ec26..a6ecfd3 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.c,v $ - * Revision 1.55 1998-02-10 12:03:06 adam + * Revision 1.56 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.55 1998/02/10 12:03:06 adam * Implemented Sort. * * Revision 1.54 1998/01/29 13:39:13 adam @@ -201,409 +205,148 @@ * Initial work on z39 server. * */ + #include #include +#include #ifdef WINDOWS #include #include #else #include #endif -#include #include -#include #include #include "zserver.h" -static int register_lock (ZServerInfo *zi) -{ - time_t lastChange; - int state = zebra_server_lock_get_state(zi, &lastChange); - - switch (state) - { - case 'c': - state = 1; - break; - default: - state = 0; - } - zebra_server_lock (zi, state); -#if USE_TIMES - times (&zi->tms1); -#endif - if (zi->registerState == state) - { - if (zi->registerChange >= lastChange) - return 0; - logf (LOG_LOG, "Register completely updated since last access"); - } - else if (zi->registerState == -1) - logf (LOG_LOG, "Reading register using state %d pid=%ld", state, - (long) getpid()); - else - logf (LOG_LOG, "Register has changed state from %d to %d", - zi->registerState, state); - zi->registerChange = lastChange; - if (zi->records) - { - zebTargetInfo_close (zi->zti, 0); - dict_close (zi->dict); - sortIdx_close (zi->sortIdx); - if (zi->isam) - is_close (zi->isam); - if (zi->isamc) - isc_close (zi->isamc); - rec_close (&zi->records); - } - bf_cache (zi->bfs, state ? res_get (zi->res, "shadow") : NULL); - zi->registerState = state; - zi->records = rec_open (zi->bfs, 0); - if (!(zi->dict = dict_open (zi->bfs, FNAME_DICT, 40, 0))) - return -1; - if (!(zi->sortIdx = sortIdx_open (zi->bfs, 0))) - return -1; - zi->isam = NULL; - zi->isamc = NULL; - if (!res_get_match (zi->res, "isam", "i", NULL)) - { - if (!(zi->isamc = isc_open (zi->bfs, FNAME_ISAMC, - 0, key_isamc_m(zi->res)))) - return -1; - - } - else - { - if (!(zi->isam = is_open (zi->bfs, FNAME_ISAM, key_compare, 0, - sizeof (struct it_key), zi->res))) - return -1; - } - zi->zti = zebTargetInfo_open (zi->records, 0); - - return 0; -} - -static void register_unlock (ZServerInfo *zi) -{ - static int waitSec = -1; - -#if USE_TIMES - times (&zi->tms2); - logf (LOG_LOG, "user/system: %ld/%ld", - (long) (zi->tms2.tms_utime - zi->tms1.tms_utime), - (long) (zi->tms2.tms_stime - zi->tms1.tms_stime)); -#endif - if (waitSec == -1) - { - char *s = res_get (zi->res, "debugRequestWait"); - if (s) - waitSec = atoi (s); - else - waitSec = 0; - } -#ifdef WINDOWS -#else - if (waitSec > 0) - sleep (waitSec); -#endif - if (zi->registerState != -1) - zebra_server_unlock (zi, zi->registerState); -} - static int bend_sort (void *handle, bend_sort_rr *rr); bend_initresult *bend_init (bend_initrequest *q) { bend_initresult *r = odr_malloc (q->stream, sizeof(*r)); - ZServerInfo *zi = xmalloc (sizeof(*zi)); + ZebraHandle zh; struct statserv_options_block *sob; r->errcode = 0; r->errstring = 0; - r->handle = zi; q->bend_sort = bend_sort; logf (LOG_DEBUG, "bend_init"); sob = statserv_getcontrol (); - logf (LOG_LOG, "Reading resources from %s", sob->configname); - if (!(zi->res = res_open (sob->configname))) + if (!(zh = zebra_open (NULL, sob->configname))) { - logf (LOG_FATAL, "Failed to read resources `%s'", sob->configname); + logf (LOG_FATAL, "Failed to open Zebra `%s'", sob->configname); r->errcode = 1; return r; } - zebra_server_lock_init (zi); - zi->dh = data1_create (); - zi->bfs = bfs_create (res_get (zi->res, "register")); - bf_lockDir (zi->bfs, res_get (zi->res, "lockDir")); - data1_set_tabpath (zi->dh, res_get(zi->res, "profilePath")); - zi->sets = NULL; - zi->registerState = -1; /* trigger open of registers! */ - zi->registerChange = 0; - - zi->records = NULL; - zi->registered_sets = NULL; - zi->zebra_maps = zebra_maps_open (res_get(zi->res, "profilePath"), - zi->res); + r->handle = zh; return r; } bend_searchresult *bend_search (void *handle, bend_searchrequest *q, int *fd) { - ZServerInfo *zi = handle; + ZebraHandle zh = handle; bend_searchresult *r = odr_malloc (q->stream, sizeof(*r)); - r->errcode = 0; - r->errstring = 0; r->hits = 0; - - register_lock (zi); - zi->errCode = 0; - zi->errString = NULL; - + r->errcode = 0; + r->errstring = NULL; + logf (LOG_LOG, "ResultSet '%s'", q->setname); switch (q->query->which) { case Z_Query_type_1: case Z_Query_type_101: - r->errcode = rpn_search (zi, q->stream, q->query->u.type_1, - q->num_bases, q->basenames, q->setname, - &r->hits); - r->errstring = zi->errString; + zebra_search_rpn (zh, q->stream, q->query->u.type_1, + q->num_bases, q->basenames, q->setname); + r->errcode = zh->errCode; + r->errstring = zh->errString; + r->hits = zh->hits; break; default: r->errcode = 107; } - register_unlock (zi); return r; } -struct fetch_control { - int record_offset; - int record_int_pos; - char *record_int_buf; - int record_int_len; - int fd; -}; - -static int record_ext_read (void *fh, char *buf, size_t count) -{ - struct fetch_control *fc = fh; - return read (fc->fd, buf, count); -} - -static off_t record_ext_seek (void *fh, off_t offset) -{ - struct fetch_control *fc = fh; - return lseek (fc->fd, offset + fc->record_offset, SEEK_SET); -} - -static off_t record_ext_tell (void *fh) -{ - struct fetch_control *fc = fh; - return lseek (fc->fd, 0, SEEK_CUR) - fc->record_offset; -} - -static off_t record_int_seek (void *fh, off_t offset) -{ - struct fetch_control *fc = fh; - return (off_t) (fc->record_int_pos = offset); -} - -static off_t record_int_tell (void *fh) -{ - struct fetch_control *fc = fh; - return (off_t) fc->record_int_pos; -} - -static int record_int_read (void *fh, char *buf, size_t count) -{ - struct fetch_control *fc = fh; - int l = fc->record_int_len - fc->record_int_pos; - if (l <= 0) - return 0; - l = (l < count) ? l : count; - memcpy (buf, fc->record_int_buf + fc->record_int_pos, l); - fc->record_int_pos += l; - return l; -} - -static int record_fetch (ZServerInfo *zi, int sysno, int score, ODR stream, - oid_value input_format, Z_RecordComposition *comp, - oid_value *output_format, char **rec_bufp, - int *rec_lenp, char **basenamep) -{ - Record rec; - char *fname, *file_type, *basename; - RecType rt; - struct recRetrieveCtrl retrieveCtrl; - char subType[128]; - struct fetch_control fc; - - rec = rec_get (zi->records, sysno); - if (!rec) - { - logf (LOG_DEBUG, "rec_get fail on sysno=%d", sysno); - return 14; - } - file_type = rec->info[recInfo_fileType]; - fname = rec->info[recInfo_filename]; - basename = rec->info[recInfo_databaseName]; - *basenamep = odr_malloc (stream, strlen(basename)+1); - strcpy (*basenamep, basename); - - if (!(rt = recType_byName (file_type, subType))) - { - logf (LOG_WARN, "Retrieve: Cannot handle type %s", file_type); - return 14; - } - logf (LOG_DEBUG, "retrieve localno=%d score=%d", sysno, score); - retrieveCtrl.fh = &fc; - fc.fd = -1; - if (rec->size[recInfo_storeData] > 0) - { - retrieveCtrl.readf = record_int_read; - retrieveCtrl.seekf = record_int_seek; - retrieveCtrl.tellf = record_int_tell; - fc.record_int_len = rec->size[recInfo_storeData]; - fc.record_int_buf = rec->info[recInfo_storeData]; - fc.record_int_pos = 0; - logf (LOG_DEBUG, "Internal retrieve. %d bytes", fc.record_int_len); - } - else - { - if ((fc.fd = open (fname, O_BINARY|O_RDONLY)) == -1) - { - logf (LOG_WARN|LOG_ERRNO, "Retrieve fail; missing file: %s", - fname); - rec_rm (&rec); - return 14; - } - memcpy (&fc.record_offset, rec->info[recInfo_offset], - sizeof(fc.record_offset)); - - retrieveCtrl.readf = record_ext_read; - retrieveCtrl.seekf = record_ext_seek; - retrieveCtrl.tellf = record_ext_tell; - - record_ext_seek (retrieveCtrl.fh, 0); - } - retrieveCtrl.subType = subType; - retrieveCtrl.localno = sysno; - retrieveCtrl.score = score; - retrieveCtrl.odr = stream; - retrieveCtrl.input_format = retrieveCtrl.output_format = input_format; - retrieveCtrl.comp = comp; - retrieveCtrl.diagnostic = 0; - retrieveCtrl.dh = zi->dh; - (*rt->retrieve)(&retrieveCtrl); - *output_format = retrieveCtrl.output_format; - *rec_bufp = retrieveCtrl.rec_buf; - *rec_lenp = retrieveCtrl.rec_len; - if (fc.fd != -1) - close (fc.fd); - rec_rm (&rec); - - return retrieveCtrl.diagnostic; -} - bend_fetchresult *bend_fetch (void *handle, bend_fetchrequest *q, int *num) { - ZServerInfo *zi = handle; + ZebraHandle zh = handle; bend_fetchresult *r = odr_malloc (q->stream, sizeof(*r)); - int positions[2]; - ZServerSetSysno *records; - - register_lock (zi); + ZebraRetrievalRecord retrievalRecord; - r->errstring = 0; - r->last_in_set = 0; - r->basename = "base"; - - zi->errCode = 0; + retrievalRecord.position = q->number; + + zebra_records_retrieve (zh, q->stream, q->setname, q->comp, + q->format, 1, &retrievalRecord); - positions[0] = q->number; - records = resultSetSysnoGet (zi, q->setname, 1, positions); - if (!records) + if (zh->errCode) { - logf (LOG_DEBUG, "resultSetRecordGet, error"); - r->errcode = 13; - register_unlock (zi); - return r; + r->errcode = zh->errCode; + r->errstring = zh->errString; } - if (!records[0].sysno) + else { - r->errcode = 13; - logf (LOG_DEBUG, "Out of range. pos=%d", q->number); - register_unlock (zi); - return r; + r->errcode = 0; + r->basename = retrievalRecord.base; + r->record = retrievalRecord.buf; + r->len = retrievalRecord.len; + r->format = retrievalRecord.format; } - r->errcode = record_fetch (zi, records[0].sysno, - records[0].score, q->stream, q->format, - q->comp, &r->format, &r->record, &r->len, - &r->basename); - resultSetSysnoDel (zi, records, 1); - register_unlock (zi); return r; } bend_deleteresult *bend_delete (void *handle, bend_deleterequest *q, int *num) { - ZServerInfo *zi = handle; - register_lock (zi); - register_unlock (zi); return 0; } bend_scanresult *bend_scan (void *handle, bend_scanrequest *q, int *num) { - ZServerInfo *zi = handle; + ZebraScanEntry *entries; + ZebraHandle zh = handle; bend_scanresult *r = odr_malloc (q->stream, sizeof(*r)); - int status; - - register_lock (zi); - zi->errCode = 0; - zi->errString = 0; - + int is_partial, i; + r->term_position = q->term_position; r->num_entries = q->num_entries; - r->errcode = rpn_scan (zi, q->stream, q->term, - q->attributeset, - q->num_bases, q->basenames, - &r->term_position, - &r->num_entries, &r->entries, &status); - r->errstring = zi->errString; - r->status = status; - register_unlock (zi); + + r->entries = odr_malloc (q->stream, sizeof(*r->entries) * q->num_entries); + zebra_scan (zh, q->stream, q->term, + q->attributeset, + q->num_bases, q->basenames, + &r->term_position, + &r->num_entries, &entries, &is_partial); + if (is_partial) + r->status = BEND_SCAN_PARTIAL; + else + r->status = BEND_SCAN_SUCCESS; + for (i = 0; i < r->num_entries; i++) + { + r->entries[i].term = entries[i].term; + r->entries[i].occurrences = entries[i].occurrences; + } + r->errcode = zh->errCode; + r->errstring = zh->errString; return r; } void bend_close (void *handle) { - ZServerInfo *zi = handle; + zebra_close ((ZebraHandle) handle); +} - if (zi->records) - { - resultSetDestroy (zi); - zebTargetInfo_close (zi->zti, 0); - dict_close (zi->dict); - sortIdx_close (zi->sortIdx); - if (zi->isam) - is_close (zi->isam); - if (zi->isamc) - isc_close (zi->isamc); - rec_close (&zi->records); - register_unlock (zi); - } - zebra_maps_close (zi->zebra_maps); - bfs_destroy (zi->bfs); - data1_destroy (zi->dh); - zebra_server_lock_destroy (zi); +int bend_sort (void *handle, bend_sort_rr *rr) +{ + ZebraHandle zh = handle; - res_close (zi->res); - xfree (zi); + zebra_sort (zh, rr->stream, rr->num_input_setnames, rr->input_setnames, + rr->output_setname, rr->sort_sequence, &rr->sort_status); + rr->errcode = zh->errCode; + rr->errstring = zh->errString; + return 0; } #ifndef WINDOWS @@ -625,20 +368,6 @@ static void pre_init (struct statserv_options_block *sob) } #endif -int bend_sort (void *handle, bend_sort_rr *rr) -{ - ZServerInfo *zi = handle; - -#if 1 - register_lock (zi); - - resultSetSort (zi, rr); - - register_unlock (zi); -#endif - return 0; -} - int main (int argc, char **argv) { struct statserv_options_block *sob; diff --git a/index/zserver.h b/index/zserver.h index ab1e985..dd93352 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zserver.h,v $ - * Revision 1.29 1998-02-10 12:03:06 adam + * Revision 1.30 1998-03-05 08:45:13 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.29 1998/02/10 12:03:06 adam * Implemented Sort. * * Revision 1.28 1998/01/29 13:40:11 adam @@ -125,28 +129,30 @@ typedef struct { int sysno; int score; -} ZServerSetSysno; +} *ZebraPosSet; -typedef struct ZServerSet_ { - char *name; - RSET rset; - int size; - struct zset_sort_info *sort_info; - struct ZServerSet_ *next; -} ZServerSet; +typedef struct zebra_set *ZebraSet; + +typedef struct zebra_rank_class { + struct rank_control *control; + int init_flag; + void *class_handle; + struct zebra_rank_class *next; +} *ZebraRankClass; -typedef struct { +struct zebra_info { int registerState; /* 0 (no commit pages), 1 (use commit pages) */ time_t registerChange; - ZServerSet *sets; + ZebraSet sets; Dict dict; SortIdx sortIdx; ISAM isam; ISAMC isamc; Records records; int errCode; + int hits; char *errString; - ZebTargetInfo *zti; + ZebraExplainInfo zei; data1_handle dh; data1_attset *registered_sets; BFiles bfs; @@ -160,39 +166,68 @@ typedef struct { struct tms tms2; #endif ZebraMaps zebra_maps; -} ZServerInfo; + ZebraRankClass rank_classes; +}; + +typedef struct zebra_info *ZebraHandle; + +struct rank_control { + char *name; + void *(*create)(ZebraHandle zh); + void (*destroy)(ZebraHandle zh, void *class_handle); + void *(*begin)(ZebraHandle zh, void *class_handle, RSET rset); + void (*end)(ZebraHandle zh, void *set_handle); + int (*calc)(void *set_handle, int sysno); + void (*add)(void *set_handle, int seqno, int term_index); +}; + +void rpn_search (ZebraHandle zh, ODR stream, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname); + + +typedef struct { + int occurrences; + char *term; +} ZebraScanEntry; -int rpn_search (ZServerInfo *zi, ODR stream, - Z_RPNQuery *rpn, int num_bases, char **basenames, - const char *setname, int *hits); +void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **list, + int *is_partial); -int rpn_scan (ZServerInfo *zi, ODR stream, Z_AttributesPlusTerm *zapt, - oid_value attributeset, - int num_bases, char **basenames, - int *position, int *num_entries, struct scan_entry **list, - int *status); +RSET rset_trunc (ZebraHandle zh, ISAM_P *isam_p, int no, + const char *term, int length_term, const char *flags); -RSET rset_trunc (ZServerInfo *zi, ISAM_P *isam_p, int no); +ZebraSet resultSetAdd (ZebraHandle zh, const char *name, + int ov, RSET rset, int *hits); +ZebraSet resultSetGet (ZebraHandle zh, const char *name); +RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId); +void resultSetDestroy (ZebraHandle zh); -ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, - int ov, RSET rset); -ZServerSet *resultSetGet (ZServerInfo *zi, const char *name); -void resultSetDestroy (ZServerInfo *zi); +ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, + int num, int *positions); +void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num); -ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, - int num, int *positions); -void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num); +void resultSetSort (ZebraHandle zh, ODR stream, + int num_input_setnames, char **input_setnames, + char *output_setname, Z_SortKeySpecList *sort_sequence, + int *sort_status); -int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr); +void zebra_sort (ZebraHandle zh, ODR stream, + int num_input_setnames, char **input_setnames, + char *output_setname, Z_SortKeySpecList *sort_sequence, + int *sort_status); void zlog_rpn (Z_RPNQuery *rpn); void zlog_scan (Z_AttributesPlusTerm *zapt, oid_value ast); -int zebra_server_lock_init (ZServerInfo *zi); -int zebra_server_lock_destroy (ZServerInfo *zi); -int zebra_server_lock (ZServerInfo *zi, int lockCommit); -void zebra_server_unlock (ZServerInfo *zi, int commitPhase); -int zebra_server_lock_get_state (ZServerInfo *zi, time_t *timep); +int zebra_server_lock_init (ZebraHandle zh); +int zebra_server_lock_destroy (ZebraHandle zh); +int zebra_server_lock (ZebraHandle zh, int lockCommit); +void zebra_server_unlock (ZebraHandle zh, int commitPhase); +int zebra_server_lock_get_state (ZebraHandle zh, time_t *timep); typedef struct attent { @@ -200,4 +235,42 @@ typedef struct attent data1_local_attribute *local_attributes; } attent; -int att_getentbyatt(ZServerInfo *zi, attent *res, oid_value set, int att); +void zebraRankInstall (ZebraHandle zh, struct rank_control *ctrl); +ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name); +void zebraRankDestroy (ZebraHandle zh); + +int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att); + +extern struct rank_control *rank1_class; + +ZebraHandle zebra_open (const char *host, const char *configName); +void zebra_search_rpn (ZebraHandle zh, ODR stream, + Z_RPNQuery *query, int num_bases, char **basenames, + const char *setname); + +typedef struct { + int position; + char *buf; + int len; + oid_value format; + char *base; +} ZebraRetrievalRecord; + +void zebra_records_retrieve (ZebraHandle zh, ODR stream, + const char *setname, Z_RecordComposition *comp, + oid_value input_format, + int num_recs, ZebraRetrievalRecord *recs); + +int zebra_record_fetch (ZebraHandle zh, int sysno, int score, ODR stream, + oid_value input_format, Z_RecordComposition *comp, + oid_value *output_format, char **rec_bufp, + int *rec_lenp, char **basenamep); + +void zebra_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **list, + int *is_partial); + +void zebra_close (ZebraHandle zh); + diff --git a/index/zsets.c b/index/zsets.c index 88b4880..974e2de 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.14 1998-02-10 16:39:15 adam + * Revision 1.15 1998-03-05 08:45:14 adam + * New result set model and modular ranking system. Moved towards + * descent server API. System information stored as "SGML" records. + * + * Revision 1.14 1998/02/10 16:39:15 adam * Minor change. * * Revision 1.13 1998/02/10 12:03:06 adam @@ -67,8 +71,17 @@ #define SORT_IDX_ENTRYSIZE 64 #define ZSET_SORT_MAX_LEVEL 3 +struct zebra_set { + char *name; + RSET rset; + int size; + struct zset_sort_info *sort_info; + struct zebra_set *next; +}; + struct zset_sort_entry { int sysno; + int score; char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE]; }; @@ -78,80 +91,85 @@ struct zset_sort_info { struct zset_sort_entry **entries; }; -void resultSetSortReset (struct zset_sort_info **si) -{ - int i; - if (!*si) - return ; - for (i = 0; i<(*si)->num_entries; i++) - xfree ((*si)->entries[i]); - xfree ((*si)->entries); - xfree (*si); - *si = NULL; -} +void resultSetRank (ZebraHandle zh, struct zset_sort_info *si, + RSET rset, int *hits); -ZServerSet *resultSetAdd (ZServerInfo *zi, const char *name, int ov, RSET rset) +ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov, + RSET rset, int *hits) { - ZServerSet *s; + ZebraSet s; + int i; - for (s = zi->sets; s; s = s->next) + for (s = zh->sets; s; s = s->next) if (!strcmp (s->name, name)) { logf (LOG_DEBUG, "updating result set %s", name); if (!ov) return NULL; - resultSetSortReset (&s->sort_info); rset_delete (s->rset); s->rset = rset; + resultSetRank (zh, s->sort_info, rset, hits); return s; } logf (LOG_DEBUG, "adding result set %s", name); s = xmalloc (sizeof(*s)); - s->next = zi->sets; - zi->sets = s; + s->next = zh->sets; + zh->sets = s; s->name = xmalloc (strlen(name)+1); strcpy (s->name, name); s->rset = rset; - s->sort_info = NULL; + + s->sort_info = xmalloc (sizeof(*s->sort_info)); + s->sort_info->max_entries = 1000; + s->sort_info->entries = xmalloc (sizeof(*s->sort_info->entries) * + s->sort_info->max_entries); + for (i = 0; i < s->sort_info->max_entries; i++) + s->sort_info->entries[i] = xmalloc (sizeof(**s->sort_info->entries)); + resultSetRank (zh, s->sort_info, rset, hits); return s; } -ZServerSet *resultSetGet (ZServerInfo *zi, const char *name) +ZebraSet resultSetGet (ZebraHandle zh, const char *name) { - ZServerSet *s; + ZebraSet s; - for (s = zi->sets; s; s = s->next) + for (s = zh->sets; s; s = s->next) if (!strcmp (s->name, name)) return s; return NULL; } -void resultSetDestroy (ZServerInfo *zi) +void resultSetDestroy (ZebraHandle zh) { - ZServerSet *s, *s1; + ZebraSet s, s1; - for (s = zi->sets; s; s = s1) + for (s = zh->sets; s; s = s1) { + int i; s1 = s->next; - resultSetSortReset (&s->sort_info); + + for (i = 0; i < s->sort_info->max_entries; i++) + xfree (s->sort_info->entries[i]); + xfree (s->sort_info); + rset_delete (s->rset); xfree (s->name); xfree (s); } - zi->sets = NULL; + zh->sets = NULL; } -ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, - int num, int *positions) +ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, + int num, int *positions) { - ZServerSet *sset; - ZServerSetSysno *sr; + ZebraSet sset; + ZebraPosSet sr; RSET rset; int i; struct zset_sort_info *sort_info; - if (!(sset = resultSetGet (zi, name))) + if (!(sset = resultSetGet (zh, name))) return NULL; if (!(rset = sset->rset)) return NULL; @@ -173,6 +191,7 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, { logf (LOG_DEBUG, "got pos=%d (sorted)", position); sr[i].sysno = sort_info->entries[position-1]->sysno; + sr[i].score = sort_info->entries[position-1]->score; } } } @@ -187,6 +206,7 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, int position = 0; int num_i = 0; int psysno = 0; + int term_index; RSFD rfd; struct it_key key; @@ -194,8 +214,8 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, position = sort_info->num_entries; while (num_i < num && positions[num_i] < position) num_i++; - rfd = rset_open (rset, RSETF_READ|RSETF_SORT_RANK); - while (num_i < num && rset_read (rset, rfd, &key)) + rfd = rset_open (rset, RSETF_READ); + while (num_i < num && rset_read (rset, rfd, &key, &term_index)) { if (key.sysno != psysno) { @@ -215,7 +235,7 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, { sr[num_i].sysno = psysno; logf (LOG_DEBUG, "got pos=%d (unsorted)", position); - rset_score (rset, rfd, &sr[num_i].score); + sr[num_i].score = -1; num_i++; } } @@ -225,7 +245,7 @@ ZServerSetSysno *resultSetSysnoGet (ZServerInfo *zi, const char *name, return sr; } -void resultSetSysnoDel (ZServerInfo *zi, ZServerSetSysno *records, int num) +void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num) { xfree (records); } @@ -235,7 +255,7 @@ struct sortKey { int attrUse; }; -void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset, +void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, struct sortKey *criteria, int num_criteria, int sysno) { @@ -244,11 +264,11 @@ void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset, struct zset_sort_info *sort_info = sset->sort_info; int i, j; - sortIdx_sysno (zi->sortIdx, sysno); + sortIdx_sysno (zh->sortIdx, sysno); for (i = 0; isortIdx, criteria[i].attrUse); - sortIdx_read (zi->sortIdx, this_entry.buf[i]); + sortIdx_type (zh->sortIdx, criteria[i].attrUse); + sortIdx_read (zh->sortIdx, this_entry.buf[i]); } i = sort_info->num_entries; while (--i >= 0) @@ -261,14 +281,17 @@ void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset, if (rel) break; } - if (rel) + if (!rel) + break; + if (criteria[j].relation == 'D') + { + if (rel > 0) + break; + } + else if (criteria[j].relation == 'A') { - if (criteria[j].relation == 'D') - if (rel > 0) - break; - if (criteria[j].relation == 'A') - if (rel < 0) - break; + if (rel < 0) + break; } } j = sort_info->max_entries-1; @@ -288,47 +311,94 @@ void resultSetInsertSort (ZServerInfo *zi, ZServerSet *sset, for (i = 0; ibuf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE); new_entry->sysno = sysno; + new_entry->score = -1; } - -int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr) + +void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info, + int sysno, int score, int relation) { - ZServerSet *sset; + struct zset_sort_entry *new_entry = NULL; + int i, j; + + i = sort_info->num_entries; + while (--i >= 0) + { + int rel = 0; + + rel = score - sort_info->entries[i]->score; + + if (relation == 'D') + { + if (rel >= 0) + break; + } + else if (relation == 'A') + { + if (rel <= 0) + break; + } + } + j = sort_info->max_entries-1; + if (i == j) + return; + ++i; + new_entry = sort_info->entries[j]; + while (j != i) + { + sort_info->entries[j] = sort_info->entries[j-1]; + --j; + } + sort_info->entries[j] = new_entry; + assert (new_entry); + if (sort_info->num_entries != sort_info->max_entries) + (sort_info->num_entries)++; + new_entry->sysno = sysno; + new_entry->score = score; +} + +void resultSetSort (ZebraHandle zh, ODR stream, + int num_input_setnames, char **input_setnames, + char *output_setname, Z_SortKeySpecList *sort_sequence, + int *sort_status) +{ + ZebraSet sset; RSET rset; int i, psysno = 0; struct it_key key; struct sortKey sort_criteria[3]; int num_criteria; + int term_index; RSFD rfd; - if (rr->num_input_setnames == 0) + if (num_input_setnames == 0) { - rr->errcode = 208; - return 0; + zh->errCode = 208; + return ; } - if (rr->num_input_setnames > 1) + if (num_input_setnames > 1) { - rr->errcode = 230; - return 0; + zh->errCode = 230; + return; } - sset = resultSetGet (zi, rr->input_setnames[0]); + sset = resultSetGet (zh, input_setnames[0]); if (!sset) { - rr->errcode = 30; - rr->errstring = rr->input_setnames[0]; - return 0; + zh->errCode = 30; + zh->errString = input_setnames[0]; + return; } if (!(rset = sset->rset)) { - rr->errcode = 30; - rr->errstring = rr->input_setnames[0]; - return 0; + zh->errCode = 30; + zh->errString = input_setnames[0]; + return; } - num_criteria = rr->sort_sequence->num_specs; + num_criteria = sort_sequence->num_specs; if (num_criteria > 3) num_criteria = 3; for (i = 0; i < num_criteria; i++) { - Z_SortKeySpec *sks = rr->sort_sequence->specs[i]; + Z_SortKeySpec *sks = sort_sequence->specs[i]; Z_SortKey *sk; if (*sks->sortRelation == Z_SortRelation_ascending) @@ -337,79 +407,173 @@ int resultSetSort (ZServerInfo *zi, bend_sort_rr *rr) sort_criteria[i].relation = 'D'; else { - rr->errcode = 214; - return 0; + zh->errCode = 214; + return; } if (sks->sortElement->which == Z_SortElement_databaseSpecific) { - rr->errcode = 210; - return 0; + zh->errCode = 210; + return; } else if (sks->sortElement->which != Z_SortElement_generic) { - rr->errcode = 237; - return 0; + zh->errCode = 237; + return; } sk = sks->sortElement->u.generic; switch (sk->which) { case Z_SortKey_sortField: logf (LOG_DEBUG, "Sort: key %d is of type sortField", i+1); - rr->errcode = 207; - return 0; + zh->errCode = 207; + return; case Z_SortKey_elementSpec: logf (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1); - return 0; + zh->errCode = 207; + return; case Z_SortKey_sortAttributes: logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1); sort_criteria[i].attrUse = - zebra_maps_sort (zi->zebra_maps, sk->u.sortAttributes); + zebra_maps_sort (zh->zebra_maps, sk->u.sortAttributes); logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse); if (sort_criteria[i].attrUse == -1) { - rr->errcode = 116; - return 0; + zh->errCode = 116; + return; } - if (sortIdx_type (zi->sortIdx, sort_criteria[i].attrUse)) + if (sortIdx_type (zh->sortIdx, sort_criteria[i].attrUse)) { - rr->errcode = 207; - return 0; + zh->errCode = 207; + return; } break; } } - if (strcmp (rr->output_setname, rr->input_setnames[0])) + if (strcmp (output_setname, input_setnames[0])) { + int hits; rset = rset_dup (rset); - sset = resultSetAdd (zi, rr->output_setname, 1, rset); + sset = resultSetAdd (zh, output_setname, 1, rset, &hits); } - resultSetSortReset (&sset->sort_info); - - sset->sort_info = xmalloc (sizeof(*sset->sort_info)); - sset->sort_info->max_entries = 100; sset->sort_info->num_entries = 0; - sset->sort_info->entries = xmalloc (sizeof(*sset->sort_info->entries) * - sset->sort_info->max_entries); - for (i = 0; isort_info->max_entries; i++) - sset->sort_info->entries[i] = - xmalloc (sizeof(*sset->sort_info->entries[i])); - - rfd = rset_open (rset, RSETF_READ|RSETF_SORT_SYSNO); - while (rset_read (rset, rfd, &key)) + rfd = rset_open (rset, RSETF_READ); + while (rset_read (rset, rfd, &key, &term_index)) { if (key.sysno != psysno) { psysno = key.sysno; - resultSetInsertSort (zi, sset, + resultSetInsertSort (zh, sset, sort_criteria, num_criteria, psysno); } } rset_close (rset, rfd); - rr->errcode = 0; - rr->sort_status = Z_SortStatus_success; + zh->errCode = 0; + *sort_status = Z_SortStatus_success; - return 0; + return; +} + +RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId) +{ + ZebraSet s; + + if ((s = resultSetGet (zh, resultSetId))) + return s->rset; + return NULL; +} + +void resultSetRank (ZebraHandle zh, struct zset_sort_info *sort_info, + RSET rset, int *hits) +{ + int kno = 0; + struct it_key key; + RSFD rfd; + int term_index, i; + ZebraRankClass rank_class; + struct rank_control *rc; + + sort_info->num_entries = 0; + *hits = 0; + rfd = rset_open (rset, RSETF_READ); + + logf (LOG_DEBUG, "resultSetRank"); + for (i = 0; i < rset->no_rset_terms; i++) + logf (LOG_DEBUG, "term=\"%s\" cnt=%d type=%s", + rset->rset_terms[i]->name, + rset->rset_terms[i]->nn, + rset->rset_terms[i]->flags); + + rank_class = zebraRankLookup (zh, "rank-1"); + rc = rank_class->control; + + if (rset_read (rset, rfd, &key, &term_index)) + { + int psysno = key.sysno; + int score; + void *handle = + (*rc->begin) (zh, rank_class->class_handle, rset); + (*hits)++; + do + { + kno++; + if (key.sysno != psysno) + { + score = (*rc->calc) (handle, psysno); + + resultSetInsertRank (zh, sort_info, psysno, score, 'A'); + (*hits)++; + psysno = key.sysno; + } + (*rc->add) (handle, key.seqno, term_index); + } + while (rset_read (rset, rfd, &key, &term_index)); + score = (*rc->calc) (handle, psysno); + resultSetInsertRank (zh, sort_info, psysno, score, 'A'); + (*rc->end) (zh, handle); + } + rset_close (rset, rfd); + logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, *hits); } +ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name) +{ + ZebraRankClass p = zh->rank_classes; + while (p && strcmp (p->control->name, name)) + p = p->next; + if (p && !p->init_flag) + { + if (p->control->create) + p->class_handle = (*p->control->create)(zh); + p->init_flag = 1; + } + return p; +} + +void zebraRankInstall (ZebraHandle zh, struct rank_control *ctrl) +{ + ZebraRankClass p = xmalloc (sizeof(*p)); + p->control = xmalloc (sizeof(*p->control)); + memcpy (p->control, ctrl, sizeof(*p->control)); + p->control->name = xstrdup (ctrl->name); + p->init_flag = 0; + p->next = zh->rank_classes; + zh->rank_classes = p; +} + +void zebraRankDestroy (ZebraHandle zh) +{ + ZebraRankClass p = zh->rank_classes; + while (p) + { + ZebraRankClass p_next = p->next; + if (p->init_flag && p->control->destroy) + (*p->control->destroy)(zh, p->class_handle); + xfree (p->control->name); + xfree (p->control); + xfree (p); + p = p_next; + } + zh->rank_classes = NULL; +}