From 77686142af94172d1887190ebd47aeb53f704057 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 15 Oct 2001 19:53:43 +0000 Subject: [PATCH] POSIX thread updates. First work on term sets. --- CHANGELOG | 3 + LICENSE.zmbol | 1 - configure.in | 4 +- include/zebra-lock.h | 8 +-- include/zebraver.h | 60 ++-------------- index/extract.c | 7 +- index/kcompare.c | 35 +++++++-- index/recindxp.h | 7 +- index/zebraapi.c | 31 +++++--- index/zinfo.c | 24 ++++++- index/zinfo.h | 8 ++- index/zrpn.c | 176 +++++++++++++++++++++++++++++++-------------- index/zserver.h | 196 +++++--------------------------------------------- index/zsets.c | 189 +++++++++++++++++++++++++++++++++--------------- util/zebra-lock.c | 36 +++++----- 15 files changed, 393 insertions(+), 392 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3295c9d..e903534 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +Added support for termsets - a result set of terms matching +a given query. + Added support for raw retrieval. Element Set Name R forces the text filter which returns the record in its original form. diff --git a/LICENSE.zmbol b/LICENSE.zmbol index 56a26b5..e69de29 100644 --- a/LICENSE.zmbol +++ b/LICENSE.zmbol @@ -1 +0,0 @@ -License here. diff --git a/configure.in b/configure.in index f70b492..6f82b86 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Zebra, Index Data Aps, 1994-2000 -dnl $Id: configure.in,v 1.28 2001-02-28 09:01:41 adam Exp $ +dnl $Id: configure.in,v 1.29 2001-10-15 19:53:43 adam Exp $ dnl AC_INIT(include/zebraver.h) AC_MSG_CHECKING(for package) @@ -77,7 +77,7 @@ else fi dnl dnl ------ times -AC_CHECK_HEADERS(sys/times.h pthread.h) +AC_CHECK_HEADERS(sys/times.h) dnl dnl ------- BZIP2 AC_CHECK_LIB(bz2,bzCompressInit) diff --git a/include/zebra-lock.h b/include/zebra-lock.h index 44de1f4..7aa47f9 100644 --- a/include/zebra-lock.h +++ b/include/zebra-lock.h @@ -5,7 +5,7 @@ #ifdef WIN32 #include #endif -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS #include #endif @@ -17,7 +17,7 @@ typedef struct { #ifdef WIN32 CRITICAL_SECTION mutex; #else -# if HAVE_PTHREAD_H +# if YAZ_POSIX_THREADS pthread_mutex_t mutex; # else int dummy; @@ -34,7 +34,7 @@ YAZ_EXPORT int zebra_mutex_unlock (Zebra_mutex *p); typedef struct { int readers_reading; int writers_writing; -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_t mutex; pthread_cond_t lock_free; #endif @@ -48,7 +48,7 @@ YAZ_EXPORT int zebra_lock_rdwr_runlock (Zebra_lock_rdwr *p); YAZ_EXPORT int zebra_lock_rdwr_wunlock (Zebra_lock_rdwr *p); typedef struct { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_t mutex; pthread_cond_t cond; #else diff --git a/include/zebraver.h b/include/zebraver.h index 43856df..3baa595 100644 --- a/include/zebraver.h +++ b/include/zebraver.h @@ -1,66 +1,14 @@ /* - * Copyright (C) 1994-2000, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: zebraver.h,v $ - * Revision 1.17 2000-11-08 14:06:18 adam - * Updated version. - * - * Revision 1.16 2000/02/10 10:19:47 adam - * Patch level 1. - * - * Revision 1.15 1999/12/01 13:30:30 adam - * Updated configure for Zmbol/Zebra dependent settings. - * - * Revision 1.14 1999/11/30 13:48:03 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.13 1999/02/02 14:50:48 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.12 1998/02/10 12:03:05 adam - * Implemented Sort. - * - * Revision 1.11 1998/01/29 13:33:04 adam - * 1.0b1. - * - * Revision 1.10 1997/02/12 20:45:21 adam - * Internal release. - * - * Revision 1.9 1996/11/08 11:08:03 adam - * New internal release. - * - * Revision 1.8 1996/10/18 12:38:22 adam - * New internal release. - * - * Revision 1.7 1996/06/06 13:30:12 quinn - * Work - * - * Revision 1.6 1996/05/16 15:31:07 quinn - * a7 - * - * Revision 1.5 1996/04/26 09:59:47 adam - * Added ZEBRADATE; date of this file. - * - * Revision 1.4 1996/04/24 13:36:22 quinn - * a6 - * - * Revision 1.3 1996/01/11 10:15:44 quinn - * Alfa 4 release. - * - * Revision 1.2 1995/12/06 16:05:50 adam - * New version. - * - * Revision 1.1 1995/12/05 11:24:51 adam - * New version. * + * $Id: zebraver.h,v 1.18 2001-10-15 19:53:43 adam Exp $ */ #ifndef ZEBRAVER -#define ZEBRAVER "1.1pre" +#define ZEBRAVER "1.1" #endif #ifndef ZEBRADATE -#define ZEBRADATE "$Date: 2000-11-08 14:06:18 $" +#define ZEBRADATE "$Date: 2001-10-15 19:53:43 $" #endif diff --git a/index/extract.c b/index/extract.c index 305cc18..86de3b2 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.108 2001-06-14 11:44:56 adam + * Revision 1.109 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.108 2001/06/14 11:44:56 adam * Bug fix: default storeKeys setting wasn't read when group was specified. * * Revision 1.107 2001/05/28 13:58:48 adam @@ -1003,7 +1006,7 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys) } #endif assert (ch > 0); - key_buf_used += key_SU_code (ch, ((char*)key_buf) + key_buf_used); + key_buf_used += key_SU_encode (ch, ((char*)key_buf) + key_buf_used); while (*src) ((char*)key_buf) [key_buf_used++] = *src++; diff --git a/index/kcompare.c b/index/kcompare.c index 647349a..da913f7 100644 --- a/index/kcompare.c +++ b/index/kcompare.c @@ -253,24 +253,51 @@ ISAMD_M key_isamd_m (Res res,ISAMD_M me) #endif -int key_SU_code (int ch, char *out) +int key_SU_encode (int ch, char *out) { int i; for (i = 0; ch; i++) { - if (ch > 63) - out[i] = 128 + (ch & 63); + if (ch >= 64) + out[i] = 65 + (ch & 63); else out[i] = 1 + ch; ch = ch >> 6; } return i; + /* in out + 0 1 + 1 2 + 63 64 + 64 65, 2 + 65 66, 2 + 127 128, 2 + 128 65, 3 + 191 128, 3 + 192 65, 4 + */ } +int key_SU_decode (int *ch, unsigned char *out) +{ + int len = 1; + int fact = 1; + *ch = 0; + for (len = 1; *out >= 65; len++, out++) + { + *ch += (*out - 65) * fact; + fact <<= 6; + } + *ch += (*out - 1) * fact; + return len; +} /* * $Log: kcompare.c,v $ - * Revision 1.35 1999-11-30 13:48:03 adam + * Revision 1.36 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.35 1999/11/30 13:48:03 adam * Improved installation. Updated for inclusion of YAZ header files. * * Revision 1.34 1999/07/14 13:21:34 heikki diff --git a/index/recindxp.h b/index/recindxp.h index 24d7259..0f902c8 100644 --- a/index/recindxp.h +++ b/index/recindxp.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recindxp.h,v $ - * Revision 1.9 2000-12-05 10:01:44 adam + * Revision 1.10 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.9 2000/12/05 10:01:44 adam * Fixed bug regarding user-defined attribute sets. * * Revision 1.8 2000/04/05 09:49:35 adam @@ -50,7 +53,7 @@ YAZ_BEGIN_CDECL #define REC_BLOCK_TYPES 2 #define REC_HEAD_MAGIC "recindex" -#define REC_VERSION 2 +#define REC_VERSION 3 struct records_info { int rw; diff --git a/index/zebraapi.c b/index/zebraapi.c index ec24290..3cd3a10 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -3,7 +3,10 @@ * All rights reserved. * * $Log: zebraapi.c,v $ - * Revision 1.43 2000-12-05 12:22:53 adam + * Revision 1.44 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.43 2000/12/05 12:22:53 adam * Termlist source implemented (so that we can index values of XML/SGML * attributes). * @@ -665,16 +668,15 @@ void zebra_records_retrieve (ZebraHandle zh, ODR stream, { for (i = 0; ierrCode = 13; - zh->errString = nmem_strdup (stream->mem, num_str); - break; + recs[i].errCode = 0; + recs[i].format = VAL_SUTRS; + recs[i].len = strlen(poset[i].term); + recs[i].buf = poset[i].term; + recs[i].base = poset[i].db; } - else + else if (poset[i].sysno) { recs[i].errCode = zebra_record_fetch (zh, poset[i].sysno, poset[i].score, @@ -684,6 +686,15 @@ void zebra_records_retrieve (ZebraHandle zh, ODR stream, &recs[i].base); recs[i].errString = NULL; } + else + { + char num_str[20]; + + sprintf (num_str, "%d", pos_array[i]); + zh->errCode = 13; + zh->errString = nmem_strdup (stream->mem, num_str); + break; + } } zebraPosSetDestroy (zh, poset, num_recs); } @@ -1345,7 +1356,7 @@ static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, #endif assert (ch > 0); zh->key_buf_used += - key_SU_code (ch,((char*)zh->key_buf) + zh->key_buf_used); + key_SU_encode (ch,((char*)zh->key_buf) + zh->key_buf_used); while (*src) ((char*)zh->key_buf) [(zh->key_buf_used)++] = *src++; diff --git a/index/zinfo.c b/index/zinfo.c index 7224583..0565a00 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.c,v $ - * Revision 1.21 2000-12-05 10:01:44 adam + * Revision 1.22 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.21 2000/12/05 10:01:44 adam * Fixed bug regarding user-defined attribute sets. * * Revision 1.20 2000/11/29 14:24:01 adam @@ -1372,6 +1375,25 @@ int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use) return -1; } +int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, + const char **db, int *set, int *use) +{ + struct zebDatabaseInfoB *zdb; + for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) + { + struct zebSUInfoB *zsui = zdb->attributeDetails->SUInfo; + for ( ;zsui; zsui = zsui->next) + if (zsui->info.ordinal == ord) + { + *db = zdb->databaseName; + *set = zsui->info.set; + *use = zsui->info.use; + return 0; + } + } + return -1; +} + zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei, zebAccessObject *op, Odr_oid *oid) diff --git a/index/zinfo.h b/index/zinfo.h index ec23d5a..c17271e 100644 --- a/index/zinfo.h +++ b/index/zinfo.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.h,v $ - * Revision 1.10 2000-05-15 12:56:37 adam + * Revision 1.11 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.10 2000/05/15 12:56:37 adam * Record offset of size off_t. * * Revision 1.9 2000/03/20 19:08:36 adam @@ -72,6 +75,9 @@ void zebraExplain_loadAttsets (data1_handle dh, Res res); void zebraExplain_flush (ZebraExplainInfo zei, int writeFlag, void *updateHandle); +int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, + const char **db, int *set, int *use); + typedef struct { int recordSize; off_t recordOffset; diff --git a/index/zrpn.c b/index/zrpn.c index 3a61c47..28508e3 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zrpn.c,v $ - * Revision 1.106 2001-04-11 07:58:13 adam + * Revision 1.107 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.106 2001/04/11 07:58:13 adam * Bug fix: multiple space mapped to one space when using complete subfield. * * Revision 1.105 2000/11/08 13:46:59 adam @@ -406,7 +409,8 @@ typedef struct { Z_AttributesPlusTerm *zapt; } AttrType; -static int attr_find (AttrType *src, oid_value *attributeSetP) +static int attr_find_ex (AttrType *src, oid_value *attributeSetP, + const char **string_value) { int num_attributes; @@ -440,19 +444,34 @@ static int attr_find (AttrType *src, oid_value *attributeSetP) return *element->value.numeric; break; case Z_AttributeValue_complex: - if (src->minor >= element->value.complex->num_list || - element->value.complex->list[src->minor]->which != + if (src->minor >= element->value.complex->num_list) + break; + if (element->value.complex->list[src->minor]->which == Z_StringOrNumeric_numeric) - break; - ++(src->minor); - if (element->attributeSet && attributeSetP) - { - oident *attrset; - - attrset = oid_getentbyoid (element->attributeSet); - *attributeSetP = attrset->value; - } - return *element->value.complex->list[src->minor-1]->u.numeric; + { + ++(src->minor); + if (element->attributeSet && attributeSetP) + { + oident *attrset; + + attrset = oid_getentbyoid (element->attributeSet); + *attributeSetP = attrset->value; + } + return + *element->value.complex->list[src->minor-1]->u.numeric; + } + else if (element->value.complex->list[src->minor]->which == + Z_StringOrNumeric_string) + { + if (!string_value) + break; + ++(src->minor); + *string_value = + element->value.complex->list[src->minor-1]->u.string; + return -2; + } + else + break; default: assert (0); } @@ -462,6 +481,11 @@ static int attr_find (AttrType *src, oid_value *attributeSetP) return -1; } +static int attr_find (AttrType *src, oid_value *attributeSetP) +{ + return attr_find_ex (src, attributeSetP, 0); +} + static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt, int type) { @@ -482,6 +506,7 @@ struct grep_info { int isam_p_indx; ZebraHandle zh; int reg_type; + ZebraSet termset; }; static void term_untrans (ZebraHandle zh, int reg_type, @@ -535,9 +560,24 @@ static void add_isam_p (const char *name, const char *info, assert (*info == sizeof(*p->isam_p_buf)); memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf)); -#if 0 - term_untrans (p->zh, p->reg_type, term_tmp, name+2); - logf (LOG_DEBUG, "grep: %s", term_tmp); +#if 1 + if (p->termset) + { + const char *db; + int set, use; + char term_tmp[512]; + int su_code = 0; + int len = key_SU_decode (&su_code, name); + + term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); + logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp); + zebraExplain_lookup_ord (p->zh->service->zei, + su_code, &db, &set, &use); + logf (LOG_LOG, "grep: set=%d use=%d db=%s", set, use, db); + + resultSetAddTerm (p->zh, p->termset, name[len], db, + set, use, term_tmp); + } #endif (p->isam_p_indx)++; } @@ -1203,7 +1243,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, else term_dict[prefix_len++] = '('; - ord_len = key_SU_code (ord, ord_buf); + ord_len = key_SU_encode (ord, ord_buf); for (i = 0; iterm_no = 0; +#endif + grep_info->isam_p_size = 0; + grep_info->isam_p_buf = NULL; + grep_info->zh = zh; + grep_info->reg_type = reg_type; + grep_info->termset = 0; + + attr_init (&termset, zapt, 8); + termset_value_numeric = + attr_find_ex (&termset, NULL, &termset_value_string); + if (termset_value_numeric != -1) + { + char resname[32]; + const char *termset_name = 0; + if (termset_value_numeric != -2) + { + + sprintf (resname, "%d", termset_value_numeric); + termset_name = resname; + } + else + termset_name = termset_value_string; + logf (LOG_LOG, "creating termset set %s", termset_name); + grep_info->termset = resultSetAdd (zh, termset_name, 1); + if (!grep_info->termset) + { + zh->errCode = 128; + zh->errString = nmem_strdup (stream, termset_name); + return -1; + } + } + return 0; +} + + static RSET rpn_search_APT_phrase (ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, @@ -1670,14 +1757,8 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh, char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); const char *termp = termz; -#ifdef TERM_COUNT - grep_info.term_no = 0; -#endif - grep_info.isam_p_size = 0; - grep_info.isam_p_buf = NULL; - grep_info.zh = zh; - grep_info.reg_type = reg_type; - + if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) + return 0; while (1) { logf (LOG_DEBUG, "APT_phrase termp=%s", termp); @@ -1729,14 +1810,9 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh, struct grep_info grep_info; char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); const char *termp = termz; -#ifdef TERM_COUNT - grep_info.term_no = 0; -#endif - grep_info.isam_p_size = 0; - grep_info.isam_p_buf = NULL; - grep_info.zh = zh; - grep_info.reg_type = reg_type; + if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) + return 0; while (1) { logf (LOG_DEBUG, "APT_or_list termp=%s", termp); @@ -1795,14 +1871,8 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh, char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type); const char *termp = termz; -#ifdef TERM_COUNT - grep_info.term_no = 0; -#endif - grep_info.isam_p_size = 0; - grep_info.isam_p_buf = NULL; - grep_info.zh = zh; - grep_info.reg_type = reg_type; - + if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream)) + return 0; while (1) { logf (LOG_DEBUG, "APT_and_list termp=%s", termp); @@ -1963,7 +2033,7 @@ static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, else term_dict[prefix_len++] = '('; - ord_len = key_SU_code (ord, ord_buf); + ord_len = key_SU_encode (ord, ord_buf); for (i = 0; iu.simple->u.resultSetId); if (!r) + { r = rset_create (rset_kind_null, NULL); + zh->errCode = 30; + zh->errString = + nmem_strdup (stream, zs->u.simple->u.resultSetId); + return 0; + } } else { zh->errCode = 3; - return NULL; + return 0; } } else { zh->errCode = 3; - return NULL; + return 0; } return r; } @@ -2600,7 +2670,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, for (j = 0; jlist[j].term = NULL; - prefix_len += key_SU_code (ords[i], termz + prefix_len); + prefix_len += key_SU_encode (ords[i], termz + prefix_len); termz[prefix_len++] = reg_id; termz[prefix_len] = 0; strcpy (scan_info->prefix, termz); diff --git a/index/zserver.h b/index/zserver.h index a1bee7e..d0c936b 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -1,189 +1,15 @@ /* - * Copyright (C) 1994-2000, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: zserver.h,v $ - * Revision 1.50 2000-11-29 14:24:01 adam - * Script configure uses yaz pthreads options. Added locking for - * zebra_register_{lock,unlock}. - * - * Revision 1.49 2000/05/18 12:01:36 adam - * System call times(2) used again. More 64-bit fixes. - * - * Revision 1.48 2000/04/05 09:49:35 adam - * On Unix, zebra/z'mbol uses automake. - * - * Revision 1.47 2000/03/20 19:08:36 adam - * Added remote record import using Z39.50 extended services and Segment - * Requests. - * - * Revision 1.46 2000/03/15 15:00:31 adam - * First work on threaded version. - * - * Revision 1.45 1999/11/30 13:48:04 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.44 1999/11/04 15:00:45 adam - * Implemented delete result set(s). - * - * Revision 1.43 1999/07/14 10:59:27 adam - * Changed functions isc_getmethod, isams_getmethod. - * Improved fatal error handling (such as missing EXPLAIN schema). - * - * Revision 1.42 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.41 1999/05/12 13:08:06 adam - * First version of ISAMS. - * - * Revision 1.40 1998/11/16 16:03:45 adam - * Moved loggin utilities to Yaz. Was implemented in file zlogs.c. - * - * Revision 1.39 1998/10/16 08:14:35 adam - * Updated record control system. - * - * Revision 1.38 1998/09/22 10:03:44 adam - * Changed result sets to be persistent in the sense that they can - * be re-searched if needed. - * Fixed memory leak in rsm_or. - * - * Revision 1.37 1998/09/02 13:53:22 adam - * Extra parameter decode added to search routines to implement - * persistent queries. - * - * Revision 1.36 1998/06/24 12:16:16 adam - * Support for relations on text operands. Open range support in - * DFA module (i.e. [-j], [g-]). - * - * Revision 1.35 1998/06/23 15:33:35 adam - * Added feature to specify sort criteria in query (type 7 specifies - * sort flags). - * - * Revision 1.34 1998/06/22 11:36:50 adam - * Added authentication check facility to zebra. - * - * Revision 1.33 1998/06/12 12:22:14 adam - * Work on Zebra API. - * - * Revision 1.32 1998/05/27 16:57:47 adam - * Zebra returns surrogate diagnostic for single records when - * appropriate. - * - * Revision 1.31 1998/05/20 10:12:23 adam - * Implemented automatic EXPLAIN database maintenance. - * Modified Zebra to work with ASN.1 compiled version of YAZ. - * - * Revision 1.30 1998/03/05 08:45:13 adam - * New result set model and modular ranking system. Moved towards - * descent server API. System information stored as "SGML" records. - * - * Revision 1.29 1998/02/10 12:03:06 adam - * Implemented Sort. - * - * Revision 1.28 1998/01/29 13:40:11 adam - * Better logging for scan service. - * - * Revision 1.27 1997/10/27 14:33:06 adam - * Moved towards generic character mapping depending on "structure" - * field in abstract syntax file. Fixed a few memory leaks. Fixed - * bug with negative integers when doing searches with relational - * operators. - * - * Revision 1.26 1997/09/29 12:41:35 adam - * Fixed bug regarding USE_TIMES var. - * - * Revision 1.25 1997/09/29 09:08:36 adam - * Revised locking system to be thread safe for the server. - * - * Revision 1.24 1997/09/17 12:19:19 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.23 1996/12/23 15:30:46 adam - * Work on truncation. - * Bug fix: result sets weren't deleted after server shut down. - * - * Revision 1.22 1996/11/04 14:07:49 adam - * Moved truncation code to trunc.c. - * - * Revision 1.21 1996/10/29 14:09:58 adam - * Use of cisam system - enabled if setting isamc is 1. - * - * Revision 1.20 1996/06/04 10:19:02 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.19 1996/05/14 11:34:01 adam - * Scan support in multiple registers/databases. - * - * Revision 1.18 1996/05/14 06:16:50 adam - * Compact use/set bytes used in search service. - * - * Revision 1.17 1995/12/08 16:22:57 adam - * Work on update while servers are running. Three lock files introduced. - * The servers reload their registers when necessary, but they don't - * reestablish result sets yet. - * - * Revision 1.16 1995/12/07 17:38:48 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.15 1995/11/21 15:29:13 adam - * Config file 'base' read by default by both indexer and server. - * - * Revision 1.14 1995/11/16 17:00:57 adam - * Better logging of rpn query. - * - * Revision 1.13 1995/11/16 15:34:56 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.12 1995/10/27 14:00:12 adam - * Implemented detection of database availability. - * - * Revision 1.11 1995/10/17 18:02:12 adam - * New feature: databases. Implemented as prefix to words in dictionary. - * - * Revision 1.10 1995/10/09 16:18:38 adam - * Function dict_lookup_grep got extra client data parameter. - * - * Revision 1.9 1995/10/06 14:38:01 adam - * New result set method: r_score. - * Local no (sysno) and score is transferred to retrieveCtrl. - * - * Revision 1.8 1995/10/06 13:52:06 adam - * Bug fixes. Handler may abort further scanning. - * - * Revision 1.7 1995/10/06 10:43:57 adam - * Scan added. 'occurrences' in scan entries not set yet. - * - * Revision 1.6 1995/09/28 09:19:48 adam - * xfree/xmalloc used everywhere. - * Extract/retrieve method seems to work for text records. - * - * Revision 1.5 1995/09/27 16:17:32 adam - * More work on retrieve. - * - * Revision 1.4 1995/09/14 11:53:28 adam - * First work on regular expressions/truncations. - * - * Revision 1.3 1995/09/08 08:53:23 adam - * Record buffer maintained in server_info. - * - * Revision 1.2 1995/09/06 16:11:19 adam - * Option: only one word key per file. - * - * Revision 1.1 1995/09/05 15:28:40 adam - * More work on search engine. - * + * $Id: zserver.h,v 1.51 2001-10-15 19:53:43 adam Exp $ */ #if HAVE_SYS_TIMES_H #include #endif -#if HAVE_PTHREADS_H -#include -#endif - #include #include @@ -196,6 +22,8 @@ YAZ_BEGIN_CDECL typedef struct { + char *term; + char *db; int sysno; int score; } *ZebraPosSet; @@ -291,6 +119,16 @@ struct rank_control { void (*add)(void *set_handle, int seqno, int term_index); }; +struct term_set_entry { + char *term; + struct term_set_entry *next; +}; + +struct term_set_list { + struct term_set_entry *first; + struct term_set_entry *last; +}; + RSET rpn_search (ZebraHandle zh, NMEM mem, Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname, ZebraSet sset); @@ -305,9 +143,13 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, RSET rset_trunc (ZebraHandle zh, ISAMS_P *isam_p, int no, const char *term, int length_term, const char *flags); +void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, + const char *db, int set, + int use, const char *term); ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov); ZebraSet resultSetGet (ZebraHandle zh, const char *name); -ZebraSet resultSetAddRPN (ZebraHandle zh, ODR stream, ODR decode, Z_RPNQuery *rpn, int num_bases, char **basenames, +ZebraSet resultSetAddRPN (ZebraHandle zh, ODR stream, ODR decode, + Z_RPNQuery *rpn, int num_bases, char **basenames, const char *setname); RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId); void resultSetDestroy (ZebraHandle zh, int num_names, char **names, diff --git a/index/zsets.c b/index/zsets.c index 7bb569d..f77258b 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.29 2001-01-22 10:42:56 adam + * Revision 1.30 2001-10-15 19:53:43 adam + * POSIX thread updates. First work on term sets. + * + * Revision 1.29 2001/01/22 10:42:56 adam * Added numerical sort. * * Revision 1.28 2000/07/07 12:49:20 adam @@ -119,6 +122,13 @@ #define SORT_IDX_ENTRYSIZE 64 #define ZSET_SORT_MAX_LEVEL 3 +struct zebra_set_term_entry { + int reg_type; + char *db; + int set; + int use; + char *term; +}; struct zebra_set { char *name; RSET rset; @@ -128,7 +138,10 @@ struct zebra_set { char **basenames; Z_RPNQuery *rpn; struct zset_sort_info *sort_info; + struct zebra_set_term_entry *term_entries; + int term_entries_max; struct zebra_set *next; + int locked; }; struct zset_sort_entry { @@ -157,6 +170,7 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zebraSet = resultSetAdd (zh, setname, 1); if (!zebraSet) return 0; + zebraSet->locked = 1; zebraSet->rpn = 0; zebraSet->num_bases = num_bases; zebraSet->basenames = basenames; @@ -169,9 +183,37 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output, zh->hits = zebraSet->hits; if (zebraSet->rset) zebraSet->rpn = rpn; + zebraSet->locked = 0; return zebraSet; } +void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, + const char *db, int set, + int use, const char *term) +{ + if (!s->nmem) + s->nmem = nmem_create (); + if (!s->term_entries) + { + int i; + s->term_entries_max = 1000; + s->term_entries = + nmem_malloc (s->nmem, s->term_entries_max * + sizeof(*s->term_entries)); + for (i = 0; i < s->term_entries_max; i++) + s->term_entries[i].term = 0; + } + if (s->hits < s->term_entries_max) + { + s->term_entries[s->hits].reg_type = reg_type; + s->term_entries[s->hits].db = nmem_strdup (s->nmem, db); + s->term_entries[s->hits].set = set; + s->term_entries[s->hits].use = use; + s->term_entries[s->hits].term = nmem_strdup (s->nmem, term); + } + (s->hits)++; +} + ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) { ZebraSet s; @@ -183,7 +225,7 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) if (s) { logf (LOG_DEBUG, "updating result set %s", name); - if (!ov) + if (!ov || s->locked) return NULL; if (s->rset) rset_delete (s->rset); @@ -211,8 +253,12 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) for (i = 0; i < s->sort_info->max_entries; i++) s->sort_info->entries[i] = s->sort_info->all_entries + i; } + s->locked = 0; + s->term_entries = 0; + s->hits = 0; s->rset = 0; - s->nmem = 0; + s->nmem = 0; + s->rpn = 0; return s; } @@ -223,14 +269,13 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name) for (s = zh->sets; s; s = s->next) if (!strcmp (s->name, name)) { - if (!s->rset && s->rpn) + if (!s->term_entries && !s->rset && s->rpn) { NMEM nmem = nmem_create (); s->rset = rpn_search (zh, nmem, s->rpn, s->num_bases, s->basenames, s->name, s); nmem_destroy (nmem); - } return s; } @@ -272,7 +317,8 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) if (s->nmem) nmem_destroy (s->nmem); - rset_delete (s->rset); + if (s->rset) + rset_delete (s->rset); xfree (s->name); xfree (s); } @@ -285,7 +331,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, int num, int *positions) { ZebraSet sset; - ZebraPosSet sr; + ZebraPosSet sr = 0; RSET rset; int i; struct zset_sort_info *sort_info; @@ -293,75 +339,100 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, if (!(sset = resultSetGet (zh, name))) return NULL; if (!(rset = sset->rset)) - return NULL; - sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); - for (i = 0; isort_info; - if (sort_info) - { - int position; - + if (!sset->term_entries) + return 0; + sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); for (i = 0; i 0 && position <= sort_info->num_entries) + int j; + struct zebra_set_term_entry *entry = sset->term_entries; + + sr[i].sysno = 0; + sr[i].score = -1; + sr[i].term = 0; + sr[i].db = 0; + + if (positions[i] <= sset->term_entries_max) { - logf (LOG_DEBUG, "got pos=%d (sorted)", position); - sr[i].sysno = sort_info->entries[position-1]->sysno; - sr[i].score = sort_info->entries[position-1]->score; + sr[i].term = sset->term_entries[positions[i]-1].term; + sr[i].db = sset->term_entries[positions[i]-1].db; } } } - /* did we really get all entries using sort ? */ - for (i = 0; isort_info; if (sort_info) - position = sort_info->num_entries; - while (num_i < num && positions[num_i] < position) - num_i++; - rfd = rset_open (rset, RSETF_READ); - while (num_i < num && rset_read (rset, rfd, &key, &term_index)) { - if (key.sysno != psysno) + int position; + + for (i = 0; i 0 && position <= sort_info->num_entries) { - /* determine we alreay have this in our set */ - for (i = sort_info->num_entries; --i >= 0; ) - if (psysno == sort_info->entries[i]->sysno) - break; - if (i >= 0) - continue; + logf (LOG_DEBUG, "got pos=%d (sorted)", position); + sr[i].sysno = sort_info->entries[position-1]->sysno; + sr[i].score = sort_info->entries[position-1]->score; } - position++; - assert (num_i < num); - if (position == positions[num_i]) + } + } + /* did we really get all entries using sort ? */ + for (i = 0; inum_entries; + while (num_i < num && positions[num_i] < position) + num_i++; + rfd = rset_open (rset, RSETF_READ); + while (num_i < num && rset_read (rset, rfd, &key, &term_index)) + { + if (key.sysno != psysno) { - sr[num_i].sysno = psysno; - logf (LOG_DEBUG, "got pos=%d (unsorted)", position); - sr[num_i].score = -1; - num_i++; + psysno = key.sysno; + if (sort_info) + { + /* determine we alreay have this in our set */ + for (i = sort_info->num_entries; --i >= 0; ) + if (psysno == sort_info->entries[i]->sysno) + break; + if (i >= 0) + continue; + } + position++; + assert (num_i < num); + if (position == positions[num_i]) + { + sr[num_i].sysno = psysno; + logf (LOG_DEBUG, "got pos=%d (unsorted)", position); + sr[num_i].score = -1; + num_i++; + } } } + rset_close (rset, rfd); } - rset_close (rset, rfd); } return sr; } diff --git a/util/zebra-lock.c b/util/zebra-lock.c index dfd8cea..7ed078e 100644 --- a/util/zebra-lock.c +++ b/util/zebra-lock.c @@ -7,12 +7,8 @@ int zebra_mutex_init (Zebra_mutex *p) { - if (p->state == 1) - { - fprintf (stderr, "zebra_mutex_init. state=%d\n", p->state); - } p->state = 1; -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_init (&p->mutex, 0); #endif #ifdef WIN32 @@ -28,7 +24,7 @@ int zebra_mutex_destroy (Zebra_mutex *p) { fprintf (stderr, "zebra_mutex_destroy. state = %d\n", p->state); } -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_destroy (&p->mutex); #endif #ifdef WIN32 @@ -43,7 +39,7 @@ int zebra_mutex_lock (Zebra_mutex *p) { fprintf (stderr, "zebra_mutex_lock. state = %d\n", p->state); } -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_lock (&p->mutex); #endif #ifdef WIN32 @@ -58,7 +54,7 @@ int zebra_mutex_unlock (Zebra_mutex *p) { fprintf (stderr, "zebra_mutex_unlock. state = %d\n", p->state); } -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_unlock (&p->mutex); #endif #ifdef WIN32 @@ -71,7 +67,7 @@ int zebra_lock_rdwr_init (Zebra_lock_rdwr *p) { p->readers_reading = 0; p->writers_writing = 0; -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_init (&p->mutex, 0); pthread_cond_init (&p->lock_free, 0); #endif @@ -82,7 +78,7 @@ int zebra_lock_rdwr_destroy (Zebra_lock_rdwr *p) { assert (p->readers_reading == 0); assert (p->writers_writing == 0); -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_destroy (&p->mutex); pthread_cond_destroy (&p->lock_free); #endif @@ -91,7 +87,7 @@ int zebra_lock_rdwr_destroy (Zebra_lock_rdwr *p) int zebra_lock_rdwr_rlock (Zebra_lock_rdwr *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_lock (& p->mutex); while (p->writers_writing) pthread_cond_wait (&p->lock_free, &p->mutex); @@ -103,7 +99,7 @@ int zebra_lock_rdwr_rlock (Zebra_lock_rdwr *p) int zebra_lock_rdwr_wlock (Zebra_lock_rdwr *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_lock (&p->mutex); while (p->writers_writing || p->readers_reading) pthread_cond_wait (&p->lock_free, &p->mutex); @@ -115,7 +111,7 @@ int zebra_lock_rdwr_wlock (Zebra_lock_rdwr *p) int zebra_lock_rdwr_runlock (Zebra_lock_rdwr *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_lock (&p->mutex); if (p->readers_reading == 0) { @@ -135,7 +131,7 @@ int zebra_lock_rdwr_runlock (Zebra_lock_rdwr *p) int zebra_lock_rdwr_wunlock (Zebra_lock_rdwr *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_mutex_lock (&p->mutex); if (p->writers_writing == 0) { @@ -154,7 +150,7 @@ int zebra_lock_rdwr_wunlock (Zebra_lock_rdwr *p) int zebra_mutex_cond_init (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_cond_init (&p->cond, 0); pthread_mutex_init (&p->mutex, 0); #endif @@ -163,7 +159,7 @@ int zebra_mutex_cond_init (Zebra_mutex_cond *p) int zebra_mutex_cond_destroy (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS pthread_cond_destroy (&p->cond); pthread_mutex_destroy (&p->mutex); #endif @@ -172,7 +168,7 @@ int zebra_mutex_cond_destroy (Zebra_mutex_cond *p) int zebra_mutex_cond_lock (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS return pthread_mutex_lock (&p->mutex); #else return 0; @@ -181,7 +177,7 @@ int zebra_mutex_cond_lock (Zebra_mutex_cond *p) int zebra_mutex_cond_unlock (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS return pthread_mutex_unlock (&p->mutex); #else return 0; @@ -190,7 +186,7 @@ int zebra_mutex_cond_unlock (Zebra_mutex_cond *p) int zebra_mutex_cond_wait (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS return pthread_cond_wait (&p->cond, &p->mutex); #else return 0; @@ -199,7 +195,7 @@ int zebra_mutex_cond_wait (Zebra_mutex_cond *p) int zebra_mutex_cond_signal (Zebra_mutex_cond *p) { -#if HAVE_PTHREAD_H +#if YAZ_POSIX_THREADS return pthread_cond_signal (&p->cond); #else return 0; -- 1.7.10.4