X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Findex.h;h=8a67b85ad00a27fc9f014a53da0bcc97f807a64f;hp=212b385bc2ae64adc1ef5f46ab77d7827ad650db;hb=2b99f08490d3bd763bb086693044a6db10c0bcf9;hpb=cdc04db6a4bcbf0630b5da7cfdf7195a88f0a9f9 diff --git a/index/index.h b/index/index.h index 212b385..8a67b85 100644 --- a/index/index.h +++ b/index/index.h @@ -1,149 +1,77 @@ -/* - * Copyright (C) 1994-1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: index.h,v $ - * Revision 1.36 1996-03-21 14:50:09 adam - * File update uses modify-time instead of change-time. - * - * Revision 1.35 1996/02/12 18:45:36 adam - * New fileVerboseFlag in record group control. - * - * Revision 1.34 1995/12/11 11:43:29 adam - * Locking based on fcntl instead of flock. - * Setting commitEnable removed. Command line option -n can be used to - * prevent commit if commit setting is defined in the configuration file. - * - * Revision 1.33 1995/12/08 16:22:53 adam - * Work on update while servers are running. Three lock files introduced. - * The servers reload their registers when necessary, but they don't - * reestablish result sets yet. - * - * Revision 1.32 1995/12/07 17:38:46 adam - * Work locking mechanisms for concurrent updates/commit. - * - * Revision 1.31 1995/12/06 12:41:22 adam - * New command 'stat' for the index program. - * Filenames can be read from stdin by specifying '-'. - * Bug fix/enhancement of the transformation from terms to regular - * expressons in the search engine. - * - * Revision 1.30 1995/12/05 11:25:02 adam - * Include of zebraver.h. - * - * Revision 1.29 1995/11/28 09:09:40 adam - * Zebra config renamed. - * Use setting 'recordId' to identify record now. - * Bug fix in recindex.c: rec_release_blocks was invokeded even - * though the blocks were already released. - * File traversal properly deletes records when needed. - * - * Revision 1.28 1995/11/27 13:58:53 adam - * New option -t. storeStore data implemented in server. - * - * Revision 1.27 1995/11/25 10:24:06 adam - * More record fields - they are enumerated now. - * New options: flagStoreData flagStoreKey. - * - * Revision 1.26 1995/11/22 17:19:17 adam - * Record management uses the bfile system. - * - * Revision 1.25 1995/11/21 15:29:12 adam - * Config file 'base' read by default by both indexer and server. - * - * Revision 1.24 1995/11/21 15:01:15 adam - * New general match criteria implemented. - * New feature: document groups. - * - * Revision 1.23 1995/11/20 16:59:45 adam - * New update method: the 'old' keys are saved for each records. - * - * Revision 1.22 1995/11/20 11:56:26 adam - * Work on new traversal. - * - * Revision 1.21 1995/11/16 15:34:55 adam - * Uses new record management system in both indexer and server. - * - * Revision 1.20 1995/11/15 14:46:18 adam - * Started work on better record management system. - * - * Revision 1.19 1995/10/27 14:00:11 adam - * Implemented detection of database availability. - * - * Revision 1.18 1995/10/17 18:02:08 adam - * New feature: databases. Implemented as prefix to words in dictionary. - * - * Revision 1.17 1995/10/13 16:01:49 adam - * Work on relations. - * - * Revision 1.16 1995/10/10 12:24:38 adam - * Temporary sort files are compressed. - * - * Revision 1.15 1995/10/04 16:57:19 adam - * Key input and merge sort in one pass. - * - * Revision 1.14 1995/09/29 14:01:40 adam - * Bug fixes. - * - * Revision 1.13 1995/09/28 14:22:56 adam - * Sort uses smaller temporary files. - * - * Revision 1.12 1995/09/28 12:10:32 adam - * Bug fixes. Field prefix used in queries. - * - * Revision 1.11 1995/09/27 12:22:28 adam - * More work on extract in record control. - * Field name is not in isam keys but in prefix in dictionary words. - * - * Revision 1.10 1995/09/14 07:48:23 adam - * Record control management. - * - * Revision 1.9 1995/09/11 13:09:33 adam - * More work on relevance feedback. - * - * Revision 1.8 1995/09/08 14:52:27 adam - * Minor changes. Dictionary is lower case now. - * - * Revision 1.7 1995/09/06 16:11:16 adam - * Option: only one word key per file. - * - * Revision 1.6 1995/09/05 15:28:39 adam - * More work on search engine. - * - * Revision 1.5 1995/09/04 12:33:42 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.4 1995/09/04 09:10:35 adam - * More work on index add/del/update. - * Merge sort implemented. - * Initial work on z39 server. - * - * Revision 1.3 1995/09/01 14:06:35 adam - * Split of work into more files. - * - * Revision 1.2 1995/09/01 10:30:24 adam - * More work on indexing. Not working yet. - * - * Revision 1.1 1995/08/31 14:50:24 adam - * New simple file index tool. - * - */ +/* $Id: index.h,v 1.112 2004-08-06 13:36:23 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +#ifndef INDEX_H +#define INDEX_H #include #include -#include +#include +#include +#include + +#if HAVE_SYS_TIMES_H +#include +#endif +#include + #include -#include +#include +#include +#include +#include +#include +#include "recindex.h" +#include "zebraapi.h" +#include "zinfo.h" +#include +#include + +YAZ_BEGIN_CDECL + +#define ISAM_DEFAULT "b" + +#define SU_SCHEME 1 #define IT_MAX_WORD 256 -#define IT_KEY_HAVE_SEQNO 1 -#define IT_KEY_HAVE_FIELD 0 +#define IT_KEY_NEW 1 + +#if IT_KEY_NEW + +#endif + +#if IT_KEY_NEW +#define IT_KEY_LEVEL_MAX 4 +struct it_key { + int len; + zint mem[IT_KEY_LEVEL_MAX]; +}; +#else struct it_key { int sysno; - int seqno; + int seqno; }; +#endif enum dirsKind { dirs_dir, dirs_file }; @@ -160,48 +88,52 @@ struct dirs_entry { time_t mtime; }; -struct recordGroup { - char *groupName; - char *databaseName; - char *path; - char *recordId; - char *recordType; - int flagStoreData; - int flagStoreKeys; - int fileVerboseFlag; -}; - +void getFnameTmp (Res res, char *fname, int no); -struct dirs_info *dirs_open (Dict dict, const char *rep); +struct dirs_info *dirs_open (Dict dict, const char *rep, int rw); +struct dirs_info *dirs_fopen (Dict dict, const char *path); struct dirs_entry *dirs_read (struct dirs_info *p); struct dirs_entry *dirs_last (struct dirs_info *p); void dirs_mkdir (struct dirs_info *p, const char *src, time_t mtime); void dirs_rmdir (struct dirs_info *p, const char *src); -void dirs_add (struct dirs_info *p, const char *src, int sysno, time_t mtime); +void dirs_add (struct dirs_info *p, const char *src, SYSNO sysno, time_t mtime); void dirs_del (struct dirs_info *p, const char *src); void dirs_free (struct dirs_info **pp); -struct dir_entry *dir_open (const char *rep); +struct dir_entry *dir_open (const char *rep, const char *base, + int follow_links); void dir_sort (struct dir_entry *e); void dir_free (struct dir_entry **e_p); -void repositoryUpdate (struct recordGroup *rGroup); -void repositoryAdd (struct recordGroup *rGroup); -void repositoryDelete (struct recordGroup *rGroup); +void repositoryUpdate (ZebraHandle zh, const char *path); +void repositoryAdd (ZebraHandle zh, const char *path); +void repositoryDelete (ZebraHandle zh, const char *path); +void repositoryShow (ZebraHandle zh, const char *path); -void key_open (int mem); -int key_close (void); -void key_write (int cmd, struct it_key *k, const char *str); +int key_open (ZebraHandle zh, int mem); +int key_close (ZebraHandle zh); int key_compare (const void *p1, const void *p2); +void key_init(struct it_key *k); +char *key_print_it (const void *p, char *buf); +int key_get_seq (const void *p); +int key_compare_it (const void *p1, const void *p2); int key_qsort_compare (const void *p1, const void *p2); void key_logdump (int mask, const void *p); -void key_input (const char *dict_fname, const char *isam_fname, - int nkeys, int cache); +void key_logdump_txt (int logmask, const void *p, const char *txt); +void inv_prstat (ZebraHandle zh); +void inv_compact (BFiles bfs); +void key_input (ZebraHandle zh, int nkeys, int cache, Res res); +ISAMS_M *key_isams_m (Res res, ISAMS_M *me); +ISAMC_M *key_isamc_m (Res res, ISAMC_M *me); int merge_sort (char **buf, int from, int to); +int key_SU_code (int ch, char *out); -#define TEMP_FNAME "keys%d.tmp" -#define FNAME_WORD_DICT "worddict" -#define FNAME_WORD_ISAM "wordisam" +#define FNAME_DICT "dict" +#define FNAME_ISAM "isam" +#define FNAME_ISAMC "isamc" +#define FNAME_ISAMS "isams" +#define FNAME_ISAMH "isamh" +#define FNAME_ISAMD "isamd" #define FNAME_CONFIG "zebra.cfg" #define GMATCH_DICT "gmatch" @@ -216,23 +148,354 @@ int index_char_cvt (int c); int index_word_prefix (char *string, int attset_ordinal, int local_attribute, const char *databaseName); -int fileExtract (SYSNO *sysno, const char *fname, - const struct recordGroup *rGroup, int deleteFlag); -void rec_prstat (void); - -void zebraLockPrefix (char *pathPrefix); - -void zebraIndexLockMsg (const char *str); -void zebraIndexUnlock (void); -void zebraIndexLock (int commitNow); -int zebraIndexWait (int commitPhase); +void zebraIndexLockMsg (ZebraHandle zh, const char *str); +void zebraIndexUnlock (ZebraHandle zh); +int zebraIndexLock (BFiles bfs, ZebraHandle zh, int commitNow, const char *rval); +int zebraIndexWait (ZebraHandle zh, int commitPhase); #define FNAME_MAIN_LOCK "zebraidx.LCK" #define FNAME_COMMIT_LOCK "zebracmt.LCK" #define FNAME_ORG_LOCK "zebraorg.LCK" #define FNAME_TOUCH_TIME "zebraidx.time" -int zebraLock (int fd, int wr); -int zebraLockNB (int fd, int wr); -int zebraUnlock (int fd); +typedef struct zebra_lock_info *ZebraLockHandle; +ZebraLockHandle zebra_lock_create(const char *dir, + const char *file, int excl_flag); +void zebra_lock_destroy (ZebraLockHandle h); +int zebra_lock (ZebraLockHandle h); +int zebra_lock_nb (ZebraLockHandle h); +int zebra_unlock (ZebraLockHandle h); +int zebra_lock_fd (ZebraLockHandle h); +void zebra_lock_prefix (Res res, char *dst); +char *zebra_mk_fname (const char *dir, const char *name); + +int zebra_lock_w (ZebraLockHandle h); +int zebra_lock_r (ZebraLockHandle h); + +void zebra_load_atts (data1_handle dh, Res res); + +int key_SU_decode (int *ch, const unsigned char *out); +int key_SU_encode (int ch, char *out); + +#define ENCODE_BUFLEN 768 +struct encode_info { + int sysno; /* previously written values for delta-compress */ + int seqno; + int cmd; + int prevsys; /* buffer for skipping insert/delete pairs */ + int prevseq; + int prevcmd; + int keylen; /* tells if we have an unwritten key in buf, and how long*/ +#if IT_KEY_NEW + void *encode_handle; +#endif + char buf[ENCODE_BUFLEN]; +}; + +void encode_key_init (struct encode_info *i); +char *encode_key_int (int d, char *bp); +void encode_key_write (char *k, struct encode_info *i, FILE *outf); +void encode_key_flush (struct encode_info *i, FILE *outf); + +typedef struct { + char *term; + char *db; + zint sysno; + int score; +} *ZebraPosSet; + +typedef struct zebra_set *ZebraSet; + +typedef struct zebra_rank_class { + struct rank_control *control; + int init_flag; + void *class_handle; + struct zebra_rank_class *next; +} *ZebraRankClass; + +struct recKeys { + int buf_used; + int buf_max; + char *buf; +#if IT_KEY_NEW + void *codec_handle; +#else + int prevSeqNo; + char prevAttrSet; + short prevAttrUse; +#endif +}; + +struct sortKeys { + int buf_used; + int buf_max; + char *buf; +}; + +struct zebra_register { + char *name; + + ISAMS isams; + ISAMC isamc; + ISAMB isamb; + Dict dict; + Dict matchDict; + SortIdx sortIdx; + int registerState; /* 0 (no commit pages), 1 (use commit pages) */ + time_t registerChange; + BFiles bfs; + Records records; + ZebraExplainInfo zei; + + char *server_path_prefix; + data1_handle dh; + ZebraMaps zebra_maps; + ZebraRankClass rank_classes; + RecTypes recTypes; + int seqno; + int last_val; + int stop_flag; + int active; /* 0=shutdown, 1=enabled and inactive, 2=activated */ + + struct recKeys keys; +#if 1 + struct sortKeys sortKeys; +#else + struct sortKey *sortKeys; +#endif + char **key_buf; + size_t ptr_top; + size_t ptr_i; + size_t key_buf_used; + int key_file_no; +}; + +struct zebra_service { + int stop_flag; + Res global_res; + char *configName; + struct zebra_session *sessions; + struct zebra_register *regs; + Zebra_mutex_cond session_lock; + Passwd_db passwd_db; + const char *path_root; +}; + + +struct zebra_session { + struct zebra_session *next; + struct zebra_service *service; + struct zebra_register *reg; + + char *xadmin_databaseName; + + char **basenames; + int num_basenames; + char *reg_name; + char *path_reg; + + ZebraLockHandle lock_normal; + ZebraLockHandle lock_shadow; + + int trans_no; + int trans_w_no; + + int destroyed; + ZebraSet sets; + Res res; + char *user_perm; + int errCode; + int hits; + char *errString; +#if HAVE_SYS_TIMES_H + struct tms tms1; + struct tms tms2; +#endif + int shadow_enable; + + int records_inserted; + int records_updated; + int records_deleted; + int records_processed; + char *record_encoding; + + yaz_iconv_t iconv_to_utf8; + yaz_iconv_t iconv_from_utf8; + + int m_follow_links; + const char *m_group; + const char *m_record_id; + const char *m_record_type; + int m_store_data; + int m_store_keys; + int m_explain_database; + int m_flag_rw; + int m_file_verbose_limit; +}; + +struct rank_control { + char *name; + void *(*create)(ZebraHandle zh); + void (*destroy)(struct zebra_register *reg, void *class_handle); + void *(*begin)(struct zebra_register *reg, void *class_handle, RSET rset); + /* ### Could add parameters to begin: + * char *index; // author, title, etc. + * int dbsize; // number of records in database + * int rssize; // number of records in result set (estimate?) + */ + void (*end)(struct zebra_register *reg, void *set_handle); + int (*calc)(void *set_handle, zint sysno); + void (*add)(void *set_handle, int seqno, int term_index); +}; + +struct term_set_entry { + char *term; + struct term_set_entry *next; +}; + +struct term_set_list { + struct term_set_entry *first; + struct term_set_entry *last; +}; + +RSET rpn_search (ZebraHandle zh, NMEM mem, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname, ZebraSet sset); + + +void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, + oid_value attributeset, + int num_bases, char **basenames, + int *position, int *num_entries, ZebraScanEntry **list, + int *is_partial, RSET limit_set, int return_zero); + +RSET rset_trunc (ZebraHandle zh, ISAMS_P *isam_p, int no, + const char *term, int length_term, const char *flags, + int preserve_position, int term_type); + +void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, + const char *db, int set, + int use, const char *term); +ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov); +ZebraSet resultSetGet (ZebraHandle zh, const char *name); +ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, + int num_bases, char **basenames, + const char *setname); +RSET resultSetRef (ZebraHandle zh, const char *resultSetId); +void resultSetDestroy (ZebraHandle zh, int num_names, char **names, + int *statuses); + + +ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, + int num, int *positions); +void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num); + +void resultSetSort (ZebraHandle zh, NMEM nmem, + int num_input_setnames, const char **input_setnames, + const char *output_setname, + Z_SortKeySpecList *sort_sequence, int *sort_status); +void resultSetSortSingle (ZebraHandle zh, NMEM nmem, + ZebraSet sset, RSET rset, + Z_SortKeySpecList *sort_sequence, int *sort_status); +void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset); +void resultSetInvalidate (ZebraHandle zh); + +int zebra_server_lock_init (ZebraService zh); +int zebra_server_lock_destroy (ZebraService zh); +int zebra_server_lock (ZebraService zh, int lockCommit); +void zebra_server_unlock (ZebraService zh, int commitPhase); +int zebra_server_lock_get_state (ZebraService zh, time_t *timep); + +typedef struct attent +{ + int attset_ordinal; + data1_local_attribute *local_attributes; +} attent; + +void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl); +ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name); +void zebraRankDestroy (struct zebra_register *reg); + +int att_getentbyatt(ZebraHandle zh, attent *res, oid_value set, int att, + const char *sattr); + +extern struct rank_control *rank1_class; +extern struct rank_control *rankzv_class; +extern struct rank_control *rankliv_class; + +int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, ODR stream, + oid_value input_format, Z_RecordComposition *comp, + oid_value *output_format, char **rec_bufp, + int *rec_lenp, char **basenamep); + +void extract_get_fname_tmp (ZebraHandle zh, char *fname, int no); + +void zebra_index_merge (ZebraHandle zh); + +int buffer_extract_record (ZebraHandle zh, + const char *buf, size_t buf_size, + int delete_flag, + int test_mode, + const char *recordType, + SYSNO *sysno, + const char *match_criteria, + const char *fname, + int force_update, + int allow_update); + +#if 0 +int extract_rec_in_mem (ZebraHandle zh, const char *recordType, + const char *buf, size_t buf_size, + const char *databaseName, int delete_flag, + int test_mode, int *sysno, + int store_keys, int store_data, + const char *match_criteria); +#endif +void extract_flushWriteKeys (ZebraHandle zh, int final); + +struct zebra_fetch_control { + off_t offset_end; + off_t record_offset; + off_t record_int_pos; + const char *record_int_buf; + int record_int_len; + int fd; +}; + +int zebra_record_ext_read (void *fh, char *buf, size_t count); +off_t zebra_record_ext_seek (void *fh, off_t offset); +off_t zebra_record_ext_tell (void *fh); +off_t zebra_record_int_seek (void *fh, off_t offset); +off_t zebra_record_int_tell (void *fh); +int zebra_record_int_read (void *fh, char *buf, size_t count); +void zebra_record_int_end (void *fh, off_t offset); + +void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, + int cmd, struct recKeys *reckeys); +void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, + int cmd, struct sortKeys *skp); +void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); +void extract_token_add (RecWord *p); +int explain_extract (void *handle, Record rec, data1_node *n); + +int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, + int deleteFlag); + +int zebra_begin_read (ZebraHandle zh); +int zebra_end_read (ZebraHandle zh); + +int zebra_file_stat (const char *file_name, struct stat *buf, + int follow_links); + +void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query); + +void *iscz1_start (); +void iscz1_reset (void *vp); +void iscz1_stop (void *p); +void iscz1_decode (void *vp, char **dst, const char **src); +void iscz1_encode (void *vp, char **dst, const char **src); + +YAZ_END_CDECL + +#endif