From f7a3769dede0071696bdcc13ae2ee1efe6d52d96 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 18 May 2006 12:03:05 +0000 Subject: [PATCH] Sort Keys are stored using the reckeys_t structure. sort files are named with the ordinal value (index ID) , rahter than use-attribute. This patch ease the move towards string indexes everywhere. --- index/extract.c | 239 +++++++++++------------------------------------------- index/index.h | 22 +---- index/sortidx.c | 11 ++- index/zebraapi.c | 11 +-- index/zinfo.c | 7 +- index/zsets.c | 46 +++++++---- 6 files changed, 92 insertions(+), 244 deletions(-) diff --git a/index/extract.c b/index/extract.c index de81472..86f868b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.213 2006-05-17 17:46:45 adam Exp $ +/* $Id: extract.c,v 1.214 2006-05-18 12:03:05 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -428,11 +428,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* we are going to read from a file, so prepare the extraction */ zebra_rec_keys_reset(zh->reg->keys); -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif recordOffset = fi->file_moffset; extractCtrl.handle = zh; extractCtrl.offset = fi->file_moffset; @@ -515,6 +511,11 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, if (!matchStr) { yaz_log(YLOG_WARN, "Bad match criteria"); + + if (zebra_rec_keys_empty(zh->reg->keys)) + { + yaz_log(YLOG_WARN, "And no index keys"); + } return ZEBRA_FAIL; } } @@ -576,11 +577,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, } -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -590,11 +587,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); -#else - struct sortKeys sortKeys; -#endif rec = rec_get (zh->reg->records, *sysno); assert (rec); @@ -606,18 +599,11 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, rec->size[recInfo_delKeys], 0); -#if NATTR zebra_rec_keys_set_buf(sortKeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); extract_flushSortKeys (zh, *sysno, 0, sortKeys); -#else - sortKeys.buf_used = rec->size[recInfo_sortKeys]; - sortKeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, *sysno, 0, &sortKeys); -#endif - extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); /* old values */ if (deleteFlag) @@ -662,19 +648,13 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, fname, recordOffset, *sysno, match_str_to_print); } recordAttr->staticrank = extractCtrl.staticrank; -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); -#if NATTR zebra_rec_keys_close(sortKeys); -#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -703,16 +683,9 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -937,12 +910,8 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, extractCtrl.fh = &fc; zebra_rec_keys_reset(zh->reg->keys); - -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif + if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) { if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], @@ -1074,15 +1043,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif - -#if 0 - print_rec_keys(zh, zh->reg->keys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -1091,12 +1052,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, { /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); -#else - struct sortKeys sortKeys; -#endif - if (!allow_update) { yaz_log (YLOG_LOG, "skipped %s %s %ld", @@ -1114,21 +1070,12 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); -#if NATTR zebra_rec_keys_set_buf(sortKeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); -#else - sortKeys.buf_used = rec->size[recInfo_sortKeys]; - sortKeys.buf = rec->info[recInfo_sortKeys]; -#endif -#if NATTR extract_flushSortKeys (zh, *sysno, 0, sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 0, &sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); if (delete_flag) @@ -1164,19 +1111,13 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); recordAttr->staticrank = extractCtrl.staticrank; -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); -#if NATTR zebra_rec_keys_close(sortKeys); -#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -1204,16 +1145,9 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1283,12 +1217,8 @@ int explain_extract (void *handle, Record rec, data1_node *n) } zebra_rec_keys_reset(zh->reg->keys); - -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif + extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; @@ -1311,36 +1241,23 @@ int explain_extract (void *handle, Record rec, data1_node *n) { zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortkeys = zebra_rec_keys_open(); -#else - struct sortKeys sortkeys; -#endif zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); zebra_rec_keys_close(delkeys); -#if NATTR + zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); extract_flushSortKeys (zh, rec->sysno, 0, sortkeys); zebra_rec_keys_close(sortkeys); -#else - sortkeys.buf_used = rec->size[recInfo_sortKeys]; - sortkeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys); -#endif } extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); -#if NATTR extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys); -#endif xfree (rec->info[recInfo_delKeys]); zebra_rec_keys_get_buf(zh->reg->keys, @@ -1348,16 +1265,9 @@ int explain_extract (void *handle, Record rec, data1_node *n) &rec->size[recInfo_delKeys]); xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif return 0; } @@ -1755,8 +1665,7 @@ void extract_add_index_string(RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->keys, str, length, &key); } -#if NATTR -static void extract_add_sort_string (RecWord *p, const char *str, int length) +static void extract_add_sort_string(RecWord *p, const char *str, int length) { struct it_key key; @@ -1772,7 +1681,15 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) } else { +#if NATTR return; +#else + ch = zebraExplain_lookup_attr_su(zei, p->index_type, + VAL_IDXPATH, p->attrUse); + if (ch < 0) + ch = zebraExplain_add_attr_su(zei, p->index_type, + VAL_IDXPATH, p->attrUse); +#endif } key.len = 4; key.mem[0] = ch; @@ -1780,45 +1697,24 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) key.mem[2] = p->section_id; key.mem[3] = p->seqno; - zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); -} -#else -static void extract_add_sort_string (RecWord *p, const char *str, int length) -{ - ZebraHandle zh = p->extractCtrl->handle; - struct sortKeys *sk = &zh->reg->sortKeys; - int off = 0; - - while (off < sk->buf_used) +#if 0 + if (1) { - int set, use, slen; + char strz[80]; + int i; - off += key_SU_decode(&set, (unsigned char *) sk->buf + off); - off += key_SU_decode(&use, (unsigned char *) sk->buf + off); - off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); - off += slen; - if (p->attrSet == set && p->attrUse == use) - return; + strz[0] = 0; + for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, + strz); } - assert (off == sk->buf_used); - - if (sk->buf_used + IT_MAX_WORD > sk->buf_max) - { - char *b; - - b = (char *) xmalloc (sk->buf_max += 128000); - if (sk->buf_used > 0) - memcpy (b, sk->buf, sk->buf_used); - xfree (sk->buf); - sk->buf = b; - } - off += key_SU_encode(p->attrSet, sk->buf + off); - off += key_SU_encode(p->attrUse, sk->buf + off); - off += key_SU_encode(length, sk->buf + off); - memcpy (sk->buf + off, str, length); - sk->buf_used = off + length; -} #endif + zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); +} void extract_add_string (RecWord *p, const char *string, int length) { @@ -1992,67 +1888,30 @@ void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid) zebraExplain_addSchema (zh->reg->zei, oid); } -#if NATTR -#error not done yet with zebra_rec_keys_t void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct recKeys *reckeys) + int cmd, zebra_rec_keys_t reckeys) { - SortIdx sortIdx = zh->reg->sortIdx; - void *decode_handle = iscz1_start(); - int off = 0; - int ch = 0; - - while (off < reckeys->buf_used) + if (zebra_rec_keys_rewind(reckeys)) { - const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; - - iscz1_decode(decode_handle, &dst, &src); - assert(key.len == 4); - - ch = (int) key.mem[0]; /* ordinal for field/use/attribute */ - - sortIdx_type(sortIdx, ch); - if (cmd == 1) - sortIdx_add(sortIdx, src, strlen(src)); - else - sortIdx_add(sortIdx, "", 1); - - src += strlen(src); - src++; - - off = src - reckeys->buf; - } - assert (off == reckeys->buf_used); - iscz1_stop(decode_handle); -} -#else -void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct sortKeys *sk) -{ - SortIdx sortIdx = zh->reg->sortIdx; - int off = 0; + SortIdx sortIdx = zh->reg->sortIdx; + size_t slen; + const char *str; + struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + sortIdx_sysno (sortIdx, sysno); - while (off < sk->buf_used) - { - int set, use, slen; - - off += key_SU_decode(&set, (unsigned char *) sk->buf + off); - off += key_SU_decode(&use, (unsigned char *) sk->buf + off); - off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); - - sortIdx_type(sortIdx, use); - if (cmd == 1) - sortIdx_add(sortIdx, sk->buf + off, slen); - else - sortIdx_add(sortIdx, "", 1); - off += slen; + while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ord = (int) key_in.mem[0]; + + sortIdx_type(sortIdx, ord); + if (cmd == 1) + sortIdx_add(sortIdx, str, slen); + else + sortIdx_add(sortIdx, "", 1); + } } } -#endif void encode_key_init (struct encode_info *i) { diff --git a/index/index.h b/index/index.h index 588f96c..6ffa33e 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.160 2006-05-10 08:13:21 adam Exp $ +/* $Id: index.h,v 1.161 2006-05-18 12:03:05 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -184,16 +184,6 @@ typedef struct zebra_rank_class { #include "reckeys.h" -#if NATTR - -#else -struct sortKeys { - int buf_used; - int buf_max; - char *buf; -}; -#endif - struct zebra_register { char *name; @@ -219,12 +209,7 @@ struct zebra_register { int stop_flag; zebra_rec_keys_t keys; - -#if NATTR zebra_rec_keys_t sortKeys; -#else - struct sortKeys sortKeys; -#endif char **key_buf; size_t ptr_top; size_t ptr_i; @@ -446,13 +431,8 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, zebra_rec_keys_t reckeys, zint staticrank); -#if NATTR void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, zebra_rec_keys_t skp); -#else -void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct sortKeys *skp); -#endif void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); void extract_token_add (RecWord *p); int explain_extract (void *handle, Record rec, data1_node *n); diff --git a/index/sortidx.c b/index/sortidx.c index 413e251..6cddaf4 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -1,5 +1,5 @@ -/* $Id: sortidx.c,v 1.16 2006-05-10 08:13:22 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: sortidx.c,v 1.17 2006-05-18 12:03:05 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -21,7 +21,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA */ - +#include #include #include @@ -140,7 +140,10 @@ void sortIdx_add (SortIdx si, const char *buf, int len) void sortIdx_read (SortIdx si, char *buf) { - int r = bf_read (si->current_file->bf, si->sysno+1, 0, 0, buf); + int r; + + assert(si->current_file); + r = bf_read (si->current_file->bf, si->sysno+1, 0, 0, buf); if (!r) memset (buf, 0, SORT_IDX_ENTRYSIZE); } diff --git a/index/zebraapi.c b/index/zebraapi.c index c40bce8..ff4902f 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.217 2006-05-10 08:13:23 adam Exp $ +/* $Id: zebraapi.c,v 1.218 2006-05-18 12:03:05 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -368,12 +368,7 @@ struct zebra_register *zebra_register_open(ZebraService zs, const char *name, reg->keys = zebra_rec_keys_open(); -#if NATTR reg->sortKeys = zebra_rec_keys_open(); -#else - reg->sortKeys.buf = 0; - reg->sortKeys.buf_max = 0; -#endif reg->records = 0; reg->dict = 0; @@ -551,11 +546,7 @@ static void zebra_register_close(ZebraService zs, struct zebra_register *reg) data1_destroy (reg->dh); zebra_rec_keys_close(reg->keys); -#if NATTR zebra_rec_keys_close(reg->sortKeys); -#else - xfree(reg->sortKeys.buf); -#endif xfree(reg->key_buf); xfree(reg->name); diff --git a/index/zinfo.c b/index/zinfo.c index 3324b2c..f19a068 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,4 +1,4 @@ -/* $Id: zinfo.c,v 1.62 2006-05-11 10:15:33 adam Exp $ +/* $Id: zinfo.c,v 1.63 2006-05-18 12:03:05 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -1438,8 +1438,13 @@ int zebraExplain_lookup_attr_su_any_index(ZebraExplainInfo zei, int set, int use) { struct zebSUInfoB *zsui; + int ord; assert (zei->curDatabaseInfo); + + ord = zebraExplain_lookup_attr_su(zei, 'w', set, use); + if (ord != -1) + return ord; for (zsui = zei->curDatabaseInfo->attributeDetails->SUInfo; zsui; zsui=zsui->next) if (zsui->info.which == ZEB_SU_SET_USE && diff --git a/index/zsets.c b/index/zsets.c index a223a7a..65e5272 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,5 +1,5 @@ -/* $Id: zsets.c,v 1.101 2006-05-10 08:13:26 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: zsets.c,v 1.102 2006-05-18 12:03:05 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -538,7 +538,11 @@ void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records, struct sortKeyInfo { int relation; +#if 0 int attrUse; +#else + int ord; +#endif int numerical; }; @@ -554,7 +558,7 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, sortIdx_sysno (zh->reg->sortIdx, sysno); for (i = 0; ireg->sortIdx, criteria[i].attrUse); + sortIdx_type (zh->reg->sortIdx, criteria[i].ord); sortIdx_read (zh->reg->sortIdx, this_entry.buf[i]); } i = sort_info->num_entries; @@ -849,22 +853,28 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, return ZEBRA_FAIL; case Z_SortKey_sortAttributes: yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1); - sort_criteria[i].attrUse = - zebra_maps_sort (zh->reg->zebra_maps, - sk->u.sortAttributes, - &sort_criteria[i].numerical); - yaz_log(log_level_sort, "use value = %d", sort_criteria[i].attrUse); - if (sort_criteria[i].attrUse == -1) - { - zebra_setError( - zh, YAZ_BIB1_USE_ATTRIBUTE_REQUIRED_BUT_NOT_SUPPLIED, 0); - return ZEBRA_FAIL; - } - if (sortIdx_type (zh->reg->sortIdx, sort_criteria[i].attrUse)) + if (1) { - zebra_setError( - zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0); - return ZEBRA_FAIL; + int ord; + int use = zebra_maps_sort(zh->reg->zebra_maps, + sk->u.sortAttributes, + &sort_criteria[i].numerical); + yaz_log(log_level_sort, "use value = %d", use); + if (use == -1) + { + zebra_setError( + zh, YAZ_BIB1_USE_ATTRIBUTE_REQUIRED_BUT_NOT_SUPPLIED, 0); + return ZEBRA_FAIL; + } + ord = zebraExplain_lookup_attr_su_any_index(zh->reg->zei, + VAL_IDXPATH, use); + if (ord == -1) + { + zebra_setError( + zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0); + return ZEBRA_FAIL; + } + sort_criteria[i].ord = ord; } break; } -- 1.7.10.4