From: Adam Dickmeiss Date: Wed, 5 Nov 2008 15:13:39 +0000 (+0100) Subject: Make section_id part of multi-value sort (one sort chunk / section). X-Git-Tag: v2.0.34~7 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=6277df6076edfef81e676d4aab7bd1f212bf739f;hp=e3f2abdb7863901fc997d5535ff512520edcbd0f Make section_id part of multi-value sort (one sort chunk / section). --- diff --git a/include/sortidx.h b/include/sortidx.h index 806806f..07e60d1 100644 --- a/include/sortidx.h +++ b/include/sortidx.h @@ -64,27 +64,30 @@ void zebra_sort_sysno(zebra_sort_index_t si, zint sysno); /** \brief adds multi-map content to sort file \param si sort index handle + \param section_id section of key \param w one or more 0-terminted strings (thus an array) zebra_sort_type and zebra_sort_sysno must be called prior to this */ -void zebra_sort_add(zebra_sort_index_t si, WRBUF w); +void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF w); /** \brief delete sort entry \param si sort index handle + \param section_id section of sort key to be deleted zebra_sort_type and zebra_sort_sysno must be called prior to this */ -void zebra_sort_delete(zebra_sort_index_t si); +void zebra_sort_delete(zebra_sort_index_t si, zint section_id); /** \brief reads sort entry \param si sort index handle + \param section_id output section ID (may be NULL and it will not be set) \param w resulting buffer \retval 0 could not be read \retval 1 could be read (found) */ -int zebra_sort_read(zebra_sort_index_t si, WRBUF w); +int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w); YAZ_END_CDECL diff --git a/index/extract.c b/index/extract.c index 32b92ec..82544f5 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1874,6 +1874,7 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, struct sort_add_ent *next; WRBUF wrbuf; zint sysno; + zint section_id; }; struct sort_add_ent *sort_ent_list = 0; @@ -1881,10 +1882,12 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); zint filter_sysno = key_in.mem[1]; + zint section_id = key_in.mem[2]; struct sort_add_ent **e = &sort_ent_list; - while (*e && (*e)->ord != ord) - e = &(*e)->next; + for (; *e; e = &(*e)->next) + if ((*e)->ord == ord && section_id == (*e)->section_id) + break; if (!*e) { *e = nmem_malloc(nmem, sizeof(**e)); @@ -1893,6 +1896,7 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, (*e)->ord = ord; (*e)->cmd = cmd; (*e)->sysno = filter_sysno ? filter_sysno : sysno; + (*e)->section_id = section_id; } wrbuf_write((*e)->wrbuf, str, slen); @@ -1911,9 +1915,9 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, } zebra_sort_type(si, e->ord); if (e->cmd == 1) - zebra_sort_add(si, e->wrbuf); + zebra_sort_add(si, e->section_id, e->wrbuf); else - zebra_sort_delete(si); + zebra_sort_delete(si, e->section_id); wrbuf_destroy(e->wrbuf); } } diff --git a/index/retrieve.c b/index/retrieve.c index eb0b65f..c41a926 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -269,7 +269,7 @@ static int sort_fetch( } zebra_sort_type(zh->reg->sort_index, ord); zebra_sort_sysno(zh->reg->sort_index, fi->sysno); - zebra_sort_read(zh->reg->sort_index, wrbuf_str); + zebra_sort_read(zh->reg->sort_index, 0, wrbuf_str); while (off != wrbuf_len(wrbuf_str)) { @@ -781,7 +781,7 @@ static int perform_facet_sort(ZebraHandle zh, int no_ord, int *ord_array, zebra_sort_type(zh->reg->sort_index, ord_array[ord_i]); wrbuf_rewind(w); - if (zebra_sort_read(zh->reg->sort_index, w)) + if (zebra_sort_read(zh->reg->sort_index, 0, w)) { zebra_strmap_t sm = map_array[ord_i]; int off = 0; diff --git a/index/sortidx.c b/index/sortidx.c index 5490139..82f7e7f 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA struct sort_term { zint sysno; + zint section_id; zint length; char term[SORT_MAX_MULTI]; }; @@ -46,8 +47,8 @@ static void sort_term_log_item(int level, const void *b, const char *txt) memcpy(&a1, b, sizeof(a1)); - yaz_log(level, "%s " ZINT_FORMAT " %.*s", txt, a1.sysno, - (int) a1.length-1, a1.term); + yaz_log(level, "%s " ZINT_FORMAT " " ZINT_FORMAT " %.*s", txt, a1.sysno, + a1.section_id, (int) a1.length-1, a1.term); } static int sort_term_compare(const void *a, const void *b) @@ -61,6 +62,11 @@ static int sort_term_compare(const void *a, const void *b) return 1; else if (a1.sysno < b1.sysno) return -1; + if (a1.section_id > b1.section_id) + return 1; + else if (a1.section_id < b1.section_id) + return -1; + return 0; } @@ -88,7 +94,8 @@ static void sort_term_encode2(void *p, char **dst, const char **src) memcpy(&a1, *src, sizeof(a1)); *src += sizeof(a1); - zebra_zint_encode(dst, a1.sysno); /* encode record id */ + zebra_zint_encode(dst, a1.sysno); + zebra_zint_encode(dst, a1.section_id); zebra_zint_encode(dst, a1.length); /* encode length */ memcpy(*dst, a1.term, a1.length); *dst += a1.length; @@ -100,6 +107,7 @@ static void sort_term_decode1(void *p, char **dst, const char **src) size_t slen; zebra_zint_decode(src, &a1.sysno); + a1.section_id = 0; strcpy(a1.term, *src); slen = 1 + strlen(a1.term); @@ -115,6 +123,7 @@ static void sort_term_decode2(void *p, char **dst, const char **src) struct sort_term a1; zebra_zint_decode(src, &a1.sysno); + zebra_zint_decode(src, &a1.section_id); zebra_zint_decode(src, &a1.length); memcpy(a1.term, *src, a1.length); @@ -340,7 +349,7 @@ void zebra_sort_sysno(zebra_sort_index_t si, zint sysno) } -void zebra_sort_delete(zebra_sort_index_t si) +void zebra_sort_delete(zebra_sort_index_t si, zint section_id) { struct sortFile *sf = si->current_file; @@ -361,6 +370,7 @@ void zebra_sort_delete(zebra_sort_index_t si) ISAMC_I isamc_i; s.st.sysno = si->sysno; + s.st.section_id = section_id; s.st.length = 0; s.st.term[0] = '\0'; @@ -376,7 +386,7 @@ void zebra_sort_delete(zebra_sort_index_t si) } } -void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf) +void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf) { struct sortFile *sf = si->current_file; int len; @@ -414,6 +424,7 @@ void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf) memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; s.st.sysno = si->sysno; + s.st.section_id = 0; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -438,6 +449,7 @@ void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf) memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; s.st.sysno = si->sysno; + s.st.section_id = section_id; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -451,7 +463,7 @@ void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf) } -int zebra_sort_read(zebra_sort_index_t si, WRBUF w) +int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w) { int r; struct sortFile *sf = si->current_file; @@ -483,12 +495,15 @@ int zebra_sort_read(zebra_sort_index_t si, WRBUF w) struct sort_term st, st_untilbuf; st_untilbuf.sysno = si->sysno; + st_untilbuf.section_id = 0; st_untilbuf.length = 0; st_untilbuf.term[0] = '\0'; r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); if (r && st.sysno == si->sysno) { wrbuf_write(w, st.term, st.length); + if (section_id) + *section_id = st.section_id; return 1; } } diff --git a/index/zsets.c b/index/zsets.c index b81dce6..8a3f66a 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -576,7 +576,7 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, criteria[i].ord[database_no]); zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]); wrbuf_rewind(w); - if (zebra_sort_read(zh->reg->sort_index, w)) + if (zebra_sort_read(zh->reg->sort_index, 0, w)) { /* consider each sort entry and take lowest/highest one of the one as sorting key depending on whether sort is diff --git a/test/api/test_sortidx.c b/test/api/test_sortidx.c index 961e945..89c9e72 100644 --- a/test/api/test_sortidx.c +++ b/test/api/test_sortidx.c @@ -21,12 +21,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "testlib.h" -static void sort_add_cstr(zebra_sort_index_t si, const char *str) +static void sort_add_cstr(zebra_sort_index_t si, const char *str, + zint section_id) { WRBUF w = wrbuf_alloc(); wrbuf_puts(w, str); wrbuf_putc(w, '\0'); - zebra_sort_add(si, w); + zebra_sort_add(si, section_id, w); wrbuf_destroy(w); } @@ -39,38 +40,38 @@ static void tst1(zebra_sort_index_t si) zebra_sort_type(si, my_type); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 0); - sort_add_cstr(si, "abcde1"); + sort_add_cstr(si, "abcde1", 0); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 1); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 1); YAZ_CHECK(!strcmp(wrbuf_cstr(w), "abcde1")); zebra_sort_sysno(si, sysno+1); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 0); zebra_sort_sysno(si, sysno-1); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 0); zebra_sort_sysno(si, sysno); - zebra_sort_delete(si); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); + zebra_sort_delete(si, 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 0); zebra_sort_type(si, my_type); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 0); wrbuf_rewind(w); - sort_add_cstr(si, "abcde1"); + sort_add_cstr(si, "abcde1", 0); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w), 1); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w), 1); YAZ_CHECK(!strcmp(wrbuf_cstr(w), "abcde1")); zebra_sort_sysno(si, sysno); - zebra_sort_delete(si); + zebra_sort_delete(si, 0); wrbuf_destroy(w); } @@ -85,22 +86,25 @@ static void tst2(zebra_sort_index_t si) for (sysno = 1; sysno < 50; sysno++) { + zint input_section_id = 12345; + zint output_section_id = 0; WRBUF w1 = wrbuf_alloc(); WRBUF w2 = wrbuf_alloc(); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w2), 0); + YAZ_CHECK_EQ(zebra_sort_read(si, 0, w2), 0); for (i = 0; i < 600; i++) /* 600 * 6 < max size =4K */ wrbuf_write(w1, "12345", 6); - zebra_sort_add(si, w1); + zebra_sort_add(si, input_section_id, w1); zebra_sort_sysno(si, sysno); - YAZ_CHECK_EQ(zebra_sort_read(si, w2), 1); + YAZ_CHECK_EQ(zebra_sort_read(si, &output_section_id, w2), 1); YAZ_CHECK_EQ(wrbuf_len(w1), wrbuf_len(w2)); YAZ_CHECK(!memcmp(wrbuf_buf(w1), wrbuf_buf(w2), wrbuf_len(w2))); + YAZ_CHECK_EQ(input_section_id, output_section_id); wrbuf_destroy(w1); wrbuf_destroy(w2); }