From: Adam Dickmeiss Date: Tue, 9 Sep 2008 12:24:40 +0000 (+0200) Subject: Functional multi-value sort + tests X-Git-Tag: v2.0.34~28 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=af102b1fb451ba27bfa7343528c4240b3ab3a80b Functional multi-value sort + tests --- diff --git a/include/sortidx.h b/include/sortidx.h index 5800382..54aec54 100644 --- a/include/sortidx.h +++ b/include/sortidx.h @@ -38,11 +38,6 @@ typedef struct zebra_sort_index *zebra_sort_index_t; #define ZEBRA_SORT_TYPE_ISAMB 2 #define ZEBRA_SORT_TYPE_MULTI 3 -struct zebra_sort_ent { - int num; - WRBUF wrbuf; -}; - /** \brief creates sort handle \param bfs block files handle \param write_flag (0=read-only, 1=write and read) @@ -67,23 +62,13 @@ int zebra_sort_type(zebra_sort_index_t si, int type); */ void zebra_sort_sysno(zebra_sort_index_t si, zint sysno); -/** \brief adds content to sort file - \param si sort index handle - \param buf buffer content - \param len length - - zebra_sort_type and zebra_sort_sysno must be called prior to this -*/ -void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len); - - /** \brief adds multi-map content to sort file \param si sort index handle - \param ent multi-map value + \param w one or more 0-terminted strings (thus an array) zebra_sort_type and zebra_sort_sysno must be called prior to this */ -void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent); +void zebra_sort_add(zebra_sort_index_t si, WRBUF w); /** \brief delete sort entry diff --git a/index/extract.c b/index/extract.c index 0d648a6..e092a9b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1867,20 +1867,16 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, const char *str; struct it_key key_in; -#define USE_SORT_ENT 1 -#if USE_SORT_ENT NMEM nmem = nmem_create(); struct sort_add_ent { int ord; int cmd; struct sort_add_ent *next; - struct zebra_sort_ent sort_ent; + WRBUF wrbuf; }; struct sort_add_ent *sort_ent_list = 0; -#endif zebra_sort_sysno(si, sysno); -#if USE_SORT_ENT while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); @@ -1892,15 +1888,13 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, { *e = nmem_malloc(nmem, sizeof(**e)); (*e)->next = 0; - (*e)->sort_ent.wrbuf = wrbuf_alloc(); - (*e)->sort_ent.num = 0; + (*e)->wrbuf = wrbuf_alloc(); (*e)->ord = ord; (*e)->cmd = cmd; } - wrbuf_write((*e)->sort_ent.wrbuf, str, slen); - wrbuf_putc((*e)->sort_ent.wrbuf, '\0'); - (*e)->sort_ent.num++; + wrbuf_write((*e)->wrbuf, str, slen); + wrbuf_putc((*e)->wrbuf, '\0'); } if (sort_ent_list) { @@ -1909,25 +1903,13 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, { zebra_sort_type(si, e->ord); if (e->cmd == 1) - zebra_sort_add_ent(si, &e->sort_ent); + zebra_sort_add(si, e->wrbuf); else zebra_sort_delete(si); - wrbuf_destroy(e->sort_ent.wrbuf); + wrbuf_destroy(e->wrbuf); } } nmem_destroy(nmem); -#else - while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) - { - int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - - zebra_sort_type(si, ord); - if (cmd == 1) - zebra_sort_add(si, str, slen); - else - zebra_sort_delete(si); - } -#endif } } diff --git a/index/sortidx.c b/index/sortidx.c index ef15209..a5d5c7b 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -47,7 +47,7 @@ static void sort_term_log_item(int level, const void *b, const char *txt) memcpy(&a1, b, sizeof(a1)); yaz_log(level, "%s " ZINT_FORMAT " %.*s", txt, a1.sysno, - (int) a1.length, a1.term); + (int) a1.length-1, a1.term); } static int sort_term_compare(const void *a, const void *b) @@ -102,8 +102,8 @@ static void sort_term_decode1(void *p, char **dst, const char **src) zebra_zint_decode(src, &a1.sysno); strcpy(a1.term, *src); - slen = strlen(a1.term); - *src += slen + 1; + slen = 1 + strlen(a1.term); + *src += slen; a1.length = slen; memcpy(*dst, &a1, sizeof(a1)); @@ -348,7 +348,8 @@ void zebra_sort_delete(zebra_sort_index_t si) switch(si->type) { case ZEBRA_SORT_TYPE_FLAT: - zebra_sort_add(si, "", 0); + memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE); + bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); break; case ZEBRA_SORT_TYPE_ISAMB: case ZEBRA_SORT_TYPE_MULTI: @@ -374,7 +375,7 @@ void zebra_sort_delete(zebra_sort_index_t si) } } -void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) +void zebra_sort_add(zebra_sort_index_t si, WRBUF wrbuf) { struct sortFile *sf = si->current_file; int len; @@ -385,11 +386,11 @@ void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) { case ZEBRA_SORT_TYPE_FLAT: /* take first entry from wrbuf - itself is 0-terminated */ - len = strlen(wrbuf_buf(ent->wrbuf)); + len = strlen(wrbuf_buf(wrbuf)); if (len > SORT_IDX_ENTRYSIZE) len = SORT_IDX_ENTRYSIZE; - memcpy(si->entry_buf, wrbuf_buf(ent->wrbuf), len); + memcpy(si->entry_buf, wrbuf_buf(wrbuf), len); if (len < SORT_IDX_ENTRYSIZE-len) memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); @@ -397,88 +398,21 @@ void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) case ZEBRA_SORT_TYPE_ISAMB: assert(sf->u.isamb); - assert(sf->no_inserted == 0); if (sf->no_inserted == 0) { struct sort_term_stream s; ISAMC_I isamc_i; /* take first entry from wrbuf - itself is 0-terminated */ - len = strlen(wrbuf_buf(ent->wrbuf)); - s.st.sysno = si->sysno; - if (len >= SORT_MAX_TERM) - len = SORT_MAX_TERM-1; - memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); - s.st.term[len] = '\0'; + len = wrbuf_len(wrbuf); + if (len > SORT_MAX_TERM) + { + len = SORT_MAX_TERM; + wrbuf_buf(wrbuf)[len-1] = '\0'; + } + memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; - s.no = 1; - s.insert_flag = 1; - isamc_i.clientData = &s; - isamc_i.read_item = sort_term_code_read; - - isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); - sf->no_inserted++; - } - break; - case ZEBRA_SORT_TYPE_MULTI: - assert(sf->u.isamb); - if (sf->no_inserted == 0) - { - struct sort_term_stream s; - ISAMC_I isamc_i; - len = wrbuf_len(ent->wrbuf); - s.st.sysno = si->sysno; - if (len >= SORT_MAX_MULTI) - len = SORT_MAX_MULTI-1; - memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); - s.st.length = len; - s.no = 1; - s.insert_flag = 1; - isamc_i.clientData = &s; - isamc_i.read_item = sort_term_code_read; - - isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); - sf->no_inserted++; - } - break; - } -} - -void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) -{ - struct sortFile *sf = si->current_file; - - if (!sf || !sf->u.bf) - return; - switch(si->type) - { - case ZEBRA_SORT_TYPE_FLAT: - if (len > SORT_IDX_ENTRYSIZE) - { - len = SORT_IDX_ENTRYSIZE; - memcpy(si->entry_buf, buf, len); - } - else - { - memcpy(si->entry_buf, buf, len); - memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); - } - bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); - break; - case ZEBRA_SORT_TYPE_ISAMB: - assert(sf->u.isamb); - if (sf->no_inserted == 0) - { - struct sort_term_stream s; - ISAMC_I isamc_i; - - s.st.sysno = si->sysno; - if (len >= SORT_MAX_TERM) - len = SORT_MAX_TERM-1; - memcpy(s.st.term, buf, len); - s.st.term[len] = '\0'; - s.st.length = len; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -494,12 +428,15 @@ void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) { struct sort_term_stream s; ISAMC_I isamc_i; - - s.st.sysno = si->sysno; - if (len >= SORT_MAX_MULTI) - len = SORT_MAX_MULTI-1; - memcpy(s.st.term, buf, len); + len = wrbuf_len(wrbuf); + if (len > SORT_MAX_MULTI) + { + len = SORT_MAX_MULTI; + wrbuf_buf(wrbuf)[len-1] = '\0'; + } + memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; + s.st.sysno = si->sysno; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -512,6 +449,7 @@ void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) } } + int zebra_sort_read(zebra_sort_index_t si, WRBUF w) { int r; @@ -526,43 +464,37 @@ int zebra_sort_read(zebra_sort_index_t si, WRBUF w) case ZEBRA_SORT_TYPE_FLAT: r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf); if (r && *tbuf) + { wrbuf_puts(w, tbuf); - else - return 0; + wrbuf_putc(w, '\0'); + return 1; + } break; case ZEBRA_SORT_TYPE_ISAMB: case ZEBRA_SORT_TYPE_MULTI: - if (!sf->isam_p) - return 0; - else + if (sf->isam_p) { - struct sort_term st, st_untilbuf; if (!sf->isam_pp) sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1); - if (!sf->isam_pp) - return 0; - - st_untilbuf.sysno = si->sysno; - st_untilbuf.length = 0; - st_untilbuf.term[0] = '\0'; - r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); - if (!r) - return 0; - if (r) + if (sf->isam_pp) { - if (st.sysno != si->sysno) + struct sort_term st, st_untilbuf; + + st_untilbuf.sysno = si->sysno; + st_untilbuf.length = 0; + st_untilbuf.term[0] = '\0'; + r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); + if (r && st.sysno == si->sysno) { - yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for " - ZINT_FORMAT, st.sysno, si->sysno); - return 0; + wrbuf_write(w, st.term, st.length); + return 1; } - wrbuf_write(w, st.term, st.length); } } break; } - return 1; + return 0; } /* * Local variables: diff --git a/index/zsets.c b/index/zsets.c index a906705..5187cf4 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -576,10 +576,27 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, criteria[i].ord[database_no]); zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]); wrbuf_rewind(w); - zebra_sort_read(zh->reg->sort_index, w); - memcpy(this_entry_buf, wrbuf_buf(w), - (wrbuf_len(w) >= SORT_IDX_ENTRYSIZE) ? - SORT_IDX_ENTRYSIZE : wrbuf_len(w)); + if (zebra_sort_read(zh->reg->sort_index, w)) + { + int off = 0; + while (off != wrbuf_len(w)) + { + assert(off < wrbuf_len(w)); + if (off == 0) + strcpy(this_entry_buf, wrbuf_buf(w)); + else if (criteria[i].relation == 'A') + { + if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0) + strcpy(this_entry_buf, wrbuf_buf(w)+off); + } + else if (criteria[i].relation == 'D') + { + if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0) + strcpy(this_entry_buf, wrbuf_buf(w)+off); + } + off += 1 + strlen(wrbuf_buf(w)+off); + } + } } else { diff --git a/test/api/Makefile.am b/test/api/Makefile.am index 862bcee..345120f 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -7,7 +7,8 @@ check_PROGRAMS = test_start_stop test_result_sets \ test_rank test_private_attset \ test_scan test_create_databases test_resources test_update_record \ test_zebra_fork test_special_elements test_icu_indexing \ - test_safari test_sort1 test_sort2 test_sort3 test_sortidx + test_safari test_sort1 test_sort2 test_sort3 \ + test_sortidx TESTS = $(check_PROGRAMS) @@ -15,9 +16,9 @@ EXTRA_DIST=zebra.cfg test_trunc.cfg test_private_attset.cfg \ private_attset.att private_attset.abs test_search.abs \ test_zebra_fork.cfg \ test_icu_indexing.cfg test_icu_indexing.idx \ - test_safari.cfg test_sort3.cfg \ test_sort1.cfg test_sort1.idx test_sort1.chr sort1.abs \ - test_sort2.cfg test_sort2.idx test_sort2.chr sort2.abs + test_sort2.cfg test_sort2.idx test_sort2.chr sort2.abs \ + test_safari.cfg test_sort3.cfg noinst_LIBRARIES = libtestlib.a @@ -42,6 +43,7 @@ test_sortindex_SOURCES = test_sortindex.c test_safari_SOURCES = test_safari.c test_sort1_SOURCES = test_sort1.c test_sort2_SOURCES = test_sort2.c +test_sort3_SOURCES = test_sort3.c test_sortidx_SOURCES = test_sortidx.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) diff --git a/test/api/test_sort3.c b/test/api/test_sort3.c index 5ad4930..aabf73d 100644 --- a/test/api/test_sort3.c +++ b/test/api/test_sort3.c @@ -18,35 +18,88 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** \file - \brief test sortindex + \brief sort using various sortindex types */ #include #include "testlib.h" const char *myrec[] = { - "\nMy title\n\n", - "\nMy x title\n\n", - "\nMy title x\n\n" , - 0} ; + /* 2 */ + "\n" + " My title\n" + " X\n" + "\n", + + /* 3 */ + "\n" + " My x title\n" + " B\n" + "\n", + + /* 4 */ + "\n" + " My title x\n" + " A\n" + "\n" , + 0} ; -static void tst(int argc, char **argv) +static void tst_sortindex(int argc, char **argv, const char *type) { zint ids[5]; + Res res = res_open(0, 0); ZebraService zs = tl_start_up("test_sort3.cfg", argc, argv); - ZebraHandle zh = zebra_open(zs, 0); + ZebraHandle zh; + + res_set(res, "sortindex", type); + + zh = zebra_open(zs, res); YAZ_CHECK(tl_init_data(zh, myrec)); - ids[0] = 2; - ids[1] = 4; - ids[2] = 3; + if (strcmp(type, "m")) + { + /* i, f only takes first title into consideration */ + ids[0] = 2; + ids[1] = 4; + ids[2] = 3; + } + else + { + /* m takes all titles into consideration */ + ids[0] = 4; + ids[1] = 3; + ids[2] = 2; + } YAZ_CHECK(tl_sort(zh, "@or @attr 1=4 title @attr 7=1 @attr 1=4 0", 3, ids)); + if (strcmp(type, "m")) + { + /* i, f only takes first title into consideration */ + ids[0] = 3; + ids[1] = 4; + ids[2] = 2; + } + else + { + /* m takes all titles into consideration */ + ids[0] = 2; + ids[1] = 3; + ids[2] = 4; + } + YAZ_CHECK(tl_sort(zh, "@or @attr 1=4 title @attr 7=2 @attr 1=4 0", 3, ids)); + YAZ_CHECK(tl_close_down(zh, zs)); } +static void tst(int argc, char **argv) +{ + tst_sortindex(argc, argv, "i"); + tst_sortindex(argc, argv, "f"); + tst_sortindex(argc, argv, "m"); +} + TL_MAIN /* diff --git a/test/api/test_sort3.cfg b/test/api/test_sort3.cfg index 4c43a0d..703a24e 100644 --- a/test/api/test_sort3.cfg +++ b/test/api/test_sort3.cfg @@ -7,4 +7,3 @@ recordType: grs.sgml isam: b -sortindex: i \ No newline at end of file diff --git a/test/api/test_sortidx.c b/test/api/test_sortidx.c index ebd03ca..961e945 100644 --- a/test/api/test_sortidx.c +++ b/test/api/test_sortidx.c @@ -21,6 +21,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "testlib.h" +static void sort_add_cstr(zebra_sort_index_t si, const char *str) +{ + WRBUF w = wrbuf_alloc(); + wrbuf_puts(w, str); + wrbuf_putc(w, '\0'); + zebra_sort_add(si, w); + wrbuf_destroy(w); +} + static void tst1(zebra_sort_index_t si) { zint sysno = 12; /* just some sysno */ @@ -32,7 +41,7 @@ static void tst1(zebra_sort_index_t si) zebra_sort_sysno(si, sysno); YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); - zebra_sort_add(si, "abcde1", 6); + sort_add_cstr(si, "abcde1"); zebra_sort_sysno(si, sysno); YAZ_CHECK_EQ(zebra_sort_read(si, w), 1); @@ -54,7 +63,7 @@ static void tst1(zebra_sort_index_t si) YAZ_CHECK_EQ(zebra_sort_read(si, w), 0); wrbuf_rewind(w); - zebra_sort_add(si, "abcde1", 6); + sort_add_cstr(si, "abcde1"); zebra_sort_sysno(si, sysno); YAZ_CHECK_EQ(zebra_sort_read(si, w), 1); @@ -84,7 +93,7 @@ static void tst2(zebra_sort_index_t si) for (i = 0; i < 600; i++) /* 600 * 6 < max size =4K */ wrbuf_write(w1, "12345", 6); - zebra_sort_add(si, wrbuf_buf(w1), wrbuf_len(w1)); + zebra_sort_add(si, w1); zebra_sort_sysno(si, sysno);