From: Adam Dickmeiss Date: Tue, 9 Sep 2008 09:08:11 +0000 (+0200) Subject: More work on multi-map for sort X-Git-Tag: v2.0.34~30 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=40869f1460c8b3804904ec207b18c5607f82de6e More work on multi-map for sort Added a new function zebra_sort_add_en to eventually replace zebra_sort_add. --- diff --git a/include/sortidx.h b/include/sortidx.h index f944ba7..5800382 100644 --- a/include/sortidx.h +++ b/include/sortidx.h @@ -38,6 +38,10 @@ typedef struct zebra_sort_index *zebra_sort_index_t; #define ZEBRA_SORT_TYPE_ISAMB 2 #define ZEBRA_SORT_TYPE_MULTI 3 +struct zebra_sort_ent { + int num; + WRBUF wrbuf; +}; /** \brief creates sort handle \param bfs block files handle @@ -73,6 +77,15 @@ void zebra_sort_sysno(zebra_sort_index_t si, zint sysno); void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len); +/** \brief adds multi-map content to sort file + \param si sort index handle + \param ent multi-map value + + zebra_sort_type and zebra_sort_sysno must be called prior to this +*/ +void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent); + + /** \brief delete sort entry \param si sort index handle diff --git a/index/extract.c b/index/extract.c index 89f072a..0d648a6 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1867,8 +1867,56 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, const char *str; struct it_key key_in; +#define USE_SORT_ENT 1 +#if USE_SORT_ENT + NMEM nmem = nmem_create(); + struct sort_add_ent { + int ord; + int cmd; + struct sort_add_ent *next; + struct zebra_sort_ent sort_ent; + }; + struct sort_add_ent *sort_ent_list = 0; +#endif zebra_sort_sysno(si, sysno); +#if USE_SORT_ENT + while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + + struct sort_add_ent **e = &sort_ent_list; + while (*e && (*e)->ord != ord) + e = &(*e)->next; + if (!*e) + { + *e = nmem_malloc(nmem, sizeof(**e)); + (*e)->next = 0; + (*e)->sort_ent.wrbuf = wrbuf_alloc(); + (*e)->sort_ent.num = 0; + (*e)->ord = ord; + (*e)->cmd = cmd; + } + + wrbuf_write((*e)->sort_ent.wrbuf, str, slen); + wrbuf_putc((*e)->sort_ent.wrbuf, '\0'); + (*e)->sort_ent.num++; + } + if (sort_ent_list) + { + struct sort_add_ent *e = sort_ent_list; + for (; e; e = e->next) + { + zebra_sort_type(si, e->ord); + if (e->cmd == 1) + zebra_sort_add_ent(si, &e->sort_ent); + else + zebra_sort_delete(si); + wrbuf_destroy(e->sort_ent.wrbuf); + } + } + nmem_destroy(nmem); +#else while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); @@ -1879,6 +1927,7 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, else zebra_sort_delete(si); } +#endif } } diff --git a/index/reckeys.c b/index/reckeys.c index dec3e05..adea244 100644 --- a/index/reckeys.c +++ b/index/reckeys.c @@ -27,31 +27,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#define NEW 0 - -#if NEW -struct zebra_rec_word_entry { - char *buf; - size_t len; - int ord; - int max_seq; - struct zebra_rec_word_entry *next; - struct zebra_rec_key_entry *keys; - struct zebra_rec_key_entry **last_key; -}; - -struct zebra_rec_key_entry { - struct it_key key; - struct zebra_rec_key_entry *next; -}; -#else struct zebra_rec_key_entry { char *buf; size_t len; struct it_key key; struct zebra_rec_key_entry *next; }; -#endif struct zebra_rec_keys_t_ { size_t buf_used; @@ -65,28 +46,10 @@ struct zebra_rec_keys_t_ { NMEM nmem; size_t hash_size; -#if NEW - struct zebra_rec_word_entry **entries; -#else struct zebra_rec_key_entry **entries; -#endif }; -#if NEW -struct zebra_rec_word_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, - const char *buf, - size_t len, - int ord) -{ - int i; - unsigned h = ord; - - for (i = 0; ientries[h % (unsigned) p->hash_size]; -} -#else struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, const char *buf, size_t len, @@ -105,7 +68,6 @@ struct zebra_rec_key_entry **zebra_rec_keys_mk_hash(zebra_rec_keys_t p, #endif return &p->entries[h % (unsigned) p->hash_size]; } -#endif static void init_hash(zebra_rec_keys_t p) { @@ -190,65 +152,6 @@ void zebra_rec_keys_close(zebra_rec_keys_t p) xfree(p); } -#if NEW -void zebra_rec_keys_write(zebra_rec_keys_t keys, - const char *str, size_t slen, - const struct it_key *key) -{ - char *dst; - const char *src = (char*) key; - - struct zebra_rec_word_entry **wep; - struct zebra_rec_key_entry **kep; - int ord = key->mem[0]; - int seq = key->mem[key->len-1]; - - assert(keys->owner_of_buffer); - - wep = zebra_rec_keys_mk_hash(keys, str, slen, ord); - - while (*wep) - { - struct zebra_rec_word_entry *e = *wep; - if (ord == e->ord && slen == e->len && !memcmp(str, e->buf, slen)) - break; - wep = &(*wep)->next; - } - - if (!*wep) - { - *wep = nmem_malloc(keys->nmem, sizeof(**wep)); - (*wep)->buf = nmem_malloc(keys->nmem, slen); - memcpy((*wep)->buf, str, slen); - (*wep)->len = slen; - (*wep)->ord = ord; - (*wep)->next = 0; - (*wep)->keys = 0; - (*wep)->max_seq = 0; - (*wep)->last_key = &(*wep)->keys; - } - if (seq > (*wep)->max_seq) - kep = (*wep)->last_key; - else - { - kep = &(*wep)->keys; - while (*kep) - { - if (!key_compare(key, &(*kep)->key)) - return; - kep = &(*kep)->next; - } - } - *kep = nmem_malloc(keys->nmem, sizeof(**kep)); - (*kep)->next = 0; - (*wep)->last_key = &(*kep)->next; - memcpy(&(*kep)->key, key, sizeof(*key)); - if (seq > (*wep)->max_seq) - { - (*wep)->max_seq = seq; - } -} -#else int zebra_rec_keys_add_hash(zebra_rec_keys_t keys, const char *str, size_t slen, const struct it_key *key) @@ -320,7 +223,6 @@ void zebra_rec_keys_write(zebra_rec_keys_t keys, *dst++ = '\0'; keys->buf_used = dst - keys->buf; } -#endif void zebra_rec_keys_reset(zebra_rec_keys_t keys) { @@ -337,42 +239,6 @@ int zebra_rec_keys_rewind(zebra_rec_keys_t keys) assert(keys); iscz1_reset(keys->decode_handle); -#if NEW - if (keys->buf_used == 0) - { - size_t i; - for (i = 0; ihash_size; i++) - { - struct zebra_rec_word_entry *we = keys->entries[i]; - for (; we; we = we->next) - { - struct zebra_rec_key_entry *ke = we->keys; - for (; ke; ke = ke->next) - { - const char *src = (char*) &ke->key; - char *dst; - if (keys->buf_used+1024 > keys->buf_max) - { - char *b = (char *) xmalloc (keys->buf_max += 128000); - if (keys->buf_used > 0) - memcpy (b, keys->buf, keys->buf_used); - xfree (keys->buf); - keys->buf = b; - } - - dst = keys->buf + keys->buf_used; - - iscz1_encode(keys->encode_handle, &dst, &src); - - memcpy (dst, we->buf, we->len); - dst += we->len; - *dst++ = '\0'; - keys->buf_used = dst - keys->buf; - } - } - } - } -#endif keys->fetch_offset = 0; if (keys->buf_used == 0) diff --git a/index/sortidx.c b/index/sortidx.c index 65bff6b..ef15209 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -374,6 +374,77 @@ void zebra_sort_delete(zebra_sort_index_t si) } } +void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) +{ + struct sortFile *sf = si->current_file; + int len; + + if (!sf || !sf->u.bf) + return; + switch(si->type) + { + case ZEBRA_SORT_TYPE_FLAT: + /* take first entry from wrbuf - itself is 0-terminated */ + len = strlen(wrbuf_buf(ent->wrbuf)); + if (len > SORT_IDX_ENTRYSIZE) + len = SORT_IDX_ENTRYSIZE; + + memcpy(si->entry_buf, wrbuf_buf(ent->wrbuf), len); + if (len < SORT_IDX_ENTRYSIZE-len) + memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); + bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); + break; + case ZEBRA_SORT_TYPE_ISAMB: + assert(sf->u.isamb); + + assert(sf->no_inserted == 0); + if (sf->no_inserted == 0) + { + struct sort_term_stream s; + ISAMC_I isamc_i; + /* take first entry from wrbuf - itself is 0-terminated */ + len = strlen(wrbuf_buf(ent->wrbuf)); + + s.st.sysno = si->sysno; + if (len >= SORT_MAX_TERM) + len = SORT_MAX_TERM-1; + memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); + s.st.term[len] = '\0'; + s.st.length = len; + s.no = 1; + s.insert_flag = 1; + isamc_i.clientData = &s; + isamc_i.read_item = sort_term_code_read; + + isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); + sf->no_inserted++; + } + break; + case ZEBRA_SORT_TYPE_MULTI: + assert(sf->u.isamb); + if (sf->no_inserted == 0) + { + struct sort_term_stream s; + ISAMC_I isamc_i; + len = wrbuf_len(ent->wrbuf); + + s.st.sysno = si->sysno; + if (len >= SORT_MAX_MULTI) + len = SORT_MAX_MULTI-1; + memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); + s.st.length = len; + s.no = 1; + s.insert_flag = 1; + isamc_i.clientData = &s; + isamc_i.read_item = sort_term_code_read; + + isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); + sf->no_inserted++; + } + break; + } +} + void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) { struct sortFile *sf = si->current_file;