X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fsortidx.c;h=82f7e7fc04945550529f7290029f05e86365aca4;hb=7598c76f1a4989a91003bd4fbd90f30a7c7255ef;hp=ef15209b47031ece827a1a1159b4d303c3369de8;hpb=40869f1460c8b3804904ec207b18c5607f82de6e;p=idzebra-moved-to-github.git diff --git a/index/sortidx.c b/index/sortidx.c index ef15209..82f7e7f 100644 --- a/index/sortidx.c +++ b/index/sortidx.c @@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA struct sort_term { zint sysno; + zint section_id; zint length; char term[SORT_MAX_MULTI]; }; @@ -46,8 +47,8 @@ static void sort_term_log_item(int level, const void *b, const char *txt) memcpy(&a1, b, sizeof(a1)); - yaz_log(level, "%s " ZINT_FORMAT " %.*s", txt, a1.sysno, - (int) a1.length, a1.term); + yaz_log(level, "%s " ZINT_FORMAT " " ZINT_FORMAT " %.*s", txt, a1.sysno, + a1.section_id, (int) a1.length-1, a1.term); } static int sort_term_compare(const void *a, const void *b) @@ -61,6 +62,11 @@ static int sort_term_compare(const void *a, const void *b) return 1; else if (a1.sysno < b1.sysno) return -1; + if (a1.section_id > b1.section_id) + return 1; + else if (a1.section_id < b1.section_id) + return -1; + return 0; } @@ -88,7 +94,8 @@ static void sort_term_encode2(void *p, char **dst, const char **src) memcpy(&a1, *src, sizeof(a1)); *src += sizeof(a1); - zebra_zint_encode(dst, a1.sysno); /* encode record id */ + zebra_zint_encode(dst, a1.sysno); + zebra_zint_encode(dst, a1.section_id); zebra_zint_encode(dst, a1.length); /* encode length */ memcpy(*dst, a1.term, a1.length); *dst += a1.length; @@ -100,10 +107,11 @@ static void sort_term_decode1(void *p, char **dst, const char **src) size_t slen; zebra_zint_decode(src, &a1.sysno); + a1.section_id = 0; strcpy(a1.term, *src); - slen = strlen(a1.term); - *src += slen + 1; + slen = 1 + strlen(a1.term); + *src += slen; a1.length = slen; memcpy(*dst, &a1, sizeof(a1)); @@ -115,6 +123,7 @@ static void sort_term_decode2(void *p, char **dst, const char **src) struct sort_term a1; zebra_zint_decode(src, &a1.sysno); + zebra_zint_decode(src, &a1.section_id); zebra_zint_decode(src, &a1.length); memcpy(a1.term, *src, a1.length); @@ -315,31 +324,32 @@ int zebra_sort_type(zebra_sort_index_t si, int id) return 0; } +static void zebra_sortf_rewind(struct sortFile *sf) +{ + if (sf->isam_pp) + isamb_pp_close(sf->isam_pp); + sf->isam_pp = 0; + sf->no_inserted = 0; + sf->no_deleted = 0; +} + void zebra_sort_sysno(zebra_sort_index_t si, zint sysno) { - struct sortFile *sf = si->current_file; zint new_sysno = rec_sysno_to_int(sysno); + struct sortFile *sf; for (sf = si->files; sf; sf = sf->next) { if (sf->no_inserted || sf->no_deleted) - { - isamb_pp_close(sf->isam_pp); - sf->isam_pp = 0; - } - else if (sf->isam_pp && new_sysno < si->sysno && sf->isam_pp) - { - isamb_pp_close(sf->isam_pp); - sf->isam_pp = 0; - } - sf->no_inserted = 0; - sf->no_deleted = 0; + zebra_sortf_rewind(sf); + else if (sf->isam_pp && new_sysno <= si->sysno) + zebra_sortf_rewind(sf); } si->sysno = new_sysno; } -void zebra_sort_delete(zebra_sort_index_t si) +void zebra_sort_delete(zebra_sort_index_t si, zint section_id) { struct sortFile *sf = si->current_file; @@ -348,7 +358,8 @@ void zebra_sort_delete(zebra_sort_index_t si) switch(si->type) { case ZEBRA_SORT_TYPE_FLAT: - zebra_sort_add(si, "", 0); + memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE); + bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); break; case ZEBRA_SORT_TYPE_ISAMB: case ZEBRA_SORT_TYPE_MULTI: @@ -359,6 +370,7 @@ void zebra_sort_delete(zebra_sort_index_t si) ISAMC_I isamc_i; s.st.sysno = si->sysno; + s.st.section_id = section_id; s.st.length = 0; s.st.term[0] = '\0'; @@ -374,7 +386,7 @@ void zebra_sort_delete(zebra_sort_index_t si) } } -void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) +void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf) { struct sortFile *sf = si->current_file; int len; @@ -385,11 +397,11 @@ void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) { case ZEBRA_SORT_TYPE_FLAT: /* take first entry from wrbuf - itself is 0-terminated */ - len = strlen(wrbuf_buf(ent->wrbuf)); + len = strlen(wrbuf_buf(wrbuf)); if (len > SORT_IDX_ENTRYSIZE) len = SORT_IDX_ENTRYSIZE; - memcpy(si->entry_buf, wrbuf_buf(ent->wrbuf), len); + memcpy(si->entry_buf, wrbuf_buf(wrbuf), len); if (len < SORT_IDX_ENTRYSIZE-len) memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); @@ -397,88 +409,22 @@ void zebra_sort_add_ent(zebra_sort_index_t si, struct zebra_sort_ent *ent) case ZEBRA_SORT_TYPE_ISAMB: assert(sf->u.isamb); - assert(sf->no_inserted == 0); if (sf->no_inserted == 0) { struct sort_term_stream s; ISAMC_I isamc_i; /* take first entry from wrbuf - itself is 0-terminated */ - len = strlen(wrbuf_buf(ent->wrbuf)); - s.st.sysno = si->sysno; - if (len >= SORT_MAX_TERM) - len = SORT_MAX_TERM-1; - memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); - s.st.term[len] = '\0'; - s.st.length = len; - s.no = 1; - s.insert_flag = 1; - isamc_i.clientData = &s; - isamc_i.read_item = sort_term_code_read; - - isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); - sf->no_inserted++; - } - break; - case ZEBRA_SORT_TYPE_MULTI: - assert(sf->u.isamb); - if (sf->no_inserted == 0) - { - struct sort_term_stream s; - ISAMC_I isamc_i; - len = wrbuf_len(ent->wrbuf); - - s.st.sysno = si->sysno; - if (len >= SORT_MAX_MULTI) - len = SORT_MAX_MULTI-1; - memcpy(s.st.term, wrbuf_buf(ent->wrbuf), len); + len = wrbuf_len(wrbuf); + if (len > SORT_MAX_TERM) + { + len = SORT_MAX_TERM; + wrbuf_buf(wrbuf)[len-1] = '\0'; + } + memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; - s.no = 1; - s.insert_flag = 1; - isamc_i.clientData = &s; - isamc_i.read_item = sort_term_code_read; - - isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i); - sf->no_inserted++; - } - break; - } -} - -void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) -{ - struct sortFile *sf = si->current_file; - - if (!sf || !sf->u.bf) - return; - switch(si->type) - { - case ZEBRA_SORT_TYPE_FLAT: - if (len > SORT_IDX_ENTRYSIZE) - { - len = SORT_IDX_ENTRYSIZE; - memcpy(si->entry_buf, buf, len); - } - else - { - memcpy(si->entry_buf, buf, len); - memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len); - } - bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf); - break; - case ZEBRA_SORT_TYPE_ISAMB: - assert(sf->u.isamb); - if (sf->no_inserted == 0) - { - struct sort_term_stream s; - ISAMC_I isamc_i; - s.st.sysno = si->sysno; - if (len >= SORT_MAX_TERM) - len = SORT_MAX_TERM-1; - memcpy(s.st.term, buf, len); - s.st.term[len] = '\0'; - s.st.length = len; + s.st.section_id = 0; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -494,12 +440,16 @@ void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) { struct sort_term_stream s; ISAMC_I isamc_i; - - s.st.sysno = si->sysno; - if (len >= SORT_MAX_MULTI) - len = SORT_MAX_MULTI-1; - memcpy(s.st.term, buf, len); + len = wrbuf_len(wrbuf); + if (len > SORT_MAX_MULTI) + { + len = SORT_MAX_MULTI; + wrbuf_buf(wrbuf)[len-1] = '\0'; + } + memcpy(s.st.term, wrbuf_buf(wrbuf), len); s.st.length = len; + s.st.sysno = si->sysno; + s.st.section_id = section_id; s.no = 1; s.insert_flag = 1; isamc_i.clientData = &s; @@ -512,7 +462,8 @@ void zebra_sort_add(zebra_sort_index_t si, const char *buf, int len) } } -int zebra_sort_read(zebra_sort_index_t si, WRBUF w) + +int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w) { int r; struct sortFile *sf = si->current_file; @@ -526,43 +477,40 @@ int zebra_sort_read(zebra_sort_index_t si, WRBUF w) case ZEBRA_SORT_TYPE_FLAT: r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf); if (r && *tbuf) + { wrbuf_puts(w, tbuf); - else - return 0; + wrbuf_putc(w, '\0'); + return 1; + } break; case ZEBRA_SORT_TYPE_ISAMB: case ZEBRA_SORT_TYPE_MULTI: - if (!sf->isam_p) - return 0; - else + if (sf->isam_p) { - struct sort_term st, st_untilbuf; if (!sf->isam_pp) sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1); - if (!sf->isam_pp) - return 0; - - st_untilbuf.sysno = si->sysno; - st_untilbuf.length = 0; - st_untilbuf.term[0] = '\0'; - r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); - if (!r) - return 0; - if (r) + if (sf->isam_pp) { - if (st.sysno != si->sysno) + struct sort_term st, st_untilbuf; + + st_untilbuf.sysno = si->sysno; + st_untilbuf.section_id = 0; + st_untilbuf.length = 0; + st_untilbuf.term[0] = '\0'; + r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf); + if (r && st.sysno == si->sysno) { - yaz_log(YLOG_LOG, "Received sysno=" ZINT_FORMAT " looking for " - ZINT_FORMAT, st.sysno, si->sysno); - return 0; + wrbuf_write(w, st.term, st.length); + if (section_id) + *section_id = st.section_id; + return 1; } - wrbuf_write(w, st.term, st.length); } } break; } - return 1; + return 0; } /* * Local variables: