X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=isamb%2Fisamb.c;h=151960e6079081eb1ef756d4bfa5001f7d1805b3;hb=e075935572b3ef0a246aaefde50efadbfa12e5c4;hp=ab657d74b82c80b4f7b1630cce8e90ee6bcc1ea0;hpb=b09c1d0aa414cb43fb667be54a29b3cc7279ea85;p=idzebra-moved-to-github.git diff --git a/isamb/isamb.c b/isamb/isamb.c index ab657d7..151960e 100644 --- a/isamb/isamb.c +++ b/isamb/isamb.c @@ -1,4 +1,4 @@ -/* $Id: isamb.c,v 1.72 2005-03-05 09:19:15 adam Exp $ +/* $Id: isamb.c,v 1.82 2006-05-10 08:13:26 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -72,7 +72,7 @@ struct ISAMB_head { #define ISAMB_PTR_CODEC 1 struct ISAMB_cache_entry { - ISAMB_P pos; + ISAM_P pos; unsigned char *buf; int dirty; int hits; @@ -102,7 +102,7 @@ struct ISAMB_s { }; struct ISAMB_block { - ISAMB_P pos; + ISAM_P pos; int cat; int size; int leaf; @@ -119,7 +119,7 @@ struct ISAMB_block { struct ISAMB_PP_s { ISAMB isamb; - ISAMB_P pos; + ISAM_P pos; int level; int maxlevel; /* total depth */ zint total_size; @@ -141,10 +141,10 @@ static void encode_ptr(char **dst, zint pos) while (pos > 127) { - *bp++ = 128 | (pos & 127); + *bp++ = (unsigned char) (128 | (pos & 127)); pos = pos >> 7; } - *bp++ = pos; + *bp++ = (unsigned char) pos; *dst = (char *) bp; } #else @@ -181,7 +181,7 @@ static void decode_ptr(const char **src, zint *pos) #endif ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, - int cache) + int cache) { ISAMB isamb = xmalloc(sizeof(*isamb)); int i, b_size = ISAMB_MIN_SIZE; @@ -196,7 +196,7 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, isamb->skipped_numbers = 0; isamb->returned_numbers = 0; for (i = 0; iskipped_nodes[i] = isamb->accessed_nodes[i] = 0; + isamb->skipped_nodes[i] = isamb->accessed_nodes[i] = 0; assert(cache == 0); isamb->file = xmalloc(sizeof(*isamb->file) * isamb->no_cat); @@ -266,9 +266,9 @@ ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, decode_ptr(&src, &isamb->file[i].head.first_block); decode_ptr(&src, &isamb->file[i].head.last_block); decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_size = zint_tmp; + isamb->file[i].head.block_size = (int) zint_tmp; decode_ptr(&src, &zint_tmp); - isamb->file[i].head.block_max = zint_tmp; + isamb->file[i].head.block_max = (int) zint_tmp; decode_ptr(&src, &isamb->file[i].head.free_list); } assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset); @@ -299,7 +299,7 @@ static void flush_blocks (ISAMB b, int cat) } } -static int cache_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr) +static int cache_block (ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr) { int cat = (int) (pos&CAT_MASK); int off = (int) (((pos/CAT_MAX) & @@ -432,7 +432,7 @@ void isamb_close (ISAMB isamb) * Reserve 5 bytes for large block sizes. 1 for small ones .. Number of items. We can thus have at most 2^40 nodes. */ -static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) +static struct ISAMB_block *open_block(ISAMB b, ISAM_P pos) { int cat = (int) (pos&CAT_MASK); const char *src; @@ -456,7 +456,7 @@ static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) abort(); } } - p->bytes = p->buf + offset; + p->bytes = (char *)p->buf + offset; p->leaf = p->buf[0]; p->size = (p->buf[1] + 256 * p->buf[2]) - offset; if (p->size < 0) @@ -465,7 +465,7 @@ static struct ISAMB_block *open_block(ISAMB b, ISAMC_P pos) p->size, pos); } assert (p->size >= 0); - src = p->buf + 3; + src = (char*) p->buf + 3; decode_ptr(&src, &p->no_items); p->offset = 0; @@ -509,7 +509,7 @@ struct ISAMB_block *new_block (ISAMB b, int leaf, int cat) p->cat = cat; b->file[cat].head_dirty = 1; memset (p->buf, 0, b->file[cat].head.block_size); - p->bytes = p->buf + b->file[cat].head.block_offset; + p->bytes = (char*)p->buf + b->file[cat].head.block_offset; p->leaf = leaf; p->size = 0; p->dirty = 1; @@ -544,7 +544,7 @@ static void check_block (ISAMB b, struct ISAMB_block *p) char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; void *c1 = (*b->method->codec.start)(); decode_ptr(&src, &pos); @@ -592,7 +592,7 @@ void close_block(ISAMB b, struct ISAMB_block *p) { int offset = b->file[p->cat].head.block_offset; int size = p->size + offset; - char *dst = p->buf + 3; + char *dst = (char*)p->buf + 3; assert (p->size >= 0); /* memset becuase encode_ptr usually does not write all bytes */ @@ -628,7 +628,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, char *startp = p->bytes; const char *src = startp; char *endp = p->bytes + p->size; - ISAMB_P pos; + ISAM_P pos; struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0; char sub_item[DST_ITEM_MAX]; int sub_size; @@ -732,11 +732,12 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, p->size = dst - dst_buf; assert (p->size >= 0); - if (p->size <= b->file[p->cat].head.block_max) { /* it fits OK in this block */ memcpy (startp, dst_buf, dst - dst_buf); + + close_block(b, sub_p2); } else { @@ -754,6 +755,9 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, src = dst_buf; endp = dst; + p->dirty = 1; + close_block(b, sub_p2); + half = src + b->file[p->cat].head.block_size/2; decode_ptr(&src, &pos); @@ -807,8 +811,7 @@ int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item, (*sp)->no_items = p->no_items - no_items_first_half; p->no_items = no_items_first_half; } - p->dirty = 1; - close_block(b, sub_p2); + p->dirty = 1; } close_block(b, sub_p1); (*b->method->codec.stop)(c1); @@ -839,6 +842,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int cut_item_size = 0; int no_items = 0; /* number of items (total) */ int no_items_1 = 0; /* number of items (first half) */ + int inserted_dst_bytes = 0; if (p && p->size) { @@ -852,6 +856,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { const char *dst_item = 0; /* resulting item to be inserted */ char *lookahead_next; + char *dst_0 = dst; int d = -1; if (lookahead_item) @@ -877,7 +882,6 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, else dst_item = file_item_buf; - if (!*lookahead_mode && d == 0) { /* it's a deletion and they match so there is nothing to be @@ -914,7 +918,8 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, { /* we must move the lookahead pointer */ - if (dst > maxp) + inserted_dst_bytes += (dst - dst_0); + if (inserted_dst_bytes >= quater) /* no more room. Mark lookahead as "gone".. */ lookahead_item = 0; else @@ -966,17 +971,19 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, } } } - maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; + /* this loop runs when we are "appending" to a leaf page. That is either it's empty (new) or all file items have been read in previous loop */ + + maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater; while (lookahead_item) { char *dst_item; const char *src = lookahead_item; char *dst_0 = dst; - /* compare lookahead with max item */ + /* if we have a lookahead item, we stop if we exceed the value of it */ if (max_item && (*b->method->compare_item)(max_item, lookahead_item) <= 0) { @@ -1054,6 +1061,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, /* first half */ p->size = half1 - dst_buf; + assert(p->size <= b->file[p->cat].head.block_max); memcpy (p->bytes, dst_buf, half1 - dst_buf); p->no_items = no_items_1; @@ -1069,6 +1077,7 @@ int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, memcpy (first_dst, half2, dst - half2); (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2); + assert((*sp2)->size <= b->file[p->cat].head.block_max); (*sp2)->no_items = no_items - no_items_1; (*sp2)->dirty = 1; p->dirty = 1; @@ -1102,7 +1111,7 @@ int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item, sub_size, max_item); } -int isamb_unlink (ISAMB b, ISAMC_P pos) +int isamb_unlink (ISAMB b, ISAM_P pos) { struct ISAMB_block *p1; @@ -1143,7 +1152,7 @@ int isamb_unlink (ISAMB b, ISAMC_P pos) return 0; } -ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) +void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream) { char item_buf[DST_ITEM_MAX]; char *item_ptr; @@ -1160,7 +1169,8 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); } - return 1; + *pos = 1; + return; } item_ptr = item_buf; more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode); @@ -1170,8 +1180,8 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) char sub_item[DST_ITEM_MAX]; int sub_size; - if (pos) - p = open_block(b, pos); + if (*pos) + p = open_block(b, *pos); more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp, sub_item, &sub_size, 0); if (sp) @@ -1197,7 +1207,7 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) p2->size = dst - p2->bytes; p2->no_items = p->no_items + sp->no_items; - pos = p2->pos; /* return new super page */ + *pos = p2->pos; /* return new super page */ close_block(b, sp); close_block(b, p2); #if INT_ENCODE @@ -1206,7 +1216,7 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) } else { - pos = p->pos; /* return current one (again) */ + *pos = p->pos; /* return current one (again) */ } if (p->no_items == 0) must_delete = 1; @@ -1216,13 +1226,12 @@ ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream) } if (must_delete) { - isamb_unlink(b, pos); - return 0; + isamb_unlink(b, *pos); + *pos = 0; } - return pos; } -ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAMB_P pos, int *level, int scope) +ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope) { ISAMB_PP pp = xmalloc(sizeof(*pp)); int i; @@ -1264,12 +1273,12 @@ ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAMB_P pos, int *level, int scope) return pp; } -ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope) +ISAMB_PP isamb_pp_open (ISAMB isamb, ISAM_P pos, int scope) { return isamb_pp_open_x(isamb, pos, 0, scope); } -void isamb_pp_close_x(ISAMB_PP pp, int *size, int *blocks) +void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks) { int i; if (!pp) @@ -1312,7 +1321,7 @@ void isamb_pp_close (ISAMB_PP pp) } /* simple recursive dumper .. */ -static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), +static void isamb_dump_r (ISAMB b, ISAM_P pos, void (*pr)(const char *str), int level) { char buf[1024]; @@ -1341,7 +1350,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), else { const char *src = p->bytes + p->offset; - ISAMB_P sub; + ISAM_P sub; decode_ptr(&src, &sub); p->offset = src - (char*) p->bytes; @@ -1374,7 +1383,7 @@ static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str), } } -void isamb_dump(ISAMB b, ISAMB_P pos, void (*pr)(const char *str)) +void isamb_dump(ISAMB b, ISAM_P pos, void (*pr)(const char *str)) { isamb_dump_r(b, pos, pr, 0); } @@ -1508,7 +1517,7 @@ static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf } } /* forward_on_leaf */ -static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos) +static int isamb_pp_climb_level(ISAMB_PP pp, ISAM_P *pos) { /* climbs higher in the tree, until finds a level with data left */ /* returns the node to (consider to) descend to in *pos) */ struct ISAMB_block *p = pp->block[pp->level]; @@ -1648,7 +1657,7 @@ static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf) } /* forward_unode */ -static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, +static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAM_P pos, const void *untilbuf) { /* climbs down the tree, from pos, to the leftmost leaf */ struct ISAMB_block *p = pp->block[pp->level]; @@ -1688,7 +1697,7 @@ static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, static int isamb_pp_find_next_leaf(ISAMB_PP pp) { /* finds the next leaf by climbing up and down */ - ISAMB_P pos; + ISAM_P pos; if (!isamb_pp_climb_level(pp, &pos)) return 0; isamb_pp_descend_to_leaf(pp, pos, 0); @@ -1697,7 +1706,7 @@ static int isamb_pp_find_next_leaf(ISAMB_PP pp) static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf) { /* climbs up and descends to a leaf where values >= *untilbuf are found */ - ISAMB_P pos; + ISAM_P pos; #if ISAMB_DEBUG struct ISAMB_block *p = pp->block[pp->level]; yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc starting " @@ -1793,12 +1802,12 @@ int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf) void isamb_pp_pos(ISAMB_PP pp, double *current, double *total) { /* return an estimate of the current position and of the total number of */ /* occureences in the isam tree, based on the current leaf */ - struct ISAMB_block *p = pp->block[pp->level]; assert(total); assert(current); - assert(p->leaf); + + /* if end-of-stream PP may not be leaf */ - *total = pp->block[0]->no_items; + *total = (double) (pp->block[0]->no_items); *current = (double) pp->returned_numbers; #if ISAMB_DEBUG yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn=" @@ -1817,7 +1826,7 @@ int isamb_pp_forward2(ISAMB_PP pp, void *buf, const void *untilb) again: while (p->offset == p->size) { - ISAMB_P pos; + ISAM_P pos; #if INT_ENCODE const char *src_0; void *c1; @@ -1938,3 +1947,11 @@ again: } return 1; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +