-/*
- * Copyright (c) 1996-1998, Index Data.
- * See the file LICENSE for details.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: merge.c,v $
- * Revision 1.7 1998-03-11 11:18:18 adam
- * Changed the isc_merge to take into account the mfill (minimum-fill).
- *
- * Revision 1.6 1998/03/06 13:54:03 adam
- * Fixed two nasty bugs in isc_merge.
- *
- * Revision 1.5 1997/02/12 20:42:43 adam
- * Bug fix: during isc_merge operations, some pages weren't marked dirty
- * even though they should be. At this point the merge operation marks
- * a page dirty if the previous page changed at all. A better approach is
- * to mark it dirty if the last key written changed in previous page.
- *
- * Revision 1.4 1996/11/08 11:15:31 adam
- * Number of keys in chain are stored in first block and the function
- * to retrieve this information, isc_pp_num is implemented.
- *
- * Revision 1.3 1996/11/04 14:08:59 adam
- * Optimized free block usage.
- *
- * Revision 1.2 1996/11/01 13:36:46 adam
- * New element, max_blocks_mem, that control how many blocks of max size
- * to store in memory during isc_merge.
- * Function isc_merge now ignores delete/update of identical keys and
- * the proper blocks are then non-dirty and not written in flush_blocks.
- *
- * Revision 1.1 1996/11/01 08:59:15 adam
- * First version of isc_merge that supports update/delete.
- *
- */
+/* $Id: merge.c,v 1.21 2002-08-02 19:26:56 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
-
-#include <log.h>
+#include <yaz/log.h>
#include "isamc-p.h"
struct isc_merge_block {
int dirty; /* block is different from that on file */
};
+static void opt_blocks (ISAMC is, struct isc_merge_block *mb, int ptr,
+ int last)
+{
+ int i, no_dirty = 0;
+ for (i = 0; i<ptr; i++)
+ if (mb[i].dirty)
+ no_dirty++;
+ if (no_dirty*4 < ptr*3)
+ return;
+ /* bubble-sort it */
+ for (i = 0; i<ptr; i++)
+ {
+ int tmp, j, j_min = -1;
+ for (j = i; j<ptr; j++)
+ {
+ if (j_min < 0 || mb[j_min].block > mb[j].block)
+ j_min = j;
+ }
+ assert (j_min >= 0);
+ tmp = mb[j_min].block;
+ mb[j_min].block = mb[i].block;
+ mb[i].block = tmp;
+ mb[i].dirty = 1;
+ }
+ if (!last)
+ mb[i].dirty = 1;
+}
+
static void flush_blocks (ISAMC is, struct isc_merge_block *mb, int ptr,
char *r_buf, int *firstpos, int cat, int last,
int *numkeys)
for (i = 0; i<ptr; i++)
{
- unsigned short ssize;
- char *src;
-
/* consider this block number */
if (!mb[i].block)
{
mb[i+1].dirty = 1;
mb[i].dirty = 1;
}
+ }
+
+ for (i = 0; i<ptr; i++)
+ {
+ char *src;
+ ISAMC_BLOCK_SIZE ssize = mb[i+1].offset - mb[i].offset;
- ssize = mb[i+1].offset - mb[i].offset;
assert (ssize);
/* skip rest if not dirty */
ISAMC_PP pp;
char f_item[128], *f_item_ptr;
int f_more;
+ int last_dirty = 0;
+ int debug = is->method->debug;
struct isc_merge_block mb[200];
f_more = 1;
cat = pp->cat;
- if (is->method->debug > 1)
+ if (debug > 1)
logf (LOG_LOG, "isc: isc_merge begin %d %d", cat, pp->pos);
/* read first item from i */
/* the resulting output block is too small/empty. Delete
the original (if any)
*/
- if (is->method->debug > 3)
+ if (debug > 3)
logf (LOG_LOG, "isc: release A");
if (mb[ptr].block)
isc_release_block (is, pp->cat, mb[ptr].block);
mb[ptr].block = pp->pos;
- mb[ptr].dirty = 1;
+ if (!mb[ptr].dirty)
+ mb[ptr].dirty = 1;
if (ptr > 0)
mb[ptr-1].dirty = 1;
}
{
/* indicate new boundary based on the original file */
mb[++ptr].block = pp->pos;
- mb[ptr].dirty = (mb[ptr-1].dirty > 1) ? 1 : 0;
+ mb[ptr].dirty = last_dirty;
mb[ptr].offset = r_offset;
- if (is->method->debug > 3)
+ if (debug > 3)
logf (LOG_LOG, "isc: bound ptr=%d,offset=%d",
ptr, r_offset);
if (cat==is->max_cat && ptr >= is->method->max_blocks_mem)
/* We are dealing with block(s) of max size. Block(s)
except 1 will be flushed.
*/
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: flush A %d sections", ptr);
flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat,
0, &pp->numKeys);
}
border = get_border (is, mb, ptr, cat, firstpos);
}
+ last_dirty = 0;
if (!f_more)
cmp = -1;
else if (!i_more)
{
/* no! delete the item */
r_item = NULL;
+ last_dirty = 1;
mb[ptr].dirty = 2;
}
}
}
memcpy (r_item, i_item, i_item_ptr - i_item);
mb[ptr].dirty = 2;
+ last_dirty = 1;
/* move i */
i_item_ptr = i_item;
i_more = (*data->read_item)(data->clientData, &i_item_ptr,
if (border < new_offset && border >= r_offset)
{
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: border %d %d", ptr, border);
/* Max size of current block category reached ...
make new virtual block entry */
except one will be flushed. Note: the block(s) are
surely not the last one(s).
*/
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: flush B %d sections", ptr-1);
flush_blocks (is, mb, ptr-1, r_buf, &firstpos, cat,
0, &pp->numKeys);
cat++;
mb[j].dirty = 1;
mb[j].block = 0;
+ mb[ptr].offset = r_offset;
for (i = 1; i < ptr; i++)
{
int border = is->method->filecat[cat].ifill -
ISAMC_BLOCK_OFFSET_1 + mb[j].offset;
- if (is->method->debug > 3)
+ if (debug > 3)
logf (LOG_LOG, "isc: remap %d border=%d", i, border);
if (mb[i+1].offset > border && mb[i].offset <= border)
{
- if (is->method->debug > 3)
+ if (debug > 3)
logf (LOG_LOG, "isc: to %d %d", j, mb[i].offset);
mb[++j].dirty = 1;
mb[j].block = 0;
mb[j].offset = mb[i].offset;
}
}
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: remap from %d to %d sections to cat %d",
ptr, j, cat);
ptr = j;
border = get_border (is, mb, ptr, cat, firstpos);
+ if (debug > 3)
+ logf (LOG_LOG, "isc: border=%d r_offset=%d", border, r_offset);
}
}
if (mb[ptr].offset < r_offset)
{ /* empty output. Release last block if any */
if (cat == pp->cat && mb[ptr].block)
{
- if (is->method->debug > 3)
+ if (debug > 3)
logf (LOG_LOG, "isc: release C");
isc_release_block (is, pp->cat, mb[ptr].block);
mb[ptr].block = 0;
}
}
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: flush C, %d sections", ptr);
if (firstpos)
has changed */
if (numKeys != isc_pp_num (pp))
{
- if (is->method->debug > 2)
+ if (debug > 2)
logf (LOG_LOG, "isc: patch num keys firstpos=%d num=%d",
firstpos, numKeys);
bf_write (is->files[cat].bf, firstpos, ISAMC_BLOCK_OFFSET_N,
(*is->method->code_stop)(ISAMC_ENCODE, r_clientData);
if (!firstpos)
cat = 0;
- if (is->method->debug > 1)
+ if (debug > 1)
logf (LOG_LOG, "isc: isc_merge return %d %d", cat, firstpos);
isc_pp_close (pp);
return cat + firstpos * 8;
}
+/*
+ * $Log: merge.c,v $
+ * Revision 1.21 2002-08-02 19:26:56 adam
+ * Towards GPL
+ *
+ * Revision 1.20 1999/11/30 13:48:04 adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.19 1999/07/14 12:12:07 heikki
+ * Large-block isam-h (may not work too well... Abandoning for isam-d)
+ *
+ * Revision 1.17 1999/07/13 14:22:17 heikki
+ * Better allocation strategy in isamh_merge
+ *
+ * Revision 1.16 1999/07/08 14:23:27 heikki
+ * Fixed a bug in isamh_pp_read and cleaned up a bit
+ *
+ * Revision 1.15 1999/07/07 09:36:04 heikki
+ * Fixed an assertion in isamh
+ *
+ * Revision 1.13 1999/07/06 09:37:05 heikki
+ * Working on isamh - not ready yet.
+ *
+ * Revision 1.12 1999/06/30 15:03:55 heikki
+ * first take on isamh, the append-only isam structure
+ *
+ * Revision 1.11 1999/05/26 07:49:14 adam
+ * C++ compilation.
+ *
+ * Revision 1.10 1998/03/19 12:22:09 adam
+ * Minor change.
+ *
+ * Revision 1.9 1998/03/19 10:04:38 adam
+ * Minor changes.
+ *
+ * Revision 1.8 1998/03/18 09:23:55 adam
+ * Blocks are stored in chunks on free list - up to factor 2 in speed.
+ * Fixed bug that could occur in block category rearrangemen.
+ *
+ * Revision 1.7 1998/03/11 11:18:18 adam
+ * Changed the isc_merge to take into account the mfill (minimum-fill).
+ *
+ * Revision 1.6 1998/03/06 13:54:03 adam
+ * Fixed two nasty bugs in isc_merge.
+ *
+ * Revision 1.5 1997/02/12 20:42:43 adam
+ * Bug fix: during isc_merge operations, some pages weren't marked dirty
+ * even though they should be. At this point the merge operation marks
+ * a page dirty if the previous page changed at all. A better approach is
+ * to mark it dirty if the last key written changed in previous page.
+ *
+ * Revision 1.4 1996/11/08 11:15:31 adam
+ * Number of keys in chain are stored in first block and the function
+ * to retrieve this information, isc_pp_num is implemented.
+ *
+ * Revision 1.3 1996/11/04 14:08:59 adam
+ * Optimized free block usage.
+ *
+ * Revision 1.2 1996/11/01 13:36:46 adam
+ * New element, max_blocks_mem, that control how many blocks of max size
+ * to store in memory during isc_merge.
+ * Function isc_merge now ignores delete/update of identical keys and
+ * the proper blocks are then non-dirty and not written in flush_blocks.
+ *
+ * Revision 1.1 1996/11/01 08:59:15 adam
+ * First version of isc_merge that supports update/delete.
+ *
+ */
+
+