2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
6 * Isamh - append-only isam
25 static void flush_block (ISAMH is, int cat);
26 static void release_fc (ISAMH is, int cat);
27 static void init_fc (ISAMH is, int cat);
29 #define ISAMH_FREELIST_CHUNK 1
33 ISAMH_M isamh_getmethod (void)
35 static struct ISAMH_filecat_s def_cat[] = {
37 /* blocksz, max keys before switching size */
48 /* assume about 2 bytes per pointer, when compressed. The head uses */
49 /* 16 bytes, and other blocks use 8 for header info... If you want 3 */
50 /* blocks of 32 bytes, say max 16+24+24 = 64 keys */
53 ISAMH_M m = (ISAMH_M) xmalloc (sizeof(*m));
61 m->compare_item = NULL;
65 m->max_blocks_mem = 10;
71 ISAMH isamh_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method)
74 ISAMH_filecat filecat;
78 is = (ISAMH) xmalloc (sizeof(*is));
80 is->method = (ISAMH_M) xmalloc (sizeof(*is->method));
81 memcpy (is->method, method, sizeof(*method));
82 filecat = is->method->filecat;
85 /* determine number of block categories */
86 if (is->method->debug)
87 logf (LOG_LOG, "isc: bsize ifill mfill mblocks");
90 if (is->method->debug)
91 logf (LOG_LOG, "isc:%6d %6d",
92 filecat[i].bsize, filecat[i].mblocks);
93 if (max_buf_size < filecat[i].bsize)
94 max_buf_size = filecat[i].bsize;
95 } while (filecat[i++].mblocks);
99 /* max_buf_size is the larget buffer to be used during merge */
100 max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;
101 if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)
102 max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;
105 if (is->method->debug)
106 logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size);
108 assert (is->no_files > 0);
109 is->files = (ISAMH_file) xmalloc (sizeof(*is->files)*is->no_files);
113 is->merge_buf = (char *) xmalloc (max_buf_size+256);
114 memset (is->merge_buf, 0, max_buf_size+256);
116 is->startblock = (char *) xmalloc (max_buf_size+256);
117 memset (is->startblock, 0, max_buf_size+256);
118 is->lastblock = (char *) xmalloc (max_buf_size+256);
119 memset (is->lastblock, 0, max_buf_size+256);
120 /* The spare 256 bytes should not be needed! */
124 is->startblock = is->lastblock = NULL;
126 for (i = 0; i<is->no_files; i++)
130 sprintf (fname, "%s%c", name, i+'A');
131 is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
133 is->files[i].head_is_dirty = 0;
134 if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMH_head),
137 is->files[i].head.lastblock = 1;
138 is->files[i].head.freelist = 0;
140 is->files[i].alloc_entries_num = 0;
141 is->files[i].alloc_entries_max =
142 is->method->filecat[i].bsize / sizeof(int) - 1;
143 is->files[i].alloc_buf = (char *)
144 xmalloc (is->method->filecat[i].bsize);
145 is->files[i].no_writes = 0;
146 is->files[i].no_reads = 0;
147 is->files[i].no_skip_writes = 0;
148 is->files[i].no_allocated = 0;
149 is->files[i].no_released = 0;
150 is->files[i].no_remap = 0;
151 is->files[i].no_forward = 0;
152 is->files[i].no_backward = 0;
153 is->files[i].sum_forward = 0;
154 is->files[i].sum_backward = 0;
155 is->files[i].no_next = 0;
156 is->files[i].no_prev = 0;
163 int isamh_block_used (ISAMH is, int type)
165 if (type < 0 || type >= is->no_files)
167 return is->files[type].head.lastblock-1;
170 int isamh_block_size (ISAMH is, int type)
172 ISAMH_filecat filecat = is->method->filecat;
173 if (type < 0 || type >= is->no_files)
175 return filecat[type].bsize;
178 int isamh_close (ISAMH is)
182 if (is->method->debug)
184 logf (LOG_LOG, "isc: next forw mid-f prev backw mid-b");
185 for (i = 0; i<is->no_files; i++)
186 logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",
187 is->files[i].no_next,
188 is->files[i].no_forward,
189 is->files[i].no_forward ?
190 (double) is->files[i].sum_forward/is->files[i].no_forward
192 is->files[i].no_prev,
193 is->files[i].no_backward,
194 is->files[i].no_backward ?
195 (double) is->files[i].sum_backward/is->files[i].no_backward
198 if (is->method->debug)
199 logf (LOG_LOG, "isc: writes reads skipped alloc released remap");
200 for (i = 0; i<is->no_files; i++)
203 assert (is->files[i].bf);
204 if (is->files[i].head_is_dirty)
205 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMH_head),
207 if (is->method->debug)
208 logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",
209 is->files[i].no_writes,
210 is->files[i].no_reads,
211 is->files[i].no_skip_writes,
212 is->files[i].no_allocated,
213 is->files[i].no_released,
214 is->files[i].no_remap);
215 xfree (is->files[i].fc_list);
217 bf_close (is->files[i].bf);
220 xfree (is->startblock);
221 xfree (is->lastblock);
227 int isamh_read_block (ISAMH is, int cat, int pos, char *dst)
229 ++(is->files[cat].no_reads);
230 return bf_read (is->files[cat].bf, pos, 0, 0, dst);
233 int isamh_write_block (ISAMH is, int cat, int pos, char *src)
235 ++(is->files[cat].no_writes);
236 if (is->method->debug > 2)
237 logf (LOG_LOG, "isc: write_block %d %d", cat, pos);
238 return bf_write (is->files[cat].bf, pos, 0, 0, src);
241 int isamh_write_dblock (ISAMH is, int cat, int pos, char *src,
242 int nextpos, int offset)
244 ISAMH_BLOCK_SIZE size = offset + ISAMH_BLOCK_OFFSET_N;
245 if (is->method->debug > 2)
246 logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d",
247 (int) size, nextpos);
248 src -= ISAMH_BLOCK_OFFSET_N;
249 memcpy (src, &nextpos, sizeof(int));
250 memcpy (src + sizeof(int), &size, sizeof(size));
251 return isamh_write_block (is, cat, pos, src);
254 #if ISAMH_FREELIST_CHUNK
255 static void flush_block (ISAMH is, int cat)
257 char *abuf = is->files[cat].alloc_buf;
258 int block = is->files[cat].head.freelist;
259 if (block && is->files[cat].alloc_entries_num)
261 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
262 bf_write (is->files[cat].bf, block, 0, 0, abuf);
263 is->files[cat].alloc_entries_num = 0;
268 static int alloc_block (ISAMH is, int cat)
270 int block = is->files[cat].head.freelist;
271 char *abuf = is->files[cat].alloc_buf;
273 (is->files[cat].no_allocated)++;
277 block = (is->files[cat].head.lastblock)++; /* no free list */
278 is->files[cat].head_is_dirty = 1;
282 if (!is->files[cat].alloc_entries_num) /* read first time */
284 bf_read (is->files[cat].bf, block, 0, 0, abuf);
285 memcpy (&is->files[cat].alloc_entries_num, abuf,
286 sizeof(is->files[cat].alloc_entries_num));
287 assert (is->files[cat].alloc_entries_num > 0);
289 /* have some free blocks now */
290 assert (is->files[cat].alloc_entries_num > 0);
291 is->files[cat].alloc_entries_num--;
292 if (!is->files[cat].alloc_entries_num) /* last one in block? */
294 memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
296 is->files[cat].head_is_dirty = 1;
298 if (is->files[cat].head.freelist)
300 bf_read (is->files[cat].bf, is->files[cat].head.freelist,
302 memcpy (&is->files[cat].alloc_entries_num, abuf,
303 sizeof(is->files[cat].alloc_entries_num));
304 assert (is->files[cat].alloc_entries_num);
308 memcpy (&block, abuf + sizeof(int) + sizeof(int) *
309 is->files[cat].alloc_entries_num, sizeof(int));
314 static void release_block (ISAMH is, int cat, int pos)
316 char *abuf = is->files[cat].alloc_buf;
317 int block = is->files[cat].head.freelist;
319 (is->files[cat].no_released)++;
321 if (block && !is->files[cat].alloc_entries_num) /* must read block */
323 bf_read (is->files[cat].bf, block, 0, 0, abuf);
324 memcpy (&is->files[cat].alloc_entries_num, abuf,
325 sizeof(is->files[cat].alloc_entries_num));
326 assert (is->files[cat].alloc_entries_num > 0);
328 assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
329 if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
332 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
333 bf_write (is->files[cat].bf, block, 0, 0, abuf);
334 is->files[cat].alloc_entries_num = 0;
336 if (!is->files[cat].alloc_entries_num) /* make new buffer? */
338 memcpy (abuf + sizeof(int), &block, sizeof(int));
339 is->files[cat].head.freelist = pos;
340 is->files[cat].head_is_dirty = 1;
344 memcpy (abuf + sizeof(int) +
345 is->files[cat].alloc_entries_num*sizeof(int),
348 is->files[cat].alloc_entries_num++;
351 static void flush_block (ISAMH is, int cat)
353 char *abuf = is->files[cat].alloc_buf;
357 static int alloc_block (ISAMH is, int cat)
360 char buf[sizeof(int)];
362 is->files[cat].head_is_dirty = 1;
363 (is->files[cat].no_allocated)++;
364 if ((block = is->files[cat].head.freelist))
366 bf_read (is->files[cat].bf, block, 0, sizeof(int), buf);
367 memcpy (&is->files[cat].head.freelist, buf, sizeof(int));
370 block = (is->files[cat].head.lastblock)++;
374 static void release_block (ISAMH is, int cat, int pos)
376 char buf[sizeof(int)];
378 (is->files[cat].no_released)++;
379 is->files[cat].head_is_dirty = 1;
380 memcpy (buf, &is->files[cat].head.freelist, sizeof(int));
381 is->files[cat].head.freelist = pos;
382 bf_write (is->files[cat].bf, pos, 0, sizeof(int), buf);
386 int isamh_alloc_block (ISAMH is, int cat)
390 if (is->files[cat].fc_list)
393 for (j = 0; j < is->files[cat].fc_max; j++)
394 if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
396 is->files[cat].fc_list[j] = 0;
402 block = alloc_block (is, cat);
403 if (is->method->debug > 3)
404 logf (LOG_LOG, "isc: alloc_block in cat %d: %d", cat, block);
408 void isamh_release_block (ISAMH is, int cat, int pos)
410 if (is->method->debug > 3)
411 logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos);
412 if (is->files[cat].fc_list)
415 for (j = 0; j<is->files[cat].fc_max; j++)
416 if (!is->files[cat].fc_list[j])
418 is->files[cat].fc_list[j] = pos;
422 release_block (is, cat, pos);
425 static void init_fc (ISAMH is, int cat)
429 is->files[cat].fc_max = j;
430 is->files[cat].fc_list = (int *)
431 xmalloc (sizeof(*is->files[0].fc_list) * j);
433 is->files[cat].fc_list[j] = 0;
436 static void release_fc (ISAMH is, int cat)
438 int b, j = is->files[cat].fc_max;
441 if ((b = is->files[cat].fc_list[j]))
443 release_block (is, cat, b);
444 is->files[cat].fc_list[j] = 0;
448 void isamh_pp_close (ISAMH_PP pp)
452 (*is->method->code_stop)(ISAMH_DECODE, pp->decodeClientData);
457 ISAMH_PP isamh_pp_open (ISAMH is, ISAMH_P ipos)
459 ISAMH_PP pp = (ISAMH_PP) xmalloc (sizeof(*pp));
462 pp->cat = isamh_type(ipos);
463 pp->pos = isamh_block(ipos);
465 src = pp->buf = (char *) xmalloc (is->method->filecat[pp->cat].bsize);
471 pp->decodeClientData = (*is->method->code_start)(ISAMH_DECODE);
479 isamh_read_block (is, pp->cat, pp->pos, src);
480 memcpy (&pp->next, src, sizeof(pp->next));
481 src += sizeof(pp->next);
482 memcpy (&pp->size, src, sizeof(pp->size));
483 src += sizeof(pp->size);
484 memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
485 src += sizeof(pp->numKeys);
486 memcpy (&pp->lastblock, src, sizeof(pp->lastblock));
487 src += sizeof(pp->lastblock);
488 assert (pp->next != pp->pos);
489 pp->offset = src - pp->buf;
490 assert (pp->offset == ISAMH_BLOCK_OFFSET_1);
491 if (is->method->debug > 2)
492 logf (LOG_LOG, "isamh_pp_open sz=%d c=%d p=%d n=%d",
493 pp->size, pp->cat, pp->pos, isamh_block(pp->next));
500 void isamh_buildfirstblock(ISAMH_PP pp){
503 assert(pp->next != pp->pos);
504 memcpy(dst, &pp->next, sizeof(pp->next) );
505 dst += sizeof(pp->next);
506 memcpy(dst, &pp->size,sizeof(pp->size));
507 dst += sizeof(pp->size);
508 memcpy(dst, &pp->numKeys, sizeof(pp->numKeys));
509 dst += sizeof(pp->numKeys);
510 memcpy(dst, &pp->lastblock, sizeof(pp->lastblock));
511 dst += sizeof(pp->lastblock);
512 assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_1);
513 if (pp->is->method->debug > 2)
514 logf (LOG_LOG, "isamh: first: sz=%d p=%d/%d>%d/%d>%d/%d nk=%d",
517 isamh_block(pp->next), isamh_type(pp->next),
518 isamh_block(pp->lastblock), isamh_type(pp->lastblock),
522 void isamh_buildlaterblock(ISAMH_PP pp){
525 assert(pp->next != pp->pos);
526 memcpy(dst, &pp->next, sizeof(pp->next) );
527 dst += sizeof(pp->next);
528 memcpy(dst, &pp->size,sizeof(pp->size));
529 dst += sizeof(pp->size);
530 assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_N);
531 if (pp->is->method->debug > 2)
532 logf (LOG_LOG, "isamh: l8r: sz=%d p=%d/%d>%d/%d",
535 isamh_block(pp->next), isamh_type(pp->next) );
540 /* returns non-zero if item could be read; 0 otherwise */
541 int isamh_pp_read (ISAMH_PP pp, void *buf)
543 return isamh_read_item (pp, (char **) &buf);
546 /* read one item from file - decode and store it in *dst.
549 1 if item could be read ok and NO boundary
550 2 if item could be read ok and boundary */
551 int isamh_read_item (ISAMH_PP pp, char **dst)
554 char *src = pp->buf + pp->offset;
556 if (pp->offset >= pp->size)
561 return 0; /* end of file */
563 if (pp->next > pp->pos)
565 if (pp->next == pp->pos + 1)
566 is->files[pp->cat].no_next++;
569 is->files[pp->cat].no_forward++;
570 is->files[pp->cat].sum_forward += pp->next - pp->pos;
575 if (pp->next + 1 == pp->pos)
576 is->files[pp->cat].no_prev++;
579 is->files[pp->cat].no_backward++;
580 is->files[pp->cat].sum_backward += pp->pos - pp->next;
583 /* out new block position */
586 /* read block and save 'next' and 'size' entry */
587 isamh_read_block (is, pp->cat, pp->pos, src);
588 memcpy (&pp->next, src, sizeof(pp->next));
589 src += sizeof(pp->next);
590 memcpy (&pp->size, src, sizeof(pp->size));
591 src += sizeof(pp->size);
592 /* assume block is non-empty */
593 assert (src - pp->buf == ISAMH_BLOCK_OFFSET_N);
594 assert (pp->next != pp->pos);
596 isamh_release_block (is, pp->cat, pp->pos);
597 (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src);
598 pp->offset = src - pp->buf;
599 if (is->method->debug > 2)
600 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
601 pp->size, pp->cat, pp->pos, pp->next);
604 (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src);
605 pp->offset = src - pp->buf;
609 int isamh_pp_num (ISAMH_PP pp)
616 * Revision 1.3 1999-07-06 16:30:20 heikki
617 * IsamH startss to work - at least it builds indexes. Can not search yet...
619 * Revision 1.2 1999/07/06 09:37:05 heikki
620 * Working on isamh - not ready yet.
622 * Revision 1.1 1999/06/30 15:04:54 heikki
623 * Copied from isamc.c, slowly starting to simplify...