X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=bfile%2Fcfile.c;h=60dc4d2533add6c6153416eae3277622fd275ad6;hp=ee11f0fed50bac8d880918352ac3eedb0747af9d;hb=05b9b8ed020c5bfa48a913d6a2e2b50ddf1bab8e;hpb=d75b9accf5a28bd5d8ffd70bbb33b3e8e009d079 diff --git a/bfile/cfile.c b/bfile/cfile.c index ee11f0f..60dc4d2 100644 --- a/bfile/cfile.c +++ b/bfile/cfile.c @@ -1,129 +1,152 @@ -/* - * Copyright (C) 1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: cfile.c,v $ - * Revision 1.1 1995-11-30 08:33:11 adam - * Started work on commit facility. - * - */ +/* $Id: cfile.c,v 1.32 2004-12-08 12:23:08 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ #include -#include -#include -#include +#include +#include -#include +#include +#include "mfile.h" #include "cfile.h" -static int hash_write (CFile cf, const void *buf, size_t bytes) +static int write_head (CFile cf) { - int r; + int left = cf->head.hash_size * sizeof(zint); + int bno = 1; + const char *tab = (char*) cf->array; - r = write (cf->hash_fd, buf, bytes); - if (r == bytes) - return bytes; - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "write in commit hash file"); - else - logf (LOG_FATAL, "write in commit hash file. " - "%d bytes instead of %d bytes", r, bytes); - exit (1); + if (!tab) + return 0; + while (left >= (int) HASH_BSIZE) + { + mf_write (cf->hash_mf, bno++, 0, 0, tab); + tab += HASH_BSIZE; + left -= HASH_BSIZE; + } + if (left > 0) + mf_write (cf->hash_mf, bno, 0, left, tab); return 0; } -static int hash_read (CFile cf, void *buf, size_t bytes) +static int read_head (CFile cf) { - int r; + int left = cf->head.hash_size * sizeof(zint); + int bno = 1; + char *tab = (char*) cf->array; - r = read (cf->hash_fd, buf, bytes); - if (r == bytes) - return bytes; - if (r == -1) - logf (LOG_FATAL|LOG_ERRNO, "read in commit hash file"); - else - logf (LOG_FATAL, "read in commit hash file. " - "%d bytes instead of %d bytes", r, bytes); - abort (); + if (!tab) + return 0; + while (left >= (int) HASH_BSIZE) + { + mf_read (cf->hash_mf, bno++, 0, 0, tab); + tab += HASH_BSIZE; + left -= HASH_BSIZE; + } + if (left > 0) + mf_read (cf->hash_mf, bno, 0, left, tab); return 0; } -CFile cf_open (MFile mf, const char *cname, const char *fname, + +CFile cf_open (MFile mf, MFile_area area, const char *fname, int block_size, int wflag, int *firstp) { - char path[256]; - int r, i; - CFile cf = xmalloc (sizeof(*cf)); + char path[1024]; + int i; + CFile cf = (CFile) xmalloc (sizeof(*cf)); int hash_bytes; - cf->mf = mf; - sprintf (path, "%s.%s.b", cname, fname); - if ((cf->block_fd = - open (path, wflag ? O_RDWR|O_CREAT : O_RDONLY, 0666)) < 0) + cf->rmf = mf; + yaz_log (YLOG_DEBUG, "cf: open %s %s", cf->rmf->name, wflag ? "rdwr" : "rd"); + sprintf (path, "%s-b", fname); + if (!(cf->block_mf = mf_open (area, path, block_size, wflag))) { - logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "Failed to open %s", path); exit (1); } - sprintf (path, "%s.%s.h", cname, fname); - if ((cf->hash_fd = - open (path, wflag ? O_RDWR|O_CREAT : O_RDONLY, 0666)) < 0) + sprintf (path, "%s-i", fname); + if (!(cf->hash_mf = mf_open (area, path, HASH_BSIZE, wflag))) { - logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "Failed to open %s", path); exit (1); } - r = read (cf->hash_fd, &cf->head, sizeof(cf->head)); - if (r != sizeof(cf->head)) + assert (firstp); + if (!mf_read (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head) || + !cf->head.state) { *firstp = 1; - if (r == -1) - { - logf (LOG_FATAL|LOG_ERRNO, "read head of %s", path); - exit (1); - } - if (r != 0) - { - logf (LOG_FATAL, "illegal head of %s", path); - exit (1); - } + cf->head.state = 1; cf->head.block_size = block_size; - cf->head.hash_size = 401; - hash_bytes = cf->head.hash_size * sizeof(int); - cf->head.next_bucket = + cf->head.hash_size = 199; + hash_bytes = cf->head.hash_size * sizeof(zint); + cf->head.flat_bucket = cf->head.next_bucket = cf->head.first_bucket = (hash_bytes+sizeof(cf->head))/HASH_BSIZE + 2; cf->head.next_block = 1; if (wflag) - hash_write (cf, &cf->head, sizeof(cf->head)); - cf->array = xmalloc (hash_bytes); + mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head); + cf->array = (zint *) xmalloc (hash_bytes); for (i = 0; ihead.hash_size; i++) cf->array[i] = 0; if (wflag) - hash_write (cf, cf->array, hash_bytes); + write_head (cf); } else { *firstp = 0; assert (cf->head.block_size == block_size); - assert (cf->head.hash_size > 2 && cf->head.hash_size < 200000); - hash_bytes = cf->head.hash_size * sizeof(int); + assert (cf->head.hash_size > 2); + hash_bytes = cf->head.hash_size * sizeof(zint); assert (cf->head.next_bucket > 0); - cf->array = xmalloc (hash_bytes); - hash_read (cf, cf->array, hash_bytes); + assert (cf->head.next_block > 0); + if (cf->head.state == 1) + cf->array = (zint *) xmalloc (hash_bytes); + else + cf->array = NULL; + read_head (cf); + } + if (cf->head.state == 1) + { + cf->parray = (struct CFile_hash_bucket **) + xmalloc (cf->head.hash_size * sizeof(*cf->parray)); + for (i = 0; ihead.hash_size; i++) + cf->parray[i] = NULL; } - cf->parray = xmalloc (cf->head.hash_size * sizeof(*cf->parray)); - for (i = 0; ihead.hash_size; i++) - cf->parray[i] = NULL; + else + cf->parray = NULL; cf->bucket_lru_front = cf->bucket_lru_back = NULL; cf->bucket_in_memory = 0; - cf->max_bucket_in_memory = 200; + cf->max_bucket_in_memory = 100; cf->dirty = 0; - cf->iobuf = xmalloc (cf->head.block_size); + cf->iobuf = (char *) xmalloc (cf->head.block_size); + memset (cf->iobuf, 0, cf->head.block_size); + cf->no_hits = 0; + cf->no_miss = 0; + zebra_mutex_init (&cf->mutex); return cf; } -static int cf_hash (CFile cf, int no) +static int cf_hash (CFile cf, zint no) { - return (no>>3) % cf->head.hash_size; + return (int) (((no >> 3) % cf->head.hash_size)); } static void release_bucket (CFile cf, struct CFile_hash_bucket *p) @@ -157,19 +180,14 @@ static void flush_bucket (CFile cf, int no_to_flush) break; if (p->dirty) { - if (lseek (cf->hash_fd, p->ph.this_bucket*HASH_BSIZE, SEEK_SET) < 0) - { - logf (LOG_FATAL|LOG_ERRNO, "lseek in flush_bucket"); - exit (1); - } - hash_write (cf, &p->ph, HASH_BSIZE); + mf_write (cf->hash_mf, p->ph.this_bucket, 0, 0, &p->ph); cf->dirty = 1; } release_bucket (cf, p); } } -static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno) +static struct CFile_hash_bucket *alloc_bucket (CFile cf, zint block_no, int hno) { struct CFile_hash_bucket *p, **pp; @@ -177,7 +195,7 @@ static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno) flush_bucket (cf, 1); assert (cf->bucket_in_memory < cf->max_bucket_in_memory); ++(cf->bucket_in_memory); - p = xmalloc (sizeof(*p)); + p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p)); p->lru_next = NULL; p->lru_prev = cf->bucket_lru_front; @@ -196,60 +214,94 @@ static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno) return p; } -static struct CFile_hash_bucket *get_bucket (CFile cf, int block_no, int hno) +static struct CFile_hash_bucket *get_bucket (CFile cf, zint block_no, int hno) { struct CFile_hash_bucket *p; p = alloc_bucket (cf, block_no, hno); - - if (lseek (cf->hash_fd, block_no * HASH_BSIZE, SEEK_SET) < 0) + if (!mf_read (cf->hash_mf, block_no, 0, 0, &p->ph)) { - logf (LOG_FATAL|LOG_ERRNO, "lseek in get_bucket"); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "read get_bucket"); exit (1); } - hash_read (cf, &p->ph, HASH_BSIZE); assert (p->ph.this_bucket == block_no); p->dirty = 0; return p; } -static struct CFile_hash_bucket *new_bucket (CFile cf, int *block_no, int hno) +static struct CFile_hash_bucket *new_bucket (CFile cf, zint *block_nop, int hno) { struct CFile_hash_bucket *p; int i; + zint block_no; - *block_no = cf->head.next_bucket++; - p = alloc_bucket (cf, *block_no, hno); + block_no = *block_nop = cf->head.next_bucket++; + p = alloc_bucket (cf, block_no, hno); for (i = 0; iph.vno[i] = 0; + p->ph.no[i] = 0; + } p->ph.next_bucket = 0; - p->ph.this_bucket = *block_no; + p->ph.this_bucket = block_no; p->dirty = 1; return p; } -int cf_lookup (CFile cf, int no) +static zint cf_lookup_flat (CFile cf, zint no) +{ + zint hno = (no*sizeof(zint))/HASH_BSIZE; + int off = (int) ((no*sizeof(zint)) - hno*HASH_BSIZE); + zint vno = 0; + + mf_read (cf->hash_mf, hno+cf->head.next_bucket, off, sizeof(zint), &vno); + return vno; +} + +static zint cf_lookup_hash (CFile cf, zint no) { int hno = cf_hash (cf, no); struct CFile_hash_bucket *hb; - int block_no, i; + zint block_no; + int i; - logf (LOG_LOG, "cf_lookup pass 1"); for (hb = cf->parray[hno]; hb; hb = hb->h_next) { - logf (LOG_LOG, "bucket_no=%d", hb->ph.this_bucket); for (i = 0; iph.vno[i]; i++) if (hb->ph.no[i] == no) + { + (cf->no_hits)++; return hb->ph.vno[i]; + } } - logf (LOG_LOG, "cf_lookup pass 2"); for (block_no = cf->array[hno]; block_no; block_no = hb->ph.next_bucket) { - logf (LOG_LOG, "bucket_no=%d", block_no); for (hb = cf->parray[hno]; hb; hb = hb->h_next) + { if (hb->ph.this_bucket == block_no) - continue; + break; + } + if (hb) + continue; +#if 0 + /* extra check ... */ + for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next) + { + if (hb->ph.this_bucket == block_no) + { + yaz_log (YLOG_FATAL, "Found hash bucket on other chain (1)"); + abort (); + } + for (i = 0; iph.vno[i]; i++) + if (hb->ph.no[i] == no) + { + yaz_log (YLOG_FATAL, "Found hash bucket on other chain (2)"); + abort (); + } + } +#endif + (cf->no_miss)++; hb = get_bucket (cf, block_no, hno); for (i = 0; iph.vno[i]; i++) if (hb->ph.no[i] == no) @@ -258,19 +310,81 @@ int cf_lookup (CFile cf, int no) return 0; } -int cf_new (CFile cf, int no) +static void cf_write_flat (CFile cf, zint no, zint vno) +{ + zint hno = (no*sizeof(zint))/HASH_BSIZE; + int off = (int) ((no*sizeof(zint)) - hno*HASH_BSIZE); + + hno += cf->head.next_bucket; + if (hno >= cf->head.flat_bucket) + cf->head.flat_bucket = hno+1; + cf->dirty = 1; + mf_write (cf->hash_mf, hno, off, sizeof(zint), &vno); +} + +static void cf_moveto_flat (CFile cf) +{ + struct CFile_hash_bucket *p; + int j; + zint i; + + yaz_log (YLOG_DEBUG, "cf: Moving to flat shadow: %s", cf->rmf->name); + yaz_log (YLOG_DEBUG, "cf: hits=%d miss=%d bucket_in_memory=" ZINT_FORMAT " total=" + ZINT_FORMAT, + cf->no_hits, cf->no_miss, cf->bucket_in_memory, + cf->head.next_bucket - cf->head.first_bucket); + assert (cf->head.state == 1); + flush_bucket (cf, -1); + assert (cf->bucket_in_memory == 0); + p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p)); + for (i = cf->head.first_bucket; i < cf->head.next_bucket; i++) + { + if (!mf_read (cf->hash_mf, i, 0, 0, &p->ph)) + { + yaz_log (YLOG_FATAL|YLOG_ERRNO, "read bucket moveto flat"); + exit (1); + } + for (j = 0; j < HASH_BUCKET && p->ph.vno[j]; j++) + cf_write_flat (cf, p->ph.no[j], p->ph.vno[j]); + } + xfree (p); + xfree (cf->array); + cf->array = NULL; + xfree (cf->parray); + cf->parray = NULL; + cf->head.state = 2; + cf->dirty = 1; +} + +static zint cf_lookup (CFile cf, zint no) +{ + if (cf->head.state > 1) + return cf_lookup_flat (cf, no); + return cf_lookup_hash (cf, no); +} + +static zint cf_new_flat (CFile cf, zint no) +{ + zint vno = (cf->head.next_block)++; + + cf_write_flat (cf, no, vno); + return vno; +} + +static zint cf_new_hash (CFile cf, zint no) { int hno = cf_hash (cf, no); struct CFile_hash_bucket *hbprev = NULL, *hb = cf->parray[hno]; - int *bucketpp = &cf->array[hno]; + zint *bucketpp = &cf->array[hno]; int i; - int vno = (cf->head.next_block)++; - + zint vno = (cf->head.next_block)++; + for (hb = cf->parray[hno]; hb; hb = hb->h_next) if (!hb->ph.vno[HASH_BUCKET-1]) for (i = 0; iph.vno[i]) { + (cf->no_hits)++; hb->ph.no[i] = no; hb->ph.vno[i] = vno; hb->dirty = 1; @@ -284,8 +398,23 @@ int cf_new (CFile cf, int no) { bucketpp = &hb->ph.next_bucket; hbprev = hb; - continue; + break; } + if (hb) + continue; + +#if 0 + /* extra check ... */ + for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next) + { + if (hb->ph.this_bucket == *bucketpp) + { + yaz_log (YLOG_FATAL, "Found hash bucket on other chain"); + abort (); + } + } +#endif + (cf->no_miss)++; hb = get_bucket (cf, *bucketpp, hno); assert (hb); for (i = 0; ihead.state > 1) + return cf_new_flat (cf, no); + if (cf->no_miss*2 > cf->no_hits) + { + cf_moveto_flat (cf); + assert (cf->head.state > 1); + return cf_new_flat (cf, no); + } + return cf_new_hash (cf, no); +} + + +int cf_read (CFile cf, zint no, int offset, int nbytes, void *buf) { - int block, r; + zint block; assert (cf); - logf (LOG_LOG, "cf_read no=%d, offset=%d, num=%d", no, offset, num); + zebra_mutex_lock (&cf->mutex); if (!(block = cf_lookup (cf, no))) - return -1; - if (lseek (cf->block_fd, cf->head.block_size * block + offset, - SEEK_SET) < 0) { - logf (LOG_FATAL|LOG_ERRNO, "cf_read, lseek no=%d, block=%d", - no, block); - exit (1); + zebra_mutex_unlock (&cf->mutex); + return -1; } - r = read (cf->block_fd, buf, num ? num : cf->head.block_size); - if (r != cf->head.block_size) + zebra_mutex_unlock (&cf->mutex); + if (!mf_read (cf->block_mf, block, offset, nbytes, buf)) { - logf (LOG_FATAL|LOG_ERRNO, "cf_read, read no=%d, block=%d", - no, block); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "cf_read no=" ZINT_FORMAT " block=" ZINT_FORMAT, no, block); exit (1); } return 1; } -int cf_write (CFile cf, int no, int offset, int num, const void *buf) +int cf_write (CFile cf, zint no, int offset, int nbytes, const void *buf) { - int block, r; + zint block; assert (cf); - - logf (LOG_LOG, "cf_write no=%d, offset=%d, num=%d", no, offset, num); + zebra_mutex_lock (&cf->mutex); if (!(block = cf_lookup (cf, no))) { block = cf_new (cf, no); - if (offset || num) + if (offset || nbytes) { - mf_read (cf->mf, no, 0, 0, cf->iobuf); - memcpy (cf->iobuf + offset, buf, num); + mf_read (cf->rmf, no, 0, 0, cf->iobuf); + memcpy (cf->iobuf + offset, buf, nbytes); buf = cf->iobuf; offset = 0; - num = 0; + nbytes = 0; } } - if (lseek (cf->block_fd, cf->head.block_size * block + offset, - SEEK_SET) < 0) + zebra_mutex_unlock (&cf->mutex); + if (mf_write (cf->block_mf, block, offset, nbytes, buf)) { - logf (LOG_FATAL|LOG_ERRNO, "cf_write, lseek no=%d, block=%d", - no, block); - exit (1); - } - r = write (cf->block_fd, buf, num ? num : cf->head.block_size); - if (r != cf->head.block_size) - { - logf (LOG_FATAL|LOG_ERRNO, "cf_write, read no=%d, block=%d", - no, block); + yaz_log (YLOG_FATAL|YLOG_ERRNO, "cf_write no=" ZINT_FORMAT + " block=" ZINT_FORMAT, no, block); exit (1); } return 0; @@ -370,31 +500,22 @@ int cf_write (CFile cf, int no, int offset, int num, const void *buf) int cf_close (CFile cf) { + yaz_log (YLOG_DEBUG, "cf: close hits=%d miss=%d bucket_in_memory=" ZINT_FORMAT + " total=" ZINT_FORMAT, + cf->no_hits, cf->no_miss, cf->bucket_in_memory, + cf->head.next_bucket - cf->head.first_bucket); flush_bucket (cf, -1); if (cf->dirty) { - int hash_bytes = cf->head.hash_size * sizeof(int); - if (lseek (cf->hash_fd, 0L, SEEK_SET) < 0) - { - logf (LOG_FATAL|LOG_ERRNO, "seek in hash fd"); - exit (1); - } - hash_write (cf, &cf->head, sizeof(cf->head)); - hash_write (cf, cf->array, hash_bytes); - } - if (close (cf->hash_fd) < 0) - { - logf (LOG_FATAL|LOG_ERRNO, "close hash fd"); - exit (1); - } - if (close (cf->block_fd) < 0) - { - logf (LOG_FATAL|LOG_ERRNO, "close block fd"); - exit (1); + mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head); + write_head (cf); } + mf_close (cf->hash_mf); + mf_close (cf->block_mf); xfree (cf->array); xfree (cf->parray); xfree (cf->iobuf); + zebra_mutex_destroy (&cf->mutex); xfree (cf); return 0; }