X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=bfile%2Fcfile.c;h=60dc4d2533add6c6153416eae3277622fd275ad6;hp=ee11f0fed50bac8d880918352ac3eedb0747af9d;hb=05b9b8ed020c5bfa48a913d6a2e2b50ddf1bab8e;hpb=d75b9accf5a28bd5d8ffd70bbb33b3e8e009d079

diff --git a/bfile/cfile.c b/bfile/cfile.c
index ee11f0f..60dc4d2 100644
--- a/bfile/cfile.c
+++ b/bfile/cfile.c
@@ -1,129 +1,152 @@
-/*
- * Copyright (C) 1995, Index Data I/S 
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: cfile.c,v $
- * Revision 1.1  1995-11-30 08:33:11  adam
- * Started work on commit facility.
- *
- */
+/* $Id: cfile.c,v 1.32 2004-12-08 12:23:08 adam Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+   Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
 
 #include <assert.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <alexutil.h>
+#include <stdlib.h>
+#include <string.h>
 
-#include <mfile.h>
+#include <zebrautl.h>
+#include "mfile.h"
 #include "cfile.h"
 
-static int hash_write (CFile cf, const void *buf, size_t bytes)
+static int write_head (CFile cf)
 {
-    int r;
+    int left = cf->head.hash_size * sizeof(zint);
+    int bno = 1;
+    const char *tab = (char*) cf->array;
 
-    r = write (cf->hash_fd, buf, bytes);
-    if (r == bytes)
-        return bytes;
-    if (r == -1)
-        logf (LOG_FATAL|LOG_ERRNO, "write in commit hash file");
-    else
-        logf (LOG_FATAL, "write in commit hash file. "
-                     "%d bytes instead of %d bytes", r, bytes);
-    exit (1);
+    if (!tab)
+        return 0;
+    while (left >= (int) HASH_BSIZE)
+    {
+        mf_write (cf->hash_mf, bno++, 0, 0, tab);
+        tab += HASH_BSIZE;
+        left -= HASH_BSIZE;
+    }
+    if (left > 0)
+        mf_write (cf->hash_mf, bno, 0, left, tab);
     return 0;
 }
 
-static int hash_read (CFile cf, void *buf, size_t bytes)
+static int read_head (CFile cf)
 {
-    int r;
+    int left = cf->head.hash_size * sizeof(zint);
+    int bno = 1;
+    char *tab = (char*) cf->array;
 
-    r = read (cf->hash_fd, buf, bytes);
-    if (r == bytes)
-        return bytes;
-    if (r == -1)
-        logf (LOG_FATAL|LOG_ERRNO, "read in commit hash file");
-    else
-        logf (LOG_FATAL, "read in commit hash file. "
-                     "%d bytes instead of %d bytes", r, bytes);
-    abort ();
+    if (!tab)
+        return 0;
+    while (left >= (int) HASH_BSIZE)
+    {
+        mf_read (cf->hash_mf, bno++, 0, 0, tab);
+        tab += HASH_BSIZE;
+        left -= HASH_BSIZE;
+    }
+    if (left > 0)
+        mf_read (cf->hash_mf, bno, 0, left, tab);
     return 0;
 }
 
-CFile cf_open (MFile mf, const char *cname, const char *fname,
+
+CFile cf_open (MFile mf, MFile_area area, const char *fname,
                int block_size, int wflag, int *firstp)
 {
-    char path[256];
-    int r, i;
-    CFile cf = xmalloc (sizeof(*cf));
+    char path[1024];
+    int i;
+    CFile cf = (CFile) xmalloc (sizeof(*cf));
     int hash_bytes;
    
-    cf->mf = mf; 
-    sprintf (path, "%s.%s.b", cname, fname);
-    if ((cf->block_fd = 
-        open (path, wflag ? O_RDWR|O_CREAT : O_RDONLY, 0666)) < 0)
+    cf->rmf = mf; 
+    yaz_log (YLOG_DEBUG, "cf: open %s %s", cf->rmf->name, wflag ? "rdwr" : "rd");
+    sprintf (path, "%s-b", fname);
+    if (!(cf->block_mf = mf_open (area, path, block_size, wflag)))
     {
-        logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);
+        yaz_log (YLOG_FATAL|YLOG_ERRNO, "Failed to open %s", path);
         exit (1);
     }
-    sprintf (path, "%s.%s.h", cname, fname);
-    if ((cf->hash_fd = 
-        open (path, wflag ? O_RDWR|O_CREAT : O_RDONLY, 0666)) < 0)
+    sprintf (path, "%s-i", fname);
+    if (!(cf->hash_mf = mf_open (area, path, HASH_BSIZE, wflag)))
     {
-        logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);
+        yaz_log (YLOG_FATAL|YLOG_ERRNO, "Failed to open %s", path);
         exit (1);
     }
-    r = read (cf->hash_fd, &cf->head, sizeof(cf->head));
-    if (r != sizeof(cf->head))
+    assert (firstp);
+    if (!mf_read (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head) ||
+        !cf->head.state)
     {
         *firstp = 1;
-        if (r == -1)
-        {
-            logf (LOG_FATAL|LOG_ERRNO, "read head of %s", path);
-            exit (1);
-        }
-        if (r != 0)
-        {
-            logf (LOG_FATAL, "illegal head of %s", path);
-            exit (1);
-        }
+        cf->head.state = 1;
         cf->head.block_size = block_size;
-        cf->head.hash_size = 401;
-        hash_bytes = cf->head.hash_size * sizeof(int);
-        cf->head.next_bucket =
+        cf->head.hash_size = 199;
+        hash_bytes = cf->head.hash_size * sizeof(zint);
+        cf->head.flat_bucket = cf->head.next_bucket = cf->head.first_bucket = 
             (hash_bytes+sizeof(cf->head))/HASH_BSIZE + 2;
         cf->head.next_block = 1;
         if (wflag)
-            hash_write (cf, &cf->head, sizeof(cf->head));
-        cf->array = xmalloc (hash_bytes);
+            mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);
+        cf->array = (zint *) xmalloc (hash_bytes);
         for (i = 0; i<cf->head.hash_size; i++)
             cf->array[i] = 0;
         if (wflag)
-            hash_write (cf, cf->array, hash_bytes);
+            write_head (cf);
     }
     else
     {
         *firstp = 0;
         assert (cf->head.block_size == block_size);
-        assert (cf->head.hash_size > 2 && cf->head.hash_size < 200000);
-        hash_bytes = cf->head.hash_size * sizeof(int);
+        assert (cf->head.hash_size > 2);
+        hash_bytes = cf->head.hash_size * sizeof(zint);
         assert (cf->head.next_bucket > 0);
-        cf->array = xmalloc (hash_bytes);
-        hash_read (cf, cf->array, hash_bytes);
+        assert (cf->head.next_block > 0);
+        if (cf->head.state == 1)
+            cf->array = (zint *) xmalloc (hash_bytes);
+        else
+            cf->array = NULL;
+        read_head (cf);
+    }
+    if (cf->head.state == 1)
+    {
+        cf->parray = (struct CFile_hash_bucket **)
+	    xmalloc (cf->head.hash_size * sizeof(*cf->parray));
+        for (i = 0; i<cf->head.hash_size; i++)
+            cf->parray[i] = NULL;
     }
-    cf->parray = xmalloc (cf->head.hash_size * sizeof(*cf->parray));
-    for (i = 0; i<cf->head.hash_size; i++)
-        cf->parray[i] = NULL;
+    else
+        cf->parray = NULL;
     cf->bucket_lru_front = cf->bucket_lru_back = NULL;
     cf->bucket_in_memory = 0;
-    cf->max_bucket_in_memory = 200;
+    cf->max_bucket_in_memory = 100;
     cf->dirty = 0;
-    cf->iobuf = xmalloc (cf->head.block_size);
+    cf->iobuf = (char *) xmalloc (cf->head.block_size);
+    memset (cf->iobuf, 0, cf->head.block_size);
+    cf->no_hits = 0;
+    cf->no_miss = 0;
+    zebra_mutex_init (&cf->mutex);
     return cf;
 }
 
-static int cf_hash (CFile cf, int no)
+static int cf_hash (CFile cf, zint no)
 {
-    return (no>>3) % cf->head.hash_size;
+    return (int) (((no >> 3) % cf->head.hash_size));
 }
 
 static void release_bucket (CFile cf, struct CFile_hash_bucket *p)
@@ -157,19 +180,14 @@ static void flush_bucket (CFile cf, int no_to_flush)
             break;
         if (p->dirty)
         {
-            if (lseek (cf->hash_fd, p->ph.this_bucket*HASH_BSIZE, SEEK_SET) < 0)
-            {
-                logf (LOG_FATAL|LOG_ERRNO, "lseek in flush_bucket");
-                exit (1);
-            }
-            hash_write (cf, &p->ph, HASH_BSIZE);
+            mf_write (cf->hash_mf, p->ph.this_bucket, 0, 0, &p->ph);
             cf->dirty = 1;
         }
         release_bucket (cf, p);
     }
 }
 
-static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno)
+static struct CFile_hash_bucket *alloc_bucket (CFile cf, zint block_no, int hno)
 {
     struct CFile_hash_bucket *p, **pp;
 
@@ -177,7 +195,7 @@ static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno)
         flush_bucket (cf, 1);
     assert (cf->bucket_in_memory < cf->max_bucket_in_memory);
     ++(cf->bucket_in_memory);
-    p = xmalloc (sizeof(*p));
+    p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));
 
     p->lru_next = NULL;
     p->lru_prev = cf->bucket_lru_front;
@@ -196,60 +214,94 @@ static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno)
     return p;
 }
 
-static struct CFile_hash_bucket *get_bucket (CFile cf, int block_no, int hno)
+static struct CFile_hash_bucket *get_bucket (CFile cf, zint block_no, int hno)
 {
     struct CFile_hash_bucket *p;
 
     p = alloc_bucket (cf, block_no, hno);
-
-    if (lseek (cf->hash_fd, block_no * HASH_BSIZE, SEEK_SET) < 0)
+    if (!mf_read (cf->hash_mf, block_no, 0, 0, &p->ph))
     {
-        logf (LOG_FATAL|LOG_ERRNO, "lseek in get_bucket");
+        yaz_log (YLOG_FATAL|YLOG_ERRNO, "read get_bucket");
         exit (1);
     }
-    hash_read (cf, &p->ph, HASH_BSIZE);
     assert (p->ph.this_bucket == block_no);
     p->dirty = 0;
     return p;
 }
 
-static struct CFile_hash_bucket *new_bucket (CFile cf, int *block_no, int hno)
+static struct CFile_hash_bucket *new_bucket (CFile cf, zint *block_nop, int hno)
 {
     struct CFile_hash_bucket *p;
     int i;
+    zint block_no;
 
-    *block_no = cf->head.next_bucket++;
-    p = alloc_bucket (cf, *block_no, hno);
+    block_no = *block_nop = cf->head.next_bucket++;
+    p = alloc_bucket (cf, block_no, hno);
 
     for (i = 0; i<HASH_BUCKET; i++)
+    {
         p->ph.vno[i] = 0;
+        p->ph.no[i] = 0;
+    }
     p->ph.next_bucket = 0;
-    p->ph.this_bucket = *block_no;
+    p->ph.this_bucket = block_no;
     p->dirty = 1;
     return p;
 }
 
-int cf_lookup (CFile cf, int no)
+static zint cf_lookup_flat (CFile cf, zint no)
+{
+    zint hno = (no*sizeof(zint))/HASH_BSIZE;
+    int off = (int) ((no*sizeof(zint)) - hno*HASH_BSIZE);
+    zint vno = 0;
+
+    mf_read (cf->hash_mf, hno+cf->head.next_bucket, off, sizeof(zint), &vno);
+    return vno;
+}
+
+static zint cf_lookup_hash (CFile cf, zint no)
 {
     int hno = cf_hash (cf, no);
     struct CFile_hash_bucket *hb;
-    int block_no, i;
+    zint block_no;
+    int i;
 
-    logf (LOG_LOG, "cf_lookup pass 1");
     for (hb = cf->parray[hno]; hb; hb = hb->h_next)
     {
-        logf (LOG_LOG, "bucket_no=%d", hb->ph.this_bucket);
         for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
             if (hb->ph.no[i] == no)
+            {
+                (cf->no_hits)++;
                 return hb->ph.vno[i];
+            }
     }
-    logf (LOG_LOG, "cf_lookup pass 2");
     for (block_no = cf->array[hno]; block_no; block_no = hb->ph.next_bucket)
     {
-        logf (LOG_LOG, "bucket_no=%d", block_no);
         for (hb = cf->parray[hno]; hb; hb = hb->h_next)
+        {
             if (hb->ph.this_bucket == block_no)
-                continue;
+                break;
+        }
+        if (hb)
+            continue;
+#if 0
+        /* extra check ... */
+        for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)
+        {
+            if (hb->ph.this_bucket == block_no)
+            {
+                yaz_log (YLOG_FATAL, "Found hash bucket on other chain (1)");
+                abort ();
+            }
+            for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
+                if (hb->ph.no[i] == no)
+                {
+                    yaz_log (YLOG_FATAL, "Found hash bucket on other chain (2)");
+                    abort ();
+                }
+        }
+#endif
+        (cf->no_miss)++;
         hb = get_bucket (cf, block_no, hno);
         for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
             if (hb->ph.no[i] == no)
@@ -258,19 +310,81 @@ int cf_lookup (CFile cf, int no)
     return 0;
 }
 
-int cf_new (CFile cf, int no)
+static void cf_write_flat (CFile cf, zint no, zint vno)
+{
+    zint hno = (no*sizeof(zint))/HASH_BSIZE;
+    int off = (int) ((no*sizeof(zint)) - hno*HASH_BSIZE);
+
+    hno += cf->head.next_bucket;
+    if (hno >= cf->head.flat_bucket)
+        cf->head.flat_bucket = hno+1;
+    cf->dirty = 1;
+    mf_write (cf->hash_mf, hno, off, sizeof(zint), &vno);
+}
+
+static void cf_moveto_flat (CFile cf)
+{
+    struct CFile_hash_bucket *p;
+    int j;
+    zint i;
+
+    yaz_log (YLOG_DEBUG, "cf: Moving to flat shadow: %s", cf->rmf->name);
+    yaz_log (YLOG_DEBUG, "cf: hits=%d miss=%d bucket_in_memory=" ZINT_FORMAT " total="
+	  ZINT_FORMAT,
+	cf->no_hits, cf->no_miss, cf->bucket_in_memory, 
+        cf->head.next_bucket - cf->head.first_bucket);
+    assert (cf->head.state == 1);
+    flush_bucket (cf, -1);
+    assert (cf->bucket_in_memory == 0);
+    p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));
+    for (i = cf->head.first_bucket; i < cf->head.next_bucket; i++)
+    {
+        if (!mf_read (cf->hash_mf, i, 0, 0, &p->ph))
+        {
+            yaz_log (YLOG_FATAL|YLOG_ERRNO, "read bucket moveto flat");
+            exit (1);
+        }
+        for (j = 0; j < HASH_BUCKET && p->ph.vno[j]; j++)
+            cf_write_flat (cf, p->ph.no[j], p->ph.vno[j]);
+    }
+    xfree (p);
+    xfree (cf->array);
+    cf->array = NULL;
+    xfree (cf->parray);
+    cf->parray = NULL;
+    cf->head.state = 2;
+    cf->dirty = 1;
+}
+
+static zint cf_lookup (CFile cf, zint no)
+{
+    if (cf->head.state > 1)
+        return cf_lookup_flat (cf, no);
+    return cf_lookup_hash (cf, no);
+}
+
+static zint cf_new_flat (CFile cf, zint no)
+{
+    zint vno = (cf->head.next_block)++;
+
+    cf_write_flat (cf, no, vno);
+    return vno;
+}
+
+static zint cf_new_hash (CFile cf, zint no)
 {
     int hno = cf_hash (cf, no);
     struct CFile_hash_bucket *hbprev = NULL, *hb = cf->parray[hno];
-    int *bucketpp = &cf->array[hno];
+    zint *bucketpp = &cf->array[hno]; 
     int i;
-    int vno = (cf->head.next_block)++;
-    
+    zint vno = (cf->head.next_block)++;
+  
     for (hb = cf->parray[hno]; hb; hb = hb->h_next)
         if (!hb->ph.vno[HASH_BUCKET-1])
             for (i = 0; i<HASH_BUCKET; i++)
                 if (!hb->ph.vno[i])
                 {
+                    (cf->no_hits)++;
                     hb->ph.no[i] = no;
                     hb->ph.vno[i] = vno;
                     hb->dirty = 1;
@@ -284,8 +398,23 @@ int cf_new (CFile cf, int no)
             {
                 bucketpp = &hb->ph.next_bucket;
                 hbprev = hb;
-                continue;
+                break;
             }
+        if (hb)
+            continue;
+
+#if 0
+        /* extra check ... */
+        for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)
+        {
+            if (hb->ph.this_bucket == *bucketpp)
+            {
+                yaz_log (YLOG_FATAL, "Found hash bucket on other chain");
+                abort ();
+            }
+        }
+#endif
+        (cf->no_miss)++;
         hb = get_bucket (cf, *bucketpp, hno);
         assert (hb);
         for (i = 0; i<HASH_BUCKET; i++)
@@ -307,62 +436,63 @@ int cf_new (CFile cf, int no)
     return vno;
 }
 
-int cf_read (CFile cf, int no, int offset, int num, void *buf)
+zint cf_new (CFile cf, zint no)
+{
+    if (cf->head.state > 1)
+        return cf_new_flat (cf, no);
+    if (cf->no_miss*2 > cf->no_hits)
+    {
+        cf_moveto_flat (cf);
+        assert (cf->head.state > 1);
+        return cf_new_flat (cf, no);
+    }
+    return cf_new_hash (cf, no);
+}
+
+
+int cf_read (CFile cf, zint no, int offset, int nbytes, void *buf)
 {
-    int block, r;
+    zint block;
     
     assert (cf);
-    logf (LOG_LOG, "cf_read no=%d, offset=%d, num=%d", no, offset, num);
+    zebra_mutex_lock (&cf->mutex);
     if (!(block = cf_lookup (cf, no)))
-        return -1;
-    if (lseek (cf->block_fd, cf->head.block_size * block + offset,
-         SEEK_SET) < 0)
     {
-        logf (LOG_FATAL|LOG_ERRNO, "cf_read, lseek no=%d, block=%d",
-              no, block);
-        exit (1);
+	zebra_mutex_unlock (&cf->mutex);
+        return -1;
     }
-    r = read (cf->block_fd, buf, num ? num : cf->head.block_size);
-    if (r != cf->head.block_size)
+    zebra_mutex_unlock (&cf->mutex);
+    if (!mf_read (cf->block_mf, block, offset, nbytes, buf))
     {
-        logf (LOG_FATAL|LOG_ERRNO, "cf_read, read no=%d, block=%d",
-              no, block);
+        yaz_log (YLOG_FATAL|YLOG_ERRNO, "cf_read no=" ZINT_FORMAT " block=" ZINT_FORMAT, no, block);
         exit (1);
     }
     return 1;
 }
 
-int cf_write (CFile cf, int no, int offset, int num, const void *buf)
+int cf_write (CFile cf, zint no, int offset, int nbytes, const void *buf)
 {
-    int block, r;
+    zint block;
 
     assert (cf);
-
-    logf (LOG_LOG, "cf_write no=%d, offset=%d, num=%d", no, offset, num);
+    zebra_mutex_lock (&cf->mutex);
     if (!(block = cf_lookup (cf, no)))
     {
         block = cf_new (cf, no);
-        if (offset || num)
+        if (offset || nbytes)
         {
-            mf_read (cf->mf, no, 0, 0, cf->iobuf);
-            memcpy (cf->iobuf + offset, buf, num);
+            mf_read (cf->rmf, no, 0, 0, cf->iobuf);
+            memcpy (cf->iobuf + offset, buf, nbytes);
             buf = cf->iobuf;
             offset = 0;
-            num = 0;
+            nbytes = 0;
         }
     }
-    if (lseek (cf->block_fd, cf->head.block_size * block + offset,
-               SEEK_SET) < 0)
+    zebra_mutex_unlock (&cf->mutex);
+    if (mf_write (cf->block_mf, block, offset, nbytes, buf))
     {
-        logf (LOG_FATAL|LOG_ERRNO, "cf_write, lseek no=%d, block=%d",
-              no, block);
-        exit (1);
-    }
-    r = write (cf->block_fd, buf, num ? num : cf->head.block_size);
-    if (r != cf->head.block_size)
-    {
-        logf (LOG_FATAL|LOG_ERRNO, "cf_write, read no=%d, block=%d",
-              no, block);
+        yaz_log (YLOG_FATAL|YLOG_ERRNO, "cf_write no=" ZINT_FORMAT
+	      " block=" ZINT_FORMAT, no, block);
         exit (1);
     }
     return 0;
@@ -370,31 +500,22 @@ int cf_write (CFile cf, int no, int offset, int num, const void *buf)
 
 int cf_close (CFile cf)
 {
+    yaz_log (YLOG_DEBUG, "cf: close hits=%d miss=%d bucket_in_memory=" ZINT_FORMAT
+	  " total=" ZINT_FORMAT,
+          cf->no_hits, cf->no_miss, cf->bucket_in_memory,
+          cf->head.next_bucket - cf->head.first_bucket);
     flush_bucket (cf, -1);
     if (cf->dirty)
     {
-        int hash_bytes = cf->head.hash_size * sizeof(int);
-        if (lseek (cf->hash_fd, 0L, SEEK_SET) < 0)
-        {
-            logf (LOG_FATAL|LOG_ERRNO, "seek in hash fd");
-            exit (1);
-        }
-        hash_write (cf, &cf->head, sizeof(cf->head));
-        hash_write (cf, cf->array, hash_bytes);
-    }
-    if (close (cf->hash_fd) < 0)
-    {
-        logf (LOG_FATAL|LOG_ERRNO, "close hash fd");
-        exit (1);
-    }
-    if (close (cf->block_fd) < 0)
-    {
-        logf (LOG_FATAL|LOG_ERRNO, "close block fd");
-        exit (1);
+        mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);
+        write_head (cf);
     }
+    mf_close (cf->hash_mf);
+    mf_close (cf->block_mf);
     xfree (cf->array);
     xfree (cf->parray);
     xfree (cf->iobuf);
+    zebra_mutex_destroy (&cf->mutex);
     xfree (cf);
     return 0;
 }